StarPU Handbook
sc_hypervisor_monitoring.h
Go to the documentation of this file.
1 /* StarPU --- Runtime system for heterogeneous multicore architectures.
2  *
3  * Copyright (C) 2011,2013-2014 Inria
4  * Copyright (C) 2013,2015,2017 CNRS
5  *
6  * StarPU is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU Lesser General Public License as published by
8  * the Free Software Foundation; either version 2.1 of the License, or (at
9  * your option) any later version.
10  *
11  * StarPU is distributed in the hope that it will be useful, but
12  * WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
14  *
15  * See the GNU Lesser General Public License in COPYING.LGPL for more details.
16  */
17 
18 #ifndef SC_HYPERVISOR_MONITORING_H
19 #define SC_HYPERVISOR_MONITORING_H
20 
21 #include <sc_hypervisor.h>
22 
23 #ifdef __cplusplus
24 extern "C"
25 {
26 #endif
27 
28 /* structure to indicate when the moving of workers was actually done
29  (moved workers can be seen in the new ctx ) */
31 {
32  /* receiver context */
34  /* list of workers required to be moved */
36  /* number of workers required to be moved */
38  /* list of workers that actually got in the receiver ctx */
40 };
41 
42 /* wrapper attached to a sched_ctx storing monitoring information */
44 {
45  /* the sched_ctx it monitors */
46  unsigned sched_ctx;
47 
48  /* user configuration meant to limit resizing */
50 
51 
52  /* the start time of the resizing sample of the workers of this context*/
53  double start_time_w[STARPU_NMAXWORKERS];
54 
55  /* idle time of workers in this context */
56  double current_idle_time[STARPU_NMAXWORKERS];
57 
58  /* idle time from the last resize */
59  double idle_time[STARPU_NMAXWORKERS];
60 
61  /* time when the idle started */
62  double idle_start_time[STARPU_NMAXWORKERS];
63 
64  /* time during which the worker executed tasks */
65  double exec_time[STARPU_NMAXWORKERS];
66 
67  /* time when the worker started executing a task */
68  double exec_start_time[STARPU_NMAXWORKERS];
69 
70  /* list of workers that will leave this contexts (lazy resizing process) */
71  int worker_to_be_removed[STARPU_NMAXWORKERS];
72 
73  /* number of tasks pushed on each worker in this ctx */
74  int pushed_tasks[STARPU_NMAXWORKERS];
75 
76  /* number of tasks poped from each worker in this ctx */
77  int poped_tasks[STARPU_NMAXWORKERS];
78 
79  /* number of flops the context has to execute */
80  double total_flops;
81 
82  /* number of flops executed since the beginning until now */
83  double total_elapsed_flops[STARPU_NMAXWORKERS];
84 
85  /* number of flops executed since last resizing */
86  double elapsed_flops[STARPU_NMAXWORKERS];
87 
88  /* data quantity executed on each worker in this ctx */
89  size_t elapsed_data[STARPU_NMAXWORKERS];
90 
91  /* nr of tasks executed on each worker in this ctx */
92  int elapsed_tasks[STARPU_NMAXWORKERS];
93 
94  /* the average speed of the type of workers when they belonged to this context */
95  /* 0 - cuda 1 - cpu */
96  double ref_speed[2];
97 
98  /* number of flops submitted to this ctx */
100 
101  /* number of flops that still have to be executed in this ctx */
103 
104  /* the start time of the resizing sample of this context*/
105  double start_time;
106 
107  /* the first time a task was pushed to this context*/
109 
110  /* the start time for sample in which the hyp is not allowed to react
111  bc too expensive */
112  double hyp_react_start_time;
113 
114  /* the workers don't leave the current ctx until the receiver ctx
115  doesn't ack the receive of these workers */
116  struct sc_hypervisor_resize_ack resize_ack;
117 
118  /* mutex to protect the ack of workers */
119  starpu_pthread_mutex_t mutex;
120 
121  /* boolean indicating if the resizing strategy can see the
122  flops of all the execution or not */
124 
125  /* boolean indicating that a context is being sized */
126  unsigned to_be_sized;
127 
128  /* boolean indicating if we add the idle of this worker to
129  the idle of the context */
130  unsigned compute_idle[STARPU_NMAXWORKERS];
131 
132  /* boolean indicating if we add the entiere idle of this
133  worker to the idle of the context or just half*/
134  unsigned compute_partial_idle[STARPU_NMAXWORKERS];
135 
136  /* consider the max in the lp */
137  unsigned consider_max;
138 
139 
140 };
141 
142 /* return the wrapper of context that saves its monitoring information */
144 
145 /* get the list of registered contexts */
146 unsigned *sc_hypervisor_get_sched_ctxs();
147 
148 /* get the number of registered contexts */
150 
151 /* get the number of workers of a certain architecture in a context */
152 int sc_hypervisor_get_nworkers_ctx(unsigned sched_ctx, enum starpu_worker_archtype arch);
153 
154 /* get the number of flops executed by a context since last resizing (reset to 0 when a resizing is done)*/
156 
157 /* get the number of flops executed by a context since the begining */
158 double sc_hypervisor_get_total_elapsed_flops_per_sched_ctx(struct sc_hypervisor_wrapper* sc_w);
159 
160 /* compute an average value of the cpu/cuda speed */
161 double sc_hypervisorsc_hypervisor_get_speed_per_worker_type(struct sc_hypervisor_wrapper* sc_w, enum starpu_worker_archtype arch);
162 
163 /* compte the actual speed of all workers of a specific type of worker */
164 double sc_hypervisor_get_speed(struct sc_hypervisor_wrapper *sc_w, enum starpu_worker_archtype arch);
165 
166 #ifdef __cplusplus
167 }
168 #endif
169 
170 #endif
unsigned * sc_hypervisor_get_sched_ctxs()
Definition: sc_hypervisor_monitoring.h:43
int nmoved_workers
Definition: sc_hypervisor_monitoring.h:37
unsigned sched_ctx
Definition: sc_hypervisor_monitoring.h:46
struct sc_hypervisor_policy_config * config
Definition: sc_hypervisor_monitoring.h:49
Definition: sc_hypervisor_monitoring.h:30
double start_time
Definition: sc_hypervisor_monitoring.h:105
starpu_worker_archtype
Definition: starpu_worker.h:31
struct sc_hypervisor_wrapper * sc_hypervisor_get_wrapper(unsigned sched_ctx)
starpu_pthread_mutex_t mutex
Definition: sc_hypervisor_monitoring.h:119
int sc_hypervisor_get_nsched_ctxs()
double real_start_time
Definition: sc_hypervisor_monitoring.h:108
#define STARPU_NMAXWORKERS
Definition: starpu_config.h:102
double submitted_flops
Definition: sc_hypervisor_monitoring.h:99
double remaining_flops
Definition: sc_hypervisor_monitoring.h:102
double sc_hypervisor_get_elapsed_flops_per_sched_ctx(struct sc_hypervisor_wrapper *sc_w)
int receiver_sched_ctx
Definition: sc_hypervisor_monitoring.h:33
unsigned total_flops_available
Definition: sc_hypervisor_monitoring.h:123
int * moved_workers
Definition: sc_hypervisor_monitoring.h:35
int * acked_workers
Definition: sc_hypervisor_monitoring.h:39
Definition: sc_hypervisor_config.h:48
double total_flops
Definition: sc_hypervisor_monitoring.h:80