Coverage for cpyquickhelper/profiling/event_profiler.py: 98%
178 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-30 05:30 +0200
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-30 05:30 +0200
1"""
2@file
3@brief Profiling class.
4"""
5import sys
6import inspect
7import numpy
8import pandas
9from ._event_profiler import CEventProfiler # pylint: disable=E0611
10from ._event_profiler_c import ( # pylint: disable=E0611
11 _profiling_start, _profiling_stop,
12 _profiling_n_columns, _profiling_log_event,
13 _profiling_delete, _profiling_get_saved_maps,
14 _profiling_dump_and_clear,
15 _profiling_register_pyinstance)
18class EventProfiler:
19 """
20 This profiler profiles both memory and function calls.
21 It stores events and produces a timeline.
22 See :ref:`l-example-event-profling` for an example
23 of use.
25 :param size: size of the buffer to store events
26 :param impl: different implementation of the same
27 function (`'python'`, `'pybind11'`, `'c'`)
29 The profiler stores every event about function calls and returns,
30 and memory allocation. It does not give the time spent in every function
31 but a timeline with the series of events which occured.
32 There are two pieces:
34 * A C++ class which logs every event into a bug buffer,
35 every event is described by five int64 (id_frame, id_arg, event,
36 timestamp, value1, value2).
37 * A python class which wraps the first one.
39 Function calls are caught by using :func:`sys.setprofile`.
40 Memory calls are caught by using :epkg:`PyMem_SetAllocator`.
41 Once the profiled code has ended, results are returned by
42 @see meth retrieve_results. This function extends the information
43 stored in the C++ class with file names, function names, line numbers,
44 memory deallocated.
46 .. note::
47 The program crashes if the profiled code raises an exception.
48 The default memory allocator is not restored and any allocation
49 is logged into a buffer which was deleted. The exception must be
50 caught or class @see cl WithEventProfiler must be used.
52 About parameter *impl*, the question is related to the implementation
53 of method :meth:`log_event
54 <cpyquickhelper.profiling.event_profiler.EventProfiler.log_event>`.
55 A call to the python implementation takes 2-3 microseconds, a call
56 to the :epkg:`pybind11` implementation takes 1-2 microseconds.
57 """
59 _event_mapping = {
60 'call': 1,
61 'return': 2,
62 'c_call': 3,
63 'c_return': 4,
64 'exception': 5,
65 'c_exception': 6,
66 'profiler_call': 10,
67 'profiler_return': 11,
68 'profiler_start': 100,
69 'profiler_sort': 101,
70 'malloc': 1000,
71 'calloc': 1001,
72 'realloc': 1002,
73 'free': 1003,
74 'realloc_free': 1004,
75 }
77 def __init__(self, size=1000000, impl='python'):
78 self._started = False
79 self._prof = None
80 self._frames = {-1: inspect.currentframe()}
81 self._args = {-1: None, 0: None, id(None): None}
82 if impl != 'c':
83 self._buffer = CEventProfiler(size)
84 else:
85 self._buffer = None
86 self._size = size
87 self._events = []
88 self._impl = impl
89 self._copy_cache = False
90 if self._buffer is None:
91 nc = _profiling_n_columns()
92 else:
93 nc = self.n_columns
94 self._cache_copy = numpy.empty((size, nc), dtype=numpy.int64)
95 if impl == 'pybind11':
96 self._buffer.register_pyinstance(self)
98 @property
99 def n_columns(self):
100 """
101 Returns the number of stored informations by the profiler
102 in memory. This corresponds to the number of columns returned
103 by @see meth retrieve_raw_results.
104 """
105 if self._buffer is not None:
106 return CEventProfiler.n_columns()
107 return _profiling_n_columns()
109 def start(self):
110 """
111 Starts the profiling.
112 """
113 if self._started:
114 raise RuntimeError(
115 "The profiler was already started. It cannot be done again.")
116 self._frames[0] = inspect.currentframe()
117 self._started = True
118 self._copy_cache = False
119 if self._buffer is not None:
120 self._setup_profiler()
121 self._buffer.log_event(-1, -1, 100, 0, 0)
122 self._buffer.start()
123 else:
124 _profiling_start(int(self._size), 0)
125 self._setup_profiler()
126 _profiling_register_pyinstance(self)
128 def stop(self):
129 """
130 Stops the profiling.
131 """
132 if not self._started:
133 raise RuntimeError(
134 "The profiler was not started. It must be done first.")
135 self._restore_profiler()
136 if self._buffer is not None:
137 self._buffer.stop()
138 self._buffer.log_event(-1, -1, 101, 0, 0)
139 if self._impl == 'pybind11':
140 map_frame, map_arg = self._buffer.get_saved_maps()
141 self._frames.update(map_frame)
142 self._args.update(map_arg)
143 self._buffer.delete()
144 else:
145 map_frame, map_arg = _profiling_get_saved_maps()
146 self._empty_cache()
147 _profiling_delete()
148 _profiling_stop()
149 self._frames.update(map_frame)
150 self._args.update(map_arg)
151 self._started = False
153 def _setup_profiler(self):
154 """
155 This relies on :func:`sys.setprofile` and :func:`sys.getprofile`.
156 """
157 self._prof = sys.getprofile()
158 if self._impl == 'python':
159 sys.setprofile(self.log_event)
160 elif self._impl == 'pybind11':
161 sys.setprofile(self._buffer.c_log_event)
162 elif self._impl == 'c':
163 sys.setprofile(_profiling_log_event)
164 else:
165 raise ValueError( # pragma: no cover
166 f"Unexpected value for impl={self._impl!r}.")
168 def _restore_profiler(self):
169 """
170 This relies on :func:`sys.setprofile` and :func:`sys.getprofile`.
171 """
172 sys.setprofile(self._prof)
173 self._prof = None
175 def log_event(self, frame, event, arg):
176 """
177 Logs an event in the database.
179 :param frame: (frame), see :mod:`inspect`
180 :param event: (str)
181 kind of event
182 :param arg: None or...
183 """
184 idf = id(frame)
185 if idf not in self._frames:
186 self._frames[idf] = frame
187 if type(arg) == type(open): # pylint: disable=C0123
188 ida = id(arg)
189 else:
190 ida = id(None)
191 if ida not in self._args:
192 self._args[ida] = arg
193 r = self._buffer.log_event(
194 idf, ida, EventProfiler._event_mapping[event], 0, 0)
195 if not r:
196 self._empty_cache()
198 def _empty_cache(self):
199 """
200 Empties the cache. This function logs a couple of
201 events. The cache must contains enough place to
202 log them.
203 """
204 if self._copy_cache:
205 raise RuntimeError( # pragma: no cover
206 "Profiling cache being copied. "
207 "Increase the size of the cache.")
208 self._copy_cache = True
209 if self._buffer is None:
210 ptr = self._cache_copy.__array_interface__[ # pylint: disable=E1101
211 'data'][0]
212 # self._cache_copy.__array_struct__
213 size = _profiling_dump_and_clear(
214 ptr, self._cache_copy.size, 1)
215 else:
216 size = self._buffer.dump_and_clear(self._cache_copy, True)
217 # We hope here this function will not be called by another
218 # thread which could be able to fill
219 # the cache while it is being copied.
220 self._events.append(self._cache_copy[:size].copy())
221 self._copy_cache = False
222 return size
224 def retrieve_raw_results(self, empty_cache=True):
225 """
226 Retrieves the raw results. Difficult to interpret.
228 :param empty_cache: retrives the data from the C++ container and
229 stores it in a numpy array before
230 :return: numpy array
231 """
232 if empty_cache and self._buffer is not None:
233 self._empty_cache()
234 res = numpy.vstack(self._events)
235 # Filling information about the size of freed buffer.
236 # id_frame, id_arg, event, time, value1, value2
237 memo = {}
238 for i in range(res.shape[0]):
239 event = res[i, 2]
240 if event < 1000:
241 continue
242 ptr = res[i, 4]
243 if event <= 1002:
244 memo[ptr] = res[i, 5]
245 else:
246 if ptr not in memo:
247 # The system is freeing a non-existing pointer.
248 res[i, 5] = 0
249 else:
250 res[i, 5] = -memo[ptr]
251 del memo[ptr]
252 return res
254 def _choose(self, frame, arg, f_back=False):
255 """
256 Returns a string which represents the called function.
257 When the function is built-in, frame contains the function
258 calling the built-in, *arg* is then a pointer on this
259 function. This method returns its name.
260 """
261 if arg is None:
262 if f_back:
263 name = ('' if frame.f_back is None else
264 frame.f_back.f_code.co_name)
265 else:
266 name = frame.f_code.co_name
267 return name
268 try:
269 return arg.__qualname__
270 except AttributeError:
271 return arg.__class__.__name__
273 def _choose_mod(self, frame, arg, clean_name, f_back=False):
274 """
275 Returns a string which represents the module of the called function.
276 When the function is built-in, frame contains the function
277 calling the built-in, *arg* is then a pointer on this
278 function. This method returns its module.
279 """
280 if arg is None:
281 if f_back:
282 name = ('' if frame.f_back is None else
283 frame.f_back.f_code.co_filename)
284 else:
285 name = frame.f_code.co_filename
286 return clean_name(name)
287 try:
288 return arg.__module__
289 except AttributeError:
290 return arg.__class__.__module__
292 def retrieve_results(self, empty_cache=True, clean_file_name=None):
293 """
294 Retrieves the raw results. Difficult to interpret.
296 :param empty_cache: retrives the data from the C++ container and
297 stores it in a numpy array before
298 :param clean_file_name: function which cleans the file name
299 :return: numpy array
300 """
301 def _identity_(x):
302 return x
304 if clean_file_name is None:
305 clean_file_name = _identity_
306 rev_event_mapping = {
307 v: k for k, v in EventProfiler._event_mapping.items()}
308 res = self.retrieve_raw_results(empty_cache=empty_cache)
309 df = pandas.DataFrame(
310 res[:, 3:], columns=['time', 'value1', 'value2'])
311 df['event'] = list(map(lambda v: rev_event_mapping[v], res[:, 2]))
312 frames = list(map(lambda v: self._frames[v], res[:, 0]))
313 args = list(map(lambda v: self._args[v], res[:, 1]))
315 df['name'] = list(self._choose(frame, arg)
316 for frame, arg in zip(frames, args))
317 df['mod'] = list(self._choose_mod(frame, arg, clean_file_name)
318 for frame, arg in zip(frames, args))
319 df['lineno'] = list(frame.f_code.co_firstlineno for frame in frames)
320 df['from_name'] = list(
321 self._choose(frame, arg, True) for frame, arg in zip(frames, args))
322 df['from_mod'] = list(
323 self._choose_mod(frame, None, clean_file_name, f_back=True)
324 for frame in frames)
325 df['from_line'] = list(
326 (-1 if frame.f_back is None else frame.f_back.f_lineno)
327 for frame in frames)
328 return df
331class WithEventProfiler:
332 """
333 Enables profiling with the following syntax:
335 ::
337 prof = WithEventProfiler()
338 with prof:
339 # code to profile
340 # may raise an exception
342 The class restores the memory allocator and stops logging any event
343 even if an exception was raised.
345 :param size: size of the buffer to store events
346 :param impl: different implementation of the same
347 function (`'python'`, `'pybind11'`)
348 :param clean_file_name: function uses to clean or shorten the file name
349 saved in the report.
350 """
352 def __init__(self, size=1000000, impl='python', clean_file_name=None):
353 self.size = size
354 self.clean_file_name = clean_file_name
355 self.report_ = None
356 self.prof_ = None
357 self.impl = impl
359 def __enter__(self):
360 self.prof_ = EventProfiler(size=self.size, impl=self.impl)
361 self.prof_.start()
363 def __exit__(self, typ, value, traceback):
364 self.prof_.stop()
365 self.report_ = self.prof_.retrieve_results(
366 clean_file_name=self.clean_file_name)
368 @property
369 def report(self):
370 """Returns the profiling report as a dataframe."""
371 return self.report_
374class EventProfilerDebug(EventProfiler):
375 """
376 One class to measure time wasted by profiling.
377 """
379 def start(self):
380 """
381 Starts the profiling without enabling it.
382 """
383 if self._started:
384 raise RuntimeError( # pragma: no cover
385 "The profiler was already started. It cannot be done again.")
386 self._frames[0] = inspect.currentframe()
387 self._started = True
388 self._copy_cache = False
389 self._buffer.log_event(-1, -1, 100, 0, 0)
391 def stop(self):
392 """
393 Stops the unstarted profiling.
394 """
395 if not self._started:
396 raise RuntimeError( # pragma: no cover
397 "The profiler was not started. It must be done first.")
398 self._buffer.log_event(-1, -1, 101, 0, 0)
399 self._started = False