Coverage for cpyquickhelper/profiling/event_profiler.py: 98%

178 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-30 05:30 +0200

1""" 

2@file 

3@brief Profiling class. 

4""" 

5import sys 

6import inspect 

7import numpy 

8import pandas 

9from ._event_profiler import CEventProfiler # pylint: disable=E0611 

10from ._event_profiler_c import ( # pylint: disable=E0611 

11 _profiling_start, _profiling_stop, 

12 _profiling_n_columns, _profiling_log_event, 

13 _profiling_delete, _profiling_get_saved_maps, 

14 _profiling_dump_and_clear, 

15 _profiling_register_pyinstance) 

16 

17 

18class EventProfiler: 

19 """ 

20 This profiler profiles both memory and function calls. 

21 It stores events and produces a timeline. 

22 See :ref:`l-example-event-profling` for an example 

23 of use. 

24 

25 :param size: size of the buffer to store events 

26 :param impl: different implementation of the same 

27 function (`'python'`, `'pybind11'`, `'c'`) 

28 

29 The profiler stores every event about function calls and returns, 

30 and memory allocation. It does not give the time spent in every function 

31 but a timeline with the series of events which occured. 

32 There are two pieces: 

33 

34 * A C++ class which logs every event into a bug buffer, 

35 every event is described by five int64 (id_frame, id_arg, event, 

36 timestamp, value1, value2). 

37 * A python class which wraps the first one. 

38 

39 Function calls are caught by using :func:`sys.setprofile`. 

40 Memory calls are caught by using :epkg:`PyMem_SetAllocator`. 

41 Once the profiled code has ended, results are returned by 

42 @see meth retrieve_results. This function extends the information 

43 stored in the C++ class with file names, function names, line numbers, 

44 memory deallocated. 

45 

46 .. note:: 

47 The program crashes if the profiled code raises an exception. 

48 The default memory allocator is not restored and any allocation 

49 is logged into a buffer which was deleted. The exception must be 

50 caught or class @see cl WithEventProfiler must be used. 

51 

52 About parameter *impl*, the question is related to the implementation 

53 of method :meth:`log_event 

54 <cpyquickhelper.profiling.event_profiler.EventProfiler.log_event>`. 

55 A call to the python implementation takes 2-3 microseconds, a call 

56 to the :epkg:`pybind11` implementation takes 1-2 microseconds. 

57 """ 

58 

59 _event_mapping = { 

60 'call': 1, 

61 'return': 2, 

62 'c_call': 3, 

63 'c_return': 4, 

64 'exception': 5, 

65 'c_exception': 6, 

66 'profiler_call': 10, 

67 'profiler_return': 11, 

68 'profiler_start': 100, 

69 'profiler_sort': 101, 

70 'malloc': 1000, 

71 'calloc': 1001, 

72 'realloc': 1002, 

73 'free': 1003, 

74 'realloc_free': 1004, 

75 } 

76 

77 def __init__(self, size=1000000, impl='python'): 

78 self._started = False 

79 self._prof = None 

80 self._frames = {-1: inspect.currentframe()} 

81 self._args = {-1: None, 0: None, id(None): None} 

82 if impl != 'c': 

83 self._buffer = CEventProfiler(size) 

84 else: 

85 self._buffer = None 

86 self._size = size 

87 self._events = [] 

88 self._impl = impl 

89 self._copy_cache = False 

90 if self._buffer is None: 

91 nc = _profiling_n_columns() 

92 else: 

93 nc = self.n_columns 

94 self._cache_copy = numpy.empty((size, nc), dtype=numpy.int64) 

95 if impl == 'pybind11': 

96 self._buffer.register_pyinstance(self) 

97 

98 @property 

99 def n_columns(self): 

100 """ 

101 Returns the number of stored informations by the profiler 

102 in memory. This corresponds to the number of columns returned 

103 by @see meth retrieve_raw_results. 

104 """ 

105 if self._buffer is not None: 

106 return CEventProfiler.n_columns() 

107 return _profiling_n_columns() 

108 

109 def start(self): 

110 """ 

111 Starts the profiling. 

112 """ 

113 if self._started: 

114 raise RuntimeError( 

115 "The profiler was already started. It cannot be done again.") 

116 self._frames[0] = inspect.currentframe() 

117 self._started = True 

118 self._copy_cache = False 

119 if self._buffer is not None: 

120 self._setup_profiler() 

121 self._buffer.log_event(-1, -1, 100, 0, 0) 

122 self._buffer.start() 

123 else: 

124 _profiling_start(int(self._size), 0) 

125 self._setup_profiler() 

126 _profiling_register_pyinstance(self) 

127 

128 def stop(self): 

129 """ 

130 Stops the profiling. 

131 """ 

132 if not self._started: 

133 raise RuntimeError( 

134 "The profiler was not started. It must be done first.") 

135 self._restore_profiler() 

136 if self._buffer is not None: 

137 self._buffer.stop() 

138 self._buffer.log_event(-1, -1, 101, 0, 0) 

139 if self._impl == 'pybind11': 

140 map_frame, map_arg = self._buffer.get_saved_maps() 

141 self._frames.update(map_frame) 

142 self._args.update(map_arg) 

143 self._buffer.delete() 

144 else: 

145 map_frame, map_arg = _profiling_get_saved_maps() 

146 self._empty_cache() 

147 _profiling_delete() 

148 _profiling_stop() 

149 self._frames.update(map_frame) 

150 self._args.update(map_arg) 

151 self._started = False 

152 

153 def _setup_profiler(self): 

154 """ 

155 This relies on :func:`sys.setprofile` and :func:`sys.getprofile`. 

156 """ 

157 self._prof = sys.getprofile() 

158 if self._impl == 'python': 

159 sys.setprofile(self.log_event) 

160 elif self._impl == 'pybind11': 

161 sys.setprofile(self._buffer.c_log_event) 

162 elif self._impl == 'c': 

163 sys.setprofile(_profiling_log_event) 

164 else: 

165 raise ValueError( # pragma: no cover 

166 f"Unexpected value for impl={self._impl!r}.") 

167 

168 def _restore_profiler(self): 

169 """ 

170 This relies on :func:`sys.setprofile` and :func:`sys.getprofile`. 

171 """ 

172 sys.setprofile(self._prof) 

173 self._prof = None 

174 

175 def log_event(self, frame, event, arg): 

176 """ 

177 Logs an event in the database. 

178 

179 :param frame: (frame), see :mod:`inspect` 

180 :param event: (str) 

181 kind of event 

182 :param arg: None or... 

183 """ 

184 idf = id(frame) 

185 if idf not in self._frames: 

186 self._frames[idf] = frame 

187 if type(arg) == type(open): # pylint: disable=C0123 

188 ida = id(arg) 

189 else: 

190 ida = id(None) 

191 if ida not in self._args: 

192 self._args[ida] = arg 

193 r = self._buffer.log_event( 

194 idf, ida, EventProfiler._event_mapping[event], 0, 0) 

195 if not r: 

196 self._empty_cache() 

197 

198 def _empty_cache(self): 

199 """ 

200 Empties the cache. This function logs a couple of 

201 events. The cache must contains enough place to 

202 log them. 

203 """ 

204 if self._copy_cache: 

205 raise RuntimeError( # pragma: no cover 

206 "Profiling cache being copied. " 

207 "Increase the size of the cache.") 

208 self._copy_cache = True 

209 if self._buffer is None: 

210 ptr = self._cache_copy.__array_interface__[ # pylint: disable=E1101 

211 'data'][0] 

212 # self._cache_copy.__array_struct__ 

213 size = _profiling_dump_and_clear( 

214 ptr, self._cache_copy.size, 1) 

215 else: 

216 size = self._buffer.dump_and_clear(self._cache_copy, True) 

217 # We hope here this function will not be called by another 

218 # thread which could be able to fill 

219 # the cache while it is being copied. 

220 self._events.append(self._cache_copy[:size].copy()) 

221 self._copy_cache = False 

222 return size 

223 

224 def retrieve_raw_results(self, empty_cache=True): 

225 """ 

226 Retrieves the raw results. Difficult to interpret. 

227 

228 :param empty_cache: retrives the data from the C++ container and 

229 stores it in a numpy array before 

230 :return: numpy array 

231 """ 

232 if empty_cache and self._buffer is not None: 

233 self._empty_cache() 

234 res = numpy.vstack(self._events) 

235 # Filling information about the size of freed buffer. 

236 # id_frame, id_arg, event, time, value1, value2 

237 memo = {} 

238 for i in range(res.shape[0]): 

239 event = res[i, 2] 

240 if event < 1000: 

241 continue 

242 ptr = res[i, 4] 

243 if event <= 1002: 

244 memo[ptr] = res[i, 5] 

245 else: 

246 if ptr not in memo: 

247 # The system is freeing a non-existing pointer. 

248 res[i, 5] = 0 

249 else: 

250 res[i, 5] = -memo[ptr] 

251 del memo[ptr] 

252 return res 

253 

254 def _choose(self, frame, arg, f_back=False): 

255 """ 

256 Returns a string which represents the called function. 

257 When the function is built-in, frame contains the function 

258 calling the built-in, *arg* is then a pointer on this 

259 function. This method returns its name. 

260 """ 

261 if arg is None: 

262 if f_back: 

263 name = ('' if frame.f_back is None else 

264 frame.f_back.f_code.co_name) 

265 else: 

266 name = frame.f_code.co_name 

267 return name 

268 try: 

269 return arg.__qualname__ 

270 except AttributeError: 

271 return arg.__class__.__name__ 

272 

273 def _choose_mod(self, frame, arg, clean_name, f_back=False): 

274 """ 

275 Returns a string which represents the module of the called function. 

276 When the function is built-in, frame contains the function 

277 calling the built-in, *arg* is then a pointer on this 

278 function. This method returns its module. 

279 """ 

280 if arg is None: 

281 if f_back: 

282 name = ('' if frame.f_back is None else 

283 frame.f_back.f_code.co_filename) 

284 else: 

285 name = frame.f_code.co_filename 

286 return clean_name(name) 

287 try: 

288 return arg.__module__ 

289 except AttributeError: 

290 return arg.__class__.__module__ 

291 

292 def retrieve_results(self, empty_cache=True, clean_file_name=None): 

293 """ 

294 Retrieves the raw results. Difficult to interpret. 

295 

296 :param empty_cache: retrives the data from the C++ container and 

297 stores it in a numpy array before 

298 :param clean_file_name: function which cleans the file name 

299 :return: numpy array 

300 """ 

301 def _identity_(x): 

302 return x 

303 

304 if clean_file_name is None: 

305 clean_file_name = _identity_ 

306 rev_event_mapping = { 

307 v: k for k, v in EventProfiler._event_mapping.items()} 

308 res = self.retrieve_raw_results(empty_cache=empty_cache) 

309 df = pandas.DataFrame( 

310 res[:, 3:], columns=['time', 'value1', 'value2']) 

311 df['event'] = list(map(lambda v: rev_event_mapping[v], res[:, 2])) 

312 frames = list(map(lambda v: self._frames[v], res[:, 0])) 

313 args = list(map(lambda v: self._args[v], res[:, 1])) 

314 

315 df['name'] = list(self._choose(frame, arg) 

316 for frame, arg in zip(frames, args)) 

317 df['mod'] = list(self._choose_mod(frame, arg, clean_file_name) 

318 for frame, arg in zip(frames, args)) 

319 df['lineno'] = list(frame.f_code.co_firstlineno for frame in frames) 

320 df['from_name'] = list( 

321 self._choose(frame, arg, True) for frame, arg in zip(frames, args)) 

322 df['from_mod'] = list( 

323 self._choose_mod(frame, None, clean_file_name, f_back=True) 

324 for frame in frames) 

325 df['from_line'] = list( 

326 (-1 if frame.f_back is None else frame.f_back.f_lineno) 

327 for frame in frames) 

328 return df 

329 

330 

331class WithEventProfiler: 

332 """ 

333 Enables profiling with the following syntax: 

334 

335 :: 

336 

337 prof = WithEventProfiler() 

338 with prof: 

339 # code to profile 

340 # may raise an exception 

341 

342 The class restores the memory allocator and stops logging any event 

343 even if an exception was raised. 

344 

345 :param size: size of the buffer to store events 

346 :param impl: different implementation of the same 

347 function (`'python'`, `'pybind11'`) 

348 :param clean_file_name: function uses to clean or shorten the file name 

349 saved in the report. 

350 """ 

351 

352 def __init__(self, size=1000000, impl='python', clean_file_name=None): 

353 self.size = size 

354 self.clean_file_name = clean_file_name 

355 self.report_ = None 

356 self.prof_ = None 

357 self.impl = impl 

358 

359 def __enter__(self): 

360 self.prof_ = EventProfiler(size=self.size, impl=self.impl) 

361 self.prof_.start() 

362 

363 def __exit__(self, typ, value, traceback): 

364 self.prof_.stop() 

365 self.report_ = self.prof_.retrieve_results( 

366 clean_file_name=self.clean_file_name) 

367 

368 @property 

369 def report(self): 

370 """Returns the profiling report as a dataframe.""" 

371 return self.report_ 

372 

373 

374class EventProfilerDebug(EventProfiler): 

375 """ 

376 One class to measure time wasted by profiling. 

377 """ 

378 

379 def start(self): 

380 """ 

381 Starts the profiling without enabling it. 

382 """ 

383 if self._started: 

384 raise RuntimeError( # pragma: no cover 

385 "The profiler was already started. It cannot be done again.") 

386 self._frames[0] = inspect.currentframe() 

387 self._started = True 

388 self._copy_cache = False 

389 self._buffer.log_event(-1, -1, 100, 0, 0) 

390 

391 def stop(self): 

392 """ 

393 Stops the unstarted profiling. 

394 """ 

395 if not self._started: 

396 raise RuntimeError( # pragma: no cover 

397 "The profiler was not started. It must be done first.") 

398 self._buffer.log_event(-1, -1, 101, 0, 0) 

399 self._started = False