Coverage for onnxcustom/utils/nvprof2json.py: 99%
455 statements
« prev ^ index » next coverage.py v7.0.5, created at 2023-01-17 01:42 +0100
« prev ^ index » next coverage.py v7.0.5, created at 2023-01-17 01:42 +0100
1"""
2@file
3@brief Converts traces from :epkg:`nvprof`.
4The source comes from `nvprof2json <https://github.com/ezyang/nvprof2json>`_.
5"""
7import sqlite3
8import enum
9import json
10import copy
11import io
12import os
13import zipfile
14import cxxfilt
15import pandas
18def convert_trace_to_json(filename, output=None, temporary_file=None,
19 verbose=0, fLOG=None):
20 """
21 Converts traces produced by :epkg:`nvprof` and saved with
22 format *sqlite3* (extension `.sql`). The output format
23 follows `Trace Event Format
24 <https://docs.google.com/document/d/
25 1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview>`_.
27 :param filename: filename
28 :param output: output file or None
29 :param temporary_file: if the file needs to be unzipped,
30 this file will be created to be the unzipped file,
31 it is not cleaned after the unzipping.
32 :param verbose: verbosity
33 :param fLOG: logging function
34 :return: json (if output is None, the list of events otherwise)
36 This file, if not too big, can be viewed with `chrome-tracing`.
37 The traces are usually generated by using a command line similar to:
39 ::
41 nvprof -o gpu_profile.sql python plot_gpu_training.py
42 """
43 ext = os.path.splitext(filename)[-1]
44 if ext == ".zip":
45 if temporary_file in (None, ''):
46 temporary_file = filename + ".unzipped"
47 if os.path.exists(temporary_file):
48 if verbose > 0 and fLOG is not None:
49 fLOG( # pragma: no cover
50 "[convert_trace_to_json] %r already unzipped into %r"
51 "." % (filename, temporary_file))
52 else:
53 if verbose > 0 and fLOG is not None:
54 fLOG( # pragma: no cover
55 f"[convert_trace_to_json] unzipping to file {temporary_file!r}.")
56 zipf = zipfile.ZipFile(filename)
57 names = zipf.namelist()
58 if len(names) != 1:
59 raise RuntimeError( # pragma: no cover
60 f"More than one file is stored in zip file {filename!r}.")
61 stream = zipf.open(names[0], "r")
62 with open(temporary_file, "wb") as f:
63 while True:
64 data = stream.read(65536)
65 if len(data) == 0:
66 break
67 f.write(data)
68 zipf.close()
69 filename = temporary_file
71 conn = sqlite3.connect(filename)
72 conn.row_factory = sqlite3.Row
74 strings = {}
75 for r in conn.execute("SELECT _id_ as id, value FROM StringTable"):
76 strings[r["id"]] = _demangle(r["value"])
78 traceEvents = []
80 # """
81 # _id_: 11625
82 # cbid: 17
83 # start: 1496933427584362152
84 # end: 1496933427584362435
85 # processId: 1317533
86 # threadId: 1142654784
87 # correlationId: 13119
88 # returnValue: 0
89 # """
90 if verbose > 0 and fLOG is not None:
91 fLOG("[convert_trace_to_json] step 1 begin.")
92 for row in conn.execute("SELECT * FROM CUPTI_ACTIVITY_KIND_RUNTIME"):
93 try:
94 cbid = Cbids(row["cbid"]).name
95 except ValueError: # pragma: no cover
96 cbid = str(row["cbid"])
97 if verbose > 0 and fLOG is None:
98 fLOG(f"[convert_trace_to_json] unrecognized cbid {cbid!r}.")
99 event = {
100 "name": cbid,
101 "ph": "X", # Complete Event (Begin + End event)
102 "cat": "cuda",
103 "ts": _munge_time(row["start"]),
104 "dur": _munge_time(row["end"] - row["start"]),
105 "tid": f"Thread {row['threadId']}: Runtime API",
106 "pid": f"[{row['processId']}] Process",
107 "args": {
108 # ...
109 },
110 }
111 traceEvents.append(event)
113 # DRIVER?
115 # """
116 # _id_: 1
117 # flags: 2
118 # timestamp: 1496844806028263989
119 # id: 1
120 # objectKind: 2
121 # objectId: b'\xe5\xc0\x16\x00@\xe7\x10J\x00\x00\x00\x00'
122 # name: 3
123 # domain: 0
124 # """
125 if verbose > 0 and fLOG is not None:
126 fLOG("[convert_trace_to_json] step 2 begin.")
127 for row in conn.execute(" ".join([
128 "SELECT",
129 ",".join([
130 "start.name AS name",
131 "start.timestamp AS start_time",
132 "end.timestamp AS end_time"
133 ]),
134 "FROM",
135 "(SELECT * FROM CUPTI_ACTIVITY_KIND_MARKER WHERE name != 0) "
136 "AS start",
137 "LEFT JOIN",
138 "(SELECT * FROM CUPTI_ACTIVITY_KIND_MARKER WHERE name = 0) "
139 "AS end",
140 "ON start.id = end.id"])):
141 event = {
142 "name": strings[row["name"]],
143 "cat": "cuda",
144 "ts": _munge_time(row["start_time"]),
145 # Weirdly, these don't seem to be associated with a
146 # CPU/GPU. I guess there's no CUDA Context available
147 # when you run these, so it makes sense. But nvvp
148 # associates these with a GPU strangely enough
149 "tid": "Markers and Ranges",
150 "pid": "Markers and Ranges",
151 # parse objectId?
152 "args": {
153 # ...
154 },
155 }
156 if row["end_time"] is None:
157 event["ph"] = "I"
158 else:
159 event["ph"] = "X"
160 event["dur"] = _munge_time(row["end_time"] - row["start_time"])
161 traceEvents.append(event)
163 # """
164 # _id_: 1
165 # copyKind: 1
166 # srcKind: 1
167 # dstKind: 3
168 # flags: 0
169 # bytes: 7436640
170 # start: 1496933426915778221
171 # end: 1496933426916558424
172 # deviceId: 0
173 # contextId: 1
174 # streamId: 7
175 # correlationId: 809
176 # runtimeCorrelationId: 0
177 # """
178 if verbose > 0 and fLOG is not None:
179 fLOG("[convert_trace_to_json] step 3 begin.")
180 for row in conn.execute("SELECT * FROM CUPTI_ACTIVITY_KIND_MEMCPY"):
181 # copyKind:
182 # 1 - Memcpy HtoD
183 # 2 - Memcpy DtoH
184 # 8 - Memcpy DtoD
185 # flags: ???
186 # 0 - Sync
187 # 1 - Async
188 # srcKind/dstKind
189 # 1 - Pageable
190 # 2 - Page-locked ???
191 # 3 - Device
192 # eprintRow(row)
193 if row["copyKind"] == 1:
194 copyKind = "HtoD"
195 elif row["copyKind"] == 2:
196 copyKind = "DtoH"
197 elif row["copyKind"] == 8:
198 copyKind = "DtoD"
199 else:
200 copyKind = str(row["copyKind"])
201 if row["flags"] == 0:
202 flags = "sync"
203 elif row["flags"] == 1:
204 flags = "async"
205 else:
206 flags = str(row["flags"])
207 event = {
208 "name": f"Memcpy {copyKind} [{flags}]",
209 "ph": "X", # Complete Event (Begin + End event)
210 "cat": "cuda",
211 "ts": _munge_time(row["start"]),
212 "dur": _munge_time(row["end"] - row["start"]),
213 "tid": f"MemCpy ({copyKind})",
214 # lookup GPU name. This is tored in CUPTI_ACTIVITY_KIND_DEVICE
215 "pid": f"[{row['deviceId']}:{row['contextId']}] Overview",
216 "args": {
217 "Size": _sizeof_fmt(row["bytes"]),
218 },
219 }
220 traceEvents.append(event)
222 # name: index into StringTable
223 # What is thed difference between end and completed?
224 # """
225 # _id_: 1
226 # cacheConfig: b'\x00'
227 # sharedMemoryConfig: 1
228 # registersPerThread: 32
229 # partitionedGlobalCacheRequested: 2
230 # partitionedGlobalCacheExecuted: 2
231 # start: 1496844806032514222
232 # end: 1496844806032531694
233 # completed: 1496844806032531694
234 # deviceId: 0
235 # contextId: 1
236 # streamId: 7
237 # gridX: 57
238 # gridY: 1
239 # gridZ: 1
240 # blockX: 128
241 # blockY: 1
242 # blockZ: 1
243 # staticSharedMemory: 0
244 # dynamicSharedMemory: 0
245 # localMemoryPerThread: 0
246 # localMemoryTotal: 78643200
247 # correlationId: 487
248 # gridId: 669
249 # name: 5
250 # """
251 if verbose > 0 and fLOG is not None:
252 fLOG("[convert_trace_to_json] step 4 begin.")
253 for row in conn.execute(
254 "SELECT * FROM CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL"):
255 # eprint(strings[row["name"]])
256 # eprintRow(row)
257 event = {
258 "name": strings[row["name"]],
259 "ph": "X", # Complete Event (Begin + End event)
260 "cat": "cuda",
261 "ts": _munge_time(row["start"]),
262 "dur": _munge_time(row["end"] - row["start"]),
263 "tid": "Compute",
264 # lookup GPU name?
265 "pid": f"[{row['deviceId']}:{row['contextId']}] Overview",
266 "args": {
267 "Grid size": f"[ {row['gridX']}, {row['gridY']}, {row['gridZ']} ]",
268 "Block size": f"[ {row['blockX']}, {row['blockY']}, {row['blockZ']} ]",
269 # ...
270 },
271 }
272 alt_event = copy.deepcopy(event)
273 alt_event["tid"] = alt_event["name"]
274 alt_event["pid"] = f"[{row['deviceId']}:{row['contextId']}] Compute"
275 traceEvents.append(event)
276 traceEvents.append(alt_event)
278 if output not in (None, ''):
279 if verbose > 0 and fLOG is not None:
280 fLOG(
281 f"[convert_trace_to_json] converting into json in {output!r}.")
282 with open(output, "w", encoding="utf-8") as f:
283 json.dump(traceEvents, f, separators=(',\n', ':'))
284 f.write('\n')
285 if verbose > 0 and fLOG is not None:
286 fLOG("[convert_trace_to_json] done.")
287 return traceEvents
288 else:
289 if verbose > 0 and fLOG is not None:
290 fLOG( # pragma: no cover
291 "[convert_trace_to_json] converting into json.")
292 st = io.StringIO()
293 json.dump(traceEvents, st, separators=(',\n', ':'))
294 st.write('\n')
295 if verbose > 0 and fLOG is not None:
296 fLOG("[convert_trace_to_json] done.") # pragma: no cover
297 fLOG(st.getvalue()) # pragma: no cover
298 return st.getvalue()
301def _munge_time(t):
302 """Take a time from nvprof and convert it into a chrome://tracing time."""
303 # For strict correctness, divide by 1000, but this reduces accuracy.
304 return t # / 1000.
307def _demangle(name):
308 """Demangle a C++ identifier using c++filt"""
309 try:
310 return cxxfilt.demangle(name)
311 except cxxfilt.LibraryNotFound: # pragma: no cover
312 # One library is missing.
313 return name
316class Cbids(enum.IntEnum):
317 "List of events."
318 INVALID = 0
319 cudaDriverGetVersion = 1
320 cudaRuntimeGetVersion = 2
321 cudaGetDeviceCount = 3
322 cudaGetDeviceProperties = 4
323 cudaChooseDevice = 5
324 cudaGetChannelDesc = 6
325 cudaCreateChannelDesc = 7
326 cudaConfigureCall = 8
327 cudaSetupArgument = 9
328 cudaGetLastError = 10
329 cudaPeekAtLastError = 11
330 cudaGetErrorString = 12
331 cudaLaunch = 13
332 cudaFuncSetCacheConfig = 14
333 cudaFuncGetAttributes = 15
334 cudaSetDevice = 16
335 cudaGetDevice = 17
336 cudaSetValidDevices = 18
337 cudaSetDeviceFlags = 19
338 cudaMalloc = 20
339 cudaMallocPitch = 21
340 cudaFree = 22
341 cudaMallocArray = 23
342 cudaFreeArray = 24
343 cudaMallocHost = 25
344 cudaFreeHost = 26
345 cudaHostAlloc = 27
346 cudaHostGetDevicePointer = 28
347 cudaHostGetFlags = 29
348 cudaMemGetInfo = 30
349 cudaMemcpy = 31
350 cudaMemcpy2D = 32
351 cudaMemcpyToArray = 33
352 cudaMemcpy2DToArray = 34
353 cudaMemcpyFromArray = 35
354 cudaMemcpy2DFromArray = 36
355 cudaMemcpyArrayToArray = 37
356 cudaMemcpy2DArrayToArray = 38
357 cudaMemcpyToSymbol = 39
358 cudaMemcpyFromSymbol = 40
359 cudaMemcpyAsync = 41
360 cudaMemcpyToArrayAsync = 42
361 cudaMemcpyFromArrayAsync = 43
362 cudaMemcpy2DAsync = 44
363 cudaMemcpy2DToArrayAsync = 45
364 cudaMemcpy2DFromArrayAsync = 46
365 cudaMemcpyToSymbolAsync = 47
366 cudaMemcpyFromSymbolAsync = 48
367 cudaMemset = 49
368 cudaMemset2D = 50
369 cudaMemsetAsync = 51
370 cudaMemset2DAsync = 52
371 cudaGetSymbolAddress = 53
372 cudaGetSymbolSize = 54
373 cudaBindTexture = 55
374 cudaBindTexture2D = 56
375 cudaBindTextureToArray = 57
376 cudaUnbindTexture = 58
377 cudaGetTextureAlignmentOffset = 59
378 cudaGetTextureReference = 60
379 cudaBindSurfaceToArray = 61
380 cudaGetSurfaceReference = 62
381 cudaGLSetGLDevice = 63
382 cudaGLRegisterBufferObject = 64
383 cudaGLMapBufferObject = 65
384 cudaGLUnmapBufferObject = 66
385 cudaGLUnregisterBufferObject = 67
386 cudaGLSetBufferObjectMapFlags = 68
387 cudaGLMapBufferObjectAsync = 69
388 cudaGLUnmapBufferObjectAsync = 70
389 cudaWGLGetDevice = 71
390 cudaGraphicsGLRegisterImage = 72
391 cudaGraphicsGLRegisterBuffer = 73
392 cudaGraphicsUnregisterResource = 74
393 cudaGraphicsResourceSetMapFlags = 75
394 cudaGraphicsMapResources = 76
395 cudaGraphicsUnmapResources = 77
396 cudaGraphicsResourceGetMappedPointer = 78
397 cudaGraphicsSubResourceGetMappedArray = 79
398 cudaVDPAUGetDevice = 80
399 cudaVDPAUSetVDPAUDevice = 81
400 cudaGraphicsVDPAURegisterVideoSurface = 82
401 cudaGraphicsVDPAURegisterOutputSurface = 83
402 cudaD3D11GetDevice = 84
403 cudaD3D11GetDevices = 85
404 cudaD3D11SetDirect3DDevice = 86
405 cudaGraphicsD3D11RegisterResource = 87
406 cudaD3D10GetDevice = 88
407 cudaD3D10GetDevices = 89
408 cudaD3D10SetDirect3DDevice = 90
409 cudaGraphicsD3D10RegisterResource = 91
410 cudaD3D10RegisterResource = 92
411 cudaD3D10UnregisterResource = 93
412 cudaD3D10MapResources = 94
413 cudaD3D10UnmapResources = 95
414 cudaD3D10ResourceSetMapFlags = 96
415 cudaD3D10ResourceGetSurfaceDimensions = 97
416 cudaD3D10ResourceGetMappedArray = 98
417 cudaD3D10ResourceGetMappedPointer = 99
418 cudaD3D10ResourceGetMappedSize = 100
419 cudaD3D10ResourceGetMappedPitch = 101
420 cudaD3D9GetDevice = 102
421 cudaD3D9GetDevices = 103
422 cudaD3D9SetDirect3DDevice = 104
423 cudaD3D9GetDirect3DDevice = 105
424 cudaGraphicsD3D9RegisterResource = 106
425 cudaD3D9RegisterResource = 107
426 cudaD3D9UnregisterResource = 108
427 cudaD3D9MapResources = 109
428 cudaD3D9UnmapResources = 110
429 cudaD3D9ResourceSetMapFlags = 111
430 cudaD3D9ResourceGetSurfaceDimensions = 112
431 cudaD3D9ResourceGetMappedArray = 113
432 cudaD3D9ResourceGetMappedPointer = 114
433 cudaD3D9ResourceGetMappedSize = 115
434 cudaD3D9ResourceGetMappedPitch = 116
435 cudaD3D9Begin = 117
436 cudaD3D9End = 118
437 cudaD3D9RegisterVertexBuffer = 119
438 cudaD3D9UnregisterVertexBuffer = 120
439 cudaD3D9MapVertexBuffer = 121
440 cudaD3D9UnmapVertexBuffer = 122
441 cudaThreadExit = 123
442 cudaSetDoubleForDevice = 124
443 cudaSetDoubleForHost = 125
444 cudaThreadSynchronize = 126
445 cudaThreadGetLimit = 127
446 cudaThreadSetLimit = 128
447 cudaStreamCreate = 129
448 cudaStreamDestroy = 130
449 cudaStreamSynchronize = 131
450 cudaStreamQuery = 132
451 cudaEventCreate = 133
452 cudaEventCreateWithFlags = 134
453 cudaEventRecord = 135
454 cudaEventDestroy = 136
455 cudaEventSynchronize = 137
456 cudaEventQuery = 138
457 cudaEventElapsedTime = 139
458 cudaMalloc3D = 140
459 cudaMalloc3DArray = 141
460 cudaMemset3D = 142
461 cudaMemset3DAsync = 143
462 cudaMemcpy3D = 144
463 cudaMemcpy3DAsync = 145
464 cudaThreadSetCacheConfig = 146
465 cudaStreamWaitEvent = 147
466 cudaD3D11GetDirect3DDevice = 148
467 cudaD3D10GetDirect3DDevice = 149
468 cudaThreadGetCacheConfig = 150
469 cudaPointerGetAttributes = 151
470 cudaHostRegister = 152
471 cudaHostUnregister = 153
472 cudaDeviceCanAccessPeer = 154
473 cudaDeviceEnablePeerAccess = 155
474 cudaDeviceDisablePeerAccess = 156
475 cudaPeerRegister = 157
476 cudaPeerUnregister = 158
477 cudaPeerGetDevicePointer = 159
478 cudaMemcpyPeer = 160
479 cudaMemcpyPeerAsync = 161
480 cudaMemcpy3DPeer = 162
481 cudaMemcpy3DPeerAsync = 163
482 cudaDeviceReset = 164
483 cudaDeviceSynchronize = 165
484 cudaDeviceGetLimit = 166
485 cudaDeviceSetLimit = 167
486 cudaDeviceGetCacheConfig = 168
487 cudaDeviceSetCacheConfig = 169
488 cudaProfilerInitialize = 170
489 cudaProfilerStart = 171
490 cudaProfilerStop = 172
491 cudaDeviceGetByPCIBusId = 173
492 cudaDeviceGetPCIBusId = 174
493 cudaGLGetDevices = 175
494 cudaIpcGetEventHandle = 176
495 cudaIpcOpenEventHandle = 177
496 cudaIpcGetMemHandle = 178
497 cudaIpcOpenMemHandle = 179
498 cudaIpcCloseMemHandle = 180
499 cudaArrayGetInfo = 181
500 cudaFuncSetSharedMemConfig = 182
501 cudaDeviceGetSharedMemConfig = 183
502 cudaDeviceSetSharedMemConfig = 184
503 cudaCreateTextureObject = 185
504 cudaDestroyTextureObject = 186
505 cudaGetTextureObjectResourceDesc = 187
506 cudaGetTextureObjectTextureDesc = 188
507 cudaCreateSurfaceObject = 189
508 cudaDestroySurfaceObject = 190
509 cudaGetSurfaceObjectResourceDesc = 191
510 cudaMallocMipmappedArray = 192
511 cudaGetMipmappedArrayLevel = 193
512 cudaFreeMipmappedArray = 194
513 cudaBindTextureToMipmappedArray = 195
514 cudaGraphicsResourceGetMappedMipmappedArray = 196
515 cudaStreamAddCallback = 197
516 cudaStreamCreateWithFlags = 198
517 cudaGetTextureObjectResourceViewDesc = 199
518 cudaDeviceGetAttribute = 200
519 cudaStreamDestroy_v5050 = 201
520 cudaStreamCreateWithPriority = 202
521 cudaStreamGetPriority = 203
522 cudaStreamGetFlags = 204
523 cudaDeviceGetStreamPriorityRange = 205
524 cudaMallocManaged = 206
525 cudaOccupancyMaxActiveBlocksPerMultiprocessor_v6000 = 207
526 cudaStreamAttachMemAsync = 208
527 cudaGetErrorName = 209
528 cudaOccupancyMaxActiveBlocksPerMultiprocessor_v6050 = 210
529 cudaLaunchKernel = 211
530 cudaGetDeviceFlags = 212
531 cudaLaunch_ptsz = 213
532 cudaLaunchKernel_ptsz = 214
533 cudaMemcpy_ptds = 215
534 cudaMemcpy2D_ptds = 216
535 cudaMemcpyToArray_ptds = 217
536 cudaMemcpy2DToArray_ptds = 218
537 cudaMemcpyFromArray_ptds = 219
538 cudaMemcpy2DFromArray_ptds = 220
539 cudaMemcpyArrayToArray_ptds = 221
540 cudaMemcpy2DArrayToArray_ptds = 222
541 cudaMemcpyToSymbol_ptds = 223
542 cudaMemcpyFromSymbol_ptds = 224
543 cudaMemcpyAsync_ptsz = 225
544 cudaMemcpyToArrayAsync_ptsz = 226
545 cudaMemcpyFromArrayAsync_ptsz = 227
546 cudaMemcpy2DAsync_ptsz = 228
547 cudaMemcpy2DToArrayAsync_ptsz = 229
548 cudaMemcpy2DFromArrayAsync_ptsz = 230
549 cudaMemcpyToSymbolAsync_ptsz = 231
550 cudaMemcpyFromSymbolAsync_ptsz = 232
551 cudaMemset_ptds = 233
552 cudaMemset2D_ptds = 234
553 cudaMemsetAsync_ptsz = 235
554 cudaMemset2DAsync_ptsz = 236
555 cudaStreamGetPriority_ptsz = 237
556 cudaStreamGetFlags_ptsz = 238
557 cudaStreamSynchronize_ptsz = 239
558 cudaStreamQuery_ptsz = 240
559 cudaStreamAttachMemAsync_ptsz = 241
560 cudaEventRecord_ptsz = 242
561 cudaMemset3D_ptds = 243
562 cudaMemset3DAsync_ptsz = 244
563 cudaMemcpy3D_ptds = 245
564 cudaMemcpy3DAsync_ptsz = 246
565 cudaStreamWaitEvent_ptsz = 247
566 cudaStreamAddCallback_ptsz = 248
567 cudaMemcpy3DPeer_ptds = 249
568 cudaMemcpy3DPeerAsync_ptsz = 250
569 cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags = 251
570 cudaMemPrefetchAsync = 252
571 cudaMemPrefetchAsync_ptsz = 253
572 cudaMemAdvise = 254
573 cudaDeviceGetP2PAttribute = 255
574 cudaGraphicsEGLRegisterImage = 256
575 cudaEGLStreamConsumerConnect = 257
576 cudaEGLStreamConsumerDisconnect = 258
577 cudaEGLStreamConsumerAcquireFrame = 259
578 cudaEGLStreamConsumerReleaseFrame = 260
579 cudaEGLStreamProducerConnect = 261
580 cudaEGLStreamProducerDisconnect = 262
581 cudaEGLStreamProducerPresentFrame = 263
582 cudaEGLStreamProducerReturnFrame = 264
583 cudaGraphicsResourceGetMappedEglFrame = 265
584 cudaMemRangeGetAttribute = 266
585 cudaMemRangeGetAttributes = 267
586 cudaEGLStreamConsumerConnectWithFlags = 268
587 cudaLaunchCooperativeKernel = 269
588 cudaLaunchCooperativeKernel_ptsz = 270
589 cudaEventCreateFromEGLSync = 271
590 cudaLaunchCooperativeKernelMultiDevice = 272
591 cudaFuncSetAttribute = 273
592 cudaImportExternalMemory = 274
593 cudaExternalMemoryGetMappedBuffer = 275
594 cudaExternalMemoryGetMappedMipmappedArray = 276
595 cudaDestroyExternalMemory = 277
596 cudaImportExternalSemaphore = 278
597 cudaSignalExternalSemaphoresAsync = 279
598 cudaSignalExternalSemaphoresAsync_ptsz = 280
599 cudaWaitExternalSemaphoresAsync = 281
600 cudaWaitExternalSemaphoresAsync_ptsz = 282
601 cudaDestroyExternalSemaphore = 283
602 cudaLaunchHostFunc = 284
603 cudaLaunchHostFunc_ptsz = 285
604 cudaGraphCreate = 286
605 cudaGraphKernelNodeGetParams = 287
606 cudaGraphKernelNodeSetParams = 288
607 cudaGraphAddKernelNode = 289
608 cudaGraphAddMemcpyNode = 290
609 cudaGraphMemcpyNodeGetParams = 291
610 cudaGraphMemcpyNodeSetParams = 292
611 cudaGraphAddMemsetNode = 293
612 cudaGraphMemsetNodeGetParams = 294
613 cudaGraphMemsetNodeSetParams = 295
614 cudaGraphAddHostNode = 296
615 cudaGraphHostNodeGetParams = 297
616 cudaGraphAddChildGraphNode = 298
617 cudaGraphChildGraphNodeGetGraph = 299
618 cudaGraphAddEmptyNode = 300
619 cudaGraphClone = 301
620 cudaGraphNodeFindInClone = 302
621 cudaGraphNodeGetType = 303
622 cudaGraphGetRootNodes = 304
623 cudaGraphNodeGetDependencies = 305
624 cudaGraphNodeGetDependentNodes = 306
625 cudaGraphAddDependencies = 307
626 cudaGraphRemoveDependencies = 308
627 cudaGraphDestroyNode = 309
628 cudaGraphInstantiate = 310
629 cudaGraphLaunch = 311
630 cudaGraphLaunch_ptsz = 312
631 cudaGraphExecDestroy = 313
632 cudaGraphDestroy = 314
633 cudaStreamBeginCapture = 315
634 cudaStreamBeginCapture_ptsz = 316
635 cudaStreamIsCapturing = 317
636 cudaStreamIsCapturing_ptsz = 318
637 cudaStreamEndCapture = 319
638 cudaStreamEndCapture_ptsz = 320
639 cudaGraphHostNodeSetParams = 321
640 cudaGraphGetNodes = 322
641 cudaGraphGetEdges = 323
642 cudaStreamGetCaptureInfo = 324
643 cudaStreamGetCaptureInfo_ptsz = 325
644 cudaGraphExecKernelNodeSetParams = 326
645 cudaThreadExchangeStreamCaptureMode = 327
646 cudaDeviceGetNvSciSyncAttributes = 328
647 cudaOccupancyAvailableDynamicSMemPerBlock = 329
648 cudaStreamSetFlags = 330
649 cudaStreamSetFlags_ptsz = 331
650 cudaGraphExecMemcpyNodeSetParams = 332
651 cudaGraphExecMemsetNodeSetParams = 333
652 cudaGraphExecHostNodeSetParams = 334
653 cudaGraphExecUpdate = 335
654 SIZE = 336
655 FORCE_INT = 0x7FFFFFFF
658def _sizeof_fmt(num, suffix='B'):
659 """Format size with metric units (like nvvp)"""
660 for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']:
661 if abs(num) < 1000.0:
662 return f"{num:3.1f}{unit}{suffix}"
663 num /= 1000.0 # pragma: no cover
664 return f"{num:.1f}{'Y'}{suffix}" # pragma: no cover
667def json_to_dataframe(js):
668 """
669 Converts a json dump obtained with function
670 @see fn convert_trace_to_json
671 to a dataframe.
673 :param js: a filename, a json string, a stream containing json
674 :return: a dataframe
675 """
676 if isinstance(js, str) and os.path.exists(js):
677 if len(js) < 5000:
678 df = pandas.read_json(js)
679 else: # pragma: no cover
680 st = io.StringIO(js)
681 df = pandas.read_json(st)
682 else:
683 df = pandas.read_json(js)
685 df['ts_sec'] = df['ts'].apply(lambda t: t / 1e9)
686 return df
689def json_to_dataframe_streaming(js, chunksize=100000, flatten=False, **kwargs):
690 """
691 Converts a big json dump (from @see fn convert_trace_to_json)
692 to a dataframe. The function processes the data by streaming to avoid
693 loading huge data in memory.
694 Returns an iterator on dataframes.
695 The function relies on :epkg:`pandas_streaming`.
697 :param js: a filename, a json string, a stream containing json
698 :param chunksize:
699 see :func:`pandas_streaming.df.StreamingDataFrame.read_json`
700 :param flatten:
701 see :func:`pandas_streaming.df.StreamingDataFrame.read_json`
702 :param kwargs:
703 see :func:`pandas_streaming.df.StreamingDataFrame.read_json`
704 :return: a dataframe
705 """
706 from pandas_streaming.df import StreamingDataFrame # pylint: disable=C0415
707 if isinstance(js, str):
708 if len(js) < 5000 and os.path.exists(js):
709 sdf = StreamingDataFrame.read_json(js)
710 else:
711 raise RuntimeError(
712 "Use a stream or function json_to_dataframe instead of "
713 "the streaming version.")
714 else:
715 sdf = StreamingDataFrame.read_json(js)
717 sdf['ts_sec'] = sdf['ts'].apply(lambda t: t / 1e9)
718 return sdf