Coverage for onnxcustom/utils/nvprof2json.py: 99%

455 statements  

« prev     ^ index     » next       coverage.py v7.0.5, created at 2023-01-17 01:42 +0100

1""" 

2@file 

3@brief Converts traces from :epkg:`nvprof`. 

4The source comes from `nvprof2json <https://github.com/ezyang/nvprof2json>`_. 

5""" 

6 

7import sqlite3 

8import enum 

9import json 

10import copy 

11import io 

12import os 

13import zipfile 

14import cxxfilt 

15import pandas 

16 

17 

18def convert_trace_to_json(filename, output=None, temporary_file=None, 

19 verbose=0, fLOG=None): 

20 """ 

21 Converts traces produced by :epkg:`nvprof` and saved with 

22 format *sqlite3* (extension `.sql`). The output format 

23 follows `Trace Event Format 

24 <https://docs.google.com/document/d/ 

25 1CvAClvFfyA5R-PhYUmn5OOQtYMH4h6I0nSsKchNAySU/preview>`_. 

26 

27 :param filename: filename 

28 :param output: output file or None 

29 :param temporary_file: if the file needs to be unzipped, 

30 this file will be created to be the unzipped file, 

31 it is not cleaned after the unzipping. 

32 :param verbose: verbosity 

33 :param fLOG: logging function 

34 :return: json (if output is None, the list of events otherwise) 

35 

36 This file, if not too big, can be viewed with `chrome-tracing`. 

37 The traces are usually generated by using a command line similar to: 

38 

39 :: 

40 

41 nvprof -o gpu_profile.sql python plot_gpu_training.py 

42 """ 

43 ext = os.path.splitext(filename)[-1] 

44 if ext == ".zip": 

45 if temporary_file in (None, ''): 

46 temporary_file = filename + ".unzipped" 

47 if os.path.exists(temporary_file): 

48 if verbose > 0 and fLOG is not None: 

49 fLOG( # pragma: no cover 

50 "[convert_trace_to_json] %r already unzipped into %r" 

51 "." % (filename, temporary_file)) 

52 else: 

53 if verbose > 0 and fLOG is not None: 

54 fLOG( # pragma: no cover 

55 f"[convert_trace_to_json] unzipping to file {temporary_file!r}.") 

56 zipf = zipfile.ZipFile(filename) 

57 names = zipf.namelist() 

58 if len(names) != 1: 

59 raise RuntimeError( # pragma: no cover 

60 f"More than one file is stored in zip file {filename!r}.") 

61 stream = zipf.open(names[0], "r") 

62 with open(temporary_file, "wb") as f: 

63 while True: 

64 data = stream.read(65536) 

65 if len(data) == 0: 

66 break 

67 f.write(data) 

68 zipf.close() 

69 filename = temporary_file 

70 

71 conn = sqlite3.connect(filename) 

72 conn.row_factory = sqlite3.Row 

73 

74 strings = {} 

75 for r in conn.execute("SELECT _id_ as id, value FROM StringTable"): 

76 strings[r["id"]] = _demangle(r["value"]) 

77 

78 traceEvents = [] 

79 

80 # """ 

81 # _id_: 11625 

82 # cbid: 17 

83 # start: 1496933427584362152 

84 # end: 1496933427584362435 

85 # processId: 1317533 

86 # threadId: 1142654784 

87 # correlationId: 13119 

88 # returnValue: 0 

89 # """ 

90 if verbose > 0 and fLOG is not None: 

91 fLOG("[convert_trace_to_json] step 1 begin.") 

92 for row in conn.execute("SELECT * FROM CUPTI_ACTIVITY_KIND_RUNTIME"): 

93 try: 

94 cbid = Cbids(row["cbid"]).name 

95 except ValueError: # pragma: no cover 

96 cbid = str(row["cbid"]) 

97 if verbose > 0 and fLOG is None: 

98 fLOG(f"[convert_trace_to_json] unrecognized cbid {cbid!r}.") 

99 event = { 

100 "name": cbid, 

101 "ph": "X", # Complete Event (Begin + End event) 

102 "cat": "cuda", 

103 "ts": _munge_time(row["start"]), 

104 "dur": _munge_time(row["end"] - row["start"]), 

105 "tid": f"Thread {row['threadId']}: Runtime API", 

106 "pid": f"[{row['processId']}] Process", 

107 "args": { 

108 # ... 

109 }, 

110 } 

111 traceEvents.append(event) 

112 

113 # DRIVER? 

114 

115 # """ 

116 # _id_: 1 

117 # flags: 2 

118 # timestamp: 1496844806028263989 

119 # id: 1 

120 # objectKind: 2 

121 # objectId: b'\xe5\xc0\x16\x00@\xe7\x10J\x00\x00\x00\x00' 

122 # name: 3 

123 # domain: 0 

124 # """ 

125 if verbose > 0 and fLOG is not None: 

126 fLOG("[convert_trace_to_json] step 2 begin.") 

127 for row in conn.execute(" ".join([ 

128 "SELECT", 

129 ",".join([ 

130 "start.name AS name", 

131 "start.timestamp AS start_time", 

132 "end.timestamp AS end_time" 

133 ]), 

134 "FROM", 

135 "(SELECT * FROM CUPTI_ACTIVITY_KIND_MARKER WHERE name != 0) " 

136 "AS start", 

137 "LEFT JOIN", 

138 "(SELECT * FROM CUPTI_ACTIVITY_KIND_MARKER WHERE name = 0) " 

139 "AS end", 

140 "ON start.id = end.id"])): 

141 event = { 

142 "name": strings[row["name"]], 

143 "cat": "cuda", 

144 "ts": _munge_time(row["start_time"]), 

145 # Weirdly, these don't seem to be associated with a 

146 # CPU/GPU. I guess there's no CUDA Context available 

147 # when you run these, so it makes sense. But nvvp 

148 # associates these with a GPU strangely enough 

149 "tid": "Markers and Ranges", 

150 "pid": "Markers and Ranges", 

151 # parse objectId? 

152 "args": { 

153 # ... 

154 }, 

155 } 

156 if row["end_time"] is None: 

157 event["ph"] = "I" 

158 else: 

159 event["ph"] = "X" 

160 event["dur"] = _munge_time(row["end_time"] - row["start_time"]) 

161 traceEvents.append(event) 

162 

163 # """ 

164 # _id_: 1 

165 # copyKind: 1 

166 # srcKind: 1 

167 # dstKind: 3 

168 # flags: 0 

169 # bytes: 7436640 

170 # start: 1496933426915778221 

171 # end: 1496933426916558424 

172 # deviceId: 0 

173 # contextId: 1 

174 # streamId: 7 

175 # correlationId: 809 

176 # runtimeCorrelationId: 0 

177 # """ 

178 if verbose > 0 and fLOG is not None: 

179 fLOG("[convert_trace_to_json] step 3 begin.") 

180 for row in conn.execute("SELECT * FROM CUPTI_ACTIVITY_KIND_MEMCPY"): 

181 # copyKind: 

182 # 1 - Memcpy HtoD 

183 # 2 - Memcpy DtoH 

184 # 8 - Memcpy DtoD 

185 # flags: ??? 

186 # 0 - Sync 

187 # 1 - Async 

188 # srcKind/dstKind 

189 # 1 - Pageable 

190 # 2 - Page-locked ??? 

191 # 3 - Device 

192 # eprintRow(row) 

193 if row["copyKind"] == 1: 

194 copyKind = "HtoD" 

195 elif row["copyKind"] == 2: 

196 copyKind = "DtoH" 

197 elif row["copyKind"] == 8: 

198 copyKind = "DtoD" 

199 else: 

200 copyKind = str(row["copyKind"]) 

201 if row["flags"] == 0: 

202 flags = "sync" 

203 elif row["flags"] == 1: 

204 flags = "async" 

205 else: 

206 flags = str(row["flags"]) 

207 event = { 

208 "name": f"Memcpy {copyKind} [{flags}]", 

209 "ph": "X", # Complete Event (Begin + End event) 

210 "cat": "cuda", 

211 "ts": _munge_time(row["start"]), 

212 "dur": _munge_time(row["end"] - row["start"]), 

213 "tid": f"MemCpy ({copyKind})", 

214 # lookup GPU name. This is tored in CUPTI_ACTIVITY_KIND_DEVICE 

215 "pid": f"[{row['deviceId']}:{row['contextId']}] Overview", 

216 "args": { 

217 "Size": _sizeof_fmt(row["bytes"]), 

218 }, 

219 } 

220 traceEvents.append(event) 

221 

222 # name: index into StringTable 

223 # What is thed difference between end and completed? 

224 # """ 

225 # _id_: 1 

226 # cacheConfig: b'\x00' 

227 # sharedMemoryConfig: 1 

228 # registersPerThread: 32 

229 # partitionedGlobalCacheRequested: 2 

230 # partitionedGlobalCacheExecuted: 2 

231 # start: 1496844806032514222 

232 # end: 1496844806032531694 

233 # completed: 1496844806032531694 

234 # deviceId: 0 

235 # contextId: 1 

236 # streamId: 7 

237 # gridX: 57 

238 # gridY: 1 

239 # gridZ: 1 

240 # blockX: 128 

241 # blockY: 1 

242 # blockZ: 1 

243 # staticSharedMemory: 0 

244 # dynamicSharedMemory: 0 

245 # localMemoryPerThread: 0 

246 # localMemoryTotal: 78643200 

247 # correlationId: 487 

248 # gridId: 669 

249 # name: 5 

250 # """ 

251 if verbose > 0 and fLOG is not None: 

252 fLOG("[convert_trace_to_json] step 4 begin.") 

253 for row in conn.execute( 

254 "SELECT * FROM CUPTI_ACTIVITY_KIND_CONCURRENT_KERNEL"): 

255 # eprint(strings[row["name"]]) 

256 # eprintRow(row) 

257 event = { 

258 "name": strings[row["name"]], 

259 "ph": "X", # Complete Event (Begin + End event) 

260 "cat": "cuda", 

261 "ts": _munge_time(row["start"]), 

262 "dur": _munge_time(row["end"] - row["start"]), 

263 "tid": "Compute", 

264 # lookup GPU name? 

265 "pid": f"[{row['deviceId']}:{row['contextId']}] Overview", 

266 "args": { 

267 "Grid size": f"[ {row['gridX']}, {row['gridY']}, {row['gridZ']} ]", 

268 "Block size": f"[ {row['blockX']}, {row['blockY']}, {row['blockZ']} ]", 

269 # ... 

270 }, 

271 } 

272 alt_event = copy.deepcopy(event) 

273 alt_event["tid"] = alt_event["name"] 

274 alt_event["pid"] = f"[{row['deviceId']}:{row['contextId']}] Compute" 

275 traceEvents.append(event) 

276 traceEvents.append(alt_event) 

277 

278 if output not in (None, ''): 

279 if verbose > 0 and fLOG is not None: 

280 fLOG( 

281 f"[convert_trace_to_json] converting into json in {output!r}.") 

282 with open(output, "w", encoding="utf-8") as f: 

283 json.dump(traceEvents, f, separators=(',\n', ':')) 

284 f.write('\n') 

285 if verbose > 0 and fLOG is not None: 

286 fLOG("[convert_trace_to_json] done.") 

287 return traceEvents 

288 else: 

289 if verbose > 0 and fLOG is not None: 

290 fLOG( # pragma: no cover 

291 "[convert_trace_to_json] converting into json.") 

292 st = io.StringIO() 

293 json.dump(traceEvents, st, separators=(',\n', ':')) 

294 st.write('\n') 

295 if verbose > 0 and fLOG is not None: 

296 fLOG("[convert_trace_to_json] done.") # pragma: no cover 

297 fLOG(st.getvalue()) # pragma: no cover 

298 return st.getvalue() 

299 

300 

301def _munge_time(t): 

302 """Take a time from nvprof and convert it into a chrome://tracing time.""" 

303 # For strict correctness, divide by 1000, but this reduces accuracy. 

304 return t # / 1000. 

305 

306 

307def _demangle(name): 

308 """Demangle a C++ identifier using c++filt""" 

309 try: 

310 return cxxfilt.demangle(name) 

311 except cxxfilt.LibraryNotFound: # pragma: no cover 

312 # One library is missing. 

313 return name 

314 

315 

316class Cbids(enum.IntEnum): 

317 "List of events." 

318 INVALID = 0 

319 cudaDriverGetVersion = 1 

320 cudaRuntimeGetVersion = 2 

321 cudaGetDeviceCount = 3 

322 cudaGetDeviceProperties = 4 

323 cudaChooseDevice = 5 

324 cudaGetChannelDesc = 6 

325 cudaCreateChannelDesc = 7 

326 cudaConfigureCall = 8 

327 cudaSetupArgument = 9 

328 cudaGetLastError = 10 

329 cudaPeekAtLastError = 11 

330 cudaGetErrorString = 12 

331 cudaLaunch = 13 

332 cudaFuncSetCacheConfig = 14 

333 cudaFuncGetAttributes = 15 

334 cudaSetDevice = 16 

335 cudaGetDevice = 17 

336 cudaSetValidDevices = 18 

337 cudaSetDeviceFlags = 19 

338 cudaMalloc = 20 

339 cudaMallocPitch = 21 

340 cudaFree = 22 

341 cudaMallocArray = 23 

342 cudaFreeArray = 24 

343 cudaMallocHost = 25 

344 cudaFreeHost = 26 

345 cudaHostAlloc = 27 

346 cudaHostGetDevicePointer = 28 

347 cudaHostGetFlags = 29 

348 cudaMemGetInfo = 30 

349 cudaMemcpy = 31 

350 cudaMemcpy2D = 32 

351 cudaMemcpyToArray = 33 

352 cudaMemcpy2DToArray = 34 

353 cudaMemcpyFromArray = 35 

354 cudaMemcpy2DFromArray = 36 

355 cudaMemcpyArrayToArray = 37 

356 cudaMemcpy2DArrayToArray = 38 

357 cudaMemcpyToSymbol = 39 

358 cudaMemcpyFromSymbol = 40 

359 cudaMemcpyAsync = 41 

360 cudaMemcpyToArrayAsync = 42 

361 cudaMemcpyFromArrayAsync = 43 

362 cudaMemcpy2DAsync = 44 

363 cudaMemcpy2DToArrayAsync = 45 

364 cudaMemcpy2DFromArrayAsync = 46 

365 cudaMemcpyToSymbolAsync = 47 

366 cudaMemcpyFromSymbolAsync = 48 

367 cudaMemset = 49 

368 cudaMemset2D = 50 

369 cudaMemsetAsync = 51 

370 cudaMemset2DAsync = 52 

371 cudaGetSymbolAddress = 53 

372 cudaGetSymbolSize = 54 

373 cudaBindTexture = 55 

374 cudaBindTexture2D = 56 

375 cudaBindTextureToArray = 57 

376 cudaUnbindTexture = 58 

377 cudaGetTextureAlignmentOffset = 59 

378 cudaGetTextureReference = 60 

379 cudaBindSurfaceToArray = 61 

380 cudaGetSurfaceReference = 62 

381 cudaGLSetGLDevice = 63 

382 cudaGLRegisterBufferObject = 64 

383 cudaGLMapBufferObject = 65 

384 cudaGLUnmapBufferObject = 66 

385 cudaGLUnregisterBufferObject = 67 

386 cudaGLSetBufferObjectMapFlags = 68 

387 cudaGLMapBufferObjectAsync = 69 

388 cudaGLUnmapBufferObjectAsync = 70 

389 cudaWGLGetDevice = 71 

390 cudaGraphicsGLRegisterImage = 72 

391 cudaGraphicsGLRegisterBuffer = 73 

392 cudaGraphicsUnregisterResource = 74 

393 cudaGraphicsResourceSetMapFlags = 75 

394 cudaGraphicsMapResources = 76 

395 cudaGraphicsUnmapResources = 77 

396 cudaGraphicsResourceGetMappedPointer = 78 

397 cudaGraphicsSubResourceGetMappedArray = 79 

398 cudaVDPAUGetDevice = 80 

399 cudaVDPAUSetVDPAUDevice = 81 

400 cudaGraphicsVDPAURegisterVideoSurface = 82 

401 cudaGraphicsVDPAURegisterOutputSurface = 83 

402 cudaD3D11GetDevice = 84 

403 cudaD3D11GetDevices = 85 

404 cudaD3D11SetDirect3DDevice = 86 

405 cudaGraphicsD3D11RegisterResource = 87 

406 cudaD3D10GetDevice = 88 

407 cudaD3D10GetDevices = 89 

408 cudaD3D10SetDirect3DDevice = 90 

409 cudaGraphicsD3D10RegisterResource = 91 

410 cudaD3D10RegisterResource = 92 

411 cudaD3D10UnregisterResource = 93 

412 cudaD3D10MapResources = 94 

413 cudaD3D10UnmapResources = 95 

414 cudaD3D10ResourceSetMapFlags = 96 

415 cudaD3D10ResourceGetSurfaceDimensions = 97 

416 cudaD3D10ResourceGetMappedArray = 98 

417 cudaD3D10ResourceGetMappedPointer = 99 

418 cudaD3D10ResourceGetMappedSize = 100 

419 cudaD3D10ResourceGetMappedPitch = 101 

420 cudaD3D9GetDevice = 102 

421 cudaD3D9GetDevices = 103 

422 cudaD3D9SetDirect3DDevice = 104 

423 cudaD3D9GetDirect3DDevice = 105 

424 cudaGraphicsD3D9RegisterResource = 106 

425 cudaD3D9RegisterResource = 107 

426 cudaD3D9UnregisterResource = 108 

427 cudaD3D9MapResources = 109 

428 cudaD3D9UnmapResources = 110 

429 cudaD3D9ResourceSetMapFlags = 111 

430 cudaD3D9ResourceGetSurfaceDimensions = 112 

431 cudaD3D9ResourceGetMappedArray = 113 

432 cudaD3D9ResourceGetMappedPointer = 114 

433 cudaD3D9ResourceGetMappedSize = 115 

434 cudaD3D9ResourceGetMappedPitch = 116 

435 cudaD3D9Begin = 117 

436 cudaD3D9End = 118 

437 cudaD3D9RegisterVertexBuffer = 119 

438 cudaD3D9UnregisterVertexBuffer = 120 

439 cudaD3D9MapVertexBuffer = 121 

440 cudaD3D9UnmapVertexBuffer = 122 

441 cudaThreadExit = 123 

442 cudaSetDoubleForDevice = 124 

443 cudaSetDoubleForHost = 125 

444 cudaThreadSynchronize = 126 

445 cudaThreadGetLimit = 127 

446 cudaThreadSetLimit = 128 

447 cudaStreamCreate = 129 

448 cudaStreamDestroy = 130 

449 cudaStreamSynchronize = 131 

450 cudaStreamQuery = 132 

451 cudaEventCreate = 133 

452 cudaEventCreateWithFlags = 134 

453 cudaEventRecord = 135 

454 cudaEventDestroy = 136 

455 cudaEventSynchronize = 137 

456 cudaEventQuery = 138 

457 cudaEventElapsedTime = 139 

458 cudaMalloc3D = 140 

459 cudaMalloc3DArray = 141 

460 cudaMemset3D = 142 

461 cudaMemset3DAsync = 143 

462 cudaMemcpy3D = 144 

463 cudaMemcpy3DAsync = 145 

464 cudaThreadSetCacheConfig = 146 

465 cudaStreamWaitEvent = 147 

466 cudaD3D11GetDirect3DDevice = 148 

467 cudaD3D10GetDirect3DDevice = 149 

468 cudaThreadGetCacheConfig = 150 

469 cudaPointerGetAttributes = 151 

470 cudaHostRegister = 152 

471 cudaHostUnregister = 153 

472 cudaDeviceCanAccessPeer = 154 

473 cudaDeviceEnablePeerAccess = 155 

474 cudaDeviceDisablePeerAccess = 156 

475 cudaPeerRegister = 157 

476 cudaPeerUnregister = 158 

477 cudaPeerGetDevicePointer = 159 

478 cudaMemcpyPeer = 160 

479 cudaMemcpyPeerAsync = 161 

480 cudaMemcpy3DPeer = 162 

481 cudaMemcpy3DPeerAsync = 163 

482 cudaDeviceReset = 164 

483 cudaDeviceSynchronize = 165 

484 cudaDeviceGetLimit = 166 

485 cudaDeviceSetLimit = 167 

486 cudaDeviceGetCacheConfig = 168 

487 cudaDeviceSetCacheConfig = 169 

488 cudaProfilerInitialize = 170 

489 cudaProfilerStart = 171 

490 cudaProfilerStop = 172 

491 cudaDeviceGetByPCIBusId = 173 

492 cudaDeviceGetPCIBusId = 174 

493 cudaGLGetDevices = 175 

494 cudaIpcGetEventHandle = 176 

495 cudaIpcOpenEventHandle = 177 

496 cudaIpcGetMemHandle = 178 

497 cudaIpcOpenMemHandle = 179 

498 cudaIpcCloseMemHandle = 180 

499 cudaArrayGetInfo = 181 

500 cudaFuncSetSharedMemConfig = 182 

501 cudaDeviceGetSharedMemConfig = 183 

502 cudaDeviceSetSharedMemConfig = 184 

503 cudaCreateTextureObject = 185 

504 cudaDestroyTextureObject = 186 

505 cudaGetTextureObjectResourceDesc = 187 

506 cudaGetTextureObjectTextureDesc = 188 

507 cudaCreateSurfaceObject = 189 

508 cudaDestroySurfaceObject = 190 

509 cudaGetSurfaceObjectResourceDesc = 191 

510 cudaMallocMipmappedArray = 192 

511 cudaGetMipmappedArrayLevel = 193 

512 cudaFreeMipmappedArray = 194 

513 cudaBindTextureToMipmappedArray = 195 

514 cudaGraphicsResourceGetMappedMipmappedArray = 196 

515 cudaStreamAddCallback = 197 

516 cudaStreamCreateWithFlags = 198 

517 cudaGetTextureObjectResourceViewDesc = 199 

518 cudaDeviceGetAttribute = 200 

519 cudaStreamDestroy_v5050 = 201 

520 cudaStreamCreateWithPriority = 202 

521 cudaStreamGetPriority = 203 

522 cudaStreamGetFlags = 204 

523 cudaDeviceGetStreamPriorityRange = 205 

524 cudaMallocManaged = 206 

525 cudaOccupancyMaxActiveBlocksPerMultiprocessor_v6000 = 207 

526 cudaStreamAttachMemAsync = 208 

527 cudaGetErrorName = 209 

528 cudaOccupancyMaxActiveBlocksPerMultiprocessor_v6050 = 210 

529 cudaLaunchKernel = 211 

530 cudaGetDeviceFlags = 212 

531 cudaLaunch_ptsz = 213 

532 cudaLaunchKernel_ptsz = 214 

533 cudaMemcpy_ptds = 215 

534 cudaMemcpy2D_ptds = 216 

535 cudaMemcpyToArray_ptds = 217 

536 cudaMemcpy2DToArray_ptds = 218 

537 cudaMemcpyFromArray_ptds = 219 

538 cudaMemcpy2DFromArray_ptds = 220 

539 cudaMemcpyArrayToArray_ptds = 221 

540 cudaMemcpy2DArrayToArray_ptds = 222 

541 cudaMemcpyToSymbol_ptds = 223 

542 cudaMemcpyFromSymbol_ptds = 224 

543 cudaMemcpyAsync_ptsz = 225 

544 cudaMemcpyToArrayAsync_ptsz = 226 

545 cudaMemcpyFromArrayAsync_ptsz = 227 

546 cudaMemcpy2DAsync_ptsz = 228 

547 cudaMemcpy2DToArrayAsync_ptsz = 229 

548 cudaMemcpy2DFromArrayAsync_ptsz = 230 

549 cudaMemcpyToSymbolAsync_ptsz = 231 

550 cudaMemcpyFromSymbolAsync_ptsz = 232 

551 cudaMemset_ptds = 233 

552 cudaMemset2D_ptds = 234 

553 cudaMemsetAsync_ptsz = 235 

554 cudaMemset2DAsync_ptsz = 236 

555 cudaStreamGetPriority_ptsz = 237 

556 cudaStreamGetFlags_ptsz = 238 

557 cudaStreamSynchronize_ptsz = 239 

558 cudaStreamQuery_ptsz = 240 

559 cudaStreamAttachMemAsync_ptsz = 241 

560 cudaEventRecord_ptsz = 242 

561 cudaMemset3D_ptds = 243 

562 cudaMemset3DAsync_ptsz = 244 

563 cudaMemcpy3D_ptds = 245 

564 cudaMemcpy3DAsync_ptsz = 246 

565 cudaStreamWaitEvent_ptsz = 247 

566 cudaStreamAddCallback_ptsz = 248 

567 cudaMemcpy3DPeer_ptds = 249 

568 cudaMemcpy3DPeerAsync_ptsz = 250 

569 cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags = 251 

570 cudaMemPrefetchAsync = 252 

571 cudaMemPrefetchAsync_ptsz = 253 

572 cudaMemAdvise = 254 

573 cudaDeviceGetP2PAttribute = 255 

574 cudaGraphicsEGLRegisterImage = 256 

575 cudaEGLStreamConsumerConnect = 257 

576 cudaEGLStreamConsumerDisconnect = 258 

577 cudaEGLStreamConsumerAcquireFrame = 259 

578 cudaEGLStreamConsumerReleaseFrame = 260 

579 cudaEGLStreamProducerConnect = 261 

580 cudaEGLStreamProducerDisconnect = 262 

581 cudaEGLStreamProducerPresentFrame = 263 

582 cudaEGLStreamProducerReturnFrame = 264 

583 cudaGraphicsResourceGetMappedEglFrame = 265 

584 cudaMemRangeGetAttribute = 266 

585 cudaMemRangeGetAttributes = 267 

586 cudaEGLStreamConsumerConnectWithFlags = 268 

587 cudaLaunchCooperativeKernel = 269 

588 cudaLaunchCooperativeKernel_ptsz = 270 

589 cudaEventCreateFromEGLSync = 271 

590 cudaLaunchCooperativeKernelMultiDevice = 272 

591 cudaFuncSetAttribute = 273 

592 cudaImportExternalMemory = 274 

593 cudaExternalMemoryGetMappedBuffer = 275 

594 cudaExternalMemoryGetMappedMipmappedArray = 276 

595 cudaDestroyExternalMemory = 277 

596 cudaImportExternalSemaphore = 278 

597 cudaSignalExternalSemaphoresAsync = 279 

598 cudaSignalExternalSemaphoresAsync_ptsz = 280 

599 cudaWaitExternalSemaphoresAsync = 281 

600 cudaWaitExternalSemaphoresAsync_ptsz = 282 

601 cudaDestroyExternalSemaphore = 283 

602 cudaLaunchHostFunc = 284 

603 cudaLaunchHostFunc_ptsz = 285 

604 cudaGraphCreate = 286 

605 cudaGraphKernelNodeGetParams = 287 

606 cudaGraphKernelNodeSetParams = 288 

607 cudaGraphAddKernelNode = 289 

608 cudaGraphAddMemcpyNode = 290 

609 cudaGraphMemcpyNodeGetParams = 291 

610 cudaGraphMemcpyNodeSetParams = 292 

611 cudaGraphAddMemsetNode = 293 

612 cudaGraphMemsetNodeGetParams = 294 

613 cudaGraphMemsetNodeSetParams = 295 

614 cudaGraphAddHostNode = 296 

615 cudaGraphHostNodeGetParams = 297 

616 cudaGraphAddChildGraphNode = 298 

617 cudaGraphChildGraphNodeGetGraph = 299 

618 cudaGraphAddEmptyNode = 300 

619 cudaGraphClone = 301 

620 cudaGraphNodeFindInClone = 302 

621 cudaGraphNodeGetType = 303 

622 cudaGraphGetRootNodes = 304 

623 cudaGraphNodeGetDependencies = 305 

624 cudaGraphNodeGetDependentNodes = 306 

625 cudaGraphAddDependencies = 307 

626 cudaGraphRemoveDependencies = 308 

627 cudaGraphDestroyNode = 309 

628 cudaGraphInstantiate = 310 

629 cudaGraphLaunch = 311 

630 cudaGraphLaunch_ptsz = 312 

631 cudaGraphExecDestroy = 313 

632 cudaGraphDestroy = 314 

633 cudaStreamBeginCapture = 315 

634 cudaStreamBeginCapture_ptsz = 316 

635 cudaStreamIsCapturing = 317 

636 cudaStreamIsCapturing_ptsz = 318 

637 cudaStreamEndCapture = 319 

638 cudaStreamEndCapture_ptsz = 320 

639 cudaGraphHostNodeSetParams = 321 

640 cudaGraphGetNodes = 322 

641 cudaGraphGetEdges = 323 

642 cudaStreamGetCaptureInfo = 324 

643 cudaStreamGetCaptureInfo_ptsz = 325 

644 cudaGraphExecKernelNodeSetParams = 326 

645 cudaThreadExchangeStreamCaptureMode = 327 

646 cudaDeviceGetNvSciSyncAttributes = 328 

647 cudaOccupancyAvailableDynamicSMemPerBlock = 329 

648 cudaStreamSetFlags = 330 

649 cudaStreamSetFlags_ptsz = 331 

650 cudaGraphExecMemcpyNodeSetParams = 332 

651 cudaGraphExecMemsetNodeSetParams = 333 

652 cudaGraphExecHostNodeSetParams = 334 

653 cudaGraphExecUpdate = 335 

654 SIZE = 336 

655 FORCE_INT = 0x7FFFFFFF 

656 

657 

658def _sizeof_fmt(num, suffix='B'): 

659 """Format size with metric units (like nvvp)""" 

660 for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']: 

661 if abs(num) < 1000.0: 

662 return f"{num:3.1f}{unit}{suffix}" 

663 num /= 1000.0 # pragma: no cover 

664 return f"{num:.1f}{'Y'}{suffix}" # pragma: no cover 

665 

666 

667def json_to_dataframe(js): 

668 """ 

669 Converts a json dump obtained with function 

670 @see fn convert_trace_to_json 

671 to a dataframe. 

672 

673 :param js: a filename, a json string, a stream containing json 

674 :return: a dataframe 

675 """ 

676 if isinstance(js, str) and os.path.exists(js): 

677 if len(js) < 5000: 

678 df = pandas.read_json(js) 

679 else: # pragma: no cover 

680 st = io.StringIO(js) 

681 df = pandas.read_json(st) 

682 else: 

683 df = pandas.read_json(js) 

684 

685 df['ts_sec'] = df['ts'].apply(lambda t: t / 1e9) 

686 return df 

687 

688 

689def json_to_dataframe_streaming(js, chunksize=100000, flatten=False, **kwargs): 

690 """ 

691 Converts a big json dump (from @see fn convert_trace_to_json) 

692 to a dataframe. The function processes the data by streaming to avoid 

693 loading huge data in memory. 

694 Returns an iterator on dataframes. 

695 The function relies on :epkg:`pandas_streaming`. 

696 

697 :param js: a filename, a json string, a stream containing json 

698 :param chunksize: 

699 see :func:`pandas_streaming.df.StreamingDataFrame.read_json` 

700 :param flatten: 

701 see :func:`pandas_streaming.df.StreamingDataFrame.read_json` 

702 :param kwargs: 

703 see :func:`pandas_streaming.df.StreamingDataFrame.read_json` 

704 :return: a dataframe 

705 """ 

706 from pandas_streaming.df import StreamingDataFrame # pylint: disable=C0415 

707 if isinstance(js, str): 

708 if len(js) < 5000 and os.path.exists(js): 

709 sdf = StreamingDataFrame.read_json(js) 

710 else: 

711 raise RuntimeError( 

712 "Use a stream or function json_to_dataframe instead of " 

713 "the streaming version.") 

714 else: 

715 sdf = StreamingDataFrame.read_json(js) 

716 

717 sdf['ts_sec'] = sdf['ts'].apply(lambda t: t / 1e9) 

718 return sdf