Coverage for src/pyquickhelper/ipythonhelper/notebook

Hot-keys on this page

r m x p toggle line displays

j k next/prev highlighted chunk

0 (zero) top of page

1 (one) first highlighted chunk

1"""

2@file

3@brief Modified version of `runipy.notebook_runner

4<https://github.com/paulgb/runipy/blob/master/runipy/notebook_runner.py>`_.

5"""

7import base64

8import os

9import re

10import time

11import platform

12import warnings

13from queue import Empty

14from time import sleep

15from collections import Counter

16from io import StringIO, BytesIO

17from nbformat import NotebookNode, writes

18from nbformat.reader import reads

19from ..imghelper.svg_helper import svg2img, PYQImageException

20from ..loghelper.flog import noLOG

23class NotebookError(Exception):

24 """

25 Raised when the execution fails.

26 """

27 pass

30class NotebookKernelError(Exception):

31 """

32 Raised when

33 `wait_for_ready <https://github.com/jupyter/jupyter_client/blob/master/

34 jupyter_client/blocking/client.py#L84>`_ fails.

35 """

36 pass

39class NotebookRunner(object):

41 """

42 The kernel communicates with mime-types while the notebook

43 uses short labels for different cell types. We'll use this to

44 map from kernel types to notebook format types.

46 This classes executes a notebook end to end.

48 .. index:: kernel, notebook

50 The class can use different kernels. The next links gives more

51 information on how to create or test a kernel:

53 * `jupyter_kernel_test <https://github.com/jupyter/jupyter_kernel_test>`_

54 * `simple_kernel <https://github.com/dsblank/simple_kernel>`_

56 .. faqref::

57 :title: Do I need to shutdown the kernel after running a notebook?

59 .. index:: travis

61 If the class is instantiated with *kernel=True*, a kernel will

62 be started. It must be shutdown otherwise the program might

63 be waiting for it for ever. That is one of the reasons why the

64 travis build does not complete. The build finished but cannot terminate

65 until all kernels are shutdown.

66 """

68 # . available output types

69 MIME_MAP = {

70 'image/jpeg': 'jpeg',

71 'image/png': 'png',

72 'image/gif': 'gif',

73 'text/plain': 'text',

74 'text/html': 'html',

75 'text/latex': 'latex',

76 'application/javascript': 'html',

77 'image/svg+xml': 'svg',

78 }

80 def __init__(self, nb, profile_dir=None, working_dir=None,

81 comment="", fLOG=noLOG, theNotebook=None, code_init=None,

82 kernel_name="python", log_level="30", extended_args=None,

83 kernel=False, filename=None, replacements=None, detailed_log=None,

84 startup_timeout=300):

85 """

86 @param nb notebook as :epkg:`JSON`

87 @param profile_dir profile directory

88 @param working_dir working directory

89 @param comment additional information added to error message

90 @param theNotebook if not None, populate the variable *theNotebook* with this value in the notebook

91 @param code_init to initialize the notebook with a python code as if it was a cell

92 @param fLOG logging function

93 @param log_level Choices: (0, 10, 20, 30=default, 40, 50, 'DEBUG', 'INFO', 'WARN', 'ERROR', 'CRITICAL')

94 @param kernel_name kernel name, it can be None

95 @param extended_args others arguments to pass to the command line

96 (`--KernelManager.autorestar=True` for example),

97 see :ref:`l-ipython_notebook_args` for a full list

98 @param kernel *kernel* is True by default, the notebook can be run, if False,

99 the notebook can be read but not run

100 @param filename to add the notebook file if there is one in error messages

101 @param replacements replacements to make in every cell before running it,

102 dictionary ``{ string: string }``

103 @param detailed_log to log detailed information when executing the notebook, this should be a function

104 with the same signature as ``print`` or None

105 @param startup_timeout wait for this long for the kernel to be ready,

106 see `wait_for_ready

107 <https://github.com/jupyter/jupyter_client/blob/master/

108 jupyter_client/blocking/client.py#L84>`_

109 """

110 if kernel:

111 try:

112 from jupyter_client import KernelManager

113 except ImportError: # pragma: no cover

114 from ipykernel import KernelManager

115

116 with warnings.catch_warnings():

117 warnings.filterwarnings("ignore", category=DeprecationWarning)

118 self.km = KernelManager(

119 kernel_name=kernel_name) if kernel_name is not None else KernelManager()

120 else:

121 self.km = None

122 self.detailed_log = detailed_log

123 self.fLOG = fLOG

124 self.theNotebook = theNotebook

125 self.code_init = code_init

126 self._filename = filename if filename is not None else "memory"

127 self.replacements = replacements

128 self.init_args = dict(

129 profile_dir=profile_dir, working_dir=working_dir,

130 comment=comment, fLOG=fLOG, theNotebook=theNotebook, code_init=code_init,

131 kernel_name="python", log_level="30", extended_args=None,

132 kernel=kernel, filename=filename, replacements=replacements)

133 args = []

134

135 if profile_dir:

136 args.append('--profile-dir=%s' % os.path.abspath(profile_dir))

137 if log_level:

138 args.append('--log-level=%s' % log_level)

139

140 if extended_args is not None and len(extended_args) > 0:

141 for opt in extended_args:

142 if not opt.startswith("--"):

143 raise SyntaxError(

144 "every option should start with '--': " + opt)

145 if "=" not in opt:

146 raise SyntaxError( # pragma: no cover

147 "every option should be assigned a value: " + opt)

148 args.append(opt)

149

150 if kernel:

151 cwd = os.getcwd()

152

153 if working_dir:

154 os.chdir(working_dir)

155

156 if self.km is not None:

157 try:

158 with warnings.catch_warnings():

159 warnings.filterwarnings(

160 "ignore", category=ResourceWarning)

161 self.km.start_kernel(extra_arguments=args)

162 except Exception as e: # pragma: no cover

163 raise NotebookKernelError(

164 "Failure with args: {0}\nand error:\n{1}".format(args, str(e))) from e

165

166 if platform.system() == 'Darwin':

167 # see http://www.pypedia.com/index.php/notebook_runner

168 # There is sometimes a race condition where the first

169 # execute command hits the kernel before it's ready.

170 # It appears to happen only on Darwin (Mac OS) and an

171 # easy (but clumsy) way to mitigate it is to sleep

172 # for a second.

173 sleep(1) # pragma: no cover

174

175 if working_dir:

176 os.chdir(cwd)

177

178 self.kc = self.km.client()

179 self.kc.start_channels(stdin=False)

180 try:

181 self.kc.wait_for_ready(timeout=startup_timeout)

182 except RuntimeError as e: # pragma: no cover

183 # We wait for one second.

184 sleep(startup_timeout)

185 self.kc.stop_channels()

186 self.km.shutdown_kernel()

187 self.km = None

188 self.kc = None

189 self.nb = nb

190 self.comment = comment

191 raise NotebookKernelError(

192 "Wait_for_ready fails (timeout={0}).".format(startup_timeout)) from e

193 else:

194 self.km = None

195 self.kc = None

196 self.nb = nb

197 self.comment = comment

198

199 def __del__(self):

200 """

201 We close the kernel.

202 """

203 if self.km is not None:

204 del self.km

205 if self.kc is not None:

206 del self.kc

207

208 def to_json(self, filename=None, encoding="utf8"):

209 """

210 Converts the notebook into :epkg:`JSON`.

211

212 @param filename filename or stream

213 @param encoding encoding

214 @return Json string if filename is None, None otherwise

215 """

216 if isinstance(filename, str):

217 with open(filename, "w", encoding=encoding) as payload:

218 self.to_json(payload)

219 return None

220

221 if filename is None:

222 st = StringIO()

223 st.write(writes(self.nb))

224 return st.getvalue()

225

226 filename.write(writes(self.nb))

227 return None

228

229 def copy(self):

230 """

231 Copies the notebook (just the content).

232

233 @return instance of @see cl NotebookRunner

234 """

235 st = StringIO()

236 self.to_json(st)

237 args = self.init_args.copy()

238 for name in ["theNotebook", "filename"]:

239 if name in args:

240 del args[name]

241 nb = reads(st.getvalue())

242 return NotebookRunner(nb, **args)

243

244 def __add__(self, nb):

245 """

246 Merges two notebooks together, returns a new none.

247

248 @param nb notebook

249 @return new notebook

250 """

251 c = self.copy()

252 c.merge_notebook(nb)

253 return c

254

255 def shutdown_kernel(self):

256 """

257 Shuts down kernel.

258 """

259 self.fLOG('-- shutdown kernel')

260 if self.kc is None:

261 raise ValueError( # pragma: no cover

262 "No kernel was started, specify kernel=True when initializing the instance.")

263 self.kc.stop_channels()

264 self.km.shutdown_kernel(now=True)

265

266 def clean_code(self, code):

267 """

268 Cleans the code before running it, the function comment out

269 instruction such as ``show()``.

270

271 @param code code (string)

272 @return cleaned code

273 """

274 has_bokeh = "bokeh." in code or "from bokeh" in code or "import bokeh" in code

275 if code is None:

276 return code

277

278 lines = [_.strip("\n\r").rstrip(" \t") for _ in code.split("\n")]

279 res = []

280 show_is_last = False

281 for line in lines:

282 if line.replace(" ", "") == "show()":

283 line = line.replace("show", "#show")

284 show_is_last = True

285 elif has_bokeh and line.replace(" ", "") == "output_notebook()":

286 line = line.replace("output_notebook", "#output_notebook")

287 else:

288 show_is_last = False

289 if self.replacements is not None:

290 for k, v in self.replacements.items():

291 line = line.replace(k, v)

292 res.append(line)

293 if show_is_last:

294 res.append('"nothing to show"')

295 return "\n".join(res)

296

297 @staticmethod

298 def get_cell_code(cell):

299 """

300 Returns the code of a cell.

301

302 @param cell a cell or a string

303 @return boolean (=iscell), string

304 """

305 if isinstance(cell, str):

306 iscell = False

307 return iscell, cell

308

309 iscell = True

310 try:

311 return iscell, cell.source

312 except AttributeError: # pragma: no cover

313 return iscell, cell.input

314

315 def run_cell(self, index_cell, cell, clean_function=None, max_nbissue=15):

316 '''

317 Runs a notebook cell and update the output of that cell inplace.

318

319 :param index_cell: index of the cell

320 :param cell: cell to execute

321 :param clean_function: cleaning function to apply to the code before running it

322 :param max_nbissue: number of times an issue can be raised before stopping

323 :return: output of the cell

324 '''

325 if self.detailed_log:

326 self.detailed_log("[run_cell] index_cell={0} clean_function={1}".format(

327 index_cell, clean_function))

328 iscell, codei = NotebookRunner.get_cell_code(cell)

329

330 self.fLOG('-- running cell:\n%s\n' % codei)

331 if self.detailed_log:

332 self.detailed_log(

333 '[run_cell] code=\n {0}'.format(

334 "\n ".join(codei.split("\n"))))

335

336 code = self.clean_code(codei)

337 if clean_function is not None:

338 code = clean_function(code)

339 if self.detailed_log:

340 self.detailed_log(

341 ' cleaned code=\n {0}'.format(

342 "\n ".join(code.split("\n"))))

343 if len(code) == 0:

344 return ""

345 if self.kc is None:

346 raise ValueError( # pragma: no cover

347 "No kernel was started, specify kernel=True when initializing the instance.")

348 self.kc.execute(code)

349

350 reply = self.kc.get_shell_msg()

351 reason = None

352 try:

353 status = reply['content']['status']

354 except KeyError: # pragma: no cover

355 status = 'error'

356 reason = "no status key in reply['content']"

357

358 if status == 'error':

359 ansi_escape = re.compile(r'\x1b[^m]*m')

360 try:

361 tr = [ansi_escape.sub('', _)

362 for _ in reply['content']['traceback']]

363 except KeyError: # pragma: no cover

364 tr = (["No traceback, available keys in reply['content']"] +

365 list(reply['content']))

366 traceback_text = '\n'.join(tr)

367 self.fLOG("[nberror]\n", traceback_text)

368 if self.detailed_log:

369 self.detailed_log( # pragma: no cover

370 '[run_cell] ERROR=\n {0}'.format(

371 "\n ".join(traceback_text.split("\n"))))

372 else:

373 traceback_text = ''

374 self.fLOG('-- cell returned')

375

376 outs = list()

377 nbissue = 0

378 statuses = [status]

379 while True:

380 try:

381 msg = self.kc.get_iopub_msg(timeout=1)

382 if msg['msg_type'] == 'status':

383 if msg['content']['execution_state'] == 'idle':

384 status = 'ok'

385 statuses.append(status)

386 break

387 statuses.append(status)

388 except Empty as e: # pragma: no cover

389 # execution state should return to idle before

390 # the queue becomes empty,

391 # if it doesn't, something bad has happened

392 status = "error"

393 statuses.append(status)

394 reason = "exception Empty was raised (%r)" % e

395 nbissue += 1

396 if nbissue > max_nbissue:

397 # the notebook is empty

398 return ""

399 else:

400 continue

401

402 content = msg['content']

403 msg_type = msg['msg_type']

404 if self.detailed_log:

405 self.detailed_log(' msg_type={0}'.format(msg_type))

406

407 out = NotebookNode(output_type=msg_type, metadata=dict())

408

409 if 'execution_count' in content:

410 if iscell:

411 cell['execution_count'] = content['execution_count']

412 out.execution_count = content['execution_count']

413

414 if msg_type in ('status', 'pyin', 'execute_input'):

415 continue

416

417 if msg_type == 'stream':

418 out.name = content['name']

419 # in msgspec 5, this is name, text

420 # in msgspec 4, this is name, data

421 if 'text' in content:

422 out.text = content['text']

423 else:

424 out.data = content['data']

425

426 elif msg_type in ('display_data', 'pyout', 'execute_result'):

427 out.data = content['data']

428

429 elif msg_type in ('pyerr', 'error'):

430 out.ename = content['ename']

431 out.evalue = content['evalue']

432 out.traceback = content['traceback']

433 out.name = 'stderr'

434

435 elif msg_type == 'clear_output':

436 outs = list()

437 continue

438

439 elif msg_type in ('comm_open', 'comm_msg', 'comm_close'):

440 # widgets in a notebook

441 out.data = content["data"]

442 out.comm_id = content["comm_id"]

443

444 else:

445 dcontent = "\n".join("{0}={1}".format(k, v)

446 for k, v in sorted(content.items()))

447 raise NotImplementedError( # pragma: no cover

448 "Unhandled iopub message: '{0}'\n--CONTENT--\n{1}".format(msg_type, dcontent))

449

450 outs.append(out)

451 if self.detailed_log:

452 self.detailed_log(' out={0}'.format(type(out)))

453 if hasattr(out, "data"):

454 self.detailed_log(' out={0}'.format(out.data))

455

456 if iscell:

457 cell['outputs'] = outs

458

459 raw = []

460 for _ in outs:

461 try:

462 t = _.data

463 except AttributeError:

464 continue

465

466 # see MIMEMAP to see the available output type

467 for k, v in t.items():

468 if k.startswith("text"):

469 raw.append(v)

470

471 sraw = "\n".join(raw)

472 self.fLOG(sraw)

473 if self.detailed_log:

474 self.detailed_log(' sraw=\n {0}'.format(

475 "\n ".join(sraw.split("\n"))))

476

477 def reply2string(reply):

478 sreply = []

479 for k, v in sorted(reply.items()):

480 if isinstance(v, dict):

481 temp = []

482 for _, __ in sorted(v.items()):

483 temp.append(" [{0}]={1}".format(_, str(__)))

484 v_ = "\n".join(temp)

485 sreply.append("reply['{0}']=dict\n{1}".format(k, v_))

486 else:

487 sreply.append("reply['{0}']={1}".format(k, str(v)))

488 sreply = "\n".join(sreply)

489 return sreply

490

491 if status == 'error': # pragma: no cover

492 sreply = reply2string(reply)

493 if len(code) < 5:

494 scode = [code]

495 else:

496 scode = ""

497 mes = ("FILENAME\n{10}:1:1 - cell:{11}\n{7}\nCELL status={8}, reason='{9}' -- {4} "

498 "length={5} -- {6}:\n-----------------\n"

499 "content={12}\nmsg_type: {13} nbissue={14}"

500 "\nstatuses={15}"

501 "\n-----------------\n{0}"

502 "\n-----------------\nTRACE:\n{1}\nRAW:\n{2}REPLY:\n{3}")

503 raise NotebookError(mes.format(

504 code, traceback_text, sraw, sreply, index_cell, # 0-4

505 len(code), scode, self.comment, status, reason, # 5-9

506 self._filename, index_cell, content, msg_type, nbissue, # 10-14

507 statuses)) # 15

508 if self.detailed_log:

509 self.detailed_log('[run_cell] status={0}'.format(status))

510 return outs

511

512 def to_python(self):

513 """

514 Converts the notebook into python.

515

516 @return string

517 """

518 rows = []

519 for cell in self.iter_cells():

520 if cell.cell_type == "code":

521 codei = NotebookRunner.get_cell_code(cell)[1]

522 rows.append(codei)

523 elif cell.cell_type in ("markdown", "raw"):

524 content = cell.source

525 lines = content.split("\n")

526 for line in lines:

527 if line.startswith("#"):

528 rows.append("###")

529 rows.append(line)

530 else:

531 rows.append("# " + line)

532 else:

533 # No text, no code.

534 rows.append("# cell.type = {0}".format(cell.cell_type))

535 rows.append("")

536 return "\n".join(rows)

537

538 def iter_code_cells(self):

539 '''

540 Iterates over the notebook cells containing code.

541 '''

542 for cell in self.iter_cells():

543 if cell.cell_type == 'code':

544 yield cell

545

546 def iter_cells(self):

547 '''

548 Iterates over the notebook cells.

549 '''

550 if hasattr(self.nb, "worksheets"): # pragma: no cover

551 for ws in self.nb.worksheets:

552 for cell in ws.cells:

553 yield cell

554 else:

555 for cell in self.nb.cells:

556 yield cell

557

558 def first_cell(self):

559 """

560 Returns the first cell.

561 """

562 for cell in self.iter_cells():

563 return cell

564

565 def _cell_container(self):

566 """

567 Returns a cells container, it may change according to the format.

568

569 @return cell container

570 """

571 if hasattr(self.nb, "worksheets"): # pragma: no cover

572 last = None

573 for ws in self.nb.worksheets:

574 last = ws

575 if last is None:

576 raise NotebookError("no cell container") # pragma: no cover

577 return last.cells

578 return self.nb.cells

579

580 def __len__(self):

581 """

582 Returns the number of cells, it iterates on cells

583 to get this information and does cache the information.

584

585 @return int

586 """

587 return sum(1 for _ in self.iter_cells())

588

589 def cell_type(self, cell):

590 """

591 Returns the cell type.

592

593 @param cell from @see me iter_cells

594 @return type

595 """

596 return cell.cell_type

597

598 def cell_metadata(self, cell):

599 """

600 Returns the cell metadata.

601

602 @param cell cell

603 @return metadata

604 """

605 return cell.metadata

606

607 def _check_thumbnail_tuple(self, b):

608 """

609 Checks types for a thumbnail.

610

611 @param b tuple image, format

612 @return b

613

614 The function raises an exception if the type is incorrect.

615 """

616 if not isinstance(b, tuple):

617 raise TypeError( # pragma: no cover

618 "tuple expected, not {0}".format(type(b)))

619 if len(b) != 2:

620 raise TypeError( # pragma: no cover

621 "tuple expected of lengh 2, not {0}".format(len(b)))

622 if b[1] == "svg":

623 if not isinstance(b[0], str):

624 raise TypeError(

625 "str expected for svg, not {0}".format(type(b[0])))

626 elif b[1] in ("vnd.plotly.v1+html", "vnd.bokehjs_exec.v0+json",

627 "vnd.bokehjs_load.v0+json", 'vnd.plotly.v1+json'):

628 # Don't know how to extract a snippet out of this.

629 pass

630 else:

631 if not isinstance(b[0], bytes):

632 raise TypeError(

633 "bytes expected for images, not {0}-'{1}'\n{2}".format(type(b[0]), b[1], b))

634 return b

635

636 def create_picture_from(self, text, format, asbytes=True, context=None):

637 """

638 Creates a picture from text.

639

640 @param text the text

641 @param format text, json, ...

642 @param context (str) indication on the content of text (error, ...)

643 @param asbytes results as bytes or as an image

644 @return tuple (picture, format) or PIL.Image (if asbytes is False)

645

646 The picture will be bytes, the format png, bmp...

647 The size of the picture will depend on the text.

648 The longer, the bigger. The method relies on matplotlib

649 and then convert the image into a PIL image.

650

651 HTML could be rendered with QWebPage from PyQt (not implemented).

652 """

653 if not isinstance(text, (str, bytes)):

654 text = str(text)

655 if "\n" not in text:

656 rows = []

657 for i in range(0, len(text), 20):

658 end = min(i + 20, len(text))

659 rows.append(text[i:end])

660 text = "\n".join(text)

661 if len(text) > 200:

662 text = text[:200]

663 size = len(text) // 10

664 figsize = (3 + size, 3 + size)

665 lines = text.replace("\t", " ").replace("\r", "").split("\n")

666

667 import matplotlib.pyplot as plt

668 from matplotlib.textpath import TextPath

669 from matplotlib.font_manager import FontProperties

670 fig = plt.figure(figsize=figsize)

671 ax = fig.add_subplot(111)

672 fp = FontProperties(size=200)

673

674 dx = 0

675 dy = 0

676 for i, line in enumerate(lines):

677 if len(line.strip()) > 0:

678 ax.text(0, -dy, line, fontproperties=fp, va='top')

679 tp = TextPath((0, -dy), line, prop=fp)

680 bb = tp.get_extents()

681 dy += bb.height

682 dx = max(dx, bb.width)

683

684 ratio = abs(dx) / max(abs(dy), 1)

685 ratio = max(min(ratio, 3), 1)

686 fig.set_size_inches(int((1 + size) * ratio), 1 + size)

687 ax.set_xlim([0, dx])

688 ax.set_ylim([-dy, 0])

689 ax.set_axis_off()

690 sio = BytesIO()

691 fig.savefig(sio, format="png")

692 plt.close()

693

694 if asbytes:

695 b = sio.getvalue(), "png"

696 self._check_thumbnail_tuple(b)

697 return b

698 try:

699 from PIL import Image

700 except ImportError: # pragma: no cover

701 import Image

702 img = Image.open(sio)

703 return img

704

705 def cell_image(self, cell, image_from_text=False):

706 """

707 Returns the cell image or None if not found.

708

709 @param cell cell to examine

710 @param image_from_text produce an image even if it is not one

711 @return None for no image or a list of tuple (image as bytes, extension)

712 for each output of the cell

713 """

714 kind = self.cell_type(cell)

715 if kind != "code":

716 return None

717 results = []

718 for output in cell.outputs:

719 if output["output_type"] in {"execute_result", "display_data"}:

720 data = output["data"]

721 for k, v in data.items():

722 if k == "text/plain":

723 if image_from_text:

724 b = self.create_picture_from(

725 v, "text", context=output["output_type"])

726 results.append(b)

727 elif k == "application/javascript":

728 if image_from_text:

729 b = self.create_picture_from(v, "js")

730 results.append(b)

731 elif k == "application/json":

732 if image_from_text:

733 b = self.create_picture_from(v, "json")

734 results.append(b)

735 elif k == "image/svg+xml":

736 if not isinstance(v, str):

737 raise TypeError(

738 "This should be str not '{0}' (=SVG).".format(type(v)))

739 results.append((v, "svg"))

740 elif k == "text/html":

741 if image_from_text:

742 b = self.create_picture_from(v, "html")

743 results.append(b)

744 elif k == "text/latex":

745 if image_from_text:

746 b = self.create_picture_from(v, "latex")

747 results.append(b)

748 elif k == "application/vnd.jupyter.widget-view+json":

749 # see http://ipywidgets.readthedocs.io/en/latest/embedding.html

750 if "model_id" not in v:

751 raise KeyError( # pragma: no cover

752 "model_id is missing from {0}".format(v))

753 model_id = v["model_id"]

754 self.fLOG(

755 "[application/vnd.jupyter.widget-view+json] not rendered", model_id)

756 elif k in {"image/png", "image/jpg", "image/jpeg", "image/gif"}:

757 if not isinstance(v, bytes):

758 v = base64.b64decode(v)

759 if not isinstance(v, bytes):

760 raise TypeError( # pragma: no cover

761 "This should be bytes not '{0}' (=IMG:{1}).".format(type(v), k))

762 results.append((v, k.split("/")[-1]))

763 elif k in ("text/vnd.plotly.v1+html", "application/vnd.plotly.v1+json",

764 "application/vnd.bokehjs_exec.v0+json",

765 "application/vnd.bokehjs_load.v0+json"):

766 results.append((v, k.split("/")[-1]))

767 else:

768 raise NotImplementedError( # pragma: no cover

769 "cell type: {0}\nk={1}\nv={2}\nCELL:\n{3}".format(

770 kind, k, v, cell))

771 elif output["output_type"] == "error":

772 vl = output["traceback"]

773 if image_from_text:

774 for v in vl:

775 b = self.create_picture_from(

776 v, "text", context="error")

777 results.append(b)

778 elif output["output_type"] == "stream":

779 v = output["text"]

780 if image_from_text:

781 b = self.create_picture_from(v, "text")

782 results.append(b)

783 else:

784 raise NotImplementedError( # pragma: no cover

785 "cell type: {0}\noutput type: {1}\nOUT:\n{2}\nCELL:\n{3}"

786 "".format(kind, output["output_type"], output, cell))

787 if len(results) > 0:

788 res = self._merge_images(results)

789 if res[0] is None:

790 return None

791 self._check_thumbnail_tuple(res)

792 return res

793 return None

794

795 def cell_height(self, cell):

796 """

797 Approximates the height of a cell by its number of lines it contains.

798

799 @param cell cell

800 @return number of cell

801 """

802 kind = self.cell_type(cell)

803 if kind == "markdown":

804 content = cell.source

805 lines = content.split("\n")

806 nbs = sum(1 + len(line) // 80 for line in lines)

807 return nbs

808 if kind == "raw":

809 content = cell.source

810 lines = content.split("\n")

811 nbs = sum(1 + len(line) // 80 for line in lines)

812 return nbs

813 if kind == "code":

814 content = cell.source

815 lines = content.split("\n")

816 nbl = len(lines)

817

818 for output in cell.outputs:

819 if output["output_type"] == "execute_result" or \

820 output["output_type"] == "display_data":

821 data = output["data"]

822 for k, v in data.items():

823 if k == "text/plain":

824 nbl += len(v.split("\n"))

825 elif k == "application/javascript":

826 # rough estimation

827 nbl += len(v.split("\n")) // 2

828 elif k == "application/json":

829 # rough estimation

830 try:

831 nbl += len(v.split("{"))

832 except AttributeError: # pragma: no cover

833 nbl += len(v) // 5 + 1

834 elif k == "image/svg+xml":

835 nbl += len(v) // 5

836 elif k == "text/html":

837 nbl += len(v.split("\n"))

838 elif k == "text/latex":

839 nbl += len(v.split("\\\\")) * 2

840 elif k in {"image/png", "image/jpg", "image/jpeg", "image/gif"}:

841 nbl += len(v) // 50

842 elif k == "application/vnd.jupyter.widget-view+json":

843 nbl += 5

844 elif k in ("text/vnd.plotly.v1+html",

845 "application/vnd.plotly.v1+json",

846 "application/vnd.bokehjs_load.v0+json",

847 "application/vnd.bokehjs_exec.v0+json"):

848 nbl += 10

849 else:

850 fmt = "Unable to guess heigth for cell type: '{0}'\nk='{1}'\nv='{2}'\nCELL:\n{3}"

851 raise NotImplementedError(

852 fmt.format(kind, k, v, cell))

853 elif output["output_type"] == "stream":

854 v = output["text"]

855 nbl += len(v.split("\n"))

856 elif output["output_type"] == "error":

857 v = output["traceback"]

858 nbl += len(v)

859 else:

860 raise NotImplementedError( # pragma: no cover

861 "cell type: {0}\noutput type: {1}\nOUT:\n{2}\nCELL:\n{3}"

862 .format(kind, output["output_type"], output, cell))

863

864 return nbl

865

866 raise NotImplementedError( # pragma: no cover

867 "cell type: {0}\nCELL:\n{1}".format(kind, cell))

868

869 def add_tag_slide(self, max_nb_cell=4, max_nb_line=25):

870 """

871 Tries to add tags for a slide show when they are too few.

872

873 @param max_nb_cell maximum number of cells within a slide

874 @param max_nb_line maximum number of lines within a slide

875 @return list of modified cells { #slide: (kind, reason, cell) }

876 """

877 res = {}

878 nbline = 0

879 nbcell = 0

880 for i, cell in enumerate(self.iter_cells()):

881 meta = cell.metadata

882 if "slideshow" in meta:

883 st = meta["slideshow"]["slide_type"]

884 if st in ["slide", "subslide"]:

885 nbline = 0

886 nbcell = 0

887 else:

888 if cell.cell_type == "markdown":

889 content = cell.source

890 if content.startswith("# ") or \

891 content.startswith("## ") or \

892 content.startswith("### "):

893 meta["slideshow"] = {'slide_type': 'slide'}

894 nbline = 0

895 nbcell = 0

896 res[i] = ("slide", "section", cell)

897

898 dh = self.cell_height(cell)

899 dc = 1

900 new_nbline = nbline + dh

901 new_cell = dc + nbcell

902 if "slideshow" not in meta:

903 if new_cell > max_nb_cell or \

904 new_nbline > max_nb_line:

905 res[i] = (

906 "subslide", "{0}-{1} <-> {2}-{3}".format(nbcell, nbline, dc, dh), cell)

907 nbline = 0

908 nbcell = 0

909 meta["slideshow"] = {'slide_type': 'subslide'}

910

911 nbline += dh

912 nbcell += dc

913

914 return res

915

916 def run_notebook(self, skip_exceptions=False, progress_callback=None,

917 additional_path=None, valid=None, clean_function=None,

918 context=None):

919 '''

920 Runs all the cells of a notebook in order and update

921 the outputs in-place.

922

923 If ``skip_exceptions`` is set, then if exceptions occur in a cell, the

924 subsequent cells are run (by default, the notebook execution stops).

925

926 @param skip_exceptions skip exception

927 @param progress_callback call back function

928 @param additional_path additional paths (as a list or None if none)

929 @param valid if not None, valid is a function which returns whether

930 or not the cell should be executed or not, if the function

931 returns None, the execution of the notebooks and skip

932 the execution of the other cells

933 @param clean_function function which cleans a cell's code before executing

934 it (None for None)

935 @return dictionary with statistics

936

937 The function adds the local variable ``theNotebook`` with

938 the absolute file name of the notebook.

939 Function *valid* can return *None* to stop the execution of the notebook

940 before this cell.

941 '''

942 if self.detailed_log:

943 self.detailed_log(

944 "[run_notebook] Starting execution of '{0}'".format(self._filename))

945 # additional path

946 if additional_path is not None:

947 if not isinstance(additional_path, list):

948 raise TypeError( # pragma: no cover

949 "Additional_path should be a list not: " + str(additional_path))

950 code = ["import sys"]

951 for p in additional_path:

952 code.append("sys.path.append(r'{0}')".format(p))

953 cell = "\n".join(code)

954 self.run_cell(-1, cell)

955

956 # we add local variable theNotebook

957 if self.theNotebook is not None:

958 cell = "theNotebook = r'''{0}'''".format(self.theNotebook)

959 self.run_cell(-1, cell)

960

961 # initialisation with a code not inside the notebook

962 if self.code_init is not None:

963 self.run_cell(-1, self.code_init)

964

965 # execution of the notebook

966 nbcell = 0

967 nbrun = 0

968 nbnerr = 0

969 cl = time.perf_counter()

970 for i, cell in enumerate(self.iter_code_cells()):

971 nbcell += 1

972 codei = NotebookRunner.get_cell_code(cell)[1]

973 if valid is not None:

974 r = valid(codei)

975 if r is None:

976 break

977 if not r:

978 continue

979 try:

980 nbrun += 1

981 self.run_cell(i, cell, clean_function=clean_function)

982 nbnerr += 1

983 except Empty as er:

984 raise RuntimeError( # pragma: no cover

985 "{0}\nissue when executing:\n{1}".format(self.comment, codei)) from er

986 except NotebookError as e: # pragma: no cover

987 if not skip_exceptions:

988 raise

989 raise RuntimeError(

990 "Issue when executing:\n{0}".format(codei)) from e

991 if progress_callback:

992 progress_callback(i)

993 etime = time.perf_counter() - cl

994 res = dict(nbcell=nbcell, nbrun=nbrun, nbvalid=nbnerr, time=etime)

995 if self.detailed_log:

996 self.detailed_log(

997 "[run_notebook] end execution of '{0}'".format(self._filename))

998 self.detailed_log(

999 "[run_notebook] execution time: {0}".format(etime))

1000 self.detailed_log("[run_notebook] statistics : {0}".format(res))

1001 return res

1002

1003 def count_code_cells(self):

1004 '''

1005 Returns the number of code cells in the notebook.

1006 '''

1007 return sum(1 for _ in self.iter_code_cells())

1008

1009 def merge_notebook(self, nb):

1010 """

1011 Appends notebook *nb* to this one.

1012

1013 @param nb notebook or list of notebook (@see cl NotebookRunner)

1014 @return number of added cells

1015

1016 .. faqref::

1017 :title: How to merge notebook?

1018

1019 The following code merges two notebooks into the first one

1020 and stores the result unto a file.

1021

1022 ::

1023

1024 from pyquickhelper.ipythonhelper import read_nb

1025 nb1 = read_nb("<file1>", kernel=False)

1026 nb2 = read_nb("<file2>", kernel=False)

1027 nb1.merge_notebook(nb2)

1028 nb1.to_json(outfile)

1029 """

1030 if isinstance(nb, list):

1031 s = 0

1032 for n in nb:

1033 s += self.merge_notebook(n)

1034 return s

1035 else:

1036 last = self._cell_container()

1037 s = 0

1038 for cell in nb.iter_cells():

1039 last.append(cell)

1040 s += 1

1041 return s

1042

1043 def get_description(self):

1044 """

1045 Gets summary and description of this notebook.

1046 We expect the first cell to contain a title and a description

1047 of its content.

1048

1049 @return header, description

1050 """

1051 def split_header(s, get_header=True):

1052 s = s.lstrip().rstrip()

1053 parts = s.splitlines()

1054 if parts[0].startswith('#'):

1055 if get_header:

1056 header = re.sub('#+\\s*', '', parts.pop(0))

1057 if not parts:

1058 return header, ''

1059 else:

1060 header = ''

1061 rest = '\n'.join(parts).lstrip().split('\n\n')

1062 desc = rest[0].replace('\n', ' ')

1063 return header, desc

1064

1065 if get_header:

1066 if parts[0].startswith(('=', '-')):

1067 parts = parts[1:]

1068 header = parts.pop(0)

1069 if parts and parts[0].startswith(('=', '-')):

1070 parts.pop(0)

1071 if not parts:

1072 return header, ''

1073 else:

1074 header = ''

1075 rest = '\n'.join(parts).lstrip().split('\n\n')

1076 desc = rest[0].replace('\n', ' ')

1077 return header, desc

1078

1079 first_cell = self.first_cell()

1080

1081 if not first_cell['cell_type'] == 'markdown':

1082 raise ValueError( # pragma: no cover

1083 "The first cell is not in markdown but '{0}' filename='{1}'.".format(

1084 first_cell['cell_type'], self._filename))

1085

1086 header, desc = split_header(first_cell['source'])

1087 if not desc and len(self.nb['cells']) > 1:

1088 second_cell = self.nb['cells'][1]

1089 if second_cell['cell_type'] == 'markdown':

1090 _, desc = split_header(second_cell['source'], False)

1091

1092 reg_link = "(\\[(.*?)\\]\\(([^ ]*)\\))"

1093 reg = re.compile(reg_link)

1094 new_desc = reg.sub("\\2", desc)

1095 if "http://" in new_desc or "https://" in new_desc:

1096 raise ValueError( # pragma: no cover

1097 "Wrong regular expression in '{2}':\n{0}\nMODIFIED:\n{1}".format(

1098 desc, new_desc, self._filename))

1099 return header, new_desc.replace('"', "")

1100

1101 def get_thumbnail(self, max_width=200, max_height=200, use_default=False):

1102 """

1103 Processes the notebook and creates one picture based on the outputs

1104 to illustrate a notebook.

1105

1106 @param max_width maximum size of the thumbnail

1107 @param max_height maximum size of the thumbnail

1108 @param use_default force using a default image even if an even is present

1109 @return string (:epkg:`SVG`) or Image (:epkg:`PIL`)

1110 """

1111 images = []

1112 cells = list(self.iter_cells())

1113 cells.reverse()

1114 for cell in cells:

1115 c = self.cell_image(cell, False)

1116 if c is not None and len(c) > 0 and len(c[0]) > 0 and c[1] not in (

1117 "vnd.plotly.v1+html", "vnd.bokehjs_exec.v0+json",

1118 "vnd.bokehjs_load.v0+json"):

1119 self._check_thumbnail_tuple(c)

1120 images.append(c)

1121 if not use_default and len(images) == 0:

1122 for cell in cells:

1123 c = self.cell_image(cell, True)

1124 if c is not None and len(c) > 0 and len(c[0]) > 0:

1125 self._check_thumbnail_tuple(c)

1126 images.append(c)

1127 if len(c[0]) >= 1000:

1128 break

1129 if use_default:

1130 images = []

1131 if len(images) == 0:

1132 # no image, we need to consider the default one

1133 no_image = os.path.join(

1134 os.path.dirname(__file__), 'no_image_nb.png')

1135 with open(no_image, "rb") as f:

1136 c = (f.read(), "png")

1137 self._check_thumbnail_tuple(c)

1138 images.append(c)

1139

1140 # select the image

1141 if len(images) == 0:

1142 raise ValueError( # pragma: no cover

1143 "There should be at least one image.")

1144 if len(images) == 1:

1145 image = images[0]

1146 else:

1147 # maybe later we'll implement a different logic

1148 # we pick the last one

1149 image = images[0]

1150

1151 # zoom

1152 if image[1] in ("vnd.plotly.v1+html", "vnd.bokehjs_exec.v0+json", "vnd.bokehjs_load.v0+json"):

1153 return None

1154 if image[1] == 'svg':

1155 try:

1156 img = svg2img(image[0])

1157 except PYQImageException: # pragma: no cover

1158 # Enable to convert SVG.

1159 return None

1160 return self._scale_image(img, image[1], max_width=max_width, max_height=max_height)

1161 img = self._scale_image(

1162 image[0], image[1], max_width=max_width, max_height=max_height)

1163 return img

1164

1165 def _scale_image(self, in_bytes, format=None, max_width=200, max_height=200):

1166 """

1167 Scales an image with the same aspect ratio centered in an

1168 image with a given max_width and max_height.

1169

1170 @param in_bytes image as bytes

1171 @param format indication of the format (can be empty)

1172 @param max_width maximum size of the thumbnail

1173 @param max_height maximum size of the thumbnail

1174 @return Image (PIL)

1175 """

1176 # local import to avoid testing dependency on PIL:

1177 try:

1178 from PIL import Image

1179 except ImportError: # pragma: no cover

1180 import Image

1181

1182 if isinstance(in_bytes, tuple):

1183 in_bytes = in_bytes[0]

1184 if isinstance(in_bytes, bytes):

1185 img = Image.open(BytesIO(in_bytes))

1186 elif isinstance(in_bytes, Image.Image):

1187 img = in_bytes

1188 else:

1189 raise TypeError( # pragma: no cover

1190 "bytes expected, not {0} - format={1}".format(

1191 type(in_bytes), format))

1192 width_in, height_in = img.size

1193 scale_w = max_width / float(width_in)

1194 scale_h = max_height / float(height_in)

1195

1196 if height_in * scale_w <= max_height:

1197 scale = scale_w

1198 else:

1199 scale = scale_h

1200

1201 if scale >= 1.0:

1202 return img

1203

1204 width_sc = int(round(scale * width_in))

1205 height_sc = int(round(scale * height_in))

1206

1207 # resize the image and center

1208 img.thumbnail((width_sc, height_sc), Image.ANTIALIAS)

1209 thumb = Image.new('RGB', (max_width, max_height), (255, 255, 255))

1210 pos_insert = ((max_width - width_sc) // 2,

1211 (max_height - height_sc) // 2)

1212 thumb.paste(img, pos_insert)

1213 return thumb

1214

1215 def _merge_images(self, results):

1216 """

1217 Merges images defined by (buffer, format).

1218 The method uses PIL to merge images when possible.

1219

1220 @return ``[ (image, format) ]``

1221 """

1222 if len(results) == 1:

1223 results = results[0]

1224 self._check_thumbnail_tuple(results)

1225 return results

1226 if len(results) == 0:

1227 return None

1228

1229 formats_counts = Counter(_[1] for _ in results)

1230 if len(formats_counts) == 1:

1231 format = results[0][1]

1232 else:

1233 items = sorted(((v, k)

1234 for k, v in formats_counts.items()), reverse=False)

1235 for it in items:

1236 format = it

1237 break

1238

1239 results = [_ for _ in results if _[1] == format]

1240 if format == "svg":

1241 return ("\n".join(_[0] for _ in results), format)

1242

1243 # local import to avoid testing dependency on PIL:

1244 try:

1245 from PIL import Image

1246 except ImportError: # pragma: no cover

1247 import Image

1248

1249 dx = 0.

1250 dy = 0.

1251 over = 0.7

1252 imgs = []

1253 for in_bytes, _ in results:

1254 img = Image.open(BytesIO(in_bytes))

1255 imgs.append(img)

1256 dx = max(dx, img.size[0])

1257 dy += img.size[1] * over

1258

1259 new_im = Image.new('RGB', (int(dx), int(dy)), (220, 220, 220))

1260 for img in imgs:

1261 dy -= img.size[1] * over

1262 new_im.paste(img, (0, max(int(dy), 0)))

1263

1264 if max(dx, dy) > 0:

1265 image_buffer = BytesIO()

1266 new_im.save(image_buffer, "PNG")

1267 b = image_buffer.getvalue(), "png"

1268 return b

1269 b = None, "png"

1270 return b

Coverage for src/pyquickhelper/ipythonhelper/notebook_runner.py : 93%

660 statements

Coverage for src/pyquickhelper/ipythonhelper/notebook_runner.py : 93%

660 statements 615 run 45 missing 82 excluded

660 statements