Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2@file
3@brief Modified version of `runipy.notebook_runner
4<https://github.com/paulgb/runipy/blob/master/runipy/notebook_runner.py>`_.
5"""
7import base64
8import os
9import re
10import time
11import platform
12import warnings
13from queue import Empty
14from time import sleep
15from collections import Counter
16from io import StringIO, BytesIO
17from nbformat import NotebookNode, writes
18from nbformat.reader import reads
19from ..imghelper.svg_helper import svg2img, PYQImageException
20from ..loghelper.flog import noLOG
23class NotebookError(Exception):
24 """
25 Raised when the execution fails.
26 """
27 pass
30class NotebookKernelError(Exception):
31 """
32 Raised when
33 `wait_for_ready <https://github.com/jupyter/jupyter_client/blob/master/
34 jupyter_client/blocking/client.py#L84>`_ fails.
35 """
36 pass
39class NotebookRunner(object):
41 """
42 The kernel communicates with mime-types while the notebook
43 uses short labels for different cell types. We'll use this to
44 map from kernel types to notebook format types.
46 This classes executes a notebook end to end.
48 .. index:: kernel, notebook
50 The class can use different kernels. The next links gives more
51 information on how to create or test a kernel:
53 * `jupyter_kernel_test <https://github.com/jupyter/jupyter_kernel_test>`_
54 * `simple_kernel <https://github.com/dsblank/simple_kernel>`_
56 .. faqref::
57 :title: Do I need to shutdown the kernel after running a notebook?
59 .. index:: travis
61 If the class is instantiated with *kernel=True*, a kernel will
62 be started. It must be shutdown otherwise the program might
63 be waiting for it for ever. That is one of the reasons why the
64 travis build does not complete. The build finished but cannot terminate
65 until all kernels are shutdown.
66 """
68 # . available output types
69 MIME_MAP = {
70 'image/jpeg': 'jpeg',
71 'image/png': 'png',
72 'image/gif': 'gif',
73 'text/plain': 'text',
74 'text/html': 'html',
75 'text/latex': 'latex',
76 'application/javascript': 'html',
77 'image/svg+xml': 'svg',
78 }
80 def __init__(self, nb, profile_dir=None, working_dir=None,
81 comment="", fLOG=noLOG, theNotebook=None, code_init=None,
82 kernel_name="python", log_level="30", extended_args=None,
83 kernel=False, filename=None, replacements=None, detailed_log=None,
84 startup_timeout=300):
85 """
86 @param nb notebook as :epkg:`JSON`
87 @param profile_dir profile directory
88 @param working_dir working directory
89 @param comment additional information added to error message
90 @param theNotebook if not None, populate the variable *theNotebook* with this value in the notebook
91 @param code_init to initialize the notebook with a python code as if it was a cell
92 @param fLOG logging function
93 @param log_level Choices: (0, 10, 20, 30=default, 40, 50, 'DEBUG', 'INFO', 'WARN', 'ERROR', 'CRITICAL')
94 @param kernel_name kernel name, it can be None
95 @param extended_args others arguments to pass to the command line
96 (`--KernelManager.autorestar=True` for example),
97 see :ref:`l-ipython_notebook_args` for a full list
98 @param kernel *kernel* is True by default, the notebook can be run, if False,
99 the notebook can be read but not run
100 @param filename to add the notebook file if there is one in error messages
101 @param replacements replacements to make in every cell before running it,
102 dictionary ``{ string: string }``
103 @param detailed_log to log detailed information when executing the notebook, this should be a function
104 with the same signature as ``print`` or None
105 @param startup_timeout wait for this long for the kernel to be ready,
106 see `wait_for_ready
107 <https://github.com/jupyter/jupyter_client/blob/master/
108 jupyter_client/blocking/client.py#L84>`_
109 """
110 if kernel:
111 try:
112 from jupyter_client import KernelManager
113 except ImportError: # pragma: no cover
114 from ipykernel import KernelManager
116 with warnings.catch_warnings():
117 warnings.filterwarnings("ignore", category=DeprecationWarning)
118 self.km = KernelManager(
119 kernel_name=kernel_name) if kernel_name is not None else KernelManager()
120 else:
121 self.km = None
122 self.detailed_log = detailed_log
123 self.fLOG = fLOG
124 self.theNotebook = theNotebook
125 self.code_init = code_init
126 self._filename = filename if filename is not None else "memory"
127 self.replacements = replacements
128 self.init_args = dict(
129 profile_dir=profile_dir, working_dir=working_dir,
130 comment=comment, fLOG=fLOG, theNotebook=theNotebook, code_init=code_init,
131 kernel_name="python", log_level="30", extended_args=None,
132 kernel=kernel, filename=filename, replacements=replacements)
133 args = []
135 if profile_dir:
136 args.append('--profile-dir=%s' % os.path.abspath(profile_dir))
137 if log_level:
138 args.append('--log-level=%s' % log_level)
140 if extended_args is not None and len(extended_args) > 0:
141 for opt in extended_args:
142 if not opt.startswith("--"):
143 raise SyntaxError(
144 "every option should start with '--': " + opt)
145 if "=" not in opt:
146 raise SyntaxError( # pragma: no cover
147 "every option should be assigned a value: " + opt)
148 args.append(opt)
150 if kernel:
151 cwd = os.getcwd()
153 if working_dir:
154 os.chdir(working_dir)
156 if self.km is not None:
157 try:
158 with warnings.catch_warnings():
159 warnings.filterwarnings(
160 "ignore", category=ResourceWarning)
161 self.km.start_kernel(extra_arguments=args)
162 except Exception as e: # pragma: no cover
163 raise NotebookKernelError(
164 "Failure with args: {0}\nand error:\n{1}".format(args, str(e))) from e
166 if platform.system() == 'Darwin':
167 # see http://www.pypedia.com/index.php/notebook_runner
168 # There is sometimes a race condition where the first
169 # execute command hits the kernel before it's ready.
170 # It appears to happen only on Darwin (Mac OS) and an
171 # easy (but clumsy) way to mitigate it is to sleep
172 # for a second.
173 sleep(1) # pragma: no cover
175 if working_dir:
176 os.chdir(cwd)
178 self.kc = self.km.client()
179 self.kc.start_channels(stdin=False)
180 try:
181 self.kc.wait_for_ready(timeout=startup_timeout)
182 except RuntimeError as e: # pragma: no cover
183 # We wait for one second.
184 sleep(startup_timeout)
185 self.kc.stop_channels()
186 self.km.shutdown_kernel()
187 self.km = None
188 self.kc = None
189 self.nb = nb
190 self.comment = comment
191 raise NotebookKernelError(
192 "Wait_for_ready fails (timeout={0}).".format(startup_timeout)) from e
193 else:
194 self.km = None
195 self.kc = None
196 self.nb = nb
197 self.comment = comment
199 def __del__(self):
200 """
201 We close the kernel.
202 """
203 if self.km is not None:
204 del self.km
205 if self.kc is not None:
206 del self.kc
208 def to_json(self, filename=None, encoding="utf8"):
209 """
210 Converts the notebook into :epkg:`JSON`.
212 @param filename filename or stream
213 @param encoding encoding
214 @return Json string if filename is None, None otherwise
215 """
216 if isinstance(filename, str):
217 with open(filename, "w", encoding=encoding) as payload:
218 self.to_json(payload)
219 return None
221 if filename is None:
222 st = StringIO()
223 st.write(writes(self.nb))
224 return st.getvalue()
226 filename.write(writes(self.nb))
227 return None
229 def copy(self):
230 """
231 Copies the notebook (just the content).
233 @return instance of @see cl NotebookRunner
234 """
235 st = StringIO()
236 self.to_json(st)
237 args = self.init_args.copy()
238 for name in ["theNotebook", "filename"]:
239 if name in args:
240 del args[name]
241 nb = reads(st.getvalue())
242 return NotebookRunner(nb, **args)
244 def __add__(self, nb):
245 """
246 Merges two notebooks together, returns a new none.
248 @param nb notebook
249 @return new notebook
250 """
251 c = self.copy()
252 c.merge_notebook(nb)
253 return c
255 def shutdown_kernel(self):
256 """
257 Shuts down kernel.
258 """
259 self.fLOG('-- shutdown kernel')
260 if self.kc is None:
261 raise ValueError( # pragma: no cover
262 "No kernel was started, specify kernel=True when initializing the instance.")
263 self.kc.stop_channels()
264 self.km.shutdown_kernel(now=True)
266 def clean_code(self, code):
267 """
268 Cleans the code before running it, the function comment out
269 instruction such as ``show()``.
271 @param code code (string)
272 @return cleaned code
273 """
274 has_bokeh = "bokeh." in code or "from bokeh" in code or "import bokeh" in code
275 if code is None:
276 return code
278 lines = [_.strip("\n\r").rstrip(" \t") for _ in code.split("\n")]
279 res = []
280 show_is_last = False
281 for line in lines:
282 if line.replace(" ", "") == "show()":
283 line = line.replace("show", "#show")
284 show_is_last = True
285 elif has_bokeh and line.replace(" ", "") == "output_notebook()":
286 line = line.replace("output_notebook", "#output_notebook")
287 else:
288 show_is_last = False
289 if self.replacements is not None:
290 for k, v in self.replacements.items():
291 line = line.replace(k, v)
292 res.append(line)
293 if show_is_last:
294 res.append('"nothing to show"')
295 return "\n".join(res)
297 @staticmethod
298 def get_cell_code(cell):
299 """
300 Returns the code of a cell.
302 @param cell a cell or a string
303 @return boolean (=iscell), string
304 """
305 if isinstance(cell, str):
306 iscell = False
307 return iscell, cell
309 iscell = True
310 try:
311 return iscell, cell.source
312 except AttributeError: # pragma: no cover
313 return iscell, cell.input
315 def run_cell(self, index_cell, cell, clean_function=None, max_nbissue=15):
316 '''
317 Runs a notebook cell and update the output of that cell inplace.
319 :param index_cell: index of the cell
320 :param cell: cell to execute
321 :param clean_function: cleaning function to apply to the code before running it
322 :param max_nbissue: number of times an issue can be raised before stopping
323 :return: output of the cell
324 '''
325 if self.detailed_log:
326 self.detailed_log("[run_cell] index_cell={0} clean_function={1}".format(
327 index_cell, clean_function))
328 iscell, codei = NotebookRunner.get_cell_code(cell)
330 self.fLOG('-- running cell:\n%s\n' % codei)
331 if self.detailed_log:
332 self.detailed_log(
333 '[run_cell] code=\n {0}'.format(
334 "\n ".join(codei.split("\n"))))
336 code = self.clean_code(codei)
337 if clean_function is not None:
338 code = clean_function(code)
339 if self.detailed_log:
340 self.detailed_log(
341 ' cleaned code=\n {0}'.format(
342 "\n ".join(code.split("\n"))))
343 if len(code) == 0:
344 return ""
345 if self.kc is None:
346 raise ValueError( # pragma: no cover
347 "No kernel was started, specify kernel=True when initializing the instance.")
348 self.kc.execute(code)
350 reply = self.kc.get_shell_msg()
351 reason = None
352 try:
353 status = reply['content']['status']
354 except KeyError: # pragma: no cover
355 status = 'error'
356 reason = "no status key in reply['content']"
358 if status == 'error':
359 ansi_escape = re.compile(r'\x1b[^m]*m')
360 try:
361 tr = [ansi_escape.sub('', _)
362 for _ in reply['content']['traceback']]
363 except KeyError: # pragma: no cover
364 tr = (["No traceback, available keys in reply['content']"] +
365 list(reply['content']))
366 traceback_text = '\n'.join(tr)
367 self.fLOG("[nberror]\n", traceback_text)
368 if self.detailed_log:
369 self.detailed_log( # pragma: no cover
370 '[run_cell] ERROR=\n {0}'.format(
371 "\n ".join(traceback_text.split("\n"))))
372 else:
373 traceback_text = ''
374 self.fLOG('-- cell returned')
376 outs = list()
377 nbissue = 0
378 statuses = [status]
379 while True:
380 try:
381 msg = self.kc.get_iopub_msg(timeout=1)
382 if msg['msg_type'] == 'status':
383 if msg['content']['execution_state'] == 'idle':
384 status = 'ok'
385 statuses.append(status)
386 break
387 statuses.append(status)
388 except Empty as e: # pragma: no cover
389 # execution state should return to idle before
390 # the queue becomes empty,
391 # if it doesn't, something bad has happened
392 status = "error"
393 statuses.append(status)
394 reason = "exception Empty was raised (%r)" % e
395 nbissue += 1
396 if nbissue > max_nbissue:
397 # the notebook is empty
398 return ""
399 else:
400 continue
402 content = msg['content']
403 msg_type = msg['msg_type']
404 if self.detailed_log:
405 self.detailed_log(' msg_type={0}'.format(msg_type))
407 out = NotebookNode(output_type=msg_type, metadata=dict())
409 if 'execution_count' in content:
410 if iscell:
411 cell['execution_count'] = content['execution_count']
412 out.execution_count = content['execution_count']
414 if msg_type in ('status', 'pyin', 'execute_input'):
415 continue
417 if msg_type == 'stream':
418 out.name = content['name']
419 # in msgspec 5, this is name, text
420 # in msgspec 4, this is name, data
421 if 'text' in content:
422 out.text = content['text']
423 else:
424 out.data = content['data']
426 elif msg_type in ('display_data', 'pyout', 'execute_result'):
427 out.data = content['data']
429 elif msg_type in ('pyerr', 'error'):
430 out.ename = content['ename']
431 out.evalue = content['evalue']
432 out.traceback = content['traceback']
433 out.name = 'stderr'
435 elif msg_type == 'clear_output':
436 outs = list()
437 continue
439 elif msg_type in ('comm_open', 'comm_msg', 'comm_close'):
440 # widgets in a notebook
441 out.data = content["data"]
442 out.comm_id = content["comm_id"]
444 else:
445 dcontent = "\n".join("{0}={1}".format(k, v)
446 for k, v in sorted(content.items()))
447 raise NotImplementedError( # pragma: no cover
448 "Unhandled iopub message: '{0}'\n--CONTENT--\n{1}".format(msg_type, dcontent))
450 outs.append(out)
451 if self.detailed_log:
452 self.detailed_log(' out={0}'.format(type(out)))
453 if hasattr(out, "data"):
454 self.detailed_log(' out={0}'.format(out.data))
456 if iscell:
457 cell['outputs'] = outs
459 raw = []
460 for _ in outs:
461 try:
462 t = _.data
463 except AttributeError:
464 continue
466 # see MIMEMAP to see the available output type
467 for k, v in t.items():
468 if k.startswith("text"):
469 raw.append(v)
471 sraw = "\n".join(raw)
472 self.fLOG(sraw)
473 if self.detailed_log:
474 self.detailed_log(' sraw=\n {0}'.format(
475 "\n ".join(sraw.split("\n"))))
477 def reply2string(reply):
478 sreply = []
479 for k, v in sorted(reply.items()):
480 if isinstance(v, dict):
481 temp = []
482 for _, __ in sorted(v.items()):
483 temp.append(" [{0}]={1}".format(_, str(__)))
484 v_ = "\n".join(temp)
485 sreply.append("reply['{0}']=dict\n{1}".format(k, v_))
486 else:
487 sreply.append("reply['{0}']={1}".format(k, str(v)))
488 sreply = "\n".join(sreply)
489 return sreply
491 if status == 'error': # pragma: no cover
492 sreply = reply2string(reply)
493 if len(code) < 5:
494 scode = [code]
495 else:
496 scode = ""
497 mes = ("FILENAME\n{10}:1:1 - cell:{11}\n{7}\nCELL status={8}, reason='{9}' -- {4} "
498 "length={5} -- {6}:\n-----------------\n"
499 "content={12}\nmsg_type: {13} nbissue={14}"
500 "\nstatuses={15}"
501 "\n-----------------\n{0}"
502 "\n-----------------\nTRACE:\n{1}\nRAW:\n{2}REPLY:\n{3}")
503 raise NotebookError(mes.format(
504 code, traceback_text, sraw, sreply, index_cell, # 0-4
505 len(code), scode, self.comment, status, reason, # 5-9
506 self._filename, index_cell, content, msg_type, nbissue, # 10-14
507 statuses)) # 15
508 if self.detailed_log:
509 self.detailed_log('[run_cell] status={0}'.format(status))
510 return outs
512 def to_python(self):
513 """
514 Converts the notebook into python.
516 @return string
517 """
518 rows = []
519 for cell in self.iter_cells():
520 if cell.cell_type == "code":
521 codei = NotebookRunner.get_cell_code(cell)[1]
522 rows.append(codei)
523 elif cell.cell_type in ("markdown", "raw"):
524 content = cell.source
525 lines = content.split("\n")
526 for line in lines:
527 if line.startswith("#"):
528 rows.append("###")
529 rows.append(line)
530 else:
531 rows.append("# " + line)
532 else:
533 # No text, no code.
534 rows.append("# cell.type = {0}".format(cell.cell_type))
535 rows.append("")
536 return "\n".join(rows)
538 def iter_code_cells(self):
539 '''
540 Iterates over the notebook cells containing code.
541 '''
542 for cell in self.iter_cells():
543 if cell.cell_type == 'code':
544 yield cell
546 def iter_cells(self):
547 '''
548 Iterates over the notebook cells.
549 '''
550 if hasattr(self.nb, "worksheets"): # pragma: no cover
551 for ws in self.nb.worksheets:
552 for cell in ws.cells:
553 yield cell
554 else:
555 for cell in self.nb.cells:
556 yield cell
558 def first_cell(self):
559 """
560 Returns the first cell.
561 """
562 for cell in self.iter_cells():
563 return cell
565 def _cell_container(self):
566 """
567 Returns a cells container, it may change according to the format.
569 @return cell container
570 """
571 if hasattr(self.nb, "worksheets"): # pragma: no cover
572 last = None
573 for ws in self.nb.worksheets:
574 last = ws
575 if last is None:
576 raise NotebookError("no cell container") # pragma: no cover
577 return last.cells
578 return self.nb.cells
580 def __len__(self):
581 """
582 Returns the number of cells, it iterates on cells
583 to get this information and does cache the information.
585 @return int
586 """
587 return sum(1 for _ in self.iter_cells())
589 def cell_type(self, cell):
590 """
591 Returns the cell type.
593 @param cell from @see me iter_cells
594 @return type
595 """
596 return cell.cell_type
598 def cell_metadata(self, cell):
599 """
600 Returns the cell metadata.
602 @param cell cell
603 @return metadata
604 """
605 return cell.metadata
607 def _check_thumbnail_tuple(self, b):
608 """
609 Checks types for a thumbnail.
611 @param b tuple image, format
612 @return b
614 The function raises an exception if the type is incorrect.
615 """
616 if not isinstance(b, tuple):
617 raise TypeError( # pragma: no cover
618 "tuple expected, not {0}".format(type(b)))
619 if len(b) != 2:
620 raise TypeError( # pragma: no cover
621 "tuple expected of lengh 2, not {0}".format(len(b)))
622 if b[1] == "svg":
623 if not isinstance(b[0], str):
624 raise TypeError(
625 "str expected for svg, not {0}".format(type(b[0])))
626 elif b[1] in ("vnd.plotly.v1+html", "vnd.bokehjs_exec.v0+json",
627 "vnd.bokehjs_load.v0+json", 'vnd.plotly.v1+json'):
628 # Don't know how to extract a snippet out of this.
629 pass
630 else:
631 if not isinstance(b[0], bytes):
632 raise TypeError(
633 "bytes expected for images, not {0}-'{1}'\n{2}".format(type(b[0]), b[1], b))
634 return b
636 def create_picture_from(self, text, format, asbytes=True, context=None):
637 """
638 Creates a picture from text.
640 @param text the text
641 @param format text, json, ...
642 @param context (str) indication on the content of text (error, ...)
643 @param asbytes results as bytes or as an image
644 @return tuple (picture, format) or PIL.Image (if asbytes is False)
646 The picture will be bytes, the format png, bmp...
647 The size of the picture will depend on the text.
648 The longer, the bigger. The method relies on matplotlib
649 and then convert the image into a PIL image.
651 HTML could be rendered with QWebPage from PyQt (not implemented).
652 """
653 if not isinstance(text, (str, bytes)):
654 text = str(text)
655 if "\n" not in text:
656 rows = []
657 for i in range(0, len(text), 20):
658 end = min(i + 20, len(text))
659 rows.append(text[i:end])
660 text = "\n".join(text)
661 if len(text) > 200:
662 text = text[:200]
663 size = len(text) // 10
664 figsize = (3 + size, 3 + size)
665 lines = text.replace("\t", " ").replace("\r", "").split("\n")
667 import matplotlib.pyplot as plt
668 from matplotlib.textpath import TextPath
669 from matplotlib.font_manager import FontProperties
670 fig = plt.figure(figsize=figsize)
671 ax = fig.add_subplot(111)
672 fp = FontProperties(size=200)
674 dx = 0
675 dy = 0
676 for i, line in enumerate(lines):
677 if len(line.strip()) > 0:
678 ax.text(0, -dy, line, fontproperties=fp, va='top')
679 tp = TextPath((0, -dy), line, prop=fp)
680 bb = tp.get_extents()
681 dy += bb.height
682 dx = max(dx, bb.width)
684 ratio = abs(dx) / max(abs(dy), 1)
685 ratio = max(min(ratio, 3), 1)
686 fig.set_size_inches(int((1 + size) * ratio), 1 + size)
687 ax.set_xlim([0, dx])
688 ax.set_ylim([-dy, 0])
689 ax.set_axis_off()
690 sio = BytesIO()
691 fig.savefig(sio, format="png")
692 plt.close()
694 if asbytes:
695 b = sio.getvalue(), "png"
696 self._check_thumbnail_tuple(b)
697 return b
698 try:
699 from PIL import Image
700 except ImportError: # pragma: no cover
701 import Image
702 img = Image.open(sio)
703 return img
705 def cell_image(self, cell, image_from_text=False):
706 """
707 Returns the cell image or None if not found.
709 @param cell cell to examine
710 @param image_from_text produce an image even if it is not one
711 @return None for no image or a list of tuple (image as bytes, extension)
712 for each output of the cell
713 """
714 kind = self.cell_type(cell)
715 if kind != "code":
716 return None
717 results = []
718 for output in cell.outputs:
719 if output["output_type"] in {"execute_result", "display_data"}:
720 data = output["data"]
721 for k, v in data.items():
722 if k == "text/plain":
723 if image_from_text:
724 b = self.create_picture_from(
725 v, "text", context=output["output_type"])
726 results.append(b)
727 elif k == "application/javascript":
728 if image_from_text:
729 b = self.create_picture_from(v, "js")
730 results.append(b)
731 elif k == "application/json":
732 if image_from_text:
733 b = self.create_picture_from(v, "json")
734 results.append(b)
735 elif k == "image/svg+xml":
736 if not isinstance(v, str):
737 raise TypeError(
738 "This should be str not '{0}' (=SVG).".format(type(v)))
739 results.append((v, "svg"))
740 elif k == "text/html":
741 if image_from_text:
742 b = self.create_picture_from(v, "html")
743 results.append(b)
744 elif k == "text/latex":
745 if image_from_text:
746 b = self.create_picture_from(v, "latex")
747 results.append(b)
748 elif k == "application/vnd.jupyter.widget-view+json":
749 # see http://ipywidgets.readthedocs.io/en/latest/embedding.html
750 if "model_id" not in v:
751 raise KeyError( # pragma: no cover
752 "model_id is missing from {0}".format(v))
753 model_id = v["model_id"]
754 self.fLOG(
755 "[application/vnd.jupyter.widget-view+json] not rendered", model_id)
756 elif k in {"image/png", "image/jpg", "image/jpeg", "image/gif"}:
757 if not isinstance(v, bytes):
758 v = base64.b64decode(v)
759 if not isinstance(v, bytes):
760 raise TypeError( # pragma: no cover
761 "This should be bytes not '{0}' (=IMG:{1}).".format(type(v), k))
762 results.append((v, k.split("/")[-1]))
763 elif k in ("text/vnd.plotly.v1+html", "application/vnd.plotly.v1+json",
764 "application/vnd.bokehjs_exec.v0+json",
765 "application/vnd.bokehjs_load.v0+json"):
766 results.append((v, k.split("/")[-1]))
767 else:
768 raise NotImplementedError( # pragma: no cover
769 "cell type: {0}\nk={1}\nv={2}\nCELL:\n{3}".format(
770 kind, k, v, cell))
771 elif output["output_type"] == "error":
772 vl = output["traceback"]
773 if image_from_text:
774 for v in vl:
775 b = self.create_picture_from(
776 v, "text", context="error")
777 results.append(b)
778 elif output["output_type"] == "stream":
779 v = output["text"]
780 if image_from_text:
781 b = self.create_picture_from(v, "text")
782 results.append(b)
783 else:
784 raise NotImplementedError( # pragma: no cover
785 "cell type: {0}\noutput type: {1}\nOUT:\n{2}\nCELL:\n{3}"
786 "".format(kind, output["output_type"], output, cell))
787 if len(results) > 0:
788 res = self._merge_images(results)
789 if res[0] is None:
790 return None
791 self._check_thumbnail_tuple(res)
792 return res
793 return None
795 def cell_height(self, cell):
796 """
797 Approximates the height of a cell by its number of lines it contains.
799 @param cell cell
800 @return number of cell
801 """
802 kind = self.cell_type(cell)
803 if kind == "markdown":
804 content = cell.source
805 lines = content.split("\n")
806 nbs = sum(1 + len(line) // 80 for line in lines)
807 return nbs
808 if kind == "raw":
809 content = cell.source
810 lines = content.split("\n")
811 nbs = sum(1 + len(line) // 80 for line in lines)
812 return nbs
813 if kind == "code":
814 content = cell.source
815 lines = content.split("\n")
816 nbl = len(lines)
818 for output in cell.outputs:
819 if output["output_type"] == "execute_result" or \
820 output["output_type"] == "display_data":
821 data = output["data"]
822 for k, v in data.items():
823 if k == "text/plain":
824 nbl += len(v.split("\n"))
825 elif k == "application/javascript":
826 # rough estimation
827 nbl += len(v.split("\n")) // 2
828 elif k == "application/json":
829 # rough estimation
830 try:
831 nbl += len(v.split("{"))
832 except AttributeError: # pragma: no cover
833 nbl += len(v) // 5 + 1
834 elif k == "image/svg+xml":
835 nbl += len(v) // 5
836 elif k == "text/html":
837 nbl += len(v.split("\n"))
838 elif k == "text/latex":
839 nbl += len(v.split("\\\\")) * 2
840 elif k in {"image/png", "image/jpg", "image/jpeg", "image/gif"}:
841 nbl += len(v) // 50
842 elif k == "application/vnd.jupyter.widget-view+json":
843 nbl += 5
844 elif k in ("text/vnd.plotly.v1+html",
845 "application/vnd.plotly.v1+json",
846 "application/vnd.bokehjs_load.v0+json",
847 "application/vnd.bokehjs_exec.v0+json"):
848 nbl += 10
849 else:
850 fmt = "Unable to guess heigth for cell type: '{0}'\nk='{1}'\nv='{2}'\nCELL:\n{3}"
851 raise NotImplementedError(
852 fmt.format(kind, k, v, cell))
853 elif output["output_type"] == "stream":
854 v = output["text"]
855 nbl += len(v.split("\n"))
856 elif output["output_type"] == "error":
857 v = output["traceback"]
858 nbl += len(v)
859 else:
860 raise NotImplementedError( # pragma: no cover
861 "cell type: {0}\noutput type: {1}\nOUT:\n{2}\nCELL:\n{3}"
862 .format(kind, output["output_type"], output, cell))
864 return nbl
866 raise NotImplementedError( # pragma: no cover
867 "cell type: {0}\nCELL:\n{1}".format(kind, cell))
869 def add_tag_slide(self, max_nb_cell=4, max_nb_line=25):
870 """
871 Tries to add tags for a slide show when they are too few.
873 @param max_nb_cell maximum number of cells within a slide
874 @param max_nb_line maximum number of lines within a slide
875 @return list of modified cells { #slide: (kind, reason, cell) }
876 """
877 res = {}
878 nbline = 0
879 nbcell = 0
880 for i, cell in enumerate(self.iter_cells()):
881 meta = cell.metadata
882 if "slideshow" in meta:
883 st = meta["slideshow"]["slide_type"]
884 if st in ["slide", "subslide"]:
885 nbline = 0
886 nbcell = 0
887 else:
888 if cell.cell_type == "markdown":
889 content = cell.source
890 if content.startswith("# ") or \
891 content.startswith("## ") or \
892 content.startswith("### "):
893 meta["slideshow"] = {'slide_type': 'slide'}
894 nbline = 0
895 nbcell = 0
896 res[i] = ("slide", "section", cell)
898 dh = self.cell_height(cell)
899 dc = 1
900 new_nbline = nbline + dh
901 new_cell = dc + nbcell
902 if "slideshow" not in meta:
903 if new_cell > max_nb_cell or \
904 new_nbline > max_nb_line:
905 res[i] = (
906 "subslide", "{0}-{1} <-> {2}-{3}".format(nbcell, nbline, dc, dh), cell)
907 nbline = 0
908 nbcell = 0
909 meta["slideshow"] = {'slide_type': 'subslide'}
911 nbline += dh
912 nbcell += dc
914 return res
916 def run_notebook(self, skip_exceptions=False, progress_callback=None,
917 additional_path=None, valid=None, clean_function=None,
918 context=None):
919 '''
920 Runs all the cells of a notebook in order and update
921 the outputs in-place.
923 If ``skip_exceptions`` is set, then if exceptions occur in a cell, the
924 subsequent cells are run (by default, the notebook execution stops).
926 @param skip_exceptions skip exception
927 @param progress_callback call back function
928 @param additional_path additional paths (as a list or None if none)
929 @param valid if not None, valid is a function which returns whether
930 or not the cell should be executed or not, if the function
931 returns None, the execution of the notebooks and skip
932 the execution of the other cells
933 @param clean_function function which cleans a cell's code before executing
934 it (None for None)
935 @return dictionary with statistics
937 The function adds the local variable ``theNotebook`` with
938 the absolute file name of the notebook.
939 Function *valid* can return *None* to stop the execution of the notebook
940 before this cell.
941 '''
942 if self.detailed_log:
943 self.detailed_log(
944 "[run_notebook] Starting execution of '{0}'".format(self._filename))
945 # additional path
946 if additional_path is not None:
947 if not isinstance(additional_path, list):
948 raise TypeError( # pragma: no cover
949 "Additional_path should be a list not: " + str(additional_path))
950 code = ["import sys"]
951 for p in additional_path:
952 code.append("sys.path.append(r'{0}')".format(p))
953 cell = "\n".join(code)
954 self.run_cell(-1, cell)
956 # we add local variable theNotebook
957 if self.theNotebook is not None:
958 cell = "theNotebook = r'''{0}'''".format(self.theNotebook)
959 self.run_cell(-1, cell)
961 # initialisation with a code not inside the notebook
962 if self.code_init is not None:
963 self.run_cell(-1, self.code_init)
965 # execution of the notebook
966 nbcell = 0
967 nbrun = 0
968 nbnerr = 0
969 cl = time.perf_counter()
970 for i, cell in enumerate(self.iter_code_cells()):
971 nbcell += 1
972 codei = NotebookRunner.get_cell_code(cell)[1]
973 if valid is not None:
974 r = valid(codei)
975 if r is None:
976 break
977 if not r:
978 continue
979 try:
980 nbrun += 1
981 self.run_cell(i, cell, clean_function=clean_function)
982 nbnerr += 1
983 except Empty as er:
984 raise RuntimeError( # pragma: no cover
985 "{0}\nissue when executing:\n{1}".format(self.comment, codei)) from er
986 except NotebookError as e: # pragma: no cover
987 if not skip_exceptions:
988 raise
989 raise RuntimeError(
990 "Issue when executing:\n{0}".format(codei)) from e
991 if progress_callback:
992 progress_callback(i)
993 etime = time.perf_counter() - cl
994 res = dict(nbcell=nbcell, nbrun=nbrun, nbvalid=nbnerr, time=etime)
995 if self.detailed_log:
996 self.detailed_log(
997 "[run_notebook] end execution of '{0}'".format(self._filename))
998 self.detailed_log(
999 "[run_notebook] execution time: {0}".format(etime))
1000 self.detailed_log("[run_notebook] statistics : {0}".format(res))
1001 return res
1003 def count_code_cells(self):
1004 '''
1005 Returns the number of code cells in the notebook.
1006 '''
1007 return sum(1 for _ in self.iter_code_cells())
1009 def merge_notebook(self, nb):
1010 """
1011 Appends notebook *nb* to this one.
1013 @param nb notebook or list of notebook (@see cl NotebookRunner)
1014 @return number of added cells
1016 .. faqref::
1017 :title: How to merge notebook?
1019 The following code merges two notebooks into the first one
1020 and stores the result unto a file.
1022 ::
1024 from pyquickhelper.ipythonhelper import read_nb
1025 nb1 = read_nb("<file1>", kernel=False)
1026 nb2 = read_nb("<file2>", kernel=False)
1027 nb1.merge_notebook(nb2)
1028 nb1.to_json(outfile)
1029 """
1030 if isinstance(nb, list):
1031 s = 0
1032 for n in nb:
1033 s += self.merge_notebook(n)
1034 return s
1035 else:
1036 last = self._cell_container()
1037 s = 0
1038 for cell in nb.iter_cells():
1039 last.append(cell)
1040 s += 1
1041 return s
1043 def get_description(self):
1044 """
1045 Gets summary and description of this notebook.
1046 We expect the first cell to contain a title and a description
1047 of its content.
1049 @return header, description
1050 """
1051 def split_header(s, get_header=True):
1052 s = s.lstrip().rstrip()
1053 parts = s.splitlines()
1054 if parts[0].startswith('#'):
1055 if get_header:
1056 header = re.sub('#+\\s*', '', parts.pop(0))
1057 if not parts:
1058 return header, ''
1059 else:
1060 header = ''
1061 rest = '\n'.join(parts).lstrip().split('\n\n')
1062 desc = rest[0].replace('\n', ' ')
1063 return header, desc
1065 if get_header:
1066 if parts[0].startswith(('=', '-')):
1067 parts = parts[1:]
1068 header = parts.pop(0)
1069 if parts and parts[0].startswith(('=', '-')):
1070 parts.pop(0)
1071 if not parts:
1072 return header, ''
1073 else:
1074 header = ''
1075 rest = '\n'.join(parts).lstrip().split('\n\n')
1076 desc = rest[0].replace('\n', ' ')
1077 return header, desc
1079 first_cell = self.first_cell()
1081 if not first_cell['cell_type'] == 'markdown':
1082 raise ValueError( # pragma: no cover
1083 "The first cell is not in markdown but '{0}' filename='{1}'.".format(
1084 first_cell['cell_type'], self._filename))
1086 header, desc = split_header(first_cell['source'])
1087 if not desc and len(self.nb['cells']) > 1:
1088 second_cell = self.nb['cells'][1]
1089 if second_cell['cell_type'] == 'markdown':
1090 _, desc = split_header(second_cell['source'], False)
1092 reg_link = "(\\[(.*?)\\]\\(([^ ]*)\\))"
1093 reg = re.compile(reg_link)
1094 new_desc = reg.sub("\\2", desc)
1095 if "http://" in new_desc or "https://" in new_desc:
1096 raise ValueError( # pragma: no cover
1097 "Wrong regular expression in '{2}':\n{0}\nMODIFIED:\n{1}".format(
1098 desc, new_desc, self._filename))
1099 return header, new_desc.replace('"', "")
1101 def get_thumbnail(self, max_width=200, max_height=200, use_default=False):
1102 """
1103 Processes the notebook and creates one picture based on the outputs
1104 to illustrate a notebook.
1106 @param max_width maximum size of the thumbnail
1107 @param max_height maximum size of the thumbnail
1108 @param use_default force using a default image even if an even is present
1109 @return string (:epkg:`SVG`) or Image (:epkg:`PIL`)
1110 """
1111 images = []
1112 cells = list(self.iter_cells())
1113 cells.reverse()
1114 for cell in cells:
1115 c = self.cell_image(cell, False)
1116 if c is not None and len(c) > 0 and len(c[0]) > 0 and c[1] not in (
1117 "vnd.plotly.v1+html", "vnd.bokehjs_exec.v0+json",
1118 "vnd.bokehjs_load.v0+json"):
1119 self._check_thumbnail_tuple(c)
1120 images.append(c)
1121 if not use_default and len(images) == 0:
1122 for cell in cells:
1123 c = self.cell_image(cell, True)
1124 if c is not None and len(c) > 0 and len(c[0]) > 0:
1125 self._check_thumbnail_tuple(c)
1126 images.append(c)
1127 if len(c[0]) >= 1000:
1128 break
1129 if use_default:
1130 images = []
1131 if len(images) == 0:
1132 # no image, we need to consider the default one
1133 no_image = os.path.join(
1134 os.path.dirname(__file__), 'no_image_nb.png')
1135 with open(no_image, "rb") as f:
1136 c = (f.read(), "png")
1137 self._check_thumbnail_tuple(c)
1138 images.append(c)
1140 # select the image
1141 if len(images) == 0:
1142 raise ValueError( # pragma: no cover
1143 "There should be at least one image.")
1144 if len(images) == 1:
1145 image = images[0]
1146 else:
1147 # maybe later we'll implement a different logic
1148 # we pick the last one
1149 image = images[0]
1151 # zoom
1152 if image[1] in ("vnd.plotly.v1+html", "vnd.bokehjs_exec.v0+json", "vnd.bokehjs_load.v0+json"):
1153 return None
1154 if image[1] == 'svg':
1155 try:
1156 img = svg2img(image[0])
1157 except PYQImageException: # pragma: no cover
1158 # Enable to convert SVG.
1159 return None
1160 return self._scale_image(img, image[1], max_width=max_width, max_height=max_height)
1161 img = self._scale_image(
1162 image[0], image[1], max_width=max_width, max_height=max_height)
1163 return img
1165 def _scale_image(self, in_bytes, format=None, max_width=200, max_height=200):
1166 """
1167 Scales an image with the same aspect ratio centered in an
1168 image with a given max_width and max_height.
1170 @param in_bytes image as bytes
1171 @param format indication of the format (can be empty)
1172 @param max_width maximum size of the thumbnail
1173 @param max_height maximum size of the thumbnail
1174 @return Image (PIL)
1175 """
1176 # local import to avoid testing dependency on PIL:
1177 try:
1178 from PIL import Image
1179 except ImportError: # pragma: no cover
1180 import Image
1182 if isinstance(in_bytes, tuple):
1183 in_bytes = in_bytes[0]
1184 if isinstance(in_bytes, bytes):
1185 img = Image.open(BytesIO(in_bytes))
1186 elif isinstance(in_bytes, Image.Image):
1187 img = in_bytes
1188 else:
1189 raise TypeError( # pragma: no cover
1190 "bytes expected, not {0} - format={1}".format(
1191 type(in_bytes), format))
1192 width_in, height_in = img.size
1193 scale_w = max_width / float(width_in)
1194 scale_h = max_height / float(height_in)
1196 if height_in * scale_w <= max_height:
1197 scale = scale_w
1198 else:
1199 scale = scale_h
1201 if scale >= 1.0:
1202 return img
1204 width_sc = int(round(scale * width_in))
1205 height_sc = int(round(scale * height_in))
1207 # resize the image and center
1208 img.thumbnail((width_sc, height_sc), Image.ANTIALIAS)
1209 thumb = Image.new('RGB', (max_width, max_height), (255, 255, 255))
1210 pos_insert = ((max_width - width_sc) // 2,
1211 (max_height - height_sc) // 2)
1212 thumb.paste(img, pos_insert)
1213 return thumb
1215 def _merge_images(self, results):
1216 """
1217 Merges images defined by (buffer, format).
1218 The method uses PIL to merge images when possible.
1220 @return ``[ (image, format) ]``
1221 """
1222 if len(results) == 1:
1223 results = results[0]
1224 self._check_thumbnail_tuple(results)
1225 return results
1226 if len(results) == 0:
1227 return None
1229 formats_counts = Counter(_[1] for _ in results)
1230 if len(formats_counts) == 1:
1231 format = results[0][1]
1232 else:
1233 items = sorted(((v, k)
1234 for k, v in formats_counts.items()), reverse=False)
1235 for it in items:
1236 format = it
1237 break
1239 results = [_ for _ in results if _[1] == format]
1240 if format == "svg":
1241 return ("\n".join(_[0] for _ in results), format)
1243 # local import to avoid testing dependency on PIL:
1244 try:
1245 from PIL import Image
1246 except ImportError: # pragma: no cover
1247 import Image
1249 dx = 0.
1250 dy = 0.
1251 over = 0.7
1252 imgs = []
1253 for in_bytes, _ in results:
1254 img = Image.open(BytesIO(in_bytes))
1255 imgs.append(img)
1256 dx = max(dx, img.size[0])
1257 dy += img.size[1] * over
1259 new_im = Image.new('RGB', (int(dx), int(dy)), (220, 220, 220))
1260 for img in imgs:
1261 dy -= img.size[1] * over
1262 new_im.paste(img, (0, max(int(dy), 0)))
1264 if max(dx, dy) > 0:
1265 image_buffer = BytesIO()
1266 new_im.save(image_buffer, "PNG")
1267 b = image_buffer.getvalue(), "png"
1268 return b
1269 b = None, "png"
1270 return b