Source code for pyquickhelper.ipythonhelper.notebook_runner

"""
Modified version of `runipy.notebook_runner
<https://github.com/paulgb/runipy/blob/master/runipy/notebook_runner.py>`_.


:githublink:`%|py|6`
"""

import base64
import os
import re
import time
import platform
import warnings
from queue import Empty
from time import sleep
from collections import Counter
from io import StringIO, BytesIO
from nbformat import NotebookNode, writes
from nbformat.reader import reads
from ..imghelper.svg_helper import svg2img, PYQImageException
from ..loghelper.flog import noLOG


[docs]class NotebookError(Exception): """ Raised when the execution fails. :githublink:`%|py|26` """ pass
[docs]class NotebookKernelError(Exception): """ Raised when `wait_for_ready <https://github.com/jupyter/jupyter_client/blob/master/ jupyter_client/blocking/client.py#L84>`_ fails. :githublink:`%|py|35` """ pass
[docs]class NotebookRunner(object): """ The kernel communicates with mime-types while the notebook uses short labels for different cell types. We'll use this to map from kernel types to notebook format types. This classes executes a notebook end to end. .. index:: kernel, notebook The class can use different kernels. The next links gives more information on how to create or test a kernel: * `jupyter_kernel_test <https://github.com/jupyter/jupyter_kernel_test>`_ * `simple_kernel <https://github.com/dsblank/simple_kernel>`_ .. faqref:: :title: Do I need to shutdown the kernel after running a notebook? .. index:: travis If the class is instantiated with *kernel=True*, a kernel will be started. It must be shutdown otherwise the program might be waiting for it for ever. That is one of the reasons why the travis build does not complete. The build finished but cannot terminate until all kernels are shutdown. :githublink:`%|py|66` """ # . available output types MIME_MAP = { 'image/jpeg': 'jpeg', 'image/png': 'png', 'image/gif': 'gif', 'text/plain': 'text', 'text/html': 'html', 'text/latex': 'latex', 'application/javascript': 'html', 'image/svg+xml': 'svg', }
[docs] def __init__(self, nb, profile_dir=None, working_dir=None, comment="", fLOG=noLOG, theNotebook=None, code_init=None, kernel_name="python", log_level="30", extended_args=None, kernel=False, filename=None, replacements=None, detailed_log=None, startup_timeout=300): """ :param nb: notebook as :epkg:`JSON` :param profile_dir: profile directory :param working_dir: working directory :param comment: additional information added to error message :param theNotebook: if not None, populate the variable *theNotebook* with this value in the notebook :param code_init: to initialize the notebook with a python code as if it was a cell :param fLOG: logging function :param log_level: Choices: (0, 10, 20, 30=default, 40, 50, 'DEBUG', 'INFO', 'WARN', 'ERROR', 'CRITICAL') :param kernel_name: kernel name, it can be None :param extended_args: others arguments to pass to the command line (`--KernelManager.autorestar=True` for example), see :ref:`l-ipython_notebook_args` for a full list :param kernel: *kernel* is True by default, the notebook can be run, if False, the notebook can be read but not run :param filename: to add the notebook file if there is one in error messages :param replacements: replacements to make in every cell before running it, dictionary ``{ string: string }`` :param detailed_log: to log detailed information when executing the notebook, this should be a function with the same signature as ``print`` or None :param startup_timeout: wait for this long for the kernel to be ready, see `wait_for_ready <https://github.com/jupyter/jupyter_client/blob/master/ jupyter_client/blocking/client.py#L84>`_ .. versionchanged:: 1.8 Parameter *startup_timeout* was added. :githublink:`%|py|112` """ if kernel: try: from jupyter_client import KernelManager except ImportError: # pragma: no cover from ipykernel import KernelManager with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=DeprecationWarning) self.km = KernelManager( kernel_name=kernel_name) if kernel_name is not None else KernelManager() else: self.km = None self.detailed_log = detailed_log self.fLOG = fLOG self.theNotebook = theNotebook self.code_init = code_init self._filename = filename if filename is not None else "memory" self.replacements = replacements self.init_args = dict( profile_dir=profile_dir, working_dir=working_dir, comment=comment, fLOG=fLOG, theNotebook=theNotebook, code_init=code_init, kernel_name="python", log_level="30", extended_args=None, kernel=kernel, filename=filename, replacements=replacements) args = [] if profile_dir: args.append('--profile-dir=%s' % os.path.abspath(profile_dir)) if log_level: args.append('--log-level=%s' % log_level) if extended_args is not None and len(extended_args) > 0: for opt in extended_args: if not opt.startswith("--"): raise SyntaxError( "every option should start with '--': " + opt) if "=" not in opt: raise SyntaxError( # pragma: no cover "every option should be assigned a value: " + opt) args.append(opt) if kernel: cwd = os.getcwd() if working_dir: os.chdir(working_dir) if self.km is not None: try: with warnings.catch_warnings(): warnings.filterwarnings( "ignore", category=ResourceWarning) self.km.start_kernel(extra_arguments=args) except Exception as e: # pragma: no cover raise NotebookKernelError( "Failure with args: {0}\nand error:\n{1}".format(args, str(e))) from e if platform.system() == 'Darwin': # see http://www.pypedia.com/index.php/notebook_runner # There is sometimes a race condition where the first # execute command hits the kernel before it's ready. # It appears to happen only on Darwin (Mac OS) and an # easy (but clumsy) way to mitigate it is to sleep # for a second. sleep(1) if working_dir: os.chdir(cwd) self.kc = self.km.client() self.kc.start_channels(stdin=False) try: self.kc.wait_for_ready(timeout=startup_timeout) except RuntimeError as e: # pragma: no cover # We wait for one second. sleep(startup_timeout) self.kc.stop_channels() self.km.shutdown_kernel() self.km = None self.kc = None self.nb = nb self.comment = comment raise NotebookKernelError( "Wait_for_ready fails (timeout={0}).".format(startup_timeout)) from e else: self.km = None self.kc = None self.nb = nb self.comment = comment
[docs] def __del__(self): """ We close the kernel. :githublink:`%|py|205` """ if self.km is not None: del self.km if self.kc is not None: del self.kc
[docs] def to_json(self, filename=None, encoding="utf8"): """ Converts the notebook into :epkg:`JSON`. :param filename: filename or stream :param encoding: encoding :return: Json string if filename is None, None otherwise :githublink:`%|py|218` """ if isinstance(filename, str): with open(filename, "w", encoding=encoding) as payload: self.to_json(payload) return None if filename is None: st = StringIO() st.write(writes(self.nb)) return st.getvalue() filename.write(writes(self.nb)) return None
[docs] def copy(self): """ Copies the notebook (just the content). :return: instance of :class:`NotebookRunner <pyquickhelper.ipythonhelper.notebook_runner.NotebookRunner>` :githublink:`%|py|237` """ st = StringIO() self.to_json(st) args = self.init_args.copy() for name in ["theNotebook", "filename"]: if name in args: del args[name] nb = reads(st.getvalue()) return NotebookRunner(nb, **args)
[docs] def __add__(self, nb): """ Merges two notebooks together, returns a new none. :param nb: notebook :return: new notebook :githublink:`%|py|253` """ c = self.copy() c.merge_notebook(nb) return c
[docs] def shutdown_kernel(self): """ Shuts down kernel. :githublink:`%|py|261` """ self.fLOG('-- shutdown kernel') if self.kc is None: raise ValueError( # pragma: no cover "No kernel was started, specify kernel=True when initializing the instance.") self.kc.stop_channels() self.km.shutdown_kernel(now=True)
[docs] def clean_code(self, code): """ Cleans the code before running it, the function comment out instruction such as ``show()``. :param code: code (string) :return: cleaned code :githublink:`%|py|276` """ has_bokeh = "bokeh." in code or "from bokeh" in code or "import bokeh" in code if code is None: return code lines = [_.strip("\n\r").rstrip(" \t") for _ in code.split("\n")] res = [] show_is_last = False for line in lines: if line.replace(" ", "") == "show()": line = line.replace("show", "#show") show_is_last = True elif has_bokeh and line.replace(" ", "") == "output_notebook()": line = line.replace("output_notebook", "#output_notebook") else: show_is_last = False if self.replacements is not None: for k, v in self.replacements.items(): line = line.replace(k, v) res.append(line) if show_is_last: res.append('"nothing to show"') return "\n".join(res)
[docs] @staticmethod def get_cell_code(cell): """ Returns the code of a cell. :param cell: a cell or a string :return: boolean (=iscell), string :githublink:`%|py|307` """ if isinstance(cell, str): iscell = False return iscell, cell iscell = True try: return iscell, cell.source except AttributeError: # pragma: no cover return iscell, cell.input
[docs] def run_cell(self, index_cell, cell, clean_function=None, max_nbissue=15): """ Runs a notebook cell and update the output of that cell inplace. :param index_cell: index of the cell :param cell: cell to execute :param clean_function: cleaning function to apply to the code before running it :param max_nbissue: number of times an issue can be raised before stopping :return: output of the cell :githublink:`%|py|327` """ if self.detailed_log: self.detailed_log("[run_cell] index_cell={0} clean_function={1}".format( index_cell, clean_function)) iscell, codei = NotebookRunner.get_cell_code(cell) self.fLOG('-- running cell:\n%s\n' % codei) if self.detailed_log: self.detailed_log( '[run_cell] code=\n {0}'.format( "\n ".join(codei.split("\n")))) code = self.clean_code(codei) if clean_function is not None: code = clean_function(code) if self.detailed_log: self.detailed_log( ' cleaned code=\n {0}'.format( "\n ".join(code.split("\n")))) if len(code) == 0: return "" if self.kc is None: raise ValueError( # pragma: no cover "No kernel was started, specify kernel=True when initializing the instance.") self.kc.execute(code) reply = self.kc.get_shell_msg() reason = None try: status = reply['content']['status'] except KeyError: # pragma: no cover status = 'error' reason = "no status key in reply['content']" if status == 'error': ansi_escape = re.compile(r'\x1b[^m]*m') try: tr = [ansi_escape.sub('', _) for _ in reply['content']['traceback']] except KeyError: # pragma: no cover tr = (["No traceback, available keys in reply['content']"] + list(reply['content'])) traceback_text = '\n'.join(tr) self.fLOG("[nberror]\n", traceback_text) if self.detailed_log: self.detailed_log('[run_cell] ERROR=\n {0}'.format( "\n ".join(traceback_text.split("\n")))) else: traceback_text = '' self.fLOG('-- cell returned') outs = list() nbissue = 0 statuses = [status] while True: try: msg = self.kc.get_iopub_msg(timeout=1) if msg['msg_type'] == 'status': if msg['content']['execution_state'] == 'idle': status = 'ok' statuses.append(status) break statuses.append(status) except Empty as e: # pragma: no cover # execution state should return to idle before # the queue becomes empty, # if it doesn't, something bad has happened status = "error" statuses.append(status) reason = "exception Empty was raised (%r)" % e nbissue += 1 if nbissue > max_nbissue: # the notebook is empty return "" else: continue content = msg['content'] msg_type = msg['msg_type'] if self.detailed_log: self.detailed_log(' msg_type={0}'.format(msg_type)) out = NotebookNode(output_type=msg_type, metadata=dict()) if 'execution_count' in content: if iscell: cell['execution_count'] = content['execution_count'] out.execution_count = content['execution_count'] if msg_type in ('status', 'pyin', 'execute_input'): continue if msg_type == 'stream': out.name = content['name'] # in msgspec 5, this is name, text # in msgspec 4, this is name, data if 'text' in content: out.text = content['text'] else: out.data = content['data'] elif msg_type in ('display_data', 'pyout', 'execute_result'): out.data = content['data'] elif msg_type in ('pyerr', 'error'): out.ename = content['ename'] out.evalue = content['evalue'] out.traceback = content['traceback'] out.name = 'stderr' elif msg_type == 'clear_output': outs = list() continue elif msg_type in ('comm_open', 'comm_msg', 'comm_close'): # widgets in a notebook out.data = content["data"] out.comm_id = content["comm_id"] else: dcontent = "\n".join("{0}={1}".format(k, v) for k, v in sorted(content.items())) raise NotImplementedError( # pragma: no cover "Unhandled iopub message: '{0}'\n--CONTENT--\n{1}".format(msg_type, dcontent)) outs.append(out) if self.detailed_log: self.detailed_log(' out={0}'.format(type(out))) if hasattr(out, "data"): self.detailed_log(' out={0}'.format(out.data)) if iscell: cell['outputs'] = outs raw = [] for _ in outs: try: t = _.data except AttributeError: continue # see MIMEMAP to see the available output type for k, v in t.items(): if k.startswith("text"): raw.append(v) sraw = "\n".join(raw) self.fLOG(sraw) if self.detailed_log: self.detailed_log(' sraw=\n {0}'.format( "\n ".join(sraw.split("\n")))) def reply2string(reply): sreply = [] for k, v in sorted(reply.items()): if isinstance(v, dict): temp = [] for _, __ in sorted(v.items()): temp.append(" [{0}]={1}".format(_, str(__))) v_ = "\n".join(temp) sreply.append("reply['{0}']=dict\n{1}".format(k, v_)) else: sreply.append("reply['{0}']={1}".format(k, str(v))) sreply = "\n".join(sreply) return sreply if status == 'error': sreply = reply2string(reply) if len(code) < 5: scode = [code] else: scode = "" mes = ("FILENAME\n{10}:1:1 - cell:{11}\n{7}\nCELL status={8}, reason='{9}' -- {4} " "length={5} -- {6}:\n-----------------\n" "content={12}\nmsg_type: {13} nbissue={14}" "\nstatuses={15}" "\n-----------------\n{0}" "\n-----------------\nTRACE:\n{1}\nRAW:\n{2}REPLY:\n{3}") raise NotebookError(mes.format( code, traceback_text, sraw, sreply, index_cell, # 0-4 len(code), scode, self.comment, status, reason, # 5-9 self._filename, index_cell, content, msg_type, nbissue, # 10-14 statuses)) # 15 if self.detailed_log: self.detailed_log('[run_cell] status={0}'.format(status)) return outs
[docs] def to_python(self): """ Converts the notebook into python. :return: string :githublink:`%|py|519` """ rows = [] for cell in self.iter_cells(): if cell.cell_type == "code": codei = NotebookRunner.get_cell_code(cell)[1] rows.append(codei) elif cell.cell_type in ("markdown", "raw"): content = cell.source lines = content.split("\n") for line in lines: if line.startswith("#"): rows.append("###") rows.append(line) else: rows.append("# " + line) else: # No text, no code. rows.append("# cell.type = {0}".format(cell.cell_type)) rows.append("") return "\n".join(rows)
[docs] def iter_code_cells(self): """ Iterates over the notebook cells containing code. :githublink:`%|py|543` """ for cell in self.iter_cells(): if cell.cell_type == 'code': yield cell
[docs] def iter_cells(self): """ Iterates over the notebook cells. :githublink:`%|py|551` """ if hasattr(self.nb, "worksheets"): for ws in self.nb.worksheets: for cell in ws.cells: yield cell else: for cell in self.nb.cells: yield cell
[docs] def first_cell(self): """ Returns the first cell. :githublink:`%|py|563` """ for cell in self.iter_cells(): return cell
[docs] def _cell_container(self): """ Returns a cells container, it may change according to the format. :return: cell container :githublink:`%|py|572` """ if hasattr(self.nb, "worksheets"): last = None for ws in self.nb.worksheets: last = ws if last is None: raise NotebookError("no cell container") # pragma: no cover return last.cells return self.nb.cells
[docs] def __len__(self): """ Returns the number of cells, it iterates on cells to get this information and does cache the information. :return: int :githublink:`%|py|588` """ return sum(1 for _ in self.iter_cells())
[docs] def cell_type(self, cell): """ Returns the cell type. :param cell: from :meth:`iter_cells <pyquickhelper.ipythonhelper.notebook_runner.NotebookRunner.iter_cells>` :return: type :githublink:`%|py|597` """ return cell.cell_type
[docs] def cell_metadata(self, cell): """ Returns the cell metadata. :param cell: cell :return: metadata :githublink:`%|py|606` """ return cell.metadata
[docs] def _check_thumbnail_tuple(self, b): """ Checks types for a thumbnail. :param b: tuple image, format :return: b The function raises an exception if the type is incorrect. :githublink:`%|py|617` """ if not isinstance(b, tuple): raise TypeError( # pragma: no cover "tuple expected, not {0}".format(type(b))) if len(b) != 2: raise TypeError( # pragma: no cover "tuple expected of lengh 2, not {0}".format(len(b))) if b[1] == "svg": if not isinstance(b[0], str): raise TypeError( "str expected for svg, not {0}".format(type(b[0]))) elif b[1] in ("vnd.plotly.v1+html", "vnd.bokehjs_exec.v0+json", "vnd.bokehjs_load.v0+json", 'vnd.plotly.v1+json'): # Don't know how to extract a snippet out of this. pass else: if not isinstance(b[0], bytes): raise TypeError( "bytes expected for images, not {0}-'{1}'\n{2}".format(type(b[0]), b[1], b)) return b
[docs] def create_picture_from(self, text, format, asbytes=True, context=None): """ Creates a picture from text. :param text: the text :param format: text, json, ... :param context: (str) indication on the content of text (error, ...) :param asbytes: results as bytes or as an image :return: tuple (picture, format) or PIL.Image (if asbytes is False) The picture will be bytes, the format png, bmp... The size of the picture will depend on the text. The longer, the bigger. The method relies on matplotlib and then convert the image into a PIL image. HTML could be rendered with QWebPage from PyQt (not implemented). :githublink:`%|py|654` """ if not isinstance(text, (str, bytes)): text = str(text) if "\n" not in text: rows = [] for i in range(0, len(text), 20): end = min(i + 20, len(text)) rows.append(text[i:end]) text = "\n".join(text) if len(text) > 200: text = text[:200] size = len(text) // 10 figsize = (3 + size, 3 + size) lines = text.replace("\t", " ").replace("\r", "").split("\n") import matplotlib.pyplot as plt from matplotlib.textpath import TextPath from matplotlib.font_manager import FontProperties fig = plt.figure(figsize=figsize) ax = fig.add_subplot(111) fp = FontProperties(size=200) dx = 0 dy = 0 for i, line in enumerate(lines): if len(line.strip()) > 0: ax.text(0, -dy, line, fontproperties=fp, va='top') tp = TextPath((0, -dy), line, prop=fp) bb = tp.get_extents() dy += bb.height dx = max(dx, bb.width) ratio = abs(dx) / max(abs(dy), 1) ratio = max(min(ratio, 3), 1) fig.set_size_inches(int((1 + size) * ratio), 1 + size) ax.set_xlim([0, dx]) ax.set_ylim([-dy, 0]) ax.set_axis_off() sio = BytesIO() fig.savefig(sio, format="png") plt.close() if asbytes: b = sio.getvalue(), "png" self._check_thumbnail_tuple(b) return b try: from PIL import Image except ImportError: # pragma: no cover import Image img = Image.open(sio) return img
[docs] def cell_image(self, cell, image_from_text=False): """ Returns the cell image or None if not found. :param cell: cell to examine :param image_from_text: produce an image even if it is not one :return: None for no image or a list of tuple (image as bytes, extension) for each output of the cell :githublink:`%|py|715` """ kind = self.cell_type(cell) if kind != "code": return None results = [] for output in cell.outputs: if output["output_type"] in {"execute_result", "display_data"}: data = output["data"] for k, v in data.items(): if k == "text/plain": if image_from_text: b = self.create_picture_from( v, "text", context=output["output_type"]) results.append(b) elif k == "application/javascript": if image_from_text: b = self.create_picture_from(v, "js") results.append(b) elif k == "application/json": if image_from_text: b = self.create_picture_from(v, "json") results.append(b) elif k == "image/svg+xml": if not isinstance(v, str): raise TypeError( "This should be str not '{0}' (=SVG).".format(type(v))) results.append((v, "svg")) elif k == "text/html": if image_from_text: b = self.create_picture_from(v, "html") results.append(b) elif k == "text/latex": if image_from_text: b = self.create_picture_from(v, "latex") results.append(b) elif k == "application/vnd.jupyter.widget-view+json": # see http://ipywidgets.readthedocs.io/en/latest/embedding.html if "model_id" not in v: raise KeyError( # pragma: no cover "model_id is missing from {0}".format(v)) model_id = v["model_id"] self.fLOG( "[application/vnd.jupyter.widget-view+json] not rendered", model_id) elif k in {"image/png", "image/jpg", "image/jpeg", "image/gif"}: if not isinstance(v, bytes): v = base64.b64decode(v) if not isinstance(v, bytes): raise TypeError( # pragma: no cover "This should be bytes not '{0}' (=IMG:{1}).".format(type(v), k)) results.append((v, k.split("/")[-1])) elif k in ("text/vnd.plotly.v1+html", "application/vnd.plotly.v1+json", "application/vnd.bokehjs_exec.v0+json", "application/vnd.bokehjs_load.v0+json"): results.append((v, k.split("/")[-1])) else: raise NotImplementedError( # pragma: no cover "cell type: {0}\nk={1}\nv={2}\nCELL:\n{3}".format( kind, k, v, cell)) elif output["output_type"] == "error": vl = output["traceback"] if image_from_text: for v in vl: b = self.create_picture_from( v, "text", context="error") results.append(b) elif output["output_type"] == "stream": v = output["text"] if image_from_text: b = self.create_picture_from(v, "text") results.append(b) else: raise NotImplementedError( # pragma: no cover "cell type: {0}\noutput type: {1}\nOUT:\n{2}\nCELL:\n{3}" "".format(kind, output["output_type"], output, cell)) if len(results) > 0: res = self._merge_images(results) if res[0] is None: return None self._check_thumbnail_tuple(res) return res return None
[docs] def cell_height(self, cell): """ Approximates the height of a cell by its number of lines it contains. :param cell: cell :return: number of cell :githublink:`%|py|803` """ kind = self.cell_type(cell) if kind == "markdown": content = cell.source lines = content.split("\n") nbs = sum(1 + len(line) // 80 for line in lines) return nbs if kind == "raw": content = cell.source lines = content.split("\n") nbs = sum(1 + len(line) // 80 for line in lines) return nbs if kind == "code": content = cell.source lines = content.split("\n") nbl = len(lines) for output in cell.outputs: if output["output_type"] == "execute_result" or \ output["output_type"] == "display_data": data = output["data"] for k, v in data.items(): if k == "text/plain": nbl += len(v.split("\n")) elif k == "application/javascript": # rough estimation nbl += len(v.split("\n")) // 2 elif k == "application/json": # rough estimation try: nbl += len(v.split("{")) except AttributeError: nbl += len(v) // 5 + 1 elif k == "image/svg+xml": nbl += len(v) // 5 elif k == "text/html": nbl += len(v.split("\n")) elif k == "text/latex": nbl += len(v.split("\\\\")) * 2 elif k in {"image/png", "image/jpg", "image/jpeg", "image/gif"}: nbl += len(v) // 50 elif k == "application/vnd.jupyter.widget-view+json": nbl += 5 elif k in ("text/vnd.plotly.v1+html", "application/vnd.plotly.v1+json", "application/vnd.bokehjs_load.v0+json", "application/vnd.bokehjs_exec.v0+json"): nbl += 10 else: fmt = "Unable to guess heigth for cell type: '{0}'\nk='{1}'\nv='{2}'\nCELL:\n{3}" raise NotImplementedError( fmt.format(kind, k, v, cell)) elif output["output_type"] == "stream": v = output["text"] nbl += len(v.split("\n")) elif output["output_type"] == "error": v = output["traceback"] nbl += len(v) else: raise NotImplementedError( # pragma: no cover "cell type: {0}\noutput type: {1}\nOUT:\n{2}\nCELL:\n{3}" .format(kind, output["output_type"], output, cell)) return nbl raise NotImplementedError( # pragma: no cover "cell type: {0}\nCELL:\n{1}".format(kind, cell))
[docs] def add_tag_slide(self, max_nb_cell=4, max_nb_line=25): """ Tries to add tags for a slide show when they are too few. :param max_nb_cell: maximum number of cells within a slide :param max_nb_line: maximum number of lines within a slide :return: list of modified cells { #slide: (kind, reason, cell) } :githublink:`%|py|878` """ res = {} nbline = 0 nbcell = 0 for i, cell in enumerate(self.iter_cells()): meta = cell.metadata if "slideshow" in meta: st = meta["slideshow"]["slide_type"] if st in ["slide", "subslide"]: nbline = 0 nbcell = 0 else: if cell.cell_type == "markdown": content = cell.source if content.startswith("# ") or \ content.startswith("## ") or \ content.startswith("### "): meta["slideshow"] = {'slide_type': 'slide'} nbline = 0 nbcell = 0 res[i] = ("slide", "section", cell) dh = self.cell_height(cell) dc = 1 new_nbline = nbline + dh new_cell = dc + nbcell if "slideshow" not in meta: if new_cell > max_nb_cell or \ new_nbline > max_nb_line: res[i] = ( "subslide", "{0}-{1} <-> {2}-{3}".format(nbcell, nbline, dc, dh), cell) nbline = 0 nbcell = 0 meta["slideshow"] = {'slide_type': 'subslide'} nbline += dh nbcell += dc return res
[docs] def run_notebook(self, skip_exceptions=False, progress_callback=None, additional_path=None, valid=None, clean_function=None, context=None): """ Runs all the cells of a notebook in order and update the outputs in-place. If ``skip_exceptions`` is set, then if exceptions occur in a cell, the subsequent cells are run (by default, the notebook execution stops). :param skip_exceptions: skip exception :param progress_callback: call back function :param additional_path: additional paths (as a list or None if none) :param valid: if not None, valid is a function which returns whether or not the cell should be executed or not, if the function returns None, the execution of the notebooks and skip the execution of the other cells :param clean_function: function which cleans a cell's code before executing it (None for None) :return: dictionary with statistics The function adds the local variable ``theNotebook`` with the absolute file name of the notebook. Function *valid* can return *None* to stop the execution of the notebook before this cell. :githublink:`%|py|943` """ if self.detailed_log: self.detailed_log( "[run_notebook] Starting execution of '{0}'".format(self._filename)) # additional path if additional_path is not None: if not isinstance(additional_path, list): raise TypeError( # pragma: no cover "Additional_path should be a list not: " + str(additional_path)) code = ["import sys"] for p in additional_path: code.append("sys.path.append(r'{0}')".format(p)) cell = "\n".join(code) self.run_cell(-1, cell) # we add local variable theNotebook if self.theNotebook is not None: cell = "theNotebook = r'''{0}'''".format(self.theNotebook) self.run_cell(-1, cell) # initialisation with a code not inside the notebook if self.code_init is not None: self.run_cell(-1, self.code_init) # execution of the notebook nbcell = 0 nbrun = 0 nbnerr = 0 cl = time.perf_counter() for i, cell in enumerate(self.iter_code_cells()): nbcell += 1 codei = NotebookRunner.get_cell_code(cell)[1] if valid is not None: r = valid(codei) if r is None: break if not r: continue try: nbrun += 1 self.run_cell(i, cell, clean_function=clean_function) nbnerr += 1 except Empty as er: raise RuntimeError( # pragma: no cover "{0}\nissue when executing:\n{1}".format(self.comment, codei)) from er except NotebookError as e: if not skip_exceptions: raise raise RuntimeError( # pragma: no cover "Issue when executing:\n{0}".format(codei)) from e if progress_callback: progress_callback(i) etime = time.perf_counter() - cl res = dict(nbcell=nbcell, nbrun=nbrun, nbvalid=nbnerr, time=etime) if self.detailed_log: self.detailed_log( "[run_notebook] end execution of '{0}'".format(self._filename)) self.detailed_log( "[run_notebook] execution time: {0}".format(etime)) self.detailed_log("[run_notebook] statistics : {0}".format(res)) return res
[docs] def count_code_cells(self): """ Returns the number of code cells in the notebook. :githublink:`%|py|1008` """ return sum(1 for _ in self.iter_code_cells())
[docs] def merge_notebook(self, nb): """ Appends notebook *nb* to this one. :param nb: notebook or list of notebook (:class:`NotebookRunner <pyquickhelper.ipythonhelper.notebook_runner.NotebookRunner>`) :return: number of added cells .. faqref:: :title: How to merge notebook? The following code merges two notebooks into the first one and stores the result unto a file. :: from pyquickhelper.ipythonhelper import read_nb nb1 = read_nb("<file1>", kernel=False) nb2 = read_nb("<file2>", kernel=False) nb1.merge_notebook(nb2) nb1.to_json(outfile) :githublink:`%|py|1031` """ if isinstance(nb, list): s = 0 for n in nb: s += self.merge_notebook(n) return s else: last = self._cell_container() s = 0 for cell in nb.iter_cells(): last.append(cell) s += 1 return s
[docs] def get_description(self): """ Gets summary and description of this notebook. We expect the first cell to contain a title and a description of its content. :return: header, description :githublink:`%|py|1052` """ def split_header(s, get_header=True): s = s.lstrip().rstrip() parts = s.splitlines() if parts[0].startswith('#'): if get_header: header = re.sub('#+\\s*', '', parts.pop(0)) if not parts: return header, '' else: header = '' rest = '\n'.join(parts).lstrip().split('\n\n') desc = rest[0].replace('\n', ' ') return header, desc if get_header: if parts[0].startswith(('=', '-')): parts = parts[1:] header = parts.pop(0) if parts and parts[0].startswith(('=', '-')): parts.pop(0) if not parts: return header, '' else: header = '' rest = '\n'.join(parts).lstrip().split('\n\n') desc = rest[0].replace('\n', ' ') return header, desc first_cell = self.first_cell() if not first_cell['cell_type'] == 'markdown': raise ValueError( # pragma: no cover "The first cell is not in markdown but '{0}' filename='{1}'.".format( first_cell['cell_type'], self._filename)) header, desc = split_header(first_cell['source']) if not desc and len(self.nb['cells']) > 1: second_cell = self.nb['cells'][1] if second_cell['cell_type'] == 'markdown': _, desc = split_header(second_cell['source'], False) reg_link = "(\\[(.*?)\\]\\(([^ ]*)\\))" reg = re.compile(reg_link) new_desc = reg.sub("\\2", desc) if "http://" in new_desc or "https://" in new_desc: raise ValueError( # pragma: no cover "Wrong regular expression in '{2}':\n{0}\nMODIFIED:\n{1}".format( desc, new_desc, self._filename)) return header, new_desc.replace('"', "")
[docs] def get_thumbnail(self, max_width=200, max_height=200, use_default=False): """ Processes the notebook and creates one picture based on the outputs to illustrate a notebook. :param max_width: maximum size of the thumbnail :param max_height: maximum size of the thumbnail :param use_default: force using a default image even if an even is present :return: string (:epkg:`SVG`) or Image (:epkg:`PIL`) :githublink:`%|py|1112` """ images = [] cells = list(self.iter_cells()) cells.reverse() for cell in cells: c = self.cell_image(cell, False) if c is not None and len(c) > 0 and len(c[0]) > 0 and c[1] not in ( "vnd.plotly.v1+html", "vnd.bokehjs_exec.v0+json", "vnd.bokehjs_load.v0+json"): self._check_thumbnail_tuple(c) images.append(c) if not use_default and len(images) == 0: for cell in cells: c = self.cell_image(cell, True) if c is not None and len(c) > 0 and len(c[0]) > 0: self._check_thumbnail_tuple(c) images.append(c) if len(c[0]) >= 1000: break if use_default: images = [] if len(images) == 0: # no image, we need to consider the default one no_image = os.path.join( os.path.dirname(__file__), 'no_image_nb.png') with open(no_image, "rb") as f: c = (f.read(), "png") self._check_thumbnail_tuple(c) images.append(c) # select the image if len(images) == 0: raise ValueError( # pragma: no cover "There should be at least one image.") if len(images) == 1: image = images[0] else: # maybe later we'll implement a different logic # we pick the last one image = images[0] # zoom if image[1] in ("vnd.plotly.v1+html", "vnd.bokehjs_exec.v0+json", "vnd.bokehjs_load.v0+json"): return None if image[1] == 'svg': try: img = svg2img(image[0]) except PYQImageException: # pragma: no cover # Enable to convert SVG. return None return self._scale_image(img, image[1], max_width=max_width, max_height=max_height) img = self._scale_image( image[0], image[1], max_width=max_width, max_height=max_height) return img
[docs] def _scale_image(self, in_bytes, format=None, max_width=200, max_height=200): """ Scales an image with the same aspect ratio centered in an image with a given max_width and max_height. :param in_bytes: image as bytes :param format: indication of the format (can be empty) :param max_width: maximum size of the thumbnail :param max_height: maximum size of the thumbnail :return: Image (PIL) :githublink:`%|py|1177` """ # local import to avoid testing dependency on PIL: try: from PIL import Image except ImportError: # pragma: no cover import Image if isinstance(in_bytes, tuple): in_bytes = in_bytes[0] if isinstance(in_bytes, bytes): img = Image.open(BytesIO(in_bytes)) elif isinstance(in_bytes, Image.Image): img = in_bytes else: raise TypeError( # pragma: no cover "bytes expected, not {0} - format={1}".format( type(in_bytes), format)) width_in, height_in = img.size scale_w = max_width / float(width_in) scale_h = max_height / float(height_in) if height_in * scale_w <= max_height: scale = scale_w else: scale = scale_h if scale >= 1.0: return img width_sc = int(round(scale * width_in)) height_sc = int(round(scale * height_in)) # resize the image and center img.thumbnail((width_sc, height_sc), Image.ANTIALIAS) thumb = Image.new('RGB', (max_width, max_height), (255, 255, 255)) pos_insert = ((max_width - width_sc) // 2, (max_height - height_sc) // 2) thumb.paste(img, pos_insert) return thumb
[docs] def _merge_images(self, results): """ Merges images defined by (buffer, format). The method uses PIL to merge images when possible. :return: ``[ (image, format) ]`` :githublink:`%|py|1223` """ if len(results) == 1: results = results[0] self._check_thumbnail_tuple(results) return results if len(results) == 0: return None formats_counts = Counter(_[1] for _ in results) if len(formats_counts) == 1: format = results[0][1] else: items = sorted(((v, k) for k, v in formats_counts.items()), reverse=False) for it in items: format = it break results = [_ for _ in results if _[1] == format] if format == "svg": return ("\n".join(_[0] for _ in results), format) # local import to avoid testing dependency on PIL: try: from PIL import Image except ImportError: # pragma: no cover import Image dx = 0. dy = 0. over = 0.7 imgs = [] for in_bytes, _ in results: img = Image.open(BytesIO(in_bytes)) imgs.append(img) dx = max(dx, img.size[0]) dy += img.size[1] * over new_im = Image.new('RGB', (int(dx), int(dy)), (220, 220, 220)) for img in imgs: dy -= img.size[1] * over new_im.paste(img, (0, max(int(dy), 0))) if max(dx, dy) > 0: image_buffer = BytesIO() new_im.save(image_buffer, "PNG") b = image_buffer.getvalue(), "png" return b b = None, "png" return b