Source code for pyquickhelper.helpgen.process_notebooks

# -*- coding: utf-8 -*-
"""
Contains the main function to generate the documentation
for a module designed the same way as this one, :func:`generate_help_sphinx <pyquickhelper.helpgen.sphinx_main.generate_help_sphinx>`.



:githublink:`%|py|8`
"""
import datetime
import json
import os
import sys
import shutil
import warnings
from io import StringIO
from nbconvert.exporters.base import ExporterNameError

from .utils_sphinx_doc_helpers import HelpGenException
from .conf_path_tools import find_latex_path, find_pandoc_path
from .post_process import (
    post_process_latex_output, post_process_latex_output_any,
    post_process_rst_output, post_process_html_output,
    post_process_slides_output, post_process_python_output)
from .helpgen_exceptions import NotebookConvertError
from .install_js_dep import install_javascript_tools
from .style_css_template import THUMBNAIL_TEMPLATE, THUMBNAIL_TEMPLATE_TABLE
from .process_notebook_api import nb2rst
from ..loghelper.flog import run_cmd, fLOG, noLOG
from ..pandashelper import df2rst
from ..filehelper.synchelper import has_been_updated, explore_folder


template_examples = """

List of programs
++++++++++++++++

.. toctree::
   :maxdepth: 2

.. autosummary:: __init__.py
   :toctree: %s/
   :template: modules.rst

Another list
++++++++++++

"""


[docs]def find_pdflatex(latex_path): """ Returns the executable for latex. :param latex_path: path to look (only on Windows) :return: executable .. versionadded:: 1.7 :githublink:`%|py|58` """ if sys.platform.startswith("win"): # pragma: no cover lat = os.path.join(latex_path, "xelatex.exe") if os.path.exists(lat): return lat lat = os.path.join(latex_path, "pdflatex.exe") if os.path.exists(lat): return lat raise FileNotFoundError( "Unable to find pdflatex or xelatex in '{0}'".format(latex_path)) if sys.platform.startswith("darwin"): # pragma: no cover try: err = run_cmd("/Library/TeX/texbin/xelatex --help", wait=True)[1] if len(err) == 0: return "/Library/TeX/texbin/xelatex" raise FileNotFoundError( # pragma: no cover "Unable to run xelatex\n{0}".format(err)) except Exception: return "/Library/TeX/texbin/pdflatex" try: err = run_cmd("xelatex --help", wait=True)[1] if len(err) == 0: return "xelatex" else: raise FileNotFoundError( "Unable to run xelatex\n{0}".format(err)) except Exception: # pragma: no cover return "pdflatex"
[docs]def process_notebooks(notebooks, outfold, build, latex_path=None, pandoc_path=None, formats="ipynb,html,python,rst,slides,pdf,github", fLOG=fLOG, exc=True, remove_unicode_latex=False, nblinks=None, notebook_replacements=None): """ Converts notebooks into :epkg:`html`, :epkg:`rst`, :epkg:`latex`, :epkg:`pdf`, :epkg:`python`, :epkg:`docx` using :epkg:`nbconvert`. :param notebooks: list of notebooks or comma separated values :param outfold: folder which will contains the outputs :param build: temporary folder which contains all produced files :param pandoc_path: path to pandoc :param formats: list of formats to convert into (pdf format means latex then compilation), or comma separated values :param latex_path: path to the latex compiler :param fLOG: logging function :param exc: raises an exception (True) or a warning (False) if an error happens :param nblinks: dictionary ``{ref: url}`` or a string in :epkg:`json` format :param remove_unicode_latex: remove unicode characters for latex (to avoid failing) :param notebook_replacements: string replacement in a notebook before conversion or a string in :epkg:`json` format :return: list of tuple *[(file, created or skipped)]* This function relies on :epkg:`pandoc`. It also needs modules :epkg:`pywin32`, :epkg:`pygments`. :epkg:`pywin32` might have some issues to find its DLL, look :func:`import_pywin32 <pyquickhelper.helpgen.utils_pywin32.import_pywin32>`. The latex compilation uses :epkg:`MiKTeX`. The conversion into Word document directly uses pandoc. It still has an issue with table. Some latex templates (for nbconvert) uses ``[commandchars=\\\\\\{\\}]{\\|}`` which allows commands ``\\\\`` and it does not compile. The one used here is ``report``. Some others bugs can be found at: `schlichtanders/latex_test.html <https://gist.github.com/schlichtanders/e108ed0be80108178af2>`_. For example, you must not let spaces between symbol ``$`` and the formulas it indicates. If *pandoc_path* is None, uses :func:`find_pandoc_path <pyquickhelper.helpgen.conf_path_tools.find_pandoc_path>` to guess it. If *latex_path* is None, uses :func:`find_latex_path <pyquickhelper.helpgen.conf_path_tools.find_latex_path>` to guess it. .. exref:: :title: Convert a notebook into multiple formats :: from pyquickhelper.ipythonhelper import process_notebooks process_notebooks("td1a_correction_session7.ipynb", "dest_folder", "dest_folder", formats=("ipynb", "html", "python", "rst", "slides", "pdf", "docx", "github")]) For latex and pdf, a custom processor was added to handle raw data and add ``\\begin{verbatim}`` and ``\\end{verbatim}``. Format *github* adds a link to file on :epkg:`github`. .. todoext:: :title: check differences between _process_notebooks_in_private and _process_notebooks_in_private_cmd :tag: bug For :epkg:`latex` and :epkg:`pdf`, the custom preprocessor is not taken into account. by function :func:`_process_notebooks_in_private <pyquickhelper.helpgen.process_notebooks._process_notebooks_in_private>`. :githublink:`%|py|155` """ if isinstance(notebooks, str): notebooks = notebooks.split(',') if isinstance(formats, str): formats = formats.split(',') if isinstance(notebook_replacements, str): notebook_replacements = json.loads(notebook_replacements) if isinstance(nblinks, str): nblinks = json.loads(nblinks) if build is None: raise ValueError("build cannot be None") res = _process_notebooks_in(notebooks=notebooks, outfold=outfold, build=build, latex_path=latex_path, pandoc_path=pandoc_path, formats=formats, fLOG=fLOG, exc=exc, nblinks=nblinks, remove_unicode_latex=remove_unicode_latex, notebook_replacements=notebook_replacements) if "slides" in formats: # we copy javascript dependencies, reveal.js reveal = os.path.join(outfold, "reveal.js") if not os.path.exists(reveal): install_javascript_tools(None, dest=outfold) reveal = os.path.join(build, "reveal.js") if not os.path.exists(reveal): install_javascript_tools(None, dest=build) return res
[docs]def _process_notebooks_in_private(fnbcexe, list_args, options_args): """ This function fails in nbconvert 6.0 when the conversion is called more than once. The conversion probably changes the initial state. :githublink:`%|py|188` """ out = StringIO() err = StringIO() memo_out = sys.stdout memo_err = sys.stderr sys.stdout = out sys.stderr = err try: if list_args: fnbcexe(argv=list_args, **options_args) else: fnbcexe(**options_args) exc = None except SystemExit as e: # pragma: no cover exc = e except IndentationError as e: # pragma: no cover # This is change in IPython 6.0.0. # The conversion fails on IndentationError. # We switch to another one. from ..ipythonhelper import read_nb i = list_args.index("--template") format = list_args[i + 1] if format == "python": i = list_args.index("--output") dest = list_args[i + 1] if not dest.endswith(".py"): dest += ".py" src = list_args[-1] nb = read_nb(src) code = nb.to_python() with open(dest, "w", encoding="utf-8") as f: f.write(code) exc = None else: # We do nothing in this case. exc = e except (AttributeError, FileNotFoundError, ValueError) as e: exc = e except ExporterNameError as e: # pragma: no cover exc = e sys.stdout = memo_out sys.stderr = memo_err out = out.getvalue() err = err.getvalue() if exc: if "Unsupported mimetype 'text/html'" in str(exc): from nbconvert.nbconvertapp import main main(argv=list_args, **options_args) return "", "" env = "\n".join("{0}={1}".format(k, v) for k, v in sorted(os.environ.items())) raise RuntimeError( # pragma: no cover "Notebook conversion failed.\nfnbcexe\n{}\noptions_args\n{}" "\n--ARGS--\n{}\n--OUT--\n{}\n--ERR--\n{}\n--ENVIRON--\n{}" "".format(fnbcexe, options_args, list_args, out, err, env)) from exc return out, err
[docs]def _process_notebooks_in_private_cmd(fnbcexe, list_args, options_args, fLOG): this = os.path.join(os.path.dirname( os.path.abspath(__file__)), "process_notebooks_cmd.py") res = [] for c in list_args: if c[0] == '"' or c[-1] == '"' or ' ' not in c: res.append(c) else: res.append('"{0}"'.format(c)) sargs = " ".join(res) cmd = '"{0}" "{1}" {2}'.format( sys.executable.replace("w.exe", ".exe"), this, sargs) fLOG("[_process_notebooks_in_private_cmd]", cmd) return run_cmd(cmd, wait=True, fLOG=fLOG)
[docs]def _preprocess_notebook(notebook_content): """ Preprocesses the content of a notebook. :param notebook_content: notebook content :return: modified content :githublink:`%|py|269` """ def walk_through(field): if isinstance(field, list): for f in field: walk_through(f) elif isinstance(field, dict): if (field.get('version_major', -1) == 2 and field.get('version_minor', -1) == 0): field['version_minor'] = 2 elif (field.get('nbformat', -1) == 4 and field.get('nbformat_minor', -1) in (0, 1)): field['nbformat_minor'] = 2 for _, v in field.items(): walk_through(v) content = json.loads(notebook_content) walk_through(content) new_content = json.dumps(content) return new_content
[docs]def _process_notebooks_in(notebooks, outfold, build, latex_path=None, pandoc_path=None, formats=("ipynb", "html", "python", "rst", "slides", "pdf", "github"), fLOG=fLOG, exc=True, nblinks=None, remove_unicode_latex=False, notebook_replacements=None): """ The notebook conversion does not handle images from url for :epkg:`pdf` and :epkg:`docx`. They could be downloaded first and replaced by local files. .. note:: :epkg:`nbconvert` introduced a commit which breaks the conversion of notebooks in latex if they have a cell outputting *svg* (see `PR 910 <https://github.com/jupyter/nbconvert/pull/910>`_). Use `xelatex <https://doc.ubuntu-fr.org/xelatex>`_ if possible. :githublink:`%|py|308` """ from nbconvert.nbconvertapp import main as nbconvert_main if pandoc_path is None: pandoc_path = find_pandoc_path() if latex_path is None: latex_path = find_latex_path() if isinstance(notebooks, str): notebooks = [notebooks] if "PANDOCPY" in os.environ and sys.platform.startswith("win"): # pragma: no cover exe = os.environ["PANDOCPY"] exe = exe.rstrip("\\/") if exe.endswith("\\Scripts"): exe = exe[:len(exe) - len("Scripts") - 1] if not os.path.exists(exe): raise FileNotFoundError(exe) fLOG("[_process_notebooks_in] ** using PANDOCPY", exe) else: if sys.platform.startswith("win"): # pragma: no cover from .utils_pywin32 import import_pywin32 try: import_pywin32() except ModuleNotFoundError as e: warnings.warn(e) exe = os.path.split(sys.executable)[0] extensions = {"ipynb": ".ipynb", "latex": ".tex", "elatex": ".tex", "pdf": ".pdf", "html": ".html", "rst": ".rst", "python": ".py", "docx": ".docx", "word": ".docx", "slides": ".slides.html"} files = [] skipped = [] # main(argv=None, **kwargs) fnbc = nbconvert_main if "slides" in formats: build_slide = os.path.join(build, "bslides") if not os.path.exists(build_slide): os.mkdir(build_slide) copied_images = dict() for notebook_in in notebooks: thisfiles = [] # we copy available images (only notebook folder) # in case they are used in latex currentdir = os.path.abspath(os.path.dirname(notebook_in)) for curfile in os.listdir(currentdir): ext = os.path.splitext(curfile)[1] if ext in {'.png', '.jpg', '.bmp', '.gif', '.jpeg', '.svg', '.mp4'}: src = os.path.join(currentdir, curfile) if src not in copied_images: dest = os.path.join(build, curfile) shutil.copy(src, build) fLOG("[_process_notebooks_in] copy '{}' to '{}'.".format( src, build)) copied_images[src] = dest # copy of the notebook into the build folder # and changes the source _name = os.path.splitext(os.path.split(notebook_in)[-1])[0] _name += '.ipynb' notebook = os.path.join(build, _name) fLOG("[_process_notebooks_in] -- copy notebook '{}' to '{}'.".format( notebook_in, notebook)) with open(notebook_in, "r", encoding="utf-8") as _f: content = _f.read() content = _preprocess_notebook(content) with open(notebook, "w", encoding="utf-8") as _f: _f.write(content) # next nbout = os.path.split(notebook)[-1] if " " in nbout: raise HelpGenException( "spaces are not allowed in notebooks file names: " "{0}".format(notebook)) nbout = os.path.splitext(nbout)[0] for format in formats: if format == "github": # we add a link on the rst page in that case continue if format not in extensions: raise NotebookConvertError( # pragma: no cover "Unable to find format: '{}' in {}".format( format, ", ".join(extensions.keys()))) # output format_ = format outputfile_noext = os.path.join(build, nbout) if format == 'html': outputfile = outputfile_noext + '2html' + extensions[format] outputfile_noext_fixed = outputfile_noext + '2html' else: outputfile = outputfile_noext + extensions[format] outputfile_noext_fixed = outputfile_noext trueoutputfile = outputfile pandoco = "docx" if format in ("word", "docx") else None # The function checks it was not done before. if os.path.exists(trueoutputfile): dto = os.stat(trueoutputfile).st_mtime dtnb = os.stat(notebook).st_mtime if dtnb < dto: # pragma: no cover fLOG("[_process_notebooks_in] -- skipping notebook", format, notebook, "(", trueoutputfile, ")") if trueoutputfile not in thisfiles: thisfiles.append(trueoutputfile) if pandoco is None: skipped.append(trueoutputfile) continue out2 = os.path.splitext( trueoutputfile)[0] + "." + pandoco if os.path.exists(out2): skipped.append(trueoutputfile) continue # if the format is slides, we update the metadata options_args = {} if format == "slides": nb_slide = add_tag_for_slideshow(notebook, build_slide) fnbcexe = fnbc else: nb_slide = None fnbcexe = fnbc # compilation list_args = [] custom_config = os.path.join(os.path.abspath( os.path.dirname(__file__)), "_nbconvert_config.py") if format == "pdf": if not os.path.exists(custom_config): raise FileNotFoundError(custom_config) # title = os.path.splitext( # os.path.split(notebook)[-1])[0].replace("_", " ") list_args.extend(['--config', '"%s"' % custom_config]) format = "latex" compilation = True thisfiles.append(os.path.splitext(outputfile)[0] + ".tex") elif format in ("latex", "elatex"): if not os.path.exists(custom_config): raise FileNotFoundError(custom_config) list_args.extend(['--config', '"%s"' % custom_config]) compilation = False format = "latex" elif format in ("word", "docx"): format = "html" compilation = False elif format in ("slides", ): list_args.extend(["--reveal-prefix", "reveal.js"]) compilation = False else: compilation = False # output # set templates to None to avoid error # No template sub-directory with name 'article' found in the following paths: templ = {'html': None, 'latex': None, 'elatex': None}.get(format, format) fLOG("[_process_notebooks_in] ### convert into '{}' (done: {}): '{}' -> '{}'".format( format_, os.path.exists(outputfile), notebook, outputfile)) list_args.extend(["--output", outputfile_noext_fixed]) if templ is not None and format != "slides": list_args.extend(["--template", templ]) # execution if format not in ("ipynb", ): # nbconvert is messing up with static variables in sphinx or # docutils if format is slides, not sure about the others if format in ('rst', ): fLOG("[_process_notebooks_in] NBcn:", format, options_args) nb2rst(notebook, outputfile, post_process=False) err = "" c = "" elif nbconvert_main != fnbcexe or format not in ( "slides", "elatex", "latex", "pdf", "html"): if options_args: fLOG("[_process_notebooks_in] NBp*:", format, options_args) else: list_args.extend(["--to", format, notebook if nb_slide is None else nb_slide]) fLOG( "[_process_notebooks_in] NBc* format='{}' args={}".format(format, list_args)) fLOG("[_process_notebooks_in] cwd='{}'".format(os.getcwd())) c = " ".join(list_args) out, err = _process_notebooks_in_private( fnbcexe, list_args, options_args) else: # conversion into slides alter Jinja2 environment # jinja2.exceptions.TemplateNotFound: rst if options_args: fLOG("[_process_notebooks_in] NBp+:", format, options_args) else: list_args.extend(["--to", format, notebook if nb_slide is None else nb_slide]) fLOG("[_process_notebooks_in] NBc+:", format, list_args) fLOG("[_process_notebooks_in]", os.getcwd()) c = " ".join(list_args) out, err = _process_notebooks_in_private_cmd( fnbcexe, list_args, options_args, fLOG) if "raise ImportError" in err or "Unknown exporter" in err: raise ImportError( "cmd: {0} {1}\n--ERR--\n{2}".format(fnbcexe, list_args, err)) if len(err) > 0: if format in ("elatex", "latex"): # There might be some errors because the latex script needs to be post-processed # sometimes (wrong characters such as " or formulas not # captured as formulas). if err and "usage: process_notebooks_cmd.py" in err: raise RuntimeError( "Unable to convert a notebook\n----\n{}----\n{}\n" "---ERR---\n{}\n---OUT---\n{}".format( fnbcexe, list_args, err, out)) fLOG("[_process_notebooks_in] LATEX --ERR--\n" + err) fLOG("[_process_notebooks_in] LATEX --OUT--\n" + out) else: err = err.lower() if "critical" in err or "bad config" in err: raise HelpGenException( "CMD:\n{0}\n[nberror]\n{1}".format(list_args, err)) else: # format ipynb # we do nothing pass format = extensions[format].strip(".") # we add the file to the list of generated files if outputfile not in thisfiles: thisfiles.append(outputfile) fLOG("[_process_notebooks_in] -", format, compilation, outputfile) if compilation: # compilation latex if not sys.platform.startswith("win") or os.path.exists(latex_path): lat = find_pdflatex(latex_path) tex = set(_ for _ in thisfiles if os.path.splitext( _)[-1] == ".tex") if len(tex) != 1: raise FileNotFoundError( "No latex file was generated or more than one (={0}), nb={1}\nthisfile=\n{2}".format( len(tex), notebook, "\n".join(thisfiles))) tex = list(tex)[0] try: post_process_latex_output_any( tex, custom_latex_processing=None, nblinks=nblinks, remove_unicode=remove_unicode_latex, fLOG=fLOG) except FileNotFoundError as e: mes = ("[_process_notebooks_in-ERROR] Unable to to convert into latex" "notebook %r due to %r.") % (tex, e) warnings.warn(mes, RuntimeWarning) fLOG(mes) continue # -interaction=batchmode c = '"{0}" "{1}" -max-print-line=900 -output-directory="{2}"'.format( lat, tex, os.path.split(tex)[0]) fLOG("[_process_notebooks_in] ** LATEX compilation (b)", c) if not sys.platform.startswith("win"): c = c.replace('"', '') if sys.platform.startswith("win"): change_path = None else: # On Linux the parameter --output-directory is sometimes ignored. # And it only works from the current directory. change_path = os.path.split(tex)[0] out, err = run_cmd( c, wait=True, log_error=False, shell=sys.platform.startswith("win"), catch_exit=True, prefix_log="[latex] ", change_path=change_path) if out is not None and ("Output written" in out or 'bytes written' in out): # The output was produced. We ignore the return code. fLOG("[_process_notebooks_in] WARNINGS: " "Latex compilation had warnings:", c) out += "\n--ERR--\n" + err err = "" if len(err) > 0: raise HelpGenException( "CMD:\n{0}\n[nberror]\n{1}\nOUT:\n{2}------".format(c, err, out)) f = os.path.join(build, nbout + ".pdf") if not os.path.exists(f): # pragma: no cover # On Linux the parameter --output-directory is sometimes ignored. # And it only works from the current directory. # We check again. loc = os.path.split(f)[-1] if os.path.exists(loc): # We move the file. moved = True shutil.move(loc, f) else: moved = False if not os.path.exists(f): files = "\n".join(os.listdir(build)) msg = "Content of '{0}':\n{1}\n----\n'{2}' moved? {3}\nCMD:\n{4}".format( build, files, loc, moved, c) raise HelpGenException( "Missing file: '{0}'\nCMD\n{4}nOUT:\n{2}\n[nberror]\n{1}\n-----\n{3}".format(f, err, out, msg, c)) thisfiles.append(f) else: fLOG("[_process_notebooks_in] unable to find latex in", latex_path) elif pandoco is not None: # pragma: no cover # compilation pandoc fLOG("[_process_notebooks_in] ** pandoc compilation (b)", pandoco) inputfile = os.path.splitext(outputfile)[0] + ".html" outfilep = os.path.splitext(outputfile)[0] + "." + pandoco # for some files, the following error might appear: # Stack space overflow: current size 33692 bytes. # Use `+RTS -Ksize -RTS' to increase it. # it usually means there is something wrong (circular # reference, ...) if sys.platform.startswith("win"): c = '"{0}\\pandoc.exe" +RTS -K32m -RTS -f html -t {1} "{2}" -o "{3}"'.format( pandoc_path, pandoco, inputfile, outfilep) else: c = 'pandoc +RTS -K32m -RTS -f html -t {0} "{1}" -o "{2}"'.format( pandoco, outputfile, outfilep) if not sys.platform.startswith("win"): c = c.replace('"', '') out, err = run_cmd( c, wait=True, log_error=False, shell=sys.platform.startswith("win")) if len(err) > 0: lines = err.strip("\r\n").split("\n") # we filter out the message # pandoc.exe: Could not find image `https:// left = [ _ for _ in lines if _ and "Could not find image `http" not in _] if len(left) > 0: raise HelpGenException( "issue with cmd: %s\n[nberror]\n%s" % (c, err)) for _ in lines: fLOG("[_process_notebooks_in] w, pandoc issue: {0}".format( _.strip("\n\r"))) outputfile = outfilep format = "docx" nb_replacements = notebook_replacements.get( format, None) if notebook_replacements else None if format == "html": # we add a link to the notebook if not os.path.exists(outputfile): raise FileNotFoundError( # pragma: no cover outputfile + "\nCONTENT in " + os.path.dirname(outputfile) + ":\n" + "\n".join( os.listdir(os.path.dirname(outputfile))) + "\n[nberror]\n" + err + "\nOUT:\n" + out + "\nCMD:\n" + c) thisfiles += add_link_to_notebook(outputfile, notebook, "pdf" in formats, False, "python" in formats, "slides" in formats, exc=exc, nblinks=nblinks, fLOG=fLOG, notebook_replacements=nb_replacements) elif format == "slides.html": # we add a link to the notebook if not os.path.exists(outputfile): raise FileNotFoundError( # pragma: no cover outputfile + "\nCONTENT in " + os.path.dirname(outputfile) + ":\n" + "\n".join( os.listdir(os.path.dirname(outputfile))) + "\n[nberror]\n" + err + "\nOUT:\n" + out + "\nCMD:\n" + str(list_args)) thisfiles += add_link_to_notebook(outputfile, notebook, "pdf" in formats, False, "python" in formats, "slides" in formats, exc=exc, nblinks=nblinks, fLOG=fLOG, notebook_replacements=nb_replacements) elif format == "ipynb": # we just copy the notebook thisfiles += add_link_to_notebook(outputfile, notebook, "ipynb" in formats, False, "python" in formats, "slides" in formats, exc=exc, nblinks=nblinks, fLOG=fLOG, notebook_replacements=nb_replacements) elif format == "rst": # It adds a link to the notebook. thisfiles += add_link_to_notebook( outputfile, notebook, "pdf" in formats, "html" in formats, "python" in formats, "slides" in formats, exc=exc, github="github" in formats, notebook=notebook, nblinks=nblinks, fLOG=fLOG) elif format in ("tex", "elatex", "latex", "pdf"): thisfiles += add_link_to_notebook(outputfile, notebook, False, False, False, False, exc=exc, nblinks=nblinks, fLOG=fLOG, notebook_replacements=nb_replacements) elif format in ("py", "python"): post_process_python_output( outputfile, True, nblinks=nblinks, fLOG=fLOG, notebook_replacements=nb_replacements) elif format in ["docx", "word"]: pass else: raise HelpGenException("unexpected format " + format) files.extend(thisfiles) fLOG("[_process_notebooks_in] ### conversion into '{}' done into '{}'.".format( format_, outputfile)) copy = [] for f in files: dest = os.path.join(outfold, os.path.split(f)[-1]) if not f.endswith(".tex"): if sys.version_info >= (3, 4): try: shutil.copy(f, outfold) fLOG("[_process_notebooks_in] copy ", f, " to ", outfold, "[", dest, "]") except shutil.SameFileError: fLOG("[_process_notebooks_in] w,file ", dest, "already exists") else: # pragma: no cover try: shutil.copy(f, outfold) fLOG("[_process_notebooks_in] copy ", f, " to ", outfold, "[", dest, "]") except shutil.Error as e: if "are the same file" in str(e): fLOG("[_process_notebooks_in] w,file ", dest, "already exists") else: raise e if not os.path.exists(dest): raise FileNotFoundError(dest) copy.append((dest, True)) # image for image in os.listdir(build): if image.endswith(".png") or image.endswith(".html") or \ image.endswith(".pdf") or image.endswith(".svg") or \ image.endswith(".jpg") or image.endswith(".gif") or \ image.endswith(".xml") or image.endswith(".jpeg"): image = os.path.join(build, image) dest = os.path.join(outfold, os.path.split(image)[-1]) try: shutil.copy(image, outfold) fLOG("[_process_notebooks_in] copy ", image, " to ", outfold, "[", dest, "]") except shutil.SameFileError: fLOG("[_process_notebooks_in] w,file ", dest, "already exists") if not os.path.exists(dest): raise FileNotFoundError(dest) # pragma: no cover copy.append((dest, True)) return copy + [(_, False) for _ in skipped]
[docs]def add_tag_for_slideshow(ipy, folder, encoding="utf8"): """ Modifies a notebook to add tag for a slideshow. :param ipy: notebook file :param folder: where to write the new notebook :param encoding: encoding :return: written file :githublink:`%|py|938` """ from ..ipythonhelper import read_nb filename = os.path.split(ipy)[-1] output = os.path.join(folder, filename) nb = read_nb(ipy, encoding=encoding, kernel=False) nb.add_tag_slide() nb.to_json(output) return output
[docs]def build_all_notebooks_coverage(nbs, fileout, module_name, dump=None, badge=True, too_old=30, fLOG=noLOG): """ Creates a :epkg:`rst` page (gallery) with links to all notebooks and information about coverage. It relies on function :func:`notebook_coverage <pyquickhelper.ipythonhelper.run_notebook.notebook_coverage>`. :param nbs: list of notebooks to consider or tuple(full path, rst), :param fileout: file to create :param module_name: module name :param dump: dump containing information about notebook execution (or None for the default one) :param badge: builds an image with the notebook coverage :param too_old: drop executions older than *too_old* days from now :param fLOG: logging function :return: dataframe which contains the data :githublink:`%|py|1226` """ from ..ipythonhelper import read_nb, notebook_coverage if dump is None: dump = os.path.normpath(os.path.join(os.path.dirname(fileout), "..", "..", "..", "..", "_notebook_dumps", "notebook.{0}.txt".format(module_name))) if not os.path.exists(dump): fLOG("[notebooks-coverage] No execution report about " "notebook at '{0}' (fileout='{1}')".format(dump, os.path.dirname(fileout))) return None report0 = notebook_coverage(nbs, dump, too_old=too_old) fLOG("[notebooks-coverage] report shape", report0.shape) from numpy import isnan # Fill nan values. for i in report0.index: nbcell = report0.loc[i, "nbcell"] if isnan(nbcell): # It loads the notebook. nbfile = report0.loc[i, "notebooks"] nb = read_nb(nbfile) report0.loc[i, "nbcell"] = len(nb) report0.loc[i, "nbrun"] = 0 # Add links. cols = ['notebooks', 'date', 'etime', 'nbcell', 'nbrun', 'nbvalid', 'success', 'time'] report = report0[cols].copy() report["notebooks"] = report["notebooks"].apply( lambda x: "/".join(os.path.normpath(x).replace("\\", "/").split("/")[-2:]) if isinstance(x, str) else x) report["last_name"] = report["notebooks"].apply( lambda x: os.path.split(x)[-1] if isinstance(x, str) else x) report1 = report.copy() def clean_link(link): return link.replace("_", "").replace(".ipynb", ".rst").replace(".", "") if isinstance(link, str) else link report["notebooks"] = report.apply(lambda row: ':ref:`{0} <{1}>`'.format( row["notebooks"], clean_link(row["last_name"])), axis=1) report["title"] = report["last_name"].apply( lambda x: ':ref:`{0}`'.format(clean_link(x))) rows = ["", ".. _l-notebooks-coverage:", "", "", "Notebooks Coverage", "==================", "", "Report on last executions.", ""] # Badge if badge: from ..ipythonhelper import badge_notebook_coverage img = os.path.join(os.path.dirname(fileout), "nbcov.png") cov = badge_notebook_coverage(report0, img) now = datetime.datetime.now() sdate = "%04d-%02d-%02d" % (now.year, now.month, now.day) cpy = os.path.join(os.path.dirname(fileout), "nbcov-%s.png" % sdate) shutil.copy(img, cpy) badge = ["{0:0.00f}% {1}".format( cov, sdate), "", ".. image:: {0}".format(os.path.split(cpy)[-1]), ""] badge2 = ["", ".. image:: {0}".format(os.path.split(img)[-1]), ""] else: badge = [] badge2 = [] rows.extend(badge) # Formatting report["date"] = report["date"].apply( lambda x: x.split()[0] if isinstance(x, str) else x) report["etime"] = report["etime"].apply( lambda x: "%1.3f" % x if isinstance(x, float) else x) report["time"] = report["time"].apply( lambda x: "%1.3f" % x if isinstance(x, float) else x) def int2str(x): if isnan(x): return "" else: return int(x) report["coverage"] = report["nbrun"] / report["nbcell"] report["nbcell"] = report["nbcell"].apply(int2str) report["nbrun"] = report["nbrun"].apply(int2str) report["nbvalid"] = report["nbvalid"].apply(int2str) report["coverage"] = report["coverage"].apply( lambda x: "{0}%".format(int(x * 100)) if isinstance(x, float) else "") report = report[['notebooks', 'title', 'date', 'success', 'etime', 'nbcell', 'nbrun', 'nbvalid', 'time', 'coverage']].copy() report.columns = ['name', 'title', 'last execution', 'success', 'time', 'nb cells', 'nb runs', 'nb valid', 'exe time', 'coverage'] report = report[['coverage', 'exe time', 'last execution', 'name', 'title', 'success', 'time', 'nb cells', 'nb runs', 'nb valid']] # Add results. text = df2rst(report.sort_values("name").reset_index( drop=True), index=True, list_table=True) rows.append(text) rows.extend(badge2) fLOG("[notebooks-coverage] writing", fileout) with open(fileout, "w", encoding="utf-8") as f: f.write("\n".join(rows)) return report1