Source code for pyquickhelper.helpgen.rst_converters

"""
Helpers to convert docstring to various format.


:githublink:`%|py|5`
"""
import re
import textwrap
import os
from io import StringIO
from docutils import core, languages
from docutils.io import StringInput, StringOutput
from .utils_sphinx_doc import migrating_doxygen_doc
from .helpgen_exceptions import HelpGenConvertError
from ..texthelper.texts_language import TITLES
from ..loghelper.flog import noLOG


[docs]def default_sphinx_options(fLOG=noLOG, **options): """ Defines or overrides default options for :epkg:`Sphinx`, listed below. .. runpython:: from pyquickhelper.helpgen.rst_converters import default_sphinx_options options = default_sphinx_options() for k, v in sorted(options.items()): print("{0} = {1}".format(k, v)) .. versionchanged:: 1.8 Disables :epkg:`latex` if not available on :epkg:`Windows`. :githublink:`%|py|30` """ # delayed import to speed up time from .conf_path_tools import find_graphviz_dot, find_dvipng_path res = { # 'output_encoding': options.get('output_encoding', 'unicode'), # 'doctitle_xform': options.get('doctitle_xform', True), # 'initial_header_level': options.get('initial_header_level', 2), # 'input_encoding': options.get('input_encoding', 'utf-8-sig'), 'blog_background': options.get('blog_background', False), 'sharepost': options.get('sharepost', None), 'todoext_link_only': options.get('todoext_link_only', False), 'mathdef_link_only': options.get('mathdef_link_only', True), 'blocref_link_only': options.get('blocref_link_only', False), 'faqref_link_only': options.get('faqref_link_only', False), 'nbref_link_only': options.get('nbref_link_only', False), 'todo_link_only': options.get('todo_link_only', False), 'language': options.get('language', 'en'), # 'outdir': options.get('outdir', '.'), # 'imagedir': options.get('imagedir', '.'), # 'confdir': options.get('confdir', '.'), # 'doctreedir': options.get('doctreedir', '.'), 'math_number_all': options.get('math_number_all', False), # graphviz 'graphviz_output_format': options.get('graphviz_output_format', 'png'), 'graphviz_dot': options.get('graphviz_dot', find_graphviz_dot(exc=False)), # latex 'imgmath_image_format': options.get('imgmath_image_format', 'png'), # containers 'out_blogpostlist': [], 'out_runpythonlist': [], # 'warning_stream': StringIO(), } if res['imgmath_image_format'] == 'png': try: imgmath_latex, imgmath_dvipng, imgmath_dvisvgm = find_dvipng_path( exc=False) has_latex = True except FileNotFoundError: # miktex is not available, has_latex = False if has_latex: res['imgmath_latex'] = imgmath_latex res['imgmath_dvipng'] = imgmath_dvipng res['imgmath_dvisvgm'] = imgmath_dvisvgm for k, v in options.items(): if k not in res: res[k] = v return res
[docs]def rst2html(s, fLOG=noLOG, writer="html", keep_warnings=False, directives=None, language="en", layout='docutils', document_name="<<string>>", external_docnames=None, filter_nodes=None, new_extensions=None, update_builder=None, ret_doctree=False, load_bokeh=False, destination=None, destination_path=None, **options): """ Converts a string from :epkg:`RST` into :epkg:`HTML` format or transformed :epkg:`RST`. :param s: string to convert :param fLOG: logging function (warnings will be logged) :param writer: ``'html'`` for :epkg:`HTML` format, ``'rst'`` for :epkg:`RST` format, ``'md'`` for :epkg:`MD` format, ``'elatex'`` for :epkg:`latex` format, ``'doctree'`` to get the doctree, *writer* can also be a tuple for custom formats and must be like ``('buider_name', builder_class)``. :param keep_warnings: keep_warnings in the final HTML :param directives: new directives to add (see below) :param language: language :param layout: ``'docutils'``, ``'sphinx'``, ``'sphinx_body'``, see below. :param document_name: document name, not really important since the input is a string :param external_docnames: if the string to parse makes references to other documents, if one is missing, an exception is raised. :param filter_nodes: transforms the doctree before writing the results (layout must be 'sphinx'), the function takes a doctree as a single parameter :param new_extensions: additional extension to setup :param update_builder: update the builder after it is instantiated :param ret_doctree: returns the doctree :param load_bokeh: load :epkg:`bokeh` extensions, disabled by default as it takes a few seconds :param destination: set a destination (requires for some extension) :param destination_path: set a destination path (requires for some extension) :param options: :epkg:`Sphinx` options see `Render math as images <http://www.sphinx-doc.org/en/stable/ext/math.html#module-sphinx.ext.imgmath>`_, a subset of options is used, see :func:`default_sphinx_options <pyquickhelper.helpgen.rst_converters.default_sphinx_options>`. By default, the theme (option *html_theme*) will ``'basic'``. :return: HTML format *directives* is None or a list of 2 or 5-uple: * a directive name (mandatory) * a directive class: see `Sphinx Directive <http://sphinx-doc.org/extdev/tutorial.html>`_, see also :class:`RunPythonDirective <pyquickhelper.sphinxext.sphinx_runpython_extension.RunPythonDirective>` as an example (mandatory) * a docutils node: see :class:`runpython_node <pyquickhelper.sphinxext.sphinx_runpython_extension.runpython_node>` as an example * two functions: see :func:`visit_runpython_node <pyquickhelper.sphinxext.sphinx_runpython_extension.visit_runpython_node>`, :func:`depart_runpython_node` as an example The parameter *layout* specify the kind of HTML you need. * ``'docutils'``: very simple :epkg:`HTML`, style is not included, recursive directives are not processed (recursive means they modify the doctree). The produced :epkg:`HTML` only includes the body (no :epkg:`HTML` header). * ``'sphinx'``: in memory :epkg:`sphinx`, the produced :epkg:`HTML` includes the header, it is also recursive as directives can modify the doctree. * ``'sphinx_body'``: same as ``'sphinx'`` but only the body is returned. If the writer is a tuple, it must be a 2-uple ``(builder_name, builder_class)``. However, the builder class must contain an attribute ``_writer_class`` with the associated writer. The builcer class must also implement a method ``iter_pages`` which enumerates all written pages: ``def iter_pages(self) -> Dict[str,str]`` where the key is the document name and the value is its content. .. exref:: :title: How to test a Sphinx directive? The following code defines a simple directive definedbased on an existing one. It also defined what to do if a new node is inserted in the documentation. :: from docutils import nodes from pyquickhelper.helpgen import rst2html class runpythonthis_node(nodes.Structural, nodes.Element): pass class RunPythonThisDirective (RunPythonDirective): runpython_class = runpythonthis_node def visit_node(self, node): self.body.append("<p><b>visit_node</b></p>") def depart_node(self, node): self.body.append("<p><b>depart_node</b></p>") content = ''' test a directive ================ .. runpythonthis:: print("this code shoud appear" + "___") '''.replace(" ", "") # to remove spaces at the beginning of the line tives = [ ("runpythonthis", RunPythonThisDirective, runpythonthis_node, visit_node, depart_node) ] html = rst2html(content, writer="html", keep_warnings=True, directives=tives) Unfortunately, this functionality is only tested on :epkg:`Python` 3. It might not work on :epkg:`Python` 2.7. The function produces files if the document contains latex converted into image. .. faqref:: :title: How to get more about latex errors? :index: latex :epkg:`Sphinx` is not easy to use when it comes to debug latex expressions. I did not find an easy way to read the error returned by latex about a missing bracket or an unknown command. I finally added a short piece of code in ``sphinx.ext.imgmath.py`` just after the call to the executable indicated by *imgmath_latex* :: if b'...' in stdout or b'LaTeX Error' in stdout: print(self.builder.config.imgmath_latex_preamble) print(p.returncode) print("################") print(latex) print("..........") print(stdout.decode("ascii").replace("\\r", "")) print("-----") print(stderr) It displays the output if an error happened. .. faqref:: :title: How to hide command line window while compiling latex? :lid: command line window :epkg:`Sphinx` calls :epkg:`latex` through command line. On :epkg:`Windows`, a command line window can annoyingly show up anytime a formula is compiled. The following can be added to hide it: :: startupinfo = STARTUPINFO() startupinfo.dwFlags |= STARTF_USESHOWWINDOW And ``, startupinfo=startupinfo`` must be added to lines ``p = Popen(...``. By default, the function now interprets :epkg:`Sphinx` directives and not only *docutils* ones. Parameter *directives* adds a directive before parsing the :epkg:`RST`. The function is more consistent. Format ``rst`` is available as well as custom builders. .. versionchanged:: 1.8 New nodes are now optional in *directives*. Markdown format was added. Parameters *ret_doctree*, *load_bokeh* were added. :githublink:`%|py|247` """ # delayed import to speed up time def _get_MockSphinxApp(): from .sphinxm_mock_app import MockSphinxApp return MockSphinxApp MockSphinxApp = _get_MockSphinxApp() if 'html_theme' not in options: options['html_theme'] = 'basic' defopt = default_sphinx_options(**options) if "master_doc" not in defopt: defopt["master_doc"] = document_name if writer in ('latex', 'elatex') and 'latex_documents' not in defopt: latex_documents = [(document_name, ) * 5] defopt['latex_documents'] = latex_documents if writer in ["custom", "sphinx", "HTMLWriterWithCustomDirectives", "html"]: mockapp, writer, title_names = MockSphinxApp.create( "sphinx", directives, confoverrides=defopt, new_extensions=new_extensions, load_bokeh=load_bokeh, fLOG=fLOG, destination_path=destination_path) writer_name = "HTMLWriterWithCustomDirectives" elif writer in ("rst", "md", "latex", "elatex", 'text', 'doctree'): writer_name = writer mockapp, writer, title_names = MockSphinxApp.create( writer, directives, confoverrides=defopt, new_extensions=new_extensions, load_bokeh=load_bokeh, fLOG=fLOG, destination_path=destination_path) elif isinstance(writer, tuple): # We extect something like ("builder_name", builder_class) writer_name = writer mockapp, writer, title_names = MockSphinxApp.create( writer, directives, confoverrides=defopt, new_extensions=new_extensions, load_bokeh=load_bokeh, fLOG=fLOG, destination_path=destination_path) else: raise ValueError( "Unexpected writer '{0}', should be 'rst' or 'html' or 'md' or 'elatex' or 'text'.".format(writer)) if writer is None and directives is not None and len(directives) > 0: raise NotImplementedError( "The writer must not be null if custom directives will be added, check the documentation of the fucntion.") # delayed import to speed up time from sphinx.environment import default_settings settings_overrides = default_settings.copy() settings_overrides["warning_stream"] = StringIO() settings_overrides["master_doc"] = document_name settings_overrides["source"] = document_name settings_overrides["contentsname"] = document_name settings_overrides.update({k: v[0] for k, v in mockapp.new_options.items()}) # next settings_overrides.update(defopt) config = mockapp.config config.blog_background = True config.blog_background_page = False config.sharepost = None if hasattr(writer, "add_configuration_options"): writer.add_configuration_options(mockapp.new_options) for k in {'outdir', 'imagedir', 'confdir', 'doctreedir'}: setattr(writer.builder, k, settings_overrides.get(k, '')) if destination_path is not None: writer.builder.outdir = destination_path if update_builder: update_builder(writer.builder) env = mockapp.env if env is None: raise ValueError("No environment was built.") env.temp_data["docname"] = document_name env.temp_data["source"] = document_name mockapp.builder.env.temp_data["docname"] = document_name mockapp.builder.env.temp_data["source"] = document_name settings_overrides["env"] = env lang = languages.get_language(language) for name in title_names: if name not in lang.labels: lang.labels[name] = TITLES[language][name] for k, v in sorted(settings_overrides.items()): fLOG("[rst2html] {0}={1}{2}".format( k, v, " --- added" if hasattr(config, k) else "")) for k, v in sorted(settings_overrides.items()): if hasattr(writer.builder.config, k) and writer.builder.config[k] != v: writer.builder.config[k] = v _, pub = core.publish_programmatically( source=s, source_path=None, destination_path=destination_path, writer=writer, writer_name=writer_name, settings_overrides=settings_overrides, source_class=StringInput, destination_class=StringOutput, destination=destination, reader=None, reader_name='standalone', parser=None, parser_name='restructuredtext', settings=None, settings_spec=None, config_section=None, enable_exit_status=False) doctree = pub.document if filter_nodes is not None: if layout == "docutils" and writer != "doctree": raise ValueError( "filter_nodes is not None, layout must not be 'docutils'") filter_nodes(doctree) mockapp.finalize(doctree, external_docnames=external_docnames) parts = pub.writer.parts if not keep_warnings: if isinstance(parts["whole"], list): # Not html. exp = "".join(parts["whole"]) else: exp = re.sub( '(<div class="system-message">(.|\\n)*?</div>)', "", parts["whole"]) else: if isinstance(parts["whole"], list): exp = "".join(parts["whole"]) else: exp = parts["whole"] if ret_doctree: return doctree if layout == "docutils": return exp else: page = None pages = [] main = ("/{0}.m.html".format(document_name), "/{0}.m.{1}".format(document_name, writer_name), document_name) if not hasattr(writer.builder, "iter_pages"): raise AttributeError( "Class '{0}' must have a method 'iter_pages' which returns a dictionary.".format(writer.builder)) contents = [] for k, v in writer.builder.iter_pages(): pages.append(k) contents.append(v) if k in main: page = v break if page is None and len(contents) == 1: page = contents[0] if page is None: raise ValueError( "No page contents was produced, only '{0}'.".format(pages)) if layout == "sphinx": if isinstance(page, str): return page else: return "\n".join(page) elif layout == "sphinx_body": lines = page.replace('</head>', '</head>\n').split("\n") keep = [] begin = False for line in lines: s = line.strip(" \n\r") if s == "</body>": begin = False if begin: keep.append(line) if s == "<body>": begin = True res = "\n".join(keep) return res else: raise ValueError( "Unexpected value for layout '{0}'".format(layout))
[docs]def correct_indentation(text): """ Tries to improve the indentation before running :epkg:`docutils`. :param text: text to correct :return: corrected text :githublink:`%|py|429` """ title = {} rows = text.split("\n") for row in rows: row = row.replace("\t", " ") cr = row.lstrip() ind = len(row) - len(cr) tit = cr.strip("\r\n\t ") if len(tit) > 0 and tit[0] in "-+=*^" and tit == tit[0] * len(tit): title[ind] = title.get(ind, 0) + 1 if len(title) > 0: mint = min(title.keys()) else: mint = 0 if mint > 0: newrows = [] for row in rows: i = 0 while i < len(row) and row[i] == ' ': i += 1 rem = min(i, mint) if rem > 0: newrows.append(row[rem:]) else: newrows.append(row) return "\n".join(newrows) else: return text
[docs]def docstring2html(function_or_string, format="html", fLOG=noLOG, writer="html", keep_warnings=False, directives=None, language="en", layout='docutils', document_name="<<string>>", filter_nodes=None, **options): """ Converts a docstring into a :epkg:`HTML` format. :param function_or_string: function, class, method or doctring :param format: output format (``'html'`` or '``rawhtml``') :param fLOG: logging function :param writer: ``'html'`` for :epkg:`HTML` format, ``'rst'`` for :epkg:`RST` format, ``'md'`` for :epkg:`MD` format :param keep_warnings: keep_warnings in the final :epkg:`HTML` :param directives: new directives to add (see below) :param language: language :param layout: ``'docutils'``, ``'sphinx'``, ``'sphinx_body'``, see below. :param document_name: document_name for this string :param filter_nodes: transform the doctree before writing the results (layout must be 'sphinx') :param options: Sphinx options see `Render math as images <http://www.sphinx-doc.org/en/stable/ext/math.html#module-sphinx.ext.imgmath>`_, a subset of options is used, see :func:`default_sphinx_options <pyquickhelper.helpgen.rst_converters.default_sphinx_options>`. By default, the theme (option *html_theme*) will ``'basic'``. :return: (str) :epkg:`HTML` format or (IPython.core.display.HTML) .. exref:: :title: Produce HTML documentation for a function or class The following code can display the dosstring in :epkg:`HTML` format to display it in a :epkg:`notebook`. :: from pyquickhelper.helpgen import docstring2html import sklearn.linear_model docstring2html(sklearn.linear_model.LogisticRegression) The output format is defined by: * ``'html'``: IPython :epkg:`HTML` object * ``'rawhtml'``: :epkg:`HTML` as text + style * ``'rst'``: :epkg:`rst` * ``'text'``: raw text .. versionchanged:: 1.8 Markdown format was added. :githublink:`%|py|510` """ if not isinstance(function_or_string, str): doc = function_or_string.__doc__ else: doc = function_or_string if format == "text": return doc if doc is None: return "" javadoc = migrating_doxygen_doc(doc, "None", log=False)[1] rows = javadoc.split("\n") from .utils_sphinx_doc import _private_migrating_doxygen_doc rst = _private_migrating_doxygen_doc( rows, index_first_line=0, filename="None") rst = "\n".join(rst) ded = textwrap.dedent(rst) try: html = rst2html(ded, fLOG=fLOG, writer=writer, keep_warnings=keep_warnings, directives=directives, language=language, filter_nodes=filter_nodes, document_name=document_name, layout=layout, **options) except Exception: # we check the indentation ded = correct_indentation(ded) try: html = rst2html(ded, fLOG=fLOG, writer=writer, keep_warnings=keep_warnings, directives=directives, language=language, filter_nodes=filter_nodes, document_name=document_name, layout=layout, **options) except Exception as e: lines = ded.split("\n") lines = ["%04d %s" % (i + 1, _.strip("\n\r")) for i, _ in enumerate(lines)] raise HelpGenConvertError( "Unable to process:\n{0}".format("\n".join(lines))) from e ret_doctree = writer == "doctree" if ret_doctree: writer = "doctree" if writer in ('doctree', 'rst', 'md'): return html if format == "html": from IPython.core.display import HTML return HTML(html) elif format in ("rawhtml", 'rst', 'md', 'doctree'): return html else: raise ValueError( "Unexpected format: '" + format + "', should be html, rawhtml, text, rst, md, doctree.")
[docs]def rst2rst_folder(rststring, folder, document_name="index", **options): """ Converts a :epkg:`RST` string into simplified :epkg:`RST`. :param rststring: :epkg:`rst` string :param folder: the builder needs to write the resuts in a folder defined by this parameter :param document_name: main document :param options: additional options (same as *conf.py*) :return: converted string :githublink:`%|py|579` """ if not os.path.exists(folder): raise FileNotFoundError(folder) new_options = {} new_options.update(options) def update_builder(builder): builder.outdir = folder rst = rst2html(rststring, writer="rst", document_name="example", update_builder=update_builder, layout="sphinx", **new_options) return rst