Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2@file
3@brief Helpers to convert docstring to various format.
4"""
5import re
6import textwrap
7import os
8from io import StringIO
9from docutils import core, languages
10from docutils.io import StringInput, StringOutput
11from .utils_sphinx_doc import migrating_doxygen_doc
12from .helpgen_exceptions import HelpGenConvertError
13from ..texthelper.texts_language import TITLES
14from ..loghelper.flog import noLOG
17def default_sphinx_options(fLOG=noLOG, **options):
18 """
19 Defines or overrides default options for :epkg:`Sphinx`, listed below.
21 .. runpython::
23 from pyquickhelper.helpgen.rst_converters import default_sphinx_options
24 options = default_sphinx_options()
25 for k, v in sorted(options.items()):
26 print("{0} = {1}".format(k, v))
28 :epkg:`latex` is not available on :epkg:`Windows`.
29 """
30 # delayed import to speed up time
31 from .conf_path_tools import find_graphviz_dot, find_dvipng_path
33 res = { # 'output_encoding': options.get('output_encoding', 'unicode'),
34 # 'doctitle_xform': options.get('doctitle_xform', True),
35 # 'initial_header_level': options.get('initial_header_level', 2),
36 # 'input_encoding': options.get('input_encoding', 'utf-8-sig'),
37 'blog_background': options.get('blog_background', False),
38 'sharepost': options.get('sharepost', None),
39 'todoext_link_only': options.get('todoext_link_only', False),
40 'mathdef_link_only': options.get('mathdef_link_only', True),
41 'blocref_link_only': options.get('blocref_link_only', False),
42 'faqref_link_only': options.get('faqref_link_only', False),
43 'nbref_link_only': options.get('nbref_link_only', False),
44 'todo_link_only': options.get('todo_link_only', False),
45 'language': options.get('language', 'en'),
46 # 'outdir': options.get('outdir', '.'),
47 # 'imagedir': options.get('imagedir', '.'),
48 # 'confdir': options.get('confdir', '.'),
49 # 'doctreedir': options.get('doctreedir', '.'),
50 'math_number_all': options.get('math_number_all', False),
51 # graphviz
52 'graphviz_output_format': options.get('graphviz_output_format', 'png'),
53 'graphviz_dot': options.get('graphviz_dot', find_graphviz_dot(exc=False)),
54 # latex
55 'imgmath_image_format': options.get('imgmath_image_format', 'png'),
56 # containers
57 'out_blogpostlist': [],
58 'out_runpythonlist': [],
59 # 'warning_stream': StringIO(),
60 }
62 if res['imgmath_image_format'] == 'png':
63 try:
64 imgmath_latex, imgmath_dvipng, imgmath_dvisvgm = find_dvipng_path(
65 exc=False)
66 has_latex = True
67 except FileNotFoundError:
68 # miktex is not available,
69 has_latex = False
71 if has_latex:
72 res['imgmath_latex'] = imgmath_latex
73 res['imgmath_dvipng'] = imgmath_dvipng
74 res['imgmath_dvisvgm'] = imgmath_dvisvgm
76 for k, v in options.items():
77 if k not in res:
78 res[k] = v
80 return res
83def rst2html(s, fLOG=noLOG, writer="html", keep_warnings=False,
84 directives=None, language="en",
85 layout='docutils', document_name="<<string>>",
86 external_docnames=None, filter_nodes=None,
87 new_extensions=None, update_builder=None,
88 ret_doctree=False, load_bokeh=False,
89 destination=None, destination_path=None,
90 **options):
91 """
92 Converts a string from :epkg:`RST`
93 into :epkg:`HTML` format or transformed :epkg:`RST`.
95 @param s string to convert
96 @param fLOG logging function (warnings will be logged)
97 @param writer ``'html'`` for :epkg:`HTML` format,
98 ``'rst'`` for :epkg:`RST` format,
99 ``'md'`` for :epkg:`MD` format,
100 ``'elatex'`` for :epkg:`latex` format,
101 ``'doctree'`` to get the doctree, *writer* can also be a tuple
102 for custom formats and must be like ``('buider_name', builder_class)``.
103 @param keep_warnings keep_warnings in the final HTML
104 @param directives new directives to add (see below)
105 @param language language
106 @param layout ``'docutils'``, ``'sphinx'``, ``'sphinx_body'``, see below.
107 @param document_name document name, not really important since the input is a string
108 @param external_docnames if the string to parse makes references to other documents,
109 if one is missing, an exception is raised.
110 @param filter_nodes transforms the doctree before writing the results (layout must be 'sphinx'),
111 the function takes a doctree as a single parameter
112 @param new_extensions additional extension to setup
113 @param update_builder update the builder after it is instantiated
114 @param ret_doctree returns the doctree
115 @param load_bokeh load :epkg:`bokeh` extensions,
116 disabled by default as it takes a few seconds
117 @param destination set a destination (requires for some extension)
118 @param destination_path set a destination path (requires for some extension)
119 @param options :epkg:`Sphinx` options see
120 `Render math as images <https://www.sphinx-doc.org/en/master/ext/math.html#module-sphinx.ext.imgmath>`_,
121 a subset of options is used, see @see fn default_sphinx_options.
122 By default, the theme (option *html_theme*) will ``'basic'``.
123 @return HTML format
125 *directives* is None or a list of 2 or 5-uple:
127 * a directive name (mandatory)
128 * a directive class: see `Sphinx Directive
129 <https://www.sphinx-doc.org/en/master/development/tutorials/helloworld.html>`_,
130 see also @see cl RunPythonDirective as an example (mandatory)
131 * a docutils node: see @see cl runpython_node as an example
132 * two functions: see @see fn visit_runpython_node, @see fn depart_runpython_node as an example
134 The parameter *layout* specify the kind of HTML you need.
136 * ``'docutils'``: very simple :epkg:`HTML`, style is not included, recursive
137 directives are not processed (recursive means they modify the doctree).
138 The produced :epkg:`HTML` only includes the body (no :epkg:`HTML` header).
139 * ``'sphinx'``: in memory :epkg:`sphinx`, the produced :epkg:`HTML` includes the header, it is also recursive
140 as directives can modify the doctree.
141 * ``'sphinx_body'``: same as ``'sphinx'`` but only the body is returned.
143 If the writer is a tuple, it must be a 2-uple ``(builder_name, builder_class)``.
144 However, the builder class must contain an attribute ``_writer_class`` with
145 the associated writer. The builcer class must also implement a method
146 ``iter_pages`` which enumerates all written pages:
147 ``def iter_pages(self) -> Dict[str,str]`` where the key is the document name
148 and the value is its content.
150 .. exref::
151 :title: How to test a Sphinx directive?
153 The following code defines a simple directive
154 definedbased on an existing one.
155 It also defined what to do if a new node
156 is inserted in the documentation.
158 ::
160 from docutils import nodes
161 from pyquickhelper.helpgen import rst2html
163 class runpythonthis_node(nodes.Structural, nodes.Element):
164 pass
166 class RunPythonThisDirective (RunPythonDirective):
167 runpython_class = runpythonthis_node
169 def visit_node(self, node):
170 self.body.append("<p><b>visit_node</b></p>")
171 def depart_node(self, node):
172 self.body.append("<p><b>depart_node</b></p>")
174 content = '''
175 test a directive
176 ================
178 .. runpythonthis::
180 print("this code shoud appear" + "___")
181 '''.replace(" ", "")
182 # to remove spaces at the beginning of the line
184 tives = [ ("runpythonthis", RunPythonThisDirective,
185 runpythonthis_node, visit_node, depart_node) ]
187 html = rst2html(content, writer="html", keep_warnings=True,
188 directives=tives)
190 Unfortunately, this functionality is only tested on :epkg:`Python` 3.
191 It might not work on :epkg:`Python` 2.7.
192 The function produces files if the document contains latex
193 converted into image.
195 .. faqref::
196 :title: How to get more about latex errors?
197 :index: latex
199 :epkg:`Sphinx` is not easy to use when it comes to debug latex expressions.
200 I did not find an easy way to read the error returned by latex about
201 a missing bracket or an unknown command. I finally added a short piece
202 of code in ``sphinx.ext.imgmath.py`` just after the call to
203 the executable indicated by *imgmath_latex*
205 ::
207 if b'...' in stdout or b'LaTeX Error' in stdout:
208 print(self.builder.config.imgmath_latex_preamble)
209 print(p.returncode)
210 print("################")
211 print(latex)
212 print("..........")
213 print(stdout.decode("ascii").replace("\\r", ""))
214 print("-----")
215 print(stderr)
217 It displays the output if an error happened.
219 .. faqref::
220 :title: How to hide command line window while compiling latex?
221 :lid: command line window
223 :epkg:`Sphinx` calls :epkg:`latex` through command line.
224 On :epkg:`Windows`, a command line window
225 can annoyingly show up anytime a formula is compiled.
226 The following can be added to hide it:
228 ::
230 startupinfo = STARTUPINFO()
231 startupinfo.dwFlags |= STARTF_USESHOWWINDOW
233 And ``, startupinfo=startupinfo`` must be added to lines ``p = Popen(...``.
235 By default, the function now interprets :epkg:`Sphinx`
236 directives and not only *docutils* ones.
237 Parameter *directives* adds a directive
238 before parsing the :epkg:`RST`.
239 The function is more consistent.
240 Format ``rst`` is available as well as
241 custom builders.
242 New nodes are optional in *directives*.
243 Markdown format was added.
244 """
245 # delayed import to speed up time
246 def _get_MockSphinxApp():
247 from .sphinxm_mock_app import MockSphinxApp
248 return MockSphinxApp
249 MockSphinxApp = _get_MockSphinxApp()
251 if 'html_theme' not in options:
252 options['html_theme'] = 'basic'
253 defopt = default_sphinx_options(**options)
254 if "master_doc" not in defopt:
255 defopt["master_doc"] = document_name
256 if writer in ('latex', 'elatex') and 'latex_documents' not in defopt:
257 latex_documents = [(document_name, ) * 5]
258 defopt['latex_documents'] = latex_documents
260 if writer in ["custom", "sphinx", "HTMLWriterWithCustomDirectives", "html"]:
261 mockapp, writer, title_names = MockSphinxApp.create(
262 "sphinx", directives, confoverrides=defopt,
263 new_extensions=new_extensions,
264 load_bokeh=load_bokeh, fLOG=fLOG,
265 destination_path=destination_path)
266 writer_name = "HTMLWriterWithCustomDirectives"
267 elif writer in ("rst", "md", "latex", "elatex", 'text', 'doctree'):
268 writer_name = writer
269 mockapp, writer, title_names = MockSphinxApp.create(
270 writer, directives, confoverrides=defopt,
271 new_extensions=new_extensions,
272 load_bokeh=load_bokeh, fLOG=fLOG,
273 destination_path=destination_path)
274 elif isinstance(writer, tuple):
275 # We extect something like ("builder_name", builder_class)
276 writer_name = writer
277 mockapp, writer, title_names = MockSphinxApp.create(
278 writer, directives, confoverrides=defopt,
279 new_extensions=new_extensions,
280 load_bokeh=load_bokeh, fLOG=fLOG,
281 destination_path=destination_path)
282 else:
283 raise ValueError(
284 "Unexpected writer '{0}', should be 'rst' or 'html' or 'md' or 'elatex' or 'text'.".format(writer))
286 if writer is None and directives is not None and len(directives) > 0:
287 raise NotImplementedError(
288 "The writer must not be null if custom directives will be added, check the documentation of the fucntion.")
290 # delayed import to speed up time
291 from sphinx.environment import default_settings
292 settings_overrides = default_settings.copy()
293 settings_overrides["warning_stream"] = StringIO()
294 settings_overrides["master_doc"] = document_name
295 settings_overrides["source"] = document_name
296 settings_overrides["contentsname"] = document_name
297 settings_overrides.update({k: v[0]
298 for k, v in mockapp.new_options.items()})
300 # next
301 settings_overrides.update(defopt)
302 config = mockapp.config
303 config.blog_background = True
304 config.blog_background_page = False
305 config.sharepost = None
307 if hasattr(writer, "add_configuration_options"):
308 writer.add_configuration_options(mockapp.new_options)
309 for k in {'outdir', 'imagedir', 'confdir', 'doctreedir'}:
310 setattr(writer.builder, k, settings_overrides.get(k, ''))
311 if destination_path is not None:
312 writer.builder.outdir = destination_path
313 if update_builder:
314 update_builder(writer.builder)
316 env = mockapp.env
317 if env is None:
318 raise ValueError("No environment was built.")
320 env.temp_data["docname"] = document_name
321 env.temp_data["source"] = document_name
322 mockapp.builder.env.temp_data["docname"] = document_name
323 mockapp.builder.env.temp_data["source"] = document_name
324 settings_overrides["env"] = env
326 lang = languages.get_language(language)
327 for name in title_names:
328 if name not in lang.labels:
329 lang.labels[name] = TITLES[language][name]
331 for k, v in sorted(settings_overrides.items()):
332 fLOG("[rst2html] {0}={1}{2}".format(
333 k, v, " --- added" if hasattr(config, k) else ""))
334 for k, v in sorted(settings_overrides.items()):
335 if hasattr(writer.builder.config, k) and writer.builder.config[k] != v:
336 writer.builder.config[k] = v
338 _, pub = core.publish_programmatically(
339 source=s, source_path=None, destination_path=destination_path, writer=writer,
340 writer_name=writer_name, settings_overrides=settings_overrides,
341 source_class=StringInput, destination_class=StringOutput,
342 destination=destination, reader=None, reader_name='standalone',
343 parser=None, parser_name='restructuredtext', settings=None,
344 settings_spec=None, config_section=None, enable_exit_status=False)
346 doctree = pub.document
348 if filter_nodes is not None:
349 if layout == "docutils" and writer != "doctree":
350 raise ValueError(
351 "filter_nodes is not None, layout must not be 'docutils'")
352 filter_nodes(doctree)
354 mockapp.finalize(doctree, external_docnames=external_docnames)
355 parts = pub.writer.parts
357 if not keep_warnings:
358 if isinstance(parts["whole"], list):
359 # Not html.
360 exp = "".join(parts["whole"])
361 else:
362 exp = re.sub(
363 '(<div class="system-message">(.|\\n)*?</div>)', "", parts["whole"])
364 else:
365 if isinstance(parts["whole"], list):
366 exp = "".join(parts["whole"])
367 else:
368 exp = parts["whole"]
370 if ret_doctree:
371 return doctree
373 if layout == "docutils":
374 return exp
375 else:
376 page = None
377 pages = []
378 main = ("/{0}.m.html".format(document_name),
379 "/{0}.m.{1}".format(document_name, writer_name),
380 document_name)
381 if not hasattr(writer.builder, "iter_pages"):
382 raise AttributeError(
383 "Class '{0}' must have a method 'iter_pages' which returns a dictionary.".format(writer.builder))
384 contents = []
385 for k, v in writer.builder.iter_pages():
386 pages.append(k)
387 contents.append(v)
388 if k in main:
389 page = v
390 break
391 if page is None and len(contents) == 1:
392 page = contents[0]
393 if page is None:
394 raise ValueError(
395 "No page contents was produced, only '{0}'.".format(pages))
396 if layout == "sphinx":
397 if isinstance(page, str):
398 return page
399 else:
400 return "\n".join(page)
401 elif layout == "sphinx_body":
402 lines = page.replace('</head>', '</head>\n').split("\n")
403 keep = []
404 begin = False
405 for line in lines:
406 s = line.strip(" \n\r")
407 if s == "</body>":
408 begin = False
409 if begin:
410 keep.append(line)
411 if s == "<body>":
412 begin = True
413 res = "\n".join(keep)
414 return res
415 else:
416 raise ValueError(
417 "Unexpected value for layout '{0}'".format(layout))
420def correct_indentation(text):
421 """
422 Tries to improve the indentation before running :epkg:`docutils`.
424 @param text text to correct
425 @return corrected text
426 """
427 title = {}
428 rows = text.split("\n")
429 for row in rows:
430 row = row.replace("\t", " ")
431 cr = row.lstrip()
432 ind = len(row) - len(cr)
434 tit = cr.strip("\r\n\t ")
435 if len(tit) > 0 and tit[0] in "-+=*^" and tit == tit[0] * len(tit):
436 title[ind] = title.get(ind, 0) + 1
438 if len(title) > 0:
439 mint = min(title.keys())
440 else:
441 mint = 0
442 if mint > 0:
443 newrows = []
444 for row in rows:
445 i = 0
446 while i < len(row) and row[i] == ' ':
447 i += 1
449 rem = min(i, mint)
450 if rem > 0:
451 newrows.append(row[rem:])
452 else:
453 newrows.append(row)
455 return "\n".join(newrows)
456 else:
457 return text
460def docstring2html(function_or_string, format="html", fLOG=noLOG, writer="html",
461 keep_warnings=False, directives=None, language="en",
462 layout='docutils', document_name="<<string>>",
463 filter_nodes=None, **options):
464 """
465 Converts a docstring into a :epkg:`HTML` format.
467 @param function_or_string function, class, method or doctring
468 @param format output format (``'html'`` or '``rawhtml``')
469 @param fLOG logging function
470 @param writer ``'html'`` for :epkg:`HTML` format,
471 ``'rst'`` for :epkg:`RST` format,
472 ``'md'`` for :epkg:`MD` format
473 @param keep_warnings keep_warnings in the final :epkg:`HTML`
474 @param directives new directives to add (see below)
475 @param language language
476 @param layout ``'docutils'``, ``'sphinx'``, ``'sphinx_body'``, see below.
477 @param document_name document_name for this string
478 @param filter_nodes transform the doctree before writing the results
479 (layout must be 'sphinx')
480 @param options Sphinx options see `Render math as images
481 <https://www.sphinx-doc.org/en/master/ext/math.html#module-sphinx.ext.imgmath>`_,
482 a subset of options is used, see @see fn default_sphinx_options.
483 By default, the theme (option *html_theme*) will ``'basic'``.
484 @return (str) :epkg:`HTML` format or (IPython.core.display.HTML)
486 .. exref::
487 :title: Produce HTML documentation for a function or class
489 The following code can display the dosstring in :epkg:`HTML` format
490 to display it in a :epkg:`notebook`.
492 ::
494 from pyquickhelper.helpgen import docstring2html
495 import sklearn.linear_model
496 docstring2html(sklearn.linear_model.LogisticRegression)
498 The output format is defined by:
500 * ``'html'``: IPython :epkg:`HTML` object
501 * ``'rawhtml'``: :epkg:`HTML` as text + style
502 * ``'rst'``: :epkg:`rst`
503 * ``'text'``: raw text
504 """
505 if not isinstance(function_or_string, str):
506 doc = function_or_string.__doc__
507 else:
508 doc = function_or_string
510 if format == "text":
511 return doc
513 if doc is None:
514 return ""
516 javadoc = migrating_doxygen_doc(doc, "None", log=False)[1]
517 rows = javadoc.split("\n")
518 from .utils_sphinx_doc import _private_migrating_doxygen_doc
519 rst = _private_migrating_doxygen_doc(
520 rows, index_first_line=0, filename="None")
521 rst = "\n".join(rst)
522 ded = textwrap.dedent(rst)
524 try:
525 html = rst2html(ded, fLOG=fLOG, writer=writer,
526 keep_warnings=keep_warnings, directives=directives,
527 language=language, filter_nodes=filter_nodes,
528 document_name=document_name,
529 layout=layout, **options)
530 except Exception:
531 # we check the indentation
532 ded = correct_indentation(ded)
533 try:
534 html = rst2html(ded, fLOG=fLOG, writer=writer,
535 keep_warnings=keep_warnings, directives=directives,
536 language=language, filter_nodes=filter_nodes,
537 document_name=document_name,
538 layout=layout, **options)
539 except Exception as e:
540 lines = ded.split("\n")
541 lines = ["%04d %s" % (i + 1, _.strip("\n\r"))
542 for i, _ in enumerate(lines)]
543 raise HelpGenConvertError(
544 "Unable to process:\n{0}".format("\n".join(lines))) from e
546 ret_doctree = writer == "doctree"
547 if ret_doctree:
548 writer = "doctree"
550 if writer in ('doctree', 'rst', 'md'):
551 return html
553 if format == "html":
554 from IPython.core.display import HTML
555 return HTML(html)
556 if format in ("rawhtml", 'rst', 'md', 'doctree'):
557 return html
558 raise ValueError(
559 "Unexpected format: '{}', should be html, rawhtml, text, rst, "
560 "md, doctree.".format(format))
563def rst2rst_folder(rststring, folder, document_name="index", **options):
564 """
565 Converts a :epkg:`RST` string into simplified :epkg:`RST`.
567 @param rststring :epkg:`rst` string
568 @param folder the builder needs to write the resuts in a
569 folder defined by this parameter
570 @param document_name main document
571 @param options additional options (same as *conf.py*)
572 @return converted string
573 """
574 if not os.path.exists(folder):
575 raise FileNotFoundError(folder)
577 new_options = {}
578 new_options.update(options)
580 def update_builder(builder):
581 builder.outdir = folder
583 rst = rst2html(rststring, writer="rst", document_name="example",
584 update_builder=update_builder, layout="sphinx",
585 **new_options)
586 return rst