Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2@file 

3@brief Helpers to convert docstring to various format. 

4""" 

5import re 

6import textwrap 

7import os 

8from io import StringIO 

9from docutils import core, languages 

10from docutils.io import StringInput, StringOutput 

11from .utils_sphinx_doc import migrating_doxygen_doc 

12from .helpgen_exceptions import HelpGenConvertError 

13from ..texthelper.texts_language import TITLES 

14from ..loghelper.flog import noLOG 

15 

16 

17def default_sphinx_options(fLOG=noLOG, **options): 

18 """ 

19 Defines or overrides default options for :epkg:`Sphinx`, listed below. 

20 

21 .. runpython:: 

22 

23 from pyquickhelper.helpgen.rst_converters import default_sphinx_options 

24 options = default_sphinx_options() 

25 for k, v in sorted(options.items()): 

26 print("{0} = {1}".format(k, v)) 

27 

28 :epkg:`latex` is not available on :epkg:`Windows`. 

29 """ 

30 # delayed import to speed up time 

31 from .conf_path_tools import find_graphviz_dot, find_dvipng_path 

32 

33 res = { # 'output_encoding': options.get('output_encoding', 'unicode'), 

34 # 'doctitle_xform': options.get('doctitle_xform', True), 

35 # 'initial_header_level': options.get('initial_header_level', 2), 

36 # 'input_encoding': options.get('input_encoding', 'utf-8-sig'), 

37 'blog_background': options.get('blog_background', False), 

38 'sharepost': options.get('sharepost', None), 

39 'todoext_link_only': options.get('todoext_link_only', False), 

40 'mathdef_link_only': options.get('mathdef_link_only', True), 

41 'blocref_link_only': options.get('blocref_link_only', False), 

42 'faqref_link_only': options.get('faqref_link_only', False), 

43 'nbref_link_only': options.get('nbref_link_only', False), 

44 'todo_link_only': options.get('todo_link_only', False), 

45 'language': options.get('language', 'en'), 

46 # 'outdir': options.get('outdir', '.'), 

47 # 'imagedir': options.get('imagedir', '.'), 

48 # 'confdir': options.get('confdir', '.'), 

49 # 'doctreedir': options.get('doctreedir', '.'), 

50 'math_number_all': options.get('math_number_all', False), 

51 # graphviz 

52 'graphviz_output_format': options.get('graphviz_output_format', 'png'), 

53 'graphviz_dot': options.get('graphviz_dot', find_graphviz_dot(exc=False)), 

54 # latex 

55 'imgmath_image_format': options.get('imgmath_image_format', 'png'), 

56 # containers 

57 'out_blogpostlist': [], 

58 'out_runpythonlist': [], 

59 # 'warning_stream': StringIO(), 

60 } 

61 

62 if res['imgmath_image_format'] == 'png': 

63 try: 

64 imgmath_latex, imgmath_dvipng, imgmath_dvisvgm = find_dvipng_path( 

65 exc=False) 

66 has_latex = True 

67 except FileNotFoundError: 

68 # miktex is not available, 

69 has_latex = False 

70 

71 if has_latex: 

72 res['imgmath_latex'] = imgmath_latex 

73 res['imgmath_dvipng'] = imgmath_dvipng 

74 res['imgmath_dvisvgm'] = imgmath_dvisvgm 

75 

76 for k, v in options.items(): 

77 if k not in res: 

78 res[k] = v 

79 

80 return res 

81 

82 

83def rst2html(s, fLOG=noLOG, writer="html", keep_warnings=False, 

84 directives=None, language="en", 

85 layout='docutils', document_name="<<string>>", 

86 external_docnames=None, filter_nodes=None, 

87 new_extensions=None, update_builder=None, 

88 ret_doctree=False, load_bokeh=False, 

89 destination=None, destination_path=None, 

90 **options): 

91 """ 

92 Converts a string from :epkg:`RST` 

93 into :epkg:`HTML` format or transformed :epkg:`RST`. 

94 

95 @param s string to convert 

96 @param fLOG logging function (warnings will be logged) 

97 @param writer ``'html'`` for :epkg:`HTML` format, 

98 ``'rst'`` for :epkg:`RST` format, 

99 ``'md'`` for :epkg:`MD` format, 

100 ``'elatex'`` for :epkg:`latex` format, 

101 ``'doctree'`` to get the doctree, *writer* can also be a tuple 

102 for custom formats and must be like ``('buider_name', builder_class)``. 

103 @param keep_warnings keep_warnings in the final HTML 

104 @param directives new directives to add (see below) 

105 @param language language 

106 @param layout ``'docutils'``, ``'sphinx'``, ``'sphinx_body'``, see below. 

107 @param document_name document name, not really important since the input is a string 

108 @param external_docnames if the string to parse makes references to other documents, 

109 if one is missing, an exception is raised. 

110 @param filter_nodes transforms the doctree before writing the results (layout must be 'sphinx'), 

111 the function takes a doctree as a single parameter 

112 @param new_extensions additional extension to setup 

113 @param update_builder update the builder after it is instantiated 

114 @param ret_doctree returns the doctree 

115 @param load_bokeh load :epkg:`bokeh` extensions, 

116 disabled by default as it takes a few seconds 

117 @param destination set a destination (requires for some extension) 

118 @param destination_path set a destination path (requires for some extension) 

119 @param options :epkg:`Sphinx` options see 

120 `Render math as images <https://www.sphinx-doc.org/en/master/ext/math.html#module-sphinx.ext.imgmath>`_, 

121 a subset of options is used, see @see fn default_sphinx_options. 

122 By default, the theme (option *html_theme*) will ``'basic'``. 

123 @return HTML format 

124 

125 *directives* is None or a list of 2 or 5-uple: 

126 

127 * a directive name (mandatory) 

128 * a directive class: see `Sphinx Directive 

129 <https://www.sphinx-doc.org/en/master/development/tutorials/helloworld.html>`_, 

130 see also @see cl RunPythonDirective as an example (mandatory) 

131 * a docutils node: see @see cl runpython_node as an example 

132 * two functions: see @see fn visit_runpython_node, @see fn depart_runpython_node as an example 

133 

134 The parameter *layout* specify the kind of HTML you need. 

135 

136 * ``'docutils'``: very simple :epkg:`HTML`, style is not included, recursive 

137 directives are not processed (recursive means they modify the doctree). 

138 The produced :epkg:`HTML` only includes the body (no :epkg:`HTML` header). 

139 * ``'sphinx'``: in memory :epkg:`sphinx`, the produced :epkg:`HTML` includes the header, it is also recursive 

140 as directives can modify the doctree. 

141 * ``'sphinx_body'``: same as ``'sphinx'`` but only the body is returned. 

142 

143 If the writer is a tuple, it must be a 2-uple ``(builder_name, builder_class)``. 

144 However, the builder class must contain an attribute ``_writer_class`` with 

145 the associated writer. The builcer class must also implement a method 

146 ``iter_pages`` which enumerates all written pages: 

147 ``def iter_pages(self) -> Dict[str,str]`` where the key is the document name 

148 and the value is its content. 

149 

150 .. exref:: 

151 :title: How to test a Sphinx directive? 

152 

153 The following code defines a simple directive 

154 definedbased on an existing one. 

155 It also defined what to do if a new node 

156 is inserted in the documentation. 

157 

158 :: 

159 

160 from docutils import nodes 

161 from pyquickhelper.helpgen import rst2html 

162 

163 class runpythonthis_node(nodes.Structural, nodes.Element): 

164 pass 

165 

166 class RunPythonThisDirective (RunPythonDirective): 

167 runpython_class = runpythonthis_node 

168 

169 def visit_node(self, node): 

170 self.body.append("<p><b>visit_node</b></p>") 

171 def depart_node(self, node): 

172 self.body.append("<p><b>depart_node</b></p>") 

173 

174 content = ''' 

175 test a directive 

176 ================ 

177 

178 .. runpythonthis:: 

179 

180 print("this code shoud appear" + "___") 

181 '''.replace(" ", "") 

182 # to remove spaces at the beginning of the line 

183 

184 tives = [ ("runpythonthis", RunPythonThisDirective, 

185 runpythonthis_node, visit_node, depart_node) ] 

186 

187 html = rst2html(content, writer="html", keep_warnings=True, 

188 directives=tives) 

189 

190 Unfortunately, this functionality is only tested on :epkg:`Python` 3. 

191 It might not work on :epkg:`Python` 2.7. 

192 The function produces files if the document contains latex 

193 converted into image. 

194 

195 .. faqref:: 

196 :title: How to get more about latex errors? 

197 :index: latex 

198 

199 :epkg:`Sphinx` is not easy to use when it comes to debug latex expressions. 

200 I did not find an easy way to read the error returned by latex about 

201 a missing bracket or an unknown command. I finally added a short piece 

202 of code in ``sphinx.ext.imgmath.py`` just after the call to 

203 the executable indicated by *imgmath_latex* 

204 

205 :: 

206 

207 if b'...' in stdout or b'LaTeX Error' in stdout: 

208 print(self.builder.config.imgmath_latex_preamble) 

209 print(p.returncode) 

210 print("################") 

211 print(latex) 

212 print("..........") 

213 print(stdout.decode("ascii").replace("\\r", "")) 

214 print("-----") 

215 print(stderr) 

216 

217 It displays the output if an error happened. 

218 

219 .. faqref:: 

220 :title: How to hide command line window while compiling latex? 

221 :lid: command line window 

222 

223 :epkg:`Sphinx` calls :epkg:`latex` through command line. 

224 On :epkg:`Windows`, a command line window 

225 can annoyingly show up anytime a formula is compiled. 

226 The following can be added to hide it: 

227 

228 :: 

229 

230 startupinfo = STARTUPINFO() 

231 startupinfo.dwFlags |= STARTF_USESHOWWINDOW 

232 

233 And ``, startupinfo=startupinfo`` must be added to lines ``p = Popen(...``. 

234 

235 By default, the function now interprets :epkg:`Sphinx` 

236 directives and not only *docutils* ones. 

237 Parameter *directives* adds a directive 

238 before parsing the :epkg:`RST`. 

239 The function is more consistent. 

240 Format ``rst`` is available as well as 

241 custom builders. 

242 New nodes are optional in *directives*. 

243 Markdown format was added. 

244 """ 

245 # delayed import to speed up time 

246 def _get_MockSphinxApp(): 

247 from .sphinxm_mock_app import MockSphinxApp 

248 return MockSphinxApp 

249 MockSphinxApp = _get_MockSphinxApp() 

250 

251 if 'html_theme' not in options: 

252 options['html_theme'] = 'basic' 

253 defopt = default_sphinx_options(**options) 

254 if "master_doc" not in defopt: 

255 defopt["master_doc"] = document_name 

256 if writer in ('latex', 'elatex') and 'latex_documents' not in defopt: 

257 latex_documents = [(document_name, ) * 5] 

258 defopt['latex_documents'] = latex_documents 

259 

260 if writer in ["custom", "sphinx", "HTMLWriterWithCustomDirectives", "html"]: 

261 mockapp, writer, title_names = MockSphinxApp.create( 

262 "sphinx", directives, confoverrides=defopt, 

263 new_extensions=new_extensions, 

264 load_bokeh=load_bokeh, fLOG=fLOG, 

265 destination_path=destination_path) 

266 writer_name = "HTMLWriterWithCustomDirectives" 

267 elif writer in ("rst", "md", "latex", "elatex", 'text', 'doctree'): 

268 writer_name = writer 

269 mockapp, writer, title_names = MockSphinxApp.create( 

270 writer, directives, confoverrides=defopt, 

271 new_extensions=new_extensions, 

272 load_bokeh=load_bokeh, fLOG=fLOG, 

273 destination_path=destination_path) 

274 elif isinstance(writer, tuple): 

275 # We extect something like ("builder_name", builder_class) 

276 writer_name = writer 

277 mockapp, writer, title_names = MockSphinxApp.create( 

278 writer, directives, confoverrides=defopt, 

279 new_extensions=new_extensions, 

280 load_bokeh=load_bokeh, fLOG=fLOG, 

281 destination_path=destination_path) 

282 else: 

283 raise ValueError( 

284 "Unexpected writer '{0}', should be 'rst' or 'html' or 'md' or 'elatex' or 'text'.".format(writer)) 

285 

286 if writer is None and directives is not None and len(directives) > 0: 

287 raise NotImplementedError( 

288 "The writer must not be null if custom directives will be added, check the documentation of the fucntion.") 

289 

290 # delayed import to speed up time 

291 from sphinx.environment import default_settings 

292 settings_overrides = default_settings.copy() 

293 settings_overrides["warning_stream"] = StringIO() 

294 settings_overrides["master_doc"] = document_name 

295 settings_overrides["source"] = document_name 

296 settings_overrides["contentsname"] = document_name 

297 settings_overrides.update({k: v[0] 

298 for k, v in mockapp.new_options.items()}) 

299 

300 # next 

301 settings_overrides.update(defopt) 

302 config = mockapp.config 

303 config.blog_background = True 

304 config.blog_background_page = False 

305 config.sharepost = None 

306 

307 if hasattr(writer, "add_configuration_options"): 

308 writer.add_configuration_options(mockapp.new_options) 

309 for k in {'outdir', 'imagedir', 'confdir', 'doctreedir'}: 

310 setattr(writer.builder, k, settings_overrides.get(k, '')) 

311 if destination_path is not None: 

312 writer.builder.outdir = destination_path 

313 if update_builder: 

314 update_builder(writer.builder) 

315 

316 env = mockapp.env 

317 if env is None: 

318 raise ValueError("No environment was built.") 

319 

320 env.temp_data["docname"] = document_name 

321 env.temp_data["source"] = document_name 

322 mockapp.builder.env.temp_data["docname"] = document_name 

323 mockapp.builder.env.temp_data["source"] = document_name 

324 settings_overrides["env"] = env 

325 

326 lang = languages.get_language(language) 

327 for name in title_names: 

328 if name not in lang.labels: 

329 lang.labels[name] = TITLES[language][name] 

330 

331 for k, v in sorted(settings_overrides.items()): 

332 fLOG("[rst2html] {0}={1}{2}".format( 

333 k, v, " --- added" if hasattr(config, k) else "")) 

334 for k, v in sorted(settings_overrides.items()): 

335 if hasattr(writer.builder.config, k) and writer.builder.config[k] != v: 

336 writer.builder.config[k] = v 

337 

338 _, pub = core.publish_programmatically( 

339 source=s, source_path=None, destination_path=destination_path, writer=writer, 

340 writer_name=writer_name, settings_overrides=settings_overrides, 

341 source_class=StringInput, destination_class=StringOutput, 

342 destination=destination, reader=None, reader_name='standalone', 

343 parser=None, parser_name='restructuredtext', settings=None, 

344 settings_spec=None, config_section=None, enable_exit_status=False) 

345 

346 doctree = pub.document 

347 

348 if filter_nodes is not None: 

349 if layout == "docutils" and writer != "doctree": 

350 raise ValueError( 

351 "filter_nodes is not None, layout must not be 'docutils'") 

352 filter_nodes(doctree) 

353 

354 mockapp.finalize(doctree, external_docnames=external_docnames) 

355 parts = pub.writer.parts 

356 

357 if not keep_warnings: 

358 if isinstance(parts["whole"], list): 

359 # Not html. 

360 exp = "".join(parts["whole"]) 

361 else: 

362 exp = re.sub( 

363 '(<div class="system-message">(.|\\n)*?</div>)', "", parts["whole"]) 

364 else: 

365 if isinstance(parts["whole"], list): 

366 exp = "".join(parts["whole"]) 

367 else: 

368 exp = parts["whole"] 

369 

370 if ret_doctree: 

371 return doctree 

372 

373 if layout == "docutils": 

374 return exp 

375 else: 

376 page = None 

377 pages = [] 

378 main = ("/{0}.m.html".format(document_name), 

379 "/{0}.m.{1}".format(document_name, writer_name), 

380 document_name) 

381 if not hasattr(writer.builder, "iter_pages"): 

382 raise AttributeError( 

383 "Class '{0}' must have a method 'iter_pages' which returns a dictionary.".format(writer.builder)) 

384 contents = [] 

385 for k, v in writer.builder.iter_pages(): 

386 pages.append(k) 

387 contents.append(v) 

388 if k in main: 

389 page = v 

390 break 

391 if page is None and len(contents) == 1: 

392 page = contents[0] 

393 if page is None: 

394 raise ValueError( 

395 "No page contents was produced, only '{0}'.".format(pages)) 

396 if layout == "sphinx": 

397 if isinstance(page, str): 

398 return page 

399 else: 

400 return "\n".join(page) 

401 elif layout == "sphinx_body": 

402 lines = page.replace('</head>', '</head>\n').split("\n") 

403 keep = [] 

404 begin = False 

405 for line in lines: 

406 s = line.strip(" \n\r") 

407 if s == "</body>": 

408 begin = False 

409 if begin: 

410 keep.append(line) 

411 if s == "<body>": 

412 begin = True 

413 res = "\n".join(keep) 

414 return res 

415 else: 

416 raise ValueError( 

417 "Unexpected value for layout '{0}'".format(layout)) 

418 

419 

420def correct_indentation(text): 

421 """ 

422 Tries to improve the indentation before running :epkg:`docutils`. 

423 

424 @param text text to correct 

425 @return corrected text 

426 """ 

427 title = {} 

428 rows = text.split("\n") 

429 for row in rows: 

430 row = row.replace("\t", " ") 

431 cr = row.lstrip() 

432 ind = len(row) - len(cr) 

433 

434 tit = cr.strip("\r\n\t ") 

435 if len(tit) > 0 and tit[0] in "-+=*^" and tit == tit[0] * len(tit): 

436 title[ind] = title.get(ind, 0) + 1 

437 

438 if len(title) > 0: 

439 mint = min(title.keys()) 

440 else: 

441 mint = 0 

442 if mint > 0: 

443 newrows = [] 

444 for row in rows: 

445 i = 0 

446 while i < len(row) and row[i] == ' ': 

447 i += 1 

448 

449 rem = min(i, mint) 

450 if rem > 0: 

451 newrows.append(row[rem:]) 

452 else: 

453 newrows.append(row) 

454 

455 return "\n".join(newrows) 

456 else: 

457 return text 

458 

459 

460def docstring2html(function_or_string, format="html", fLOG=noLOG, writer="html", 

461 keep_warnings=False, directives=None, language="en", 

462 layout='docutils', document_name="<<string>>", 

463 filter_nodes=None, **options): 

464 """ 

465 Converts a docstring into a :epkg:`HTML` format. 

466 

467 @param function_or_string function, class, method or doctring 

468 @param format output format (``'html'`` or '``rawhtml``') 

469 @param fLOG logging function 

470 @param writer ``'html'`` for :epkg:`HTML` format, 

471 ``'rst'`` for :epkg:`RST` format, 

472 ``'md'`` for :epkg:`MD` format 

473 @param keep_warnings keep_warnings in the final :epkg:`HTML` 

474 @param directives new directives to add (see below) 

475 @param language language 

476 @param layout ``'docutils'``, ``'sphinx'``, ``'sphinx_body'``, see below. 

477 @param document_name document_name for this string 

478 @param filter_nodes transform the doctree before writing the results 

479 (layout must be 'sphinx') 

480 @param options Sphinx options see `Render math as images 

481 <https://www.sphinx-doc.org/en/master/ext/math.html#module-sphinx.ext.imgmath>`_, 

482 a subset of options is used, see @see fn default_sphinx_options. 

483 By default, the theme (option *html_theme*) will ``'basic'``. 

484 @return (str) :epkg:`HTML` format or (IPython.core.display.HTML) 

485 

486 .. exref:: 

487 :title: Produce HTML documentation for a function or class 

488 

489 The following code can display the dosstring in :epkg:`HTML` format 

490 to display it in a :epkg:`notebook`. 

491 

492 :: 

493 

494 from pyquickhelper.helpgen import docstring2html 

495 import sklearn.linear_model 

496 docstring2html(sklearn.linear_model.LogisticRegression) 

497 

498 The output format is defined by: 

499 

500 * ``'html'``: IPython :epkg:`HTML` object 

501 * ``'rawhtml'``: :epkg:`HTML` as text + style 

502 * ``'rst'``: :epkg:`rst` 

503 * ``'text'``: raw text 

504 """ 

505 if not isinstance(function_or_string, str): 

506 doc = function_or_string.__doc__ 

507 else: 

508 doc = function_or_string 

509 

510 if format == "text": 

511 return doc 

512 

513 if doc is None: 

514 return "" 

515 

516 javadoc = migrating_doxygen_doc(doc, "None", log=False)[1] 

517 rows = javadoc.split("\n") 

518 from .utils_sphinx_doc import _private_migrating_doxygen_doc 

519 rst = _private_migrating_doxygen_doc( 

520 rows, index_first_line=0, filename="None") 

521 rst = "\n".join(rst) 

522 ded = textwrap.dedent(rst) 

523 

524 try: 

525 html = rst2html(ded, fLOG=fLOG, writer=writer, 

526 keep_warnings=keep_warnings, directives=directives, 

527 language=language, filter_nodes=filter_nodes, 

528 document_name=document_name, 

529 layout=layout, **options) 

530 except Exception: 

531 # we check the indentation 

532 ded = correct_indentation(ded) 

533 try: 

534 html = rst2html(ded, fLOG=fLOG, writer=writer, 

535 keep_warnings=keep_warnings, directives=directives, 

536 language=language, filter_nodes=filter_nodes, 

537 document_name=document_name, 

538 layout=layout, **options) 

539 except Exception as e: 

540 lines = ded.split("\n") 

541 lines = ["%04d %s" % (i + 1, _.strip("\n\r")) 

542 for i, _ in enumerate(lines)] 

543 raise HelpGenConvertError( 

544 "Unable to process:\n{0}".format("\n".join(lines))) from e 

545 

546 ret_doctree = writer == "doctree" 

547 if ret_doctree: 

548 writer = "doctree" 

549 

550 if writer in ('doctree', 'rst', 'md'): 

551 return html 

552 

553 if format == "html": 

554 from IPython.core.display import HTML 

555 return HTML(html) 

556 if format in ("rawhtml", 'rst', 'md', 'doctree'): 

557 return html 

558 raise ValueError( 

559 "Unexpected format: '{}', should be html, rawhtml, text, rst, " 

560 "md, doctree.".format(format)) 

561 

562 

563def rst2rst_folder(rststring, folder, document_name="index", **options): 

564 """ 

565 Converts a :epkg:`RST` string into simplified :epkg:`RST`. 

566 

567 @param rststring :epkg:`rst` string 

568 @param folder the builder needs to write the resuts in a 

569 folder defined by this parameter 

570 @param document_name main document 

571 @param options additional options (same as *conf.py*) 

572 @return converted string 

573 """ 

574 if not os.path.exists(folder): 

575 raise FileNotFoundError(folder) 

576 

577 new_options = {} 

578 new_options.update(options) 

579 

580 def update_builder(builder): 

581 builder.outdir = folder 

582 

583 rst = rst2html(rststring, writer="rst", document_name="example", 

584 update_builder=update_builder, layout="sphinx", 

585 **new_options) 

586 return rst