Coverage for src/pyquickhelper/ipythonhelper/run

Hot-keys on this page

r m x p toggle line displays

j k next/prev highlighted chunk

0 (zero) top of page

1 (one) first highlighted chunk

1"""

2@file

3@brief Functions to run notebooks.

4"""

5import time

6import os

7import warnings

8import re

9from io import StringIO

10import urllib.request as urllib_request

11from datetime import datetime, timedelta

13from ..loghelper.flog import noLOG

14from ..filehelper import explore_folder

15from .notebook_runner import NotebookRunner, NotebookKernelError

16from .notebook_exception import NotebookException

17from .notebook_helper import writes

20try:

21 from nbformat.reader import reads

22 from nbformat.reader import NotJSONError

23except ImportError: # pragma: no cover

24 from IPython.nbformat.reader import reads

25 from IPython.nbformat.reader import NotJSONError

28def _cache_url_to_file(cache_urls, folder, fLOG=noLOG):

29 """

30 Downloads file corresponding to url stored in *cache_urls*.

32 @param cache_urls list of urls

33 @param folder where to store the cached files

34 @param fLOG logging function

35 @return dictionary { url: file }

37 The function detects if the file was already downloaded.

38 In that case, it does not do it a second time.

39 """

40 if cache_urls is None:

41 return None

42 if folder is None:

43 raise FileNotFoundError( # pragma: no cover

44 "folder cannot be None")

45 res = {}

46 for url in cache_urls:

47 local_file = "__cached__" + url.split("/")[-1]

48 local_file = local_file.replace(":", "_").replace("%", "_")

49 local_file = os.path.abspath(os.path.join(folder, local_file))

50 if not os.path.exists(local_file):

51 fLOG("download", url, "to", local_file)

52 with open(local_file, "wb") as f:

53 fu = urllib_request.urlopen(url)

54 c = fu.read(2 ** 21)

55 while len(c) > 0:

56 f.write(c)

57 f.flush()

58 c = fu.read(2 ** 21)

59 fu.close()

61 # to avoid having backslahes inside strings

62 res[url] = "file:///" + local_file.replace("\\", "/")

63 return res

66def run_notebook(filename, profile_dir=None, working_dir=None, skip_exceptions=False,

67 outfilename=None, encoding="utf8", additional_path=None,

68 valid=None, clean_function=None, code_init=None,

69 fLOG=noLOG, kernel_name="python", log_level="30",

70 extended_args=None, cache_urls=None, replacements=None,

71 detailed_log=None, startup_timeout=300):

72 """

73 Runs a notebook end to end,

74 it is inspired from module `runipy <https://github.com/paulgb/runipy/>`_.

76 @param filename notebook filename

77 @param profile_dir profile directory

78 @param working_dir working directory

79 @param skip_exceptions skip exceptions

80 @param outfilename if not None, saves the output in this notebook

81 @param encoding encoding for the notebooks

82 @param additional_path additional paths for import

83 @param valid if not None, valid is a function which returns whether

84 or not the cell should be executed or not, if the function

85 returns None, the execution of the notebooks and skip the execution

86 of the other cells

87 @param clean_function function which cleans a cell's code before executing it (None for None)

88 @param code_init code to run before the execution of the notebook as if it was a cell

89 @param fLOG logging function

90 @param kernel_name kernel name, it can be None

91 @param log_level Choices: (0, 10, 20, 30=default, 40, 50, 'DEBUG', 'INFO', 'WARN', 'ERROR', 'CRITICAL')

92 @param extended_args others arguments to pass to the command line ('--KernelManager.autorestar=True' for example),

93 see :ref:`l-ipython_notebook_args` for a full list

94 @param cache_urls list of urls to cache

95 @param replacements list of additional replacements, list of tuple

96 @param detailed_log a second function to log more information when executing the notebook,

97 this should be a function with the same signature as ``print`` or None

98 @param startup_timeout wait for this long for the kernel to be ready,

99 see `wait_for_ready

100 <https://github.com/jupyter/jupyter_client/blob/master/jupyter_client/blocking/client.py#L84>`_

101 @return tuple (statistics, output)

102

103 @warning The function calls `basicConfig

104 <https://docs.python.org/3/library/logging.html#logging.basicConfig>`_.

105

106 .. exref::

107 :title: Run a notebook end to end

108

109 ::

110

111 from pyquickhelper.ipythonhelper import run_notebook

112 run_notebook("source.ipynb", working_dir="temp",

113 outfilename="modified.ipynb",

114 additional_path=["custom_path"] )

115

116 The function adds the local variable ``theNotebook`` with

117 the absolute file name of the notebook.

118 The execution of a notebook might fail because it relies on remote data

119 specified by url. The function downloads the data first and stores it in

120 folder *working_dir* (must not be None). The url string is replaced by

121 the absolute path to the file.

122 """

123 cached_rep = _cache_url_to_file(cache_urls, working_dir, fLOG=fLOG)

124 if replacements is None:

125 replacements = cached_rep

126 elif cached_rep is not None:

127 cached_rep.update(replacements)

128 else:

129 cached_rep = replacements

130

131 with open(filename, "r", encoding=encoding) as payload:

132 try:

133 nbc = payload.read()

134 except UnicodeDecodeError as e: # pragma: no cover

135 raise NotebookException(

136 "(2) Unable to read file '{0}' encoding='{1}'.".format(filename, encoding)) from e

137 try:

138 nb = reads(nbc)

139 except NotJSONError as e: # pragma: no cover

140 raise NotebookException(

141 "(1) Unable to read file '{0}' encoding='{1}'.".format(filename, encoding)) from e

142

143 out = StringIO()

144

145 def flogging(*args, **kwargs):

146 if len(args) > 0:

147 out.write(" ".join(args))

148 if len(kwargs) > 0:

149 out.write(str(kwargs))

150 out.write("\n")

151 fLOG(*args, **kwargs)

152

153 try:

154 nb_runner = NotebookRunner(nb, profile_dir, working_dir, fLOG=flogging, filename=filename,

155 theNotebook=os.path.abspath(filename),

156 code_init=code_init, log_level=log_level,

157 extended_args=extended_args, kernel_name=kernel_name,

158 replacements=cached_rep, kernel=True, detailed_log=detailed_log,

159 startup_timeout=startup_timeout)

160 except NotebookKernelError: # pragma: no cover

161 # It fails. We try again once.

162 nb_runner = NotebookRunner(nb, profile_dir, working_dir, fLOG=flogging, filename=filename,

163 theNotebook=os.path.abspath(filename),

164 code_init=code_init, log_level=log_level,

165 extended_args=extended_args, kernel_name=kernel_name,

166 replacements=cached_rep, kernel=True, detailed_log=detailed_log,

167 startup_timeout=startup_timeout)

168

169 try:

170 stat = nb_runner.run_notebook(skip_exceptions=skip_exceptions, additional_path=additional_path,

171 valid=valid, clean_function=clean_function)

172

173 if outfilename is not None:

174 with open(outfilename, 'w', encoding=encoding) as f:

175 try:

176 s = writes(nb_runner.nb)

177 except NotebookException as e: # pragma: no cover

178 raise NotebookException(

179 "issue with notebook: '{}'".format(filename)) from e

180 if isinstance(s, bytes):

181 s = s.decode('utf8')

182 f.write(s)

183

184 finally:

185 nb_runner.shutdown_kernel()

186

187 return stat, out.getvalue()

188

189

190def execute_notebook_list(folder, notebooks, clean_function=None, valid=None, fLOG=noLOG,

191 additional_path=None, deepfLOG=noLOG, kernel_name="python",

192 log_level="30", extended_args=None, cache_urls=None,

193 replacements=None, detailed_log=None, startup_timeout=300):

194 """

195 Executes a list of notebooks.

196

197 @param folder folder (where to execute the notebook, current folder for the notebook)

198 @param notebooks list of notebooks to execute (or a list of tuple(notebook, code which initializes the notebook))

199 @param clean_function function which transform the code before running it

200 @param valid if not None, valid is a function which returns whether

201 or not the cell should be executed or not, if the function

202 returns None, the execution of the notebooks and skip the execution

203 of the other cells

204 @param fLOG logging function

205 @param deepfLOG logging function used to run the notebook

206 @param additional_path path to add to *sys.path* before running the notebook

207 @param kernel_name kernel name, it can be None

208 @param log_level Choices: (0, 10, 20, 30=default, 40, 50, 'DEBUG', 'INFO', 'WARN', 'ERROR', 'CRITICAL')

209 @param extended_args others arguments to pass to the command line ('--KernelManager.autorestar=True' for example),

210 see :ref:`l-ipython_notebook_args` for a full list

211 @param cache_urls list of urls to cache

212 @param replacements additional replacements

213 @param detailed_log detailed log

214 @param startup_timeout wait for this long for the kernel to be ready,

215 see `wait_for_ready

216 <https://github.com/jupyter/jupyter_client/blob/master/jupyter_client/blocking/client.py#L84>`_

217 @return dictionary of dictionaries ``{ notebook_name: { } }``

218

219 If *isSuccess* is False, *statistics* contains the execution time, *output* is the exception

220 raised during the execution.

221

222 The signature of function ``valid_cell`` is::

223

224 def valid_cell(cell):

225 return True or False or None to stop execution of the notebook before this cell

226

227 The signature of function ``clean_function`` is::

228

229 def clean_function(cell):

230 return new_cell_content

231

232 The execution of a notebook might fail because it relies on remote data

233 specified by url. The function downloads the data first and stores it in

234 folder *working_dir* (must not be None). The url string is replaced by

235 the absolute path to the file.

236 """

237 if additional_path is None:

238 additional_path = []

239

240 # we cache urls before running through the list of notebooks

241 _cache_url_to_file(cache_urls, folder, fLOG=fLOG)

242

243 results = {}

244 for i, note in enumerate(notebooks):

245 if isinstance(note, tuple):

246 note, code_init = note

247 else:

248 code_init = None

249 fLOG("[execute_notebook_list] {0}/{1} - {2}".format(i + 1,

250 len(notebooks), os.path.split(note)[-1]))

251 outfile = os.path.join(folder, "out_" + os.path.split(note)[-1])

252 cl = time.perf_counter()

253 try:

254 stat, out = run_notebook(note, working_dir=folder, outfilename=outfile,

255 additional_path=additional_path, valid=valid,

256 clean_function=clean_function, fLOG=deepfLOG,

257 code_init=code_init, kernel_name=kernel_name,

258 log_level=log_level, extended_args=extended_args,

259 cache_urls=cache_urls, replacements=replacements,

260 detailed_log=detailed_log, startup_timeout=startup_timeout)

261 if not os.path.exists(outfile):

262 raise FileNotFoundError(outfile) # pragma: no cover

263 etime = time.perf_counter() - cl

264 results[note] = dict(success=True, output=out, name=note, etime=etime,

265 date=datetime.now())

266 results[note].update(stat)

267 except Exception as e:

268 etime = time.perf_counter() - cl

269 results[note] = dict(success=False, etime=etime, error=e, name=note,

270 date=datetime.now())

271 return results

272

273

274def _get_dump_default_path(dump):

275 """

276 Proposes a default location to dump results about notebooks execution.

277

278 @param dump location of the dump or module.

279 @return location of the dump

280

281 The result might be equal to the input if *dump* is already path.

282 """

283 if hasattr(dump, '__file__') and hasattr(dump, '__name__'):

284 # Default value. We check it is none travis or appveyor.

285 from ..pycode import is_travis_or_appveyor

286 if is_travis_or_appveyor():

287 dump = None

288 if dump is not None:

289 # We guess the package name.

290 name = dump.__name__.split('.')[-1]

291 loc = os.path.dirname(dump.__file__)

292 src_loc = os.path.split(loc)

293 if src_loc[-1] == 'src':

294 # We choose a path for the dumps in a way

295 fold = os.path.join(loc, "..", "..", "..", "_notebook_dumps")

296 else:

297 src_loc_loc = os.path.split(src_loc[0])

298 if src_loc_loc[-1] == 'src':

299 # We choose a path for the dumps in a way

300 fold = os.path.join(

301 loc, "..", "..", "..", "_notebook_dumps")

302 else:

303 # This should be a parameter.

304 fold = os.path.join(loc, "..", "..", "_notebook_dumps")

305 if not os.path.exists(fold):

306 os.mkdir(fold)

307 dump = os.path.join(fold, "notebook.{0}.txt".format(name))

308 return dump

309 return dump

310

311

312def _existing_dump(dump):

313 """

314 Loads an existing dump.

315

316 @param dump filename

317 @return :epkg:`pandas:DataFrame`

318 """

319 import pandas

320 from pandas.errors import ParserError

321

322 def read_file(dump):

323 try:

324 df = pandas.read_csv(dump, sep="\t", encoding="utf-8")

325 except ParserError: # pragma: no cover

326 df = pandas.read_csv(

327 dump, sep="\t", encoding="utf-8", error_bad_lines=False, warn_bad_lines=True)

328 return df

329

330 if os.path.exists(dump):

331 # There might be some risk here to see another process writing the

332 # file at the same time.

333 try:

334 df = read_file(dump)

335 except PermissionError: # pragma: no cover

336 # We try again once.

337 time.sleep(10)

338 try:

339 df = read_file(dump)

340 except Exception as e:

341 raise RuntimeError(

342 "Unable to read '{0}' due to '{1}'".format(dump, e)) from e

343 except Exception as e: # pragma: no cover

344 raise RuntimeError(

345 "Unable to read '{0}' due to '{1}'".format(dump, e)) from e

346 else:

347 df = None

348

349 return df

350

351

352def execute_notebook_list_finalize_ut(res, dump=None, fLOG=noLOG):

353 """

354 Checks the list of results and raises an exception if one failed.

355 This is meant to be used in unit tests.

356

357 @param res output of @see fn execute_notebook_list

358 @param dump if not None, dump the results of the execution in a flat file

359 @param fLOG logging function

360

361 The dump relies on :epkg:`pandas` and append the results a previous dump.

362 If *dump* is a module, the function stores the output of the execution in a default

363 location only if the process does not run on :epkg:`travis` or :epkg:`appveyor`.

364 The default location is something like:

365

366 .. runpython::

367

368 from pyquickhelper.ipythonhelper.run_notebook import _get_dump_default_path

369 import pyquickhelper

370 print(_get_dump_default_path(pyquickhelper))

371 """

372 if len(res) == 0:

373 raise RuntimeError("No notebook was run.") # pragma: no cover

374

375 def fail_note(v):

376 return "error" in v

377 fails = [(os.path.split(k)[-1], v)

378 for k, v in sorted(res.items()) if fail_note(v)]

379 for f in fails:

380 fLOG(f)

381 for k, v in sorted(res.items()):

382 name = os.path.split(k)[-1]

383 fLOG(name, v.get("success", None), v.get("etime", None))

384 if len(fails) > 0:

385 raise fails[0][1]["error"]

386

387 dump = _get_dump_default_path(dump)

388 if dump is not None:

389 import pandas

390 df = _existing_dump(dump)

391 new_df = pandas.DataFrame(data=list(res.values()))

392

393 # We replace every EOL.

394 def eol_replace(t):

395 return t.replace("\r", "").replace("\n", "\\n")

396

397 subdf = new_df.select_dtypes(include=['object']).applymap(eol_replace)

398 for c in subdf.columns:

399 new_df[c] = subdf[c]

400

401 if df is None:

402 df = new_df

403 else:

404 df = pandas.concat([df, new_df]).copy()

405

406 # There could be a conflict while several

407 # processes in parallel could overwrite the same file.

408 if not os.path.exists(dump):

409 df.to_csv(dump, sep="\t", encoding="utf-8", index=False)

410 else:

411 # There might be some risk here to see another process

412 # writing or reading the file at the same time.

413 # Module filelock does not work in this case.

414 # locket (https://github.com/mwilliamson/locket.py) was not tried.

415 try:

416 df.to_csv(dump, sep="\t", encoding="utf-8", # pylint: disable=E1101

417 index=False)

418 except PermissionError: # pragma: no cover

419 time.sleep(7)

420 df.to_csv(dump, sep="\t", encoding="utf-8", # pylint: disable=E1101

421 index=False)

422

423

424def notebook_coverage(module_or_path, dump=None, too_old=30):

425 """

426 Extracts a list of notebooks and merges with a list of runs dumped by

427 function @see fn execute_notebook_list_finalize_ut.

428

429 @param module_or_path a module or a path

430 @param dump dump (or None to get the location by default)

431 @param too_old drop executions older than *too_old* days from now

432 @return dataframe

433

434 If *module_or_path* is a module, the function will get a list notebooks

435 assuming it follows the same design as :epkg:`pyquickhelper`.

436 """

437 if dump is None:

438 dump = _get_dump_default_path(module_or_path)

439 else:

440 dump = _get_dump_default_path(dump)

441

442 # Create the list of existing notebooks.

443 if isinstance(module_or_path, list):

444 nbs = [_[1] if isinstance(_, tuple) else _ for _ in module_or_path]

445 elif hasattr(module_or_path, '__file__') and hasattr(module_or_path, '__name__'):

446 fold = os.path.dirname(module_or_path.__file__)

447 _doc = os.path.join(fold, "..", "..", "_doc")

448 if not os.path.exists(_doc):

449 raise FileNotFoundError( # pragma: no cover

450 "Unable to find path '{0}' for module '{1}'".format(

451 _doc, module_or_path))

452 nbpath = os.path.join(_doc, "notebooks")

453 if not os.path.exists(nbpath):

454 raise FileNotFoundError( # pragma: no cover

455 "Unable to find path '{0}' for module '{1}'".format(

456 nbpath, module_or_path))

457 nbs = explore_folder(nbpath, ".*[.]ipynb$")[1]

458 else:

459 nbpath = module_or_path

460 nbs = explore_folder(nbpath, ".*[.]ipynb$")[1]

461

462 import pandas

463 dfnb = pandas.DataFrame(data=dict(notebooks=nbs))

464 dfnb["notebooks"] = dfnb["notebooks"].apply(lambda x: os.path.normpath(x))

465 dfnb = dfnb[~dfnb.notebooks.str.contains(".ipynb_checkpoints")].copy()

466 dfnb["key"] = dfnb["notebooks"].apply(lambda x: "/".join(os.path.normpath(

467 x).replace("\\", "/").split("/")[-3:]) if isinstance(x, str) else x)

468 dfnb["key"] = dfnb["key"].apply(

469 lambda x: x.lower() if isinstance(x, str) else x)

470

471 # There might be some risk here to see another process writing the

472 # file at the same time.

473 try:

474 dfall = pandas.read_csv(dump, sep="\t", encoding="utf-8")

475 except PermissionError: # pragma: no cover

476 # We try again once.

477 time.sleep(10)

478 dfall = pandas.read_csv(dump, sep="\t", encoding="utf-8")

479

480 # We drop too old execution.

481 old = datetime.now() - timedelta(too_old)

482 old = "%04d-%02d-%02d" % (old.year, old.month, old.day)

483 dfall = dfall[dfall.date > old].copy()

484

485 # We add a key to merge.

486 dfall["name"] = dfall["name"].apply(lambda x: os.path.normpath(x))

487 dfall["key"] = dfall["name"].apply(lambda x: "/".join(os.path.normpath(

488 x).replace("\\", "/").split("/")[-3:]) if isinstance(x, str) else x)

489 dfall["key"] = dfall["key"].apply(

490 lambda x: x.lower() if isinstance(x, str) else x)

491

492 # We keep the last execution.

493 gr = dfall.sort_values("date", ascending=False).groupby(

494 "key", as_index=False).first().reset_index(drop=True).copy()

495 gr = gr.drop("name", axis=1)

496

497 # Folders might be different so we merge on the last part of the path.

498 merged = dfnb.merge(gr, left_on="key", right_on="key", how="outer")

499 merged = merged[merged.notebooks.notnull()]

500 merged = merged.sort_values("key").reset_index(drop=True).copy()

501

502 if "last_name" not in merged.columns:

503 merged["last_name"] = merged["key"].apply(

504 lambda x: os.path.split(x)[-1])

505

506 # We check there is no duplicates in merged.

507 for c in ["key", "last_name"]:

508 names = [_ for _ in merged[c] if isinstance(_, str)]

509 if len(names) > len(set(names)):

510 raise ValueError( # pragma: no cover

511 "Unexpected duplicated names in column '{1}'\n{0}".format(

512 "\n".join(sorted(names)), c))

513

514 return merged

515

516

517def badge_notebook_coverage(df, image_name):

518 """

519 Builds a badge reporting on the notebook coverage.

520 It gives the proportion of run cells.

521

522 @param df output of @see fn notebook_coverage

523 @param image_name image to produce

524 @return coverage estimation

525

526 The function relies on module :epkg:`Pillow`.

527 """

528 cell = df["nbcell"].sum()

529 run = df["nbrun"].sum()

530 valid = df["nbvalid"].sum()

531 cov = run * 100.0 / cell if cell > 0 else 1.0

532 dcov = min(100., cov)

533 val = valid * 100.0 / cell if cell > 0 else 1.0

534 with warnings.catch_warnings():

535 warnings.simplefilter("ignore", ImportWarning)

536 from PIL import Image, ImageFont, ImageDraw

537 if cov <= 60:

538 color = (200, 87, 51)

539 elif cov <= 70:

540 color = (200, 156, 18)

541 elif cov <= 75:

542 color = (140, 140, 140)

543 elif cov <= 80:

544 color = (88, 171, 171)

545 elif cov <= 85:

546 color = (88, 140, 86)

547 elif cov <= 90:

548 color = (80, 155, 86)

549 elif cov <= 95:

550 color = (80, 190, 73)

551 else:

552 color = (20, 190, 50)

553 img = Image.new(mode='RGB', size=(70, 20), color=color)

554 im = ImageDraw.Draw(img)

555 font = ImageFont.load_default()

556 try:

557 cov = int(cov)

558 cov = min(cov, 100)

559 except ValueError: # pragma: no cover

560 cov = "?"

561 try:

562 val = int(val)

563 val = min(val, 100)

564 except ValueError: # pragma: no cover

565 val = "?"

566 if cov != val:

567 im.text((3, 4), "NB:{0}%-{1}% ".format(cov, val),

568 (255, 255, 255), font=font)

569 else:

570 im.text((3, 4), "NB: {0}% ".format(

571 cov), (255, 255, 255), font=font)

572 img.save(image_name)

573 return dcov

574

575

576def get_additional_paths(modules):

577 """

578 Returns a list of paths to add before running the notebooks

579 for a given a list of modules.

580

581 @return list of paths

582 """

583 addpath = [os.path.dirname(mod.__file__) for mod in modules]

584 addpath = [os.path.normpath(os.path.join(_, "..")) for _ in addpath]

585 return addpath

586

587

588def retrieve_notebooks_in_folder(folder, posreg=".*[.]ipynb$", negreg=None):

589 """

590 Retrieves notebooks in a test folder.

591

592 @param folder folder

593 @param regex regular expression

594 @return list of found notebooks

595 """

596 pos = re.compile(posreg)

597 neg = re.compile(negreg) if negreg is not None else None

598 res = []

599 for name in os.listdir(folder):

600 if pos.search(name):

601 if neg is None or not neg.search(name):

602 res.append(os.path.join(folder, name))

603 if len(res) == 0:

604 raise FileNotFoundError( # pragma: no cover

605 "No notebook found in '{0}'.".format(folder))

606 return res

Coverage for src/pyquickhelper/ipythonhelper/run_notebook.py : 96%

245 statements

Coverage for src/pyquickhelper/ipythonhelper/run_notebook.py : 96%

245 statements 236 run 9 missing 38 excluded

245 statements