Coverage for pyquickhelper/ipythonhelper/run

1"""

2@file

3@brief Functions to run notebooks.

4"""

5import time

6import os

7import warnings

8import re

9from io import StringIO

10import urllib.request as urllib_request

11from datetime import datetime, timedelta

13from ..loghelper.flog import noLOG

14from ..filehelper import explore_folder

15from .notebook_runner import NotebookRunner, NotebookKernelError

16from .notebook_exception import NotebookException

17from .notebook_helper import writes

20try:

21 from nbformat.reader import reads

22 from nbformat.reader import NotJSONError

23except ImportError: # pragma: no cover

24 from IPython.nbformat.reader import reads

25 from IPython.nbformat.reader import NotJSONError

28def _cache_url_to_file(cache_urls, folder, fLOG=noLOG):

29 """

30 Downloads file corresponding to url stored in *cache_urls*.

32 @param cache_urls list of urls

33 @param folder where to store the cached files

34 @param fLOG logging function

35 @return dictionary { url: file }

37 The function detects if the file was already downloaded.

38 In that case, it does not do it a second time.

39 """

40 if cache_urls is None:

41 return None

42 if folder is None:

43 raise FileNotFoundError( # pragma: no cover

44 "folder cannot be None")

45 res = {}

46 for url in cache_urls:

47 local_file = "__cached__" + url.split("/")[-1]

48 local_file = local_file.replace(":", "_").replace("%", "_")

49 local_file = os.path.abspath(os.path.join(folder, local_file))

50 if not os.path.exists(local_file):

51 fLOG("download", url, "to", local_file)

52 with open(local_file, "wb") as f:

53 fu = urllib_request.urlopen(url)

54 c = fu.read(2 ** 21)

55 while len(c) > 0:

56 f.write(c)

57 f.flush()

58 c = fu.read(2 ** 21)

59 fu.close()

61 # to avoid having backslahes inside strings

62 res[url] = "file:///" + local_file.replace("\\", "/")

63 return res

66def run_notebook(filename, profile_dir=None, working_dir=None, skip_exceptions=False,

67 outfilename=None, encoding="utf8", additional_path=None,

68 valid=None, clean_function=None, code_init=None,

69 fLOG=noLOG, kernel_name="python", log_level="30",

70 extended_args=None, cache_urls=None, replacements=None,

71 detailed_log=None, startup_timeout=30, raise_exception=False):

72 """

73 Runs a notebook end to end,

74 it is inspired from module `runipy <https://github.com/paulgb/runipy/>`_.

76 @param filename notebook filename

77 @param profile_dir profile directory

78 @param working_dir working directory

79 @param skip_exceptions skip exceptions

80 @param outfilename if not None, saves the output in this notebook

81 @param encoding encoding for the notebooks

82 @param additional_path additional paths for import

83 @param valid if not None, valid is a function which returns whether

84 or not the cell should be executed or not, if the function

85 returns None, the execution of the notebooks and skip the execution

86 of the other cells

87 @param clean_function function which cleans a cell's code before executing it (None for None)

88 @param code_init code to run before the execution of the notebook as if it was a cell

89 @param fLOG logging function

90 @param kernel_name kernel name, it can be None

91 @param log_level Choices: (0, 10, 20, 30=default, 40, 50, 'DEBUG', 'INFO', 'WARN', 'ERROR', 'CRITICAL')

92 @param extended_args others arguments to pass to the command line ('--KernelManager.autorestar=True' for example),

93 see :ref:`l-ipython_notebook_args` for a full list

94 @param cache_urls list of urls to cache

95 @param replacements list of additional replacements, list of tuple

96 @param detailed_log a second function to log more information when executing the notebook,

97 this should be a function with the same signature as ``print`` or None

98 @param startup_timeout wait for this long for the kernel to be ready,

99 see `wait_for_ready

100 <https://github.com/jupyter/jupyter_client/blob/master/jupyter_client/blocking/client.py#L84>`_

101 @param raise_exception raise an exception if a cell raises one

102 @return tuple (statistics, output)

103

104 @warning The function calls `basicConfig

105 <https://docs.python.org/3/library/logging.html#logging.basicConfig>`_.

106

107 .. exref::

108 :title: Run a notebook end to end

109

110 ::

111

112 from pyquickhelper.ipythonhelper import run_notebook

113 run_notebook("source.ipynb", working_dir="temp",

114 outfilename="modified.ipynb",

115 additional_path=["custom_path"] )

116

117 The function adds the local variable ``theNotebook`` with

118 the absolute file name of the notebook.

119 The execution of a notebook might fail because it relies on remote data

120 specified by url. The function downloads the data first and stores it in

121 folder *working_dir* (must not be None). The url string is replaced by

122 the absolute path to the file.

123 """

124 cached_rep = _cache_url_to_file(cache_urls, working_dir, fLOG=fLOG)

125 if replacements is None:

126 replacements = cached_rep

127 elif cached_rep is not None:

128 cached_rep.update(replacements)

129 else:

130 cached_rep = replacements

131

132 with open(filename, "r", encoding=encoding) as payload:

133 try:

134 nbc = payload.read()

135 except UnicodeDecodeError as e: # pragma: no cover

136 raise NotebookException(

137 f"(2) Unable to read file '{filename}' encoding='{encoding}'.") from e

138 try:

139 nb = reads(nbc)

140 except NotJSONError as e: # pragma: no cover

141 raise NotebookException(

142 f"(1) Unable to read file '{filename}' encoding='{encoding}'.") from e

143

144 out = StringIO()

145

146 def flogging(*args, **kwargs):

147 if len(args) > 0:

148 out.write(" ".join(args))

149 if len(kwargs) > 0:

150 out.write(str(kwargs))

151 out.write("\n")

152 fLOG(*args, **kwargs)

153

154 try:

155 nb_runner = NotebookRunner(

156 nb, profile_dir, working_dir, fLOG=flogging, filename=filename,

157 theNotebook=os.path.abspath(filename),

158 code_init=code_init, log_level=log_level,

159 extended_args=extended_args, kernel_name=kernel_name,

160 replacements=cached_rep, kernel=True, detailed_log=detailed_log,

161 startup_timeout=startup_timeout, raise_exception=raise_exception)

162 except NotebookKernelError: # pragma: no cover

163 # It fails. We try again once.

164 nb_runner = NotebookRunner(

165 nb, profile_dir, working_dir, fLOG=flogging, filename=filename,

166 theNotebook=os.path.abspath(filename),

167 code_init=code_init, log_level=log_level,

168 extended_args=extended_args, kernel_name=kernel_name,

169 replacements=cached_rep, kernel=True, detailed_log=detailed_log,

170 startup_timeout=startup_timeout, raise_exception=raise_exception)

171

172 try:

173 stat = nb_runner.run_notebook(

174 skip_exceptions=skip_exceptions, additional_path=additional_path,

175 valid=valid, clean_function=clean_function)

176

177 if outfilename is not None:

178 with open(outfilename, 'w', encoding=encoding) as f:

179 try:

180 s = writes(nb_runner.nb)

181 except NotebookException as e: # pragma: no cover

182 raise NotebookException(

183 f"issue with notebook: '{filename}'") from e

184 if isinstance(s, bytes):

185 s = s.decode('utf8')

186 f.write(s)

187

188 finally:

189 nb_runner.shutdown_kernel()

190

191 return stat, out.getvalue()

192

193

194def execute_notebook_list(folder, notebooks, clean_function=None, valid=None, fLOG=noLOG,

195 additional_path=None, deepfLOG=noLOG, kernel_name="python",

196 log_level="30", extended_args=None, cache_urls=None,

197 replacements=None, detailed_log=None, startup_timeout=300):

198 """

199 Executes a list of notebooks.

200

201 @param folder folder (where to execute the notebook, current folder for the notebook)

202 @param notebooks list of notebooks to execute (or a list of tuple(notebook, code which initializes the notebook))

203 @param clean_function function which transform the code before running it

204 @param valid if not None, valid is a function which returns whether

205 or not the cell should be executed or not, if the function

206 returns None, the execution of the notebooks and skip the execution

207 of the other cells

208 @param fLOG logging function

209 @param deepfLOG logging function used to run the notebook

210 @param additional_path path to add to *sys.path* before running the notebook

211 @param kernel_name kernel name, it can be None

212 @param log_level Choices: (0, 10, 20, 30=default, 40, 50, 'DEBUG', 'INFO', 'WARN', 'ERROR', 'CRITICAL')

213 @param extended_args others arguments to pass to the command line ('--KernelManager.autorestar=True' for example),

214 see :ref:`l-ipython_notebook_args` for a full list

215 @param cache_urls list of urls to cache

216 @param replacements additional replacements

217 @param detailed_log detailed log

218 @param startup_timeout wait for this long for the kernel to be ready,

219 see `wait_for_ready

220 <https://github.com/jupyter/jupyter_client/blob/master/jupyter_client/blocking/client.py#L84>`_

221 @return dictionary of dictionaries ``{ notebook_name: { } }``

222

223 If *isSuccess* is False, *statistics* contains the execution time, *output* is the exception

224 raised during the execution.

225

226 The signature of function ``valid_cell`` is::

227

228 def valid_cell(cell):

229 return True or False or None to stop execution of the notebook before this cell

230

231 The signature of function ``clean_function`` is::

232

233 def clean_function(cell):

234 return new_cell_content

235

236 The execution of a notebook might fail because it relies on remote data

237 specified by url. The function downloads the data first and stores it in

238 folder *working_dir* (must not be None). The url string is replaced by

239 the absolute path to the file.

240 """

241 if additional_path is None:

242 additional_path = []

243

244 # we cache urls before running through the list of notebooks

245 _cache_url_to_file(cache_urls, folder, fLOG=fLOG)

246

247 results = {}

248 for i, note in enumerate(notebooks):

249 if isinstance(note, tuple):

250 note, code_init = note

251 else:

252 code_init = None

253 fLOG("[execute_notebook_list] {0}/{1} - {2}".format(i + 1,

254 len(notebooks), os.path.split(note)[-1]))

255 outfile = os.path.join(folder, "out_" + os.path.split(note)[-1])

256 cl = time.perf_counter()

257 try:

258 stat, out = run_notebook(note, working_dir=folder, outfilename=outfile,

259 additional_path=additional_path, valid=valid,

260 clean_function=clean_function, fLOG=deepfLOG,

261 code_init=code_init, kernel_name=kernel_name,

262 log_level=log_level, extended_args=extended_args,

263 cache_urls=cache_urls, replacements=replacements,

264 detailed_log=detailed_log, startup_timeout=startup_timeout)

265 if not os.path.exists(outfile):

266 raise FileNotFoundError(outfile) # pragma: no cover

267 etime = time.perf_counter() - cl

268 results[note] = dict(success=True, output=out, name=note, etime=etime,

269 date=datetime.now())

270 results[note].update(stat)

271 except Exception as e:

272 etime = time.perf_counter() - cl

273 results[note] = dict(success=False, etime=etime, error=e, name=note,

274 date=datetime.now())

275 return results

276

277

278def _get_dump_default_path(dump):

279 """

280 Proposes a default location to dump results about notebooks execution.

281

282 @param dump location of the dump or module.

283 @return location of the dump

284

285 The result might be equal to the input if *dump* is already path.

286 """

287 if hasattr(dump, '__file__') and hasattr(dump, '__name__'):

288 # Default value. We check it is none travis or appveyor.

289 from ..pycode import is_travis_or_appveyor

290 if is_travis_or_appveyor():

291 dump = None

292 if dump is not None:

293 # We guess the package name.

294 name = dump.__name__.split('.')[-1]

295 loc = os.path.dirname(dump.__file__)

296 src_loc = os.path.split(loc)

297 if src_loc[-1] == 'src':

298 # We choose a path for the dumps in a way

299 fold = os.path.join(loc, "..", "..", "..", "_notebook_dumps")

300 else:

301 src_loc_loc = os.path.split(src_loc[0])

302 if src_loc_loc[-1] == 'src':

303 # We choose a path for the dumps in a way

304 fold = os.path.join(

305 loc, "..", "..", "..", "_notebook_dumps")

306 else:

307 # This should be a parameter.

308 fold = os.path.join(loc, "..", "..", "_notebook_dumps")

309 if not os.path.exists(fold):

310 os.mkdir(fold)

311 dump = os.path.join(fold, f"notebook.{name}.txt")

312 return dump

313 return dump

314

315

316def _existing_dump(dump):

317 """

318 Loads an existing dump.

319

320 @param dump filename

321 @return :epkg:`pandas:DataFrame`

322 """

323 import pandas

324 from pandas.errors import ParserError

325

326 def read_file(dump):

327 try:

328 df = pandas.read_csv(dump, sep="\t", encoding="utf-8")

329 except ParserError: # pragma: no cover

330 df = pandas.read_csv(

331 dump, sep="\t", encoding="utf-8", error_bad_lines=False, warn_bad_lines=True)

332 return df

333

334 if os.path.exists(dump):

335 # There might be some risk here to see another process writing the

336 # file at the same time.

337 try:

338 df = read_file(dump)

339 except PermissionError: # pragma: no cover

340 # We try again once.

341 time.sleep(10)

342 try:

343 df = read_file(dump)

344 except Exception as e:

345 raise RuntimeError(

346 f"Unable to read '{dump}' due to '{e}'") from e

347 except Exception as e: # pragma: no cover

348 raise RuntimeError(

349 f"Unable to read '{dump}' due to '{e}'") from e

350 else:

351 df = None

352

353 return df

354

355

356def execute_notebook_list_finalize_ut(res, dump=None, fLOG=noLOG):

357 """

358 Checks the list of results and raises an exception if one failed.

359 This is meant to be used in unit tests.

360

361 @param res output of @see fn execute_notebook_list

362 @param dump if not None, dump the results of the execution in a flat file

363 @param fLOG logging function

364

365 The dump relies on :epkg:`pandas` and append the results a previous dump.

366 If *dump* is a module, the function stores the output of the execution in a default

367 location only if the process does not run on :epkg:`travis` or :epkg:`appveyor`.

368 The default location is something like:

369

370 .. runpython::

371

372 from pyquickhelper.ipythonhelper.run_notebook import _get_dump_default_path

373 import pyquickhelper

374 print(_get_dump_default_path(pyquickhelper))

375 """

376 if len(res) == 0:

377 raise RuntimeError("No notebook was run.") # pragma: no cover

378

379 def fail_note(v):

380 return "error" in v

381 fails = [(os.path.split(k)[-1], v)

382 for k, v in sorted(res.items()) if fail_note(v)]

383 for f in fails:

384 fLOG(f)

385 for k, v in sorted(res.items()):

386 name = os.path.split(k)[-1]

387 fLOG(name, v.get("success", None), v.get("etime", None))

388 if len(fails) > 0:

389 raise fails[0][1]["error"]

390

391 dump = _get_dump_default_path(dump)

392 if dump is not None:

393 import pandas

394 df = _existing_dump(dump)

395 new_df = pandas.DataFrame(data=list(res.values()))

396

397 # We replace every EOL.

398 def eol_replace(t):

399 return t.replace("\r", "").replace("\n", "\\n")

400

401 subdf = new_df.select_dtypes(include=['object']).applymap(eol_replace)

402 for c in subdf.columns:

403 new_df[c] = subdf[c]

404

405 if df is None:

406 df = new_df

407 else:

408 df = pandas.concat([df, new_df]).copy()

409

410 # There could be a conflict while several

411 # processes in parallel could overwrite the same file.

412 if not os.path.exists(dump):

413 df.to_csv(dump, sep="\t", encoding="utf-8", index=False)

414 else:

415 # There might be some risk here to see another process

416 # writing or reading the file at the same time.

417 # Module filelock does not work in this case.

418 # locket (https://github.com/mwilliamson/locket.py) was not tried.

419 try:

420 df.to_csv(dump, sep="\t", encoding="utf-8", # pylint: disable=E1101

421 index=False)

422 except PermissionError: # pragma: no cover

423 time.sleep(7)

424 df.to_csv(dump, sep="\t", encoding="utf-8", # pylint: disable=E1101

425 index=False)

426

427

428def notebook_coverage(module_or_path, dump=None, too_old=30):

429 """

430 Extracts a list of notebooks and merges with a list of runs dumped by

431 function @see fn execute_notebook_list_finalize_ut.

432

433 @param module_or_path a module or a path

434 @param dump dump (or None to get the location by default)

435 @param too_old drop executions older than *too_old* days from now

436 @return dataframe

437

438 If *module_or_path* is a module, the function will get a list notebooks

439 assuming it follows the same design as :epkg:`pyquickhelper`.

440 """

441 if dump is None:

442 dump = _get_dump_default_path(module_or_path)

443 else:

444 dump = _get_dump_default_path(dump)

445

446 # Create the list of existing notebooks.

447 if isinstance(module_or_path, list):

448 nbs = [_[1] if isinstance(_, tuple) else _ for _ in module_or_path]

449 elif hasattr(module_or_path, '__file__') and hasattr(module_or_path, '__name__'):

450 fold = os.path.dirname(module_or_path.__file__)

451 _doc = os.path.join(fold, "..", "..", "_doc")

452 if not os.path.exists(_doc):

453 raise FileNotFoundError( # pragma: no cover

454 f"Unable to find path '{_doc}' for module '{module_or_path}'")

455 nbpath = os.path.join(_doc, "notebooks")

456 if not os.path.exists(nbpath):

457 raise FileNotFoundError( # pragma: no cover

458 f"Unable to find path '{nbpath}' for module '{module_or_path}'")

459 nbs = explore_folder(nbpath, ".*[.]ipynb$")[1]

460 else:

461 nbpath = module_or_path

462 nbs = explore_folder(nbpath, ".*[.]ipynb$")[1]

463

464 import pandas

465 dfnb = pandas.DataFrame(data=dict(notebooks=nbs))

466 dfnb["notebooks"] = dfnb["notebooks"].apply(lambda x: os.path.normpath(x))

467 dfnb = dfnb[~dfnb.notebooks.str.contains(".ipynb_checkpoints")].copy()

468 dfnb["key"] = dfnb["notebooks"].apply(lambda x: "/".join(os.path.normpath(

469 x).replace("\\", "/").split("/")[-3:]) if isinstance(x, str) else x)

470 dfnb["key"] = dfnb["key"].apply(

471 lambda x: x.lower() if isinstance(x, str) else x)

472

473 # There might be some risk here to see another process writing the

474 # file at the same time.

475 try:

476 dfall = pandas.read_csv(dump, sep="\t", encoding="utf-8")

477 except PermissionError: # pragma: no cover

478 # We try again once.

479 time.sleep(10)

480 dfall = pandas.read_csv(dump, sep="\t", encoding="utf-8")

481

482 # We drop too old execution.

483 old = datetime.now() - timedelta(too_old)

484 old = "%04d-%02d-%02d" % (old.year, old.month, old.day)

485 dfall = dfall[dfall.date > old].copy()

486

487 # We add a key to merge.

488 dfall["name"] = dfall["name"].apply(lambda x: os.path.normpath(x))

489 dfall["key"] = dfall["name"].apply(lambda x: "/".join(os.path.normpath(

490 x).replace("\\", "/").split("/")[-3:]) if isinstance(x, str) else x)

491 dfall["key"] = dfall["key"].apply(

492 lambda x: x.lower() if isinstance(x, str) else x)

493

494 # We keep the last execution.

495 gr = dfall.sort_values("date", ascending=False).groupby(

496 "key", as_index=False).first().reset_index(drop=True).copy()

497 gr = gr.drop("name", axis=1)

498

499 # Folders might be different so we merge on the last part of the path.

500 merged = dfnb.merge(gr, left_on="key", right_on="key", how="outer")

501 merged = merged[merged.notebooks.notnull()]

502 merged = merged.sort_values("key").reset_index(drop=True).copy()

503

504 if "last_name" not in merged.columns:

505 merged["last_name"] = merged["key"].apply(

506 lambda x: os.path.split(x)[-1])

507

508 # We check there is no duplicates in merged.

509 for c in ["key", "last_name"]:

510 names = [_ for _ in merged[c] if isinstance(_, str)]

511 if len(names) > len(set(names)):

512 raise ValueError( # pragma: no cover

513 "Unexpected duplicated names in column '{1}'\n{0}".format(

514 "\n".join(sorted(names)), c))

515

516 return merged

517

518

519def badge_notebook_coverage(df, image_name):

520 """

521 Builds a badge reporting on the notebook coverage.

522 It gives the proportion of run cells.

523

524 @param df output of @see fn notebook_coverage

525 @param image_name image to produce

526 @return coverage estimation

527

528 The function relies on module :epkg:`Pillow`.

529 """

530 cell = df["nbcell"].sum()

531 run = df["nbrun"].sum()

532 valid = df["nbvalid"].sum()

533 cov = run * 100.0 / cell if cell > 0 else 1.0

534 dcov = min(100., cov)

535 val = valid * 100.0 / cell if cell > 0 else 1.0

536 with warnings.catch_warnings():

537 warnings.simplefilter("ignore", ImportWarning)

538 from PIL import Image, ImageFont, ImageDraw

539 if cov <= 60:

540 color = (200, 87, 51)

541 elif cov <= 70:

542 color = (200, 156, 18)

543 elif cov <= 75:

544 color = (140, 140, 140)

545 elif cov <= 80:

546 color = (88, 171, 171)

547 elif cov <= 85:

548 color = (88, 140, 86)

549 elif cov <= 90:

550 color = (80, 155, 86)

551 elif cov <= 95:

552 color = (80, 190, 73)

553 else:

554 color = (20, 190, 50)

555 img = Image.new(mode='RGB', size=(70, 20), color=color)

556 im = ImageDraw.Draw(img)

557 font = ImageFont.load_default()

558 try:

559 cov = int(cov)

560 cov = min(cov, 100)

561 except ValueError: # pragma: no cover

562 cov = "?"

563 try:

564 val = int(val)

565 val = min(val, 100)

566 except ValueError: # pragma: no cover

567 val = "?"

568 if cov != val:

569 im.text((3, 4), f"NB:{cov}%-{val}% ",

570 (255, 255, 255), font=font)

571 else:

572 im.text((3, 4), f"NB: {cov}% ", (255, 255, 255), font=font)

573 img.save(image_name)

574 return dcov

575

576

577def get_additional_paths(modules):

578 """

579 Returns a list of paths to add before running the notebooks

580 for a given a list of modules.

581

582 @return list of paths

583 """

584 addpath = [os.path.dirname(mod.__file__) for mod in modules]

585 addpath = [os.path.normpath(os.path.join(_, "..")) for _ in addpath]

586 return addpath

587

588

589def retrieve_notebooks_in_folder(folder, posreg=".*[.]ipynb$", negreg=None):

590 """

591 Retrieves notebooks in a test folder.

592

593 @param folder folder

594 @param regex regular expression

595 @return list of found notebooks

596 """

597 pos = re.compile(posreg)

598 neg = re.compile(negreg) if negreg is not None else None

599 res = []

600 for name in os.listdir(folder):

601 if pos.search(name):

602 if neg is None or not neg.search(name):

603 res.append(os.path.join(folder, name))

604 if len(res) == 0:

605 raise FileNotFoundError( # pragma: no cover

606 f"No notebook found in '{folder}'.")

607 return res

Coverage for pyquickhelper/ipythonhelper/run_notebook.py: 89%

245 statements