Completion profiling

Links: notebook, html, PDF, python, slides, slides(2), GitHub

Profiling avec cProfile, memory_profiler, line_profiler, pyinstrument, snakeviz.

%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('ggplot')
from jyquickhelper import add_notebook_menu
add_notebook_menu()

Setup

Function to profile

from mlstatpy.nlp.completion import CompletionTrieNode

def gain_dynamique_moyen_par_mot(queries, weights):
    per = list(zip(weights, queries))
    total = sum(weights) * 1.0
    res = []
    trie = CompletionTrieNode.build([(None, q) for _, q in per])
    trie.precompute_stat()
    trie.update_stat_dynamic()
    wks = [(w, p, len(w) - trie.min_keystroke0(w)[0]) for p, w in per]
    wks_dyn = [(w, p, len(w) - trie.min_dynamic_keystroke(w)[0])
               for p, w in per]
    wks_dyn2 = [(w, p, len(w) - trie.min_dynamic_keystroke2(w)[0])
                for p, w in per]
    gain = sum(g * p / total for w, p, g in wks)
    gain_dyn = sum(g * p / total for w, p, g in wks_dyn)
    gain_dyn2 = sum(g * p / total for w, p, g in wks_dyn2)
    ave_length = sum(len(w) * p / total for p, w in per)
    return gain, gain_dyn, gain_dyn2, ave_length

Data

from mlstatpy.data.wikipedia import download_titles
file_titles = download_titles(country='fr')
len(file_titles)
33
from mlstatpy.data.wikipedia import enumerate_titles
list_titles = list(sorted(set(_ for _ in enumerate_titles(file_titles) if 'A' <= _[0] <= 'Z')))
import random
sample1000 = random.sample(list_titles, 1000)
with open("sample1000.txt", "w", encoding="utf-8") as f:
    f.write("\n".join(sample1000))

Standard modules

cProfile

import cProfile, io, pstats, os

def toprofile0(lines):
    gain_dynamique_moyen_par_mot(lines, [1.0] * len(lines))

def doprofile(lines, filename):
    pr = cProfile.Profile()
    pr.enable()
    toprofile0(lines)
    pr.disable()
    s = io.StringIO()
    ps = pstats.Stats(pr, stream=s).sort_stats('cumulative')
    ps.print_stats()
    rem = os.path.normpath(os.path.join(os.getcwd(), "..", "..", ".."))
    res = s.getvalue().replace(rem, "")
    ps.dump_stats(filename)
    return res
r = doprofile(sample1000, "completion.prof")
print(r)
      1225828 function calls in 1.261 seconds
Ordered by: cumulative time
ncalls  tottime  percall  cumtime  percall filename:lineno(function)
     1    0.000    0.000    1.261    1.261 <ipython-input-8-d2b6d910fdd7>:3(toprofile0)
     1    0.000    0.000    1.261    1.261 <ipython-input-3-684f3e860cf5>:3(gain_dynamique_moyen_par_mot)
     1    0.129    0.129    0.916    0.916 srcmlstatpynlpcompletion.py:416(precompute_stat)
 15148    0.260    0.000    0.662    0.000 srcmlstatpynlpcompletion.py:504(merge_completions)
 15148    0.277    0.000    0.281    0.000 {built-in method builtins.__build_class__}
     1    0.051    0.051    0.216    0.216 srcmlstatpynlpcompletion.py:451(update_stat_dynamic)
     1    0.087    0.087    0.107    0.107 srcmlstatpynlpcompletion.py:203(build)
 16147    0.048    0.000    0.090    0.000 srcmlstatpynlpcompletion.py:556(update_dynamic_minimum_keystroke)
 34403    0.061    0.000    0.070    0.000 srcmlstatpynlpcompletion.py:524(<listcomp>)
 34127    0.019    0.000    0.037    0.000 {built-in method builtins.all}
 16147    0.026    0.000    0.037    0.000 srcmlstatpynlpcompletion.py:625(init_dynamic_minimum_keystroke)
 16147    0.029    0.000    0.034    0.000 srcmlstatpynlpcompletion.py:589(second_step)
298714    0.030    0.000    0.030    0.000 {built-in method builtins.len}
 15148    0.023    0.000    0.029    0.000 srcmlstatpynlpcompletion.py:543(update_minimum_keystroke)
105060    0.028    0.000    0.028    0.000 {built-in method builtins.hasattr}
 15149    0.003    0.000    0.027    0.000 {method 'extend' of 'collections.deque' objects}
 16148    0.018    0.000    0.027    0.000 srcmlstatpynlpcompletion.py:97(unsorted_iter)
  1001    0.016    0.000    0.023    0.000 srcmlstatpynlpcompletion.py:132(leaves)
 15148    0.018    0.000    0.023    0.000 {built-in method builtins.sorted}
 93541    0.022    0.000    0.022    0.000 srcmlstatpynlpcompletion.py:436(<genexpr>)
  3000    0.013    0.000    0.014    0.000 srcmlstatpynlpcompletion.py:258(find)
 16147    0.011    0.000    0.013    0.000 srcmlstatpynlpcompletion.py:20(__init__)
109867    0.013    0.000    0.013    0.000 {method 'values' of 'dict' objects}
 22498    0.009    0.000    0.009    0.000 {built-in method builtins.min}
 45444    0.009    0.000    0.009    0.000 {method 'extend' of 'list' objects}
     1    0.001    0.001    0.009    0.009 <ipython-input-3-684f3e860cf5>:13(<listcomp>)
  1000    0.001    0.000    0.008    0.000 srcmlstatpynlpcompletion.py:383(min_dynamic_keystroke2)
 48441    0.007    0.000    0.007    0.000 {method 'pop' of 'list' objects}
 49552    0.007    0.000    0.007    0.000 {method 'append' of 'list' objects}
 19255    0.007    0.000    0.007    0.000 {built-in method builtins.max}
 52271    0.006    0.000    0.006    0.000 {method 'popleft' of 'collections.deque' objects}
     1    0.001    0.001    0.006    0.006 <ipython-input-3-684f3e860cf5>:10(<listcomp>)
 35125    0.005    0.000    0.005    0.000 {method 'append' of 'collections.deque' objects}
     1    0.001    0.001    0.005    0.005 <ipython-input-3-684f3e860cf5>:11(<listcomp>)
 16146    0.005    0.000    0.005    0.000 srcmlstatpynlpcompletion.py:54(_add)
  1000    0.001    0.000    0.005    0.000 srcmlstatpynlpcompletion.py:322(min_keystroke0)
  1000    0.001    0.000    0.005    0.000 srcmlstatpynlpcompletion.py:353(min_dynamic_keystroke)
 16148    0.005    0.000    0.005    0.000 srcmlstatpynlpcompletion.py:518(<genexpr>)
 15148    0.004    0.000    0.004    0.000 srcmlstatpynlpcompletion.py:509(Fake)
 15148    0.004    0.000    0.004    0.000 srcmlstatpynlpcompletion.py:512(<listcomp>)
 30296    0.003    0.000    0.003    0.000 {method 'items' of 'dict' objects}
 17147    0.002    0.000    0.002    0.000 {built-in method builtins.isinstance}
     5    0.000    0.000    0.001    0.000 {built-in method builtins.sum}
     1    0.000    0.000    0.000    0.000 <ipython-input-3-684f3e860cf5>:7(<listcomp>)
  1001    0.000    0.000    0.000    0.000 <ipython-input-3-684f3e860cf5>:18(<genexpr>)
  1001    0.000    0.000    0.000    0.000 <ipython-input-3-684f3e860cf5>:15(<genexpr>)
  1001    0.000    0.000    0.000    0.000 <ipython-input-3-684f3e860cf5>:16(<genexpr>)
  1001    0.000    0.000    0.000    0.000 <ipython-input-3-684f3e860cf5>:17(<genexpr>)
     1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}

Others informations when profiling

memory_profiler

See memory_profiler.

from memory_profiler import profile
%load_ext memory_profiler
%memit toprofile0(sample1000)
peak memory: 412.05 MiB, increment: 15.60 MiB
from io import StringIO
st = StringIO()
@profile(stream=st)
def toprofile(lines):
    gain_dynamique_moyen_par_mot(lines, [1.0] * len(lines))
toprofile(sample1000)
ERROR: Could not find file <ipython-input-12-7512ff5cdee3>
NOTE: %mprun can only be used on functions defined in physical files, and not in the IPython environment.
%%file temp_mem_profile.py

from mlstatpy.nlp.completion import CompletionTrieNode
from memory_profiler import profile

@profile(precision=4)
def gain_dynamique_moyen_par_mot(queries, weights):
    per = list(zip(weights, queries))
    total = sum(weights) * 1.0
    res = []
    trie = CompletionTrieNode.build([(None, q) for _, q in per])
    trie.precompute_stat()
    trie.update_stat_dynamic()
    wks = [(w, p, len(w) - trie.min_keystroke0(w)[0]) for p, w in per]
    wks_dyn = [(w, p, len(w) - trie.min_dynamic_keystroke(w)[0])
               for p, w in per]
    wks_dyn2 = [(w, p, len(w) - trie.min_dynamic_keystroke2(w)[0])
                for p, w in per]
    gain = sum(g * p / total for w, p, g in wks)
    gain_dyn = sum(g * p / total for w, p, g in wks_dyn)
    gain_dyn2 = sum(g * p / total for w, p, g in wks_dyn2)
    ave_length = sum(len(w) * p / total for p, w in per)
    return gain, gain_dyn, gain_dyn2, ave_length

@profile(precision=4)
def toprofile():
    with open("sample1000.txt", "r", encoding="utf-8") as f:
        lines = [_.strip("\n\r ") for _ in f.readlines()]
    gain_dynamique_moyen_par_mot(lines, [1.0] * len(lines))
toprofile()
Overwriting temp_mem_profile.py
import sys
cmd = sys.executable
from pyquickhelper.loghelper import run_cmd
cmd += " -m memory_profiler temp_mem_profile.py"
out, err = run_cmd(cmd, wait=True)
print(out)
Filename: temp_mem_profile.py
Line #    Mem usage    Increment   Line Contents
================================================
     5  56.0625 MiB  56.0625 MiB   @profile(precision=4)
     6                             def gain_dynamique_moyen_par_mot(queries, weights):
     7  56.0625 MiB   0.0000 MiB       per = list(zip(weights, queries))
     8  56.0625 MiB   0.0000 MiB       total = sum(weights) * 1.0
     9  56.0625 MiB   0.0000 MiB       res = []
    10  62.2500 MiB   6.1875 MiB       trie = CompletionTrieNode.build([(None, q) for _, q in per])
    11  69.5625 MiB   7.3125 MiB       trie.precompute_stat()
    12  78.7695 MiB   9.2070 MiB       trie.update_stat_dynamic()
    13  78.7734 MiB   0.0039 MiB       wks = [(w, p, len(w) - trie.min_keystroke0(w)[0]) for p, w in per]
    14  78.7969 MiB   0.0234 MiB       wks_dyn = [(w, p, len(w) - trie.min_dynamic_keystroke(w)[0])
    15  78.7969 MiB   0.0000 MiB                  for p, w in per]
    16  78.7969 MiB   0.0000 MiB       wks_dyn2 = [(w, p, len(w) - trie.min_dynamic_keystroke2(w)[0])
    17  78.7969 MiB   0.0000 MiB                   for p, w in per]
    18  78.7969 MiB   0.0000 MiB       gain = sum(g * p / total for w, p, g in wks)
    19  78.7969 MiB   0.0000 MiB       gain_dyn = sum(g * p / total for w, p, g in wks_dyn)
    20  78.7969 MiB   0.0000 MiB       gain_dyn2 = sum(g * p / total for w, p, g in wks_dyn2)
    21  78.7969 MiB   0.0000 MiB       ave_length = sum(len(w) * p / total for p, w in per)
    22  78.7969 MiB   0.0000 MiB       return gain, gain_dyn, gain_dyn2, ave_length

Filename: temp_mem_profile.py
Line #    Mem usage    Increment   Line Contents
================================================
    24  55.9570 MiB  55.9570 MiB   @profile(precision=4)
    25                             def toprofile():
    26  55.9570 MiB   0.0000 MiB       with open("sample1000.txt", "r", encoding="utf-8") as f:
    27  56.0625 MiB   0.1055 MiB           lines = [_.strip("nr ") for _ in f.readlines()]
    28  78.7969 MiB  22.7344 MiB       gain_dynamique_moyen_par_mot(lines, [1.0] * len(lines))

line_profiler

See line_profiler.

def lineprofile(lines):
    gain_dynamique_moyen_par_mot(lines, [1.0] * len(lines))
from mlstatpy.nlp.completion import CompletionTrieNode
from line_profiler import LineProfiler
prof = LineProfiler()
prof.add_function(gain_dynamique_moyen_par_mot)
prof.add_function(CompletionTrieNode.precompute_stat)
prof.run("lineprofile(sample1000)")
st = io.StringIO()
prof.print_stats(stream=st)
rem = os.path.normpath(os.path.join(os.getcwd(), "..", "..", ".."))
res = st.getvalue().replace(rem, "")
print(res)
Timer unit: 3.95062e-07 s
Total time: 3.3536 s
File: <ipython-input-3-684f3e860cf5>
Function: gain_dynamique_moyen_par_mot at line 3
Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
     3                                           def gain_dynamique_moyen_par_mot(queries, weights):
     4         1        393.0    393.0      0.0      per = list(zip(weights, queries))
     5         1         23.0     23.0      0.0      total = sum(weights) * 1.0
     6         1          4.0      4.0      0.0      res = []
     7         1     379526.0 379526.0      4.5      trie = CompletionTrieNode.build([(None, q) for _, q in per])
     8         1    6658020.0 6658020.0     78.4      trie.precompute_stat()
     9         1    1241489.0 1241489.0     14.6      trie.update_stat_dynamic()
    10         1      64879.0  64879.0      0.8      wks = [(w, p, len(w) - trie.min_keystroke0(w)[0]) for p, w in per]
    11         1         16.0     16.0      0.0      wks_dyn = [(w, p, len(w) - trie.min_dynamic_keystroke(w)[0])
    12         1      58635.0  58635.0      0.7                 for p, w in per]
    13         1         10.0     10.0      0.0      wks_dyn2 = [(w, p, len(w) - trie.min_dynamic_keystroke2(w)[0])
    14         1      80850.0  80850.0      1.0                  for p, w in per]
    15         1       1318.0   1318.0      0.0      gain = sum(g * p / total for w, p, g in wks)
    16         1       1221.0   1221.0      0.0      gain_dyn = sum(g * p / total for w, p, g in wks_dyn)
    17         1       1073.0   1073.0      0.0      gain_dyn2 = sum(g * p / total for w, p, g in wks_dyn2)
    18         1       1349.0   1349.0      0.0      ave_length = sum(len(w) * p / total for p, w in per)
    19         1          4.0      4.0      0.0      return gain, gain_dyn, gain_dyn2, ave_length
Total time: 2.19801 s
File: srcmlstatpynlpcompletion.py
Function: precompute_stat at line 416
Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
   416                                               def precompute_stat(self):
   417                                                   """
   418                                                   computes and stores list of completions for each node,
   419                                                   computes mks
   420
   421                                                   @param      clean   clean stat
   422                                                   """
   423         1          5.0      5.0      0.0          stack = deque()
   424         1      78759.0  78759.0      1.4          stack.extend(self.leaves())
   425     52272     168548.0      3.2      3.0          while len(stack) > 0:
   426     52271     169512.0      3.2      3.0              pop = stack.popleft()
   427     52271     148123.0      2.8      2.7              if pop.stat is not None:
   428     17145      42365.0      2.5      0.8                  continue
   429     35126     105270.0      3.0      1.9              if not pop.children:
   430       999       2545.0      2.5      0.0                  pop.stat = CompletionTrieNode._Stat()
   431       999       5598.0      5.6      0.1                  pop.stat.completions = []
   432       999       2812.0      2.8      0.1                  pop.stat.mks0 = len(pop.value)
   433       999       2370.0      2.4      0.0                  pop.stat.mks0_ = len(pop.value)
   434       999       2233.0      2.2      0.0                  if pop.parent is not None:
   435       999       2339.0      2.3      0.0                      stack.append(pop.parent)
   436     34127     397661.0     11.7      7.1              elif all(v.stat is not None for v in pop.children.values()):
   437     15148      55784.0      3.7      1.0                  pop.stat = CompletionTrieNode._Stat()
   438     15148      48358.0      3.2      0.9                  if pop.leave:
   439         1          5.0      5.0      0.0                      pop.stat.mks0 = len(pop.value)
   440         1          5.0      5.0      0.0                      pop.stat.mks0_ = len(pop.value)
   441     15148      76740.0      5.1      1.4                  stack.extend(pop.children.values())
   442     15148    3717484.0    245.4     66.8                  pop.stat.merge_completions(pop.value, pop.children.values())
   443     15148      62928.0      4.2      1.1                  pop.stat.next_nodes = pop.children
   444     15148     292243.0     19.3      5.3                  pop.stat.update_minimum_keystroke(len(pop.value))
   445     15148      55688.0      3.7      1.0                  if pop.parent is not None:
   446     15147      64826.0      4.3      1.2                      stack.append(pop.parent)
   447                                                       else:
   448                                                           # we'll do it again later
   449     18979      61515.0      3.2      1.1                  stack.append(pop)

Static Visualization

gprof2dot

See gprof2dot.

import gprof2dot
import sys
sys.argv=["", "-f", "pstats", "completion.prof", "-o", "completion.dot"]
gprof2dot.main()
from pyquickhelper.helpgen.conf_path_tools import find_graphviz_dot
dot = find_graphviz_dot()
from pyquickhelper.loghelper import run_cmd
out, err = run_cmd('"{0}" completion.dot -Tpng -ocompletion.png'.format(dot), wait=True)
print(out)
from pyquickhelper.helpgen import NbImage
name = "completion.png"
if os.path.exists("images/completion.jpg"):
    # replacing by jpg because latex conversion does not like png with transparency
    name = "images/completion.jpg"
NbImage(name, width=800)
../_images/completion_profiling_30_0.jpeg

pyinstrument

See pyinstrument.

from pyinstrument import Profiler

profiler = Profiler(use_signal=False)
profiler.start()

toprofile0(sample1000)

profiler.stop()
out = profiler.output_text(unicode=False, color=False)
print(out.replace("\\", "/"))
__main__:3: DeprecationWarning: use_signal is deprecated and should no longer be used.
1.414 gain_dynamique_moyen_par_mot  <ipython-input-3-684f3e860cf5>:3
|- 1.050 precompute_stat  mlstatpy/nlp/completion.py:416
|  |- 0.846 merge_completions  mlstatpy/nlp/completion.py:504
|  |  `- 0.071 <listcomp>  mlstatpy/nlp/completion.py:524
|  |- 0.027 <genexpr>  mlstatpy/nlp/completion.py:436
|  |- 0.026 leaves  mlstatpy/nlp/completion.py:132
|  `- 0.015 update_minimum_keystroke  mlstatpy/nlp/completion.py:543
|- 0.235 update_stat_dynamic  mlstatpy/nlp/completion.py:451
|  |- 0.119 update_dynamic_minimum_keystroke  mlstatpy/nlp/completion.py:556
|  |  `- 0.037 second_step  mlstatpy/nlp/completion.py:589
|  |- 0.041 init_dynamic_minimum_keystroke  mlstatpy/nlp/completion.py:625
|  `- 0.020 unsorted_iter  mlstatpy/nlp/completion.py:97
`- 0.096 build  mlstatpy/nlp/completion.py:203
   `- 0.019 __init__  mlstatpy/nlp/completion.py:20

Javascript Visualization

SnakeViz

%load_ext snakeviz

L’instruction qui suit lance l’explorateur par défaut avec les données du profilage.

# %snakeviz toprofile0(sample1000)
from pyquickhelper.helpgen import NbImage
NbImage("images/func_info.jpg", width=400)
../_images/completion_profiling_38_0.jpeg

vprof

See vprof.

from vprof import profiler

# needs to be run from a file not from a notebook
# profiler.run(toprofile0, 'cmh', args=(sample1000,), host='localhost', port=8000)
from pyquickhelper.helpgen import NbImage
NbImage("images/vprof.jpg", width=800)
../_images/completion_profiling_41_0.jpeg