Links: notebook
, html, PDF
, python
, slides, slides(2), GitHub
Profiling avec cProfile, memory_profiler, line_profiler, pyinstrument, snakeviz.
%matplotlib inline
import matplotlib.pyplot as plt
plt.style.use('ggplot')
from jyquickhelper import add_notebook_menu
add_notebook_menu()
from mlstatpy.nlp.completion import CompletionTrieNode
def gain_dynamique_moyen_par_mot(queries, weights):
per = list(zip(weights, queries))
total = sum(weights) * 1.0
res = []
trie = CompletionTrieNode.build([(None, q) for _, q in per])
trie.precompute_stat()
trie.update_stat_dynamic()
wks = [(w, p, len(w) - trie.min_keystroke0(w)[0]) for p, w in per]
wks_dyn = [(w, p, len(w) - trie.min_dynamic_keystroke(w)[0])
for p, w in per]
wks_dyn2 = [(w, p, len(w) - trie.min_dynamic_keystroke2(w)[0])
for p, w in per]
gain = sum(g * p / total for w, p, g in wks)
gain_dyn = sum(g * p / total for w, p, g in wks_dyn)
gain_dyn2 = sum(g * p / total for w, p, g in wks_dyn2)
ave_length = sum(len(w) * p / total for p, w in per)
return gain, gain_dyn, gain_dyn2, ave_length
from mlstatpy.data.wikipedia import download_titles
file_titles = download_titles(country='fr')
len(file_titles)
33
from mlstatpy.data.wikipedia import enumerate_titles
list_titles = list(sorted(set(_ for _ in enumerate_titles(file_titles) if 'A' <= _[0] <= 'Z')))
import random
sample1000 = random.sample(list_titles, 1000)
with open("sample1000.txt", "w", encoding="utf-8") as f:
f.write("\n".join(sample1000))
import cProfile, io, pstats, os
def toprofile0(lines):
gain_dynamique_moyen_par_mot(lines, [1.0] * len(lines))
def doprofile(lines, filename):
pr = cProfile.Profile()
pr.enable()
toprofile0(lines)
pr.disable()
s = io.StringIO()
ps = pstats.Stats(pr, stream=s).sort_stats('cumulative')
ps.print_stats()
rem = os.path.normpath(os.path.join(os.getcwd(), "..", "..", ".."))
res = s.getvalue().replace(rem, "")
ps.dump_stats(filename)
return res
r = doprofile(sample1000, "completion.prof")
print(r)
1225828 function calls in 1.261 seconds Ordered by: cumulative time ncalls tottime percall cumtime percall filename:lineno(function) 1 0.000 0.000 1.261 1.261 <ipython-input-8-d2b6d910fdd7>:3(toprofile0) 1 0.000 0.000 1.261 1.261 <ipython-input-3-684f3e860cf5>:3(gain_dynamique_moyen_par_mot) 1 0.129 0.129 0.916 0.916 srcmlstatpynlpcompletion.py:416(precompute_stat) 15148 0.260 0.000 0.662 0.000 srcmlstatpynlpcompletion.py:504(merge_completions) 15148 0.277 0.000 0.281 0.000 {built-in method builtins.__build_class__} 1 0.051 0.051 0.216 0.216 srcmlstatpynlpcompletion.py:451(update_stat_dynamic) 1 0.087 0.087 0.107 0.107 srcmlstatpynlpcompletion.py:203(build) 16147 0.048 0.000 0.090 0.000 srcmlstatpynlpcompletion.py:556(update_dynamic_minimum_keystroke) 34403 0.061 0.000 0.070 0.000 srcmlstatpynlpcompletion.py:524(<listcomp>) 34127 0.019 0.000 0.037 0.000 {built-in method builtins.all} 16147 0.026 0.000 0.037 0.000 srcmlstatpynlpcompletion.py:625(init_dynamic_minimum_keystroke) 16147 0.029 0.000 0.034 0.000 srcmlstatpynlpcompletion.py:589(second_step) 298714 0.030 0.000 0.030 0.000 {built-in method builtins.len} 15148 0.023 0.000 0.029 0.000 srcmlstatpynlpcompletion.py:543(update_minimum_keystroke) 105060 0.028 0.000 0.028 0.000 {built-in method builtins.hasattr} 15149 0.003 0.000 0.027 0.000 {method 'extend' of 'collections.deque' objects} 16148 0.018 0.000 0.027 0.000 srcmlstatpynlpcompletion.py:97(unsorted_iter) 1001 0.016 0.000 0.023 0.000 srcmlstatpynlpcompletion.py:132(leaves) 15148 0.018 0.000 0.023 0.000 {built-in method builtins.sorted} 93541 0.022 0.000 0.022 0.000 srcmlstatpynlpcompletion.py:436(<genexpr>) 3000 0.013 0.000 0.014 0.000 srcmlstatpynlpcompletion.py:258(find) 16147 0.011 0.000 0.013 0.000 srcmlstatpynlpcompletion.py:20(__init__) 109867 0.013 0.000 0.013 0.000 {method 'values' of 'dict' objects} 22498 0.009 0.000 0.009 0.000 {built-in method builtins.min} 45444 0.009 0.000 0.009 0.000 {method 'extend' of 'list' objects} 1 0.001 0.001 0.009 0.009 <ipython-input-3-684f3e860cf5>:13(<listcomp>) 1000 0.001 0.000 0.008 0.000 srcmlstatpynlpcompletion.py:383(min_dynamic_keystroke2) 48441 0.007 0.000 0.007 0.000 {method 'pop' of 'list' objects} 49552 0.007 0.000 0.007 0.000 {method 'append' of 'list' objects} 19255 0.007 0.000 0.007 0.000 {built-in method builtins.max} 52271 0.006 0.000 0.006 0.000 {method 'popleft' of 'collections.deque' objects} 1 0.001 0.001 0.006 0.006 <ipython-input-3-684f3e860cf5>:10(<listcomp>) 35125 0.005 0.000 0.005 0.000 {method 'append' of 'collections.deque' objects} 1 0.001 0.001 0.005 0.005 <ipython-input-3-684f3e860cf5>:11(<listcomp>) 16146 0.005 0.000 0.005 0.000 srcmlstatpynlpcompletion.py:54(_add) 1000 0.001 0.000 0.005 0.000 srcmlstatpynlpcompletion.py:322(min_keystroke0) 1000 0.001 0.000 0.005 0.000 srcmlstatpynlpcompletion.py:353(min_dynamic_keystroke) 16148 0.005 0.000 0.005 0.000 srcmlstatpynlpcompletion.py:518(<genexpr>) 15148 0.004 0.000 0.004 0.000 srcmlstatpynlpcompletion.py:509(Fake) 15148 0.004 0.000 0.004 0.000 srcmlstatpynlpcompletion.py:512(<listcomp>) 30296 0.003 0.000 0.003 0.000 {method 'items' of 'dict' objects} 17147 0.002 0.000 0.002 0.000 {built-in method builtins.isinstance} 5 0.000 0.000 0.001 0.000 {built-in method builtins.sum} 1 0.000 0.000 0.000 0.000 <ipython-input-3-684f3e860cf5>:7(<listcomp>) 1001 0.000 0.000 0.000 0.000 <ipython-input-3-684f3e860cf5>:18(<genexpr>) 1001 0.000 0.000 0.000 0.000 <ipython-input-3-684f3e860cf5>:15(<genexpr>) 1001 0.000 0.000 0.000 0.000 <ipython-input-3-684f3e860cf5>:16(<genexpr>) 1001 0.000 0.000 0.000 0.000 <ipython-input-3-684f3e860cf5>:17(<genexpr>) 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}
See memory_profiler.
from memory_profiler import profile
%load_ext memory_profiler
%memit toprofile0(sample1000)
peak memory: 412.05 MiB, increment: 15.60 MiB
from io import StringIO
st = StringIO()
@profile(stream=st)
def toprofile(lines):
gain_dynamique_moyen_par_mot(lines, [1.0] * len(lines))
toprofile(sample1000)
ERROR: Could not find file <ipython-input-12-7512ff5cdee3>
NOTE: %mprun can only be used on functions defined in physical files, and not in the IPython environment.
%%file temp_mem_profile.py
from mlstatpy.nlp.completion import CompletionTrieNode
from memory_profiler import profile
@profile(precision=4)
def gain_dynamique_moyen_par_mot(queries, weights):
per = list(zip(weights, queries))
total = sum(weights) * 1.0
res = []
trie = CompletionTrieNode.build([(None, q) for _, q in per])
trie.precompute_stat()
trie.update_stat_dynamic()
wks = [(w, p, len(w) - trie.min_keystroke0(w)[0]) for p, w in per]
wks_dyn = [(w, p, len(w) - trie.min_dynamic_keystroke(w)[0])
for p, w in per]
wks_dyn2 = [(w, p, len(w) - trie.min_dynamic_keystroke2(w)[0])
for p, w in per]
gain = sum(g * p / total for w, p, g in wks)
gain_dyn = sum(g * p / total for w, p, g in wks_dyn)
gain_dyn2 = sum(g * p / total for w, p, g in wks_dyn2)
ave_length = sum(len(w) * p / total for p, w in per)
return gain, gain_dyn, gain_dyn2, ave_length
@profile(precision=4)
def toprofile():
with open("sample1000.txt", "r", encoding="utf-8") as f:
lines = [_.strip("\n\r ") for _ in f.readlines()]
gain_dynamique_moyen_par_mot(lines, [1.0] * len(lines))
toprofile()
Overwriting temp_mem_profile.py
import sys
cmd = sys.executable
from pyquickhelper.loghelper import run_cmd
cmd += " -m memory_profiler temp_mem_profile.py"
out, err = run_cmd(cmd, wait=True)
print(out)
Filename: temp_mem_profile.py Line # Mem usage Increment Line Contents ================================================ 5 56.0625 MiB 56.0625 MiB @profile(precision=4) 6 def gain_dynamique_moyen_par_mot(queries, weights): 7 56.0625 MiB 0.0000 MiB per = list(zip(weights, queries)) 8 56.0625 MiB 0.0000 MiB total = sum(weights) * 1.0 9 56.0625 MiB 0.0000 MiB res = [] 10 62.2500 MiB 6.1875 MiB trie = CompletionTrieNode.build([(None, q) for _, q in per]) 11 69.5625 MiB 7.3125 MiB trie.precompute_stat() 12 78.7695 MiB 9.2070 MiB trie.update_stat_dynamic() 13 78.7734 MiB 0.0039 MiB wks = [(w, p, len(w) - trie.min_keystroke0(w)[0]) for p, w in per] 14 78.7969 MiB 0.0234 MiB wks_dyn = [(w, p, len(w) - trie.min_dynamic_keystroke(w)[0]) 15 78.7969 MiB 0.0000 MiB for p, w in per] 16 78.7969 MiB 0.0000 MiB wks_dyn2 = [(w, p, len(w) - trie.min_dynamic_keystroke2(w)[0]) 17 78.7969 MiB 0.0000 MiB for p, w in per] 18 78.7969 MiB 0.0000 MiB gain = sum(g * p / total for w, p, g in wks) 19 78.7969 MiB 0.0000 MiB gain_dyn = sum(g * p / total for w, p, g in wks_dyn) 20 78.7969 MiB 0.0000 MiB gain_dyn2 = sum(g * p / total for w, p, g in wks_dyn2) 21 78.7969 MiB 0.0000 MiB ave_length = sum(len(w) * p / total for p, w in per) 22 78.7969 MiB 0.0000 MiB return gain, gain_dyn, gain_dyn2, ave_length Filename: temp_mem_profile.py Line # Mem usage Increment Line Contents ================================================ 24 55.9570 MiB 55.9570 MiB @profile(precision=4) 25 def toprofile(): 26 55.9570 MiB 0.0000 MiB with open("sample1000.txt", "r", encoding="utf-8") as f: 27 56.0625 MiB 0.1055 MiB lines = [_.strip("nr ") for _ in f.readlines()] 28 78.7969 MiB 22.7344 MiB gain_dynamique_moyen_par_mot(lines, [1.0] * len(lines))
See line_profiler.
def lineprofile(lines):
gain_dynamique_moyen_par_mot(lines, [1.0] * len(lines))
from mlstatpy.nlp.completion import CompletionTrieNode
from line_profiler import LineProfiler
prof = LineProfiler()
prof.add_function(gain_dynamique_moyen_par_mot)
prof.add_function(CompletionTrieNode.precompute_stat)
prof.run("lineprofile(sample1000)")
st = io.StringIO()
prof.print_stats(stream=st)
rem = os.path.normpath(os.path.join(os.getcwd(), "..", "..", ".."))
res = st.getvalue().replace(rem, "")
print(res)
Timer unit: 3.95062e-07 s Total time: 3.3536 s File: <ipython-input-3-684f3e860cf5> Function: gain_dynamique_moyen_par_mot at line 3 Line # Hits Time Per Hit % Time Line Contents ============================================================== 3 def gain_dynamique_moyen_par_mot(queries, weights): 4 1 393.0 393.0 0.0 per = list(zip(weights, queries)) 5 1 23.0 23.0 0.0 total = sum(weights) * 1.0 6 1 4.0 4.0 0.0 res = [] 7 1 379526.0 379526.0 4.5 trie = CompletionTrieNode.build([(None, q) for _, q in per]) 8 1 6658020.0 6658020.0 78.4 trie.precompute_stat() 9 1 1241489.0 1241489.0 14.6 trie.update_stat_dynamic() 10 1 64879.0 64879.0 0.8 wks = [(w, p, len(w) - trie.min_keystroke0(w)[0]) for p, w in per] 11 1 16.0 16.0 0.0 wks_dyn = [(w, p, len(w) - trie.min_dynamic_keystroke(w)[0]) 12 1 58635.0 58635.0 0.7 for p, w in per] 13 1 10.0 10.0 0.0 wks_dyn2 = [(w, p, len(w) - trie.min_dynamic_keystroke2(w)[0]) 14 1 80850.0 80850.0 1.0 for p, w in per] 15 1 1318.0 1318.0 0.0 gain = sum(g * p / total for w, p, g in wks) 16 1 1221.0 1221.0 0.0 gain_dyn = sum(g * p / total for w, p, g in wks_dyn) 17 1 1073.0 1073.0 0.0 gain_dyn2 = sum(g * p / total for w, p, g in wks_dyn2) 18 1 1349.0 1349.0 0.0 ave_length = sum(len(w) * p / total for p, w in per) 19 1 4.0 4.0 0.0 return gain, gain_dyn, gain_dyn2, ave_length Total time: 2.19801 s File: srcmlstatpynlpcompletion.py Function: precompute_stat at line 416 Line # Hits Time Per Hit % Time Line Contents ============================================================== 416 def precompute_stat(self): 417 """ 418 computes and stores list of completions for each node, 419 computes mks 420 421 @param clean clean stat 422 """ 423 1 5.0 5.0 0.0 stack = deque() 424 1 78759.0 78759.0 1.4 stack.extend(self.leaves()) 425 52272 168548.0 3.2 3.0 while len(stack) > 0: 426 52271 169512.0 3.2 3.0 pop = stack.popleft() 427 52271 148123.0 2.8 2.7 if pop.stat is not None: 428 17145 42365.0 2.5 0.8 continue 429 35126 105270.0 3.0 1.9 if not pop.children: 430 999 2545.0 2.5 0.0 pop.stat = CompletionTrieNode._Stat() 431 999 5598.0 5.6 0.1 pop.stat.completions = [] 432 999 2812.0 2.8 0.1 pop.stat.mks0 = len(pop.value) 433 999 2370.0 2.4 0.0 pop.stat.mks0_ = len(pop.value) 434 999 2233.0 2.2 0.0 if pop.parent is not None: 435 999 2339.0 2.3 0.0 stack.append(pop.parent) 436 34127 397661.0 11.7 7.1 elif all(v.stat is not None for v in pop.children.values()): 437 15148 55784.0 3.7 1.0 pop.stat = CompletionTrieNode._Stat() 438 15148 48358.0 3.2 0.9 if pop.leave: 439 1 5.0 5.0 0.0 pop.stat.mks0 = len(pop.value) 440 1 5.0 5.0 0.0 pop.stat.mks0_ = len(pop.value) 441 15148 76740.0 5.1 1.4 stack.extend(pop.children.values()) 442 15148 3717484.0 245.4 66.8 pop.stat.merge_completions(pop.value, pop.children.values()) 443 15148 62928.0 4.2 1.1 pop.stat.next_nodes = pop.children 444 15148 292243.0 19.3 5.3 pop.stat.update_minimum_keystroke(len(pop.value)) 445 15148 55688.0 3.7 1.0 if pop.parent is not None: 446 15147 64826.0 4.3 1.2 stack.append(pop.parent) 447 else: 448 # we'll do it again later 449 18979 61515.0 3.2 1.1 stack.append(pop)
See gprof2dot.
import gprof2dot
import sys
sys.argv=["", "-f", "pstats", "completion.prof", "-o", "completion.dot"]
gprof2dot.main()
from pyquickhelper.helpgen.conf_path_tools import find_graphviz_dot
dot = find_graphviz_dot()
from pyquickhelper.loghelper import run_cmd
out, err = run_cmd('"{0}" completion.dot -Tpng -ocompletion.png'.format(dot), wait=True)
print(out)
from pyquickhelper.helpgen import NbImage
name = "completion.png"
if os.path.exists("images/completion.jpg"):
# replacing by jpg because latex conversion does not like png with transparency
name = "images/completion.jpg"
NbImage(name, width=800)
See pyinstrument.
from pyinstrument import Profiler
profiler = Profiler(use_signal=False)
profiler.start()
toprofile0(sample1000)
profiler.stop()
out = profiler.output_text(unicode=False, color=False)
print(out.replace("\\", "/"))
__main__:3: DeprecationWarning: use_signal is deprecated and should no longer be used.
1.414 gain_dynamique_moyen_par_mot <ipython-input-3-684f3e860cf5>:3
|- 1.050 precompute_stat mlstatpy/nlp/completion.py:416
| |- 0.846 merge_completions mlstatpy/nlp/completion.py:504
| | `- 0.071 <listcomp> mlstatpy/nlp/completion.py:524
| |- 0.027 <genexpr> mlstatpy/nlp/completion.py:436
| |- 0.026 leaves mlstatpy/nlp/completion.py:132
| `- 0.015 update_minimum_keystroke mlstatpy/nlp/completion.py:543
|- 0.235 update_stat_dynamic mlstatpy/nlp/completion.py:451
| |- 0.119 update_dynamic_minimum_keystroke mlstatpy/nlp/completion.py:556
| | `- 0.037 second_step mlstatpy/nlp/completion.py:589
| |- 0.041 init_dynamic_minimum_keystroke mlstatpy/nlp/completion.py:625
| `- 0.020 unsorted_iter mlstatpy/nlp/completion.py:97
`- 0.096 build mlstatpy/nlp/completion.py:203
`- 0.019 __init__ mlstatpy/nlp/completion.py:20