Compares dot implementations (numpy, c++, sse, openmp)

numpy has a very fast implementation of the dot product. It is difficult to be better and very easy to be slower. This example looks into a couple of slower implementations with cython. The tested functions are the following:

import numpy
import matplotlib.pyplot as plt
from pandas import DataFrame, concat
from td3a_cpp.tutorial.dot_cython import (
    ddot_array_16_sse, ddot_array
)
from td3a_cpp.tutorial.dot_cython_omp import (
    ddot_cython_array_omp,
    ddot_array_openmp,
    get_omp_max_threads,
    ddot_array_openmp_16
)
from td3a_cpp.tools import measure_time_dim


def get_vectors(fct, n, h=250, dtype=numpy.float64):
    ctxs = [dict(va=numpy.random.randn(n).astype(dtype),
                 vb=numpy.random.randn(n).astype(dtype),
                 dot=fct,
                 x_name=n)
            for n in range(10, n, h)]
    return ctxs

Number of threads

print(get_omp_max_threads())

Out:

8

Several cython dot

def numpy_dot(va, vb):
    return numpy.dot(va, vb)


def ddot_omp(va, vb):
    return ddot_cython_array_omp(va, vb)


def ddot_omp_static(va, vb):
    return ddot_cython_array_omp(va, vb, schedule=1)


def ddot_omp_dyn(va, vb):
    return ddot_cython_array_omp(va, vb, schedule=2)


def ddot_omp_cpp(va, vb):
    return ddot_array_openmp(va, vb)


def ddot_omp_cpp_16(va, vb):
    return ddot_array_openmp_16(va, vb)


dfs = []
for fct in [numpy_dot,
            ddot_array,
            ddot_array_16_sse,
            ddot_omp,
            ddot_omp_static,
            ddot_omp_dyn,
            ddot_omp_cpp,
            ddot_omp_cpp_16]:
    ctxs = get_vectors(fct, 40000)

    print(fct.__name__)
    df = DataFrame(list(measure_time_dim('dot(va, vb)', ctxs, verbose=1)))
    df['fct'] = fct.__name__
    dfs.append(df)
    print(df.tail(n=3))

Out:

numpy_dot

  0%|          | 0/160 [00:00<?, ?it/s]
 10%|#         | 16/160 [00:00<00:00, 156.37it/s]
 20%|##        | 32/160 [00:00<00:00, 131.94it/s]
 29%|##8       | 46/160 [00:00<00:00, 114.54it/s]
 36%|###6      | 58/160 [00:00<00:00, 103.67it/s]
 43%|####3     | 69/160 [00:00<00:01, 90.25it/s]
 49%|####9     | 79/160 [00:00<00:00, 88.51it/s]
 55%|#####5    | 88/160 [00:00<00:00, 86.66it/s]
 61%|######    | 97/160 [00:01<00:00, 84.89it/s]
 66%|######6   | 106/160 [00:01<00:00, 83.23it/s]
 72%|#######1  | 115/160 [00:01<00:00, 81.55it/s]
 78%|#######7  | 124/160 [00:01<00:00, 79.95it/s]
 82%|########2 | 132/160 [00:01<00:00, 77.95it/s]
 88%|########7 | 140/160 [00:01<00:00, 76.41it/s]
 92%|#########2| 148/160 [00:01<00:00, 65.86it/s]
 97%|#########6| 155/160 [00:01<00:00, 55.78it/s]
100%|##########| 160/160 [00:02<00:00, 79.50it/s]
      average     deviation  min_exec  ...  context_size  x_name        fct
157  0.000027  5.372544e-07  0.000027  ...           232   39260  numpy_dot
158  0.000027  3.813533e-07  0.000027  ...           232   39510  numpy_dot
159  0.000028  5.732173e-07  0.000027  ...           232   39760  numpy_dot

[3 rows x 9 columns]
ddot_array

  0%|          | 0/160 [00:00<?, ?it/s]
 14%|#3        | 22/160 [00:00<00:00, 207.20it/s]
 27%|##6       | 43/160 [00:00<00:00, 133.12it/s]
 36%|###6      | 58/160 [00:00<00:00, 102.97it/s]
 44%|####3     | 70/160 [00:00<00:01, 85.52it/s]
 50%|#####     | 80/160 [00:00<00:01, 73.87it/s]
 55%|#####5    | 88/160 [00:01<00:01, 65.90it/s]
 59%|#####9    | 95/160 [00:01<00:01, 59.59it/s]
 64%|######3   | 102/160 [00:01<00:01, 54.04it/s]
 68%|######7   | 108/160 [00:01<00:01, 49.87it/s]
 71%|#######   | 113/160 [00:01<00:01, 46.70it/s]
 74%|#######3  | 118/160 [00:01<00:00, 43.66it/s]
 77%|#######6  | 123/160 [00:01<00:00, 40.97it/s]
 80%|########  | 128/160 [00:02<00:00, 38.69it/s]
 82%|########2 | 132/160 [00:02<00:00, 36.82it/s]
 85%|########5 | 136/160 [00:02<00:00, 35.29it/s]
 88%|########7 | 140/160 [00:02<00:00, 33.94it/s]
 90%|######### | 144/160 [00:02<00:00, 32.65it/s]
 92%|#########2| 148/160 [00:02<00:00, 31.55it/s]
 95%|#########5| 152/160 [00:02<00:00, 30.53it/s]
 98%|#########7| 156/160 [00:03<00:00, 29.65it/s]
 99%|#########9| 159/160 [00:03<00:00, 29.01it/s]
100%|##########| 160/160 [00:03<00:00, 50.17it/s]
      average     deviation  min_exec  ...  context_size  x_name         fct
157  0.000073  7.654904e-07  0.000072  ...           232   39260  ddot_array
158  0.000072  6.005804e-07  0.000071  ...           232   39510  ddot_array
159  0.000074  2.082035e-07  0.000074  ...           232   39760  ddot_array

[3 rows x 9 columns]
ddot_array_16_sse

  0%|          | 0/160 [00:00<?, ?it/s]
 16%|#5        | 25/160 [00:00<00:00, 239.38it/s]
 31%|###       | 49/160 [00:00<00:00, 156.31it/s]
 42%|####1     | 67/160 [00:00<00:00, 120.88it/s]
 51%|#####     | 81/160 [00:00<00:00, 100.41it/s]
 57%|#####7    | 92/160 [00:00<00:00, 87.11it/s]
 64%|######3   | 102/160 [00:01<00:00, 77.04it/s]
 69%|######9   | 111/160 [00:01<00:00, 69.00it/s]
 74%|#######4  | 119/160 [00:01<00:00, 62.57it/s]
 79%|#######8  | 126/160 [00:01<00:00, 57.97it/s]
 82%|########2 | 132/160 [00:01<00:00, 54.12it/s]
 86%|########6 | 138/160 [00:01<00:00, 50.41it/s]
 89%|########9 | 143/160 [00:01<00:00, 47.32it/s]
 92%|#########2| 148/160 [00:02<00:00, 44.90it/s]
 96%|#########5| 153/160 [00:02<00:00, 42.82it/s]
 99%|#########8| 158/160 [00:02<00:00, 40.79it/s]
100%|##########| 160/160 [00:02<00:00, 66.47it/s]
      average     deviation  min_exec  ...  context_size  x_name                fct
157  0.000052  5.656424e-07  0.000052  ...           232   39260  ddot_array_16_sse
158  0.000054  3.769594e-07  0.000054  ...           232   39510  ddot_array_16_sse
159  0.000053  5.202191e-07  0.000052  ...           232   39760  ddot_array_16_sse

[3 rows x 9 columns]
ddot_omp

  0%|          | 0/160 [00:00<?, ?it/s]
  1%|          | 1/160 [00:01<04:56,  1.86s/it]
  2%|1         | 3/160 [00:02<01:26,  1.81it/s]
  6%|6         | 10/160 [00:02<00:30,  4.89it/s]
  7%|6         | 11/160 [00:03<00:39,  3.77it/s]
  8%|8         | 13/160 [00:04<00:47,  3.07it/s]
  9%|9         | 15/160 [00:04<00:37,  3.84it/s]
 10%|#         | 16/160 [00:04<00:35,  4.02it/s]
 11%|#         | 17/160 [00:06<01:22,  1.73it/s]
 12%|#1        | 19/160 [00:06<00:57,  2.47it/s]
 13%|#3        | 21/160 [00:08<01:05,  2.11it/s]
 14%|#4        | 23/160 [00:09<01:16,  1.79it/s]
 15%|#5        | 24/160 [00:09<01:06,  2.03it/s]
 16%|#5        | 25/160 [00:10<01:11,  1.90it/s]
 17%|#6        | 27/160 [00:13<01:49,  1.22it/s]
 18%|#8        | 29/160 [00:14<01:45,  1.25it/s]
 19%|#8        | 30/160 [00:16<02:21,  1.09s/it]
 20%|##        | 32/160 [00:17<01:32,  1.39it/s]
 24%|##4       | 39/160 [00:17<00:32,  3.78it/s]
 29%|##9       | 47/160 [00:17<00:17,  6.62it/s]
 31%|###1      | 50/160 [00:21<00:43,  2.51it/s]
 32%|###2      | 52/160 [00:21<00:40,  2.70it/s]
 34%|###3      | 54/160 [00:26<01:16,  1.38it/s]
 34%|###4      | 55/160 [00:27<01:27,  1.20it/s]
 35%|###5      | 56/160 [00:28<01:24,  1.24it/s]
 36%|###6      | 58/160 [00:28<01:02,  1.63it/s]
 37%|###6      | 59/160 [00:29<00:59,  1.70it/s]
 38%|###7      | 60/160 [00:29<00:56,  1.76it/s]
 39%|###8      | 62/160 [00:30<00:42,  2.32it/s]
 39%|###9      | 63/160 [00:32<01:23,  1.16it/s]
 40%|####      | 64/160 [00:34<01:34,  1.01it/s]
 41%|####      | 65/160 [00:35<01:55,  1.22s/it]
 48%|####8     | 77/160 [00:36<00:19,  4.15it/s]
 52%|#####2    | 84/160 [00:36<00:11,  6.68it/s]
 57%|#####7    | 92/160 [00:36<00:07,  8.97it/s]
 61%|######1   | 98/160 [00:37<00:06,  9.02it/s]
 63%|######3   | 101/160 [00:39<00:14,  3.99it/s]
 65%|######5   | 104/160 [00:40<00:13,  4.15it/s]
 68%|######7   | 108/160 [00:43<00:17,  2.89it/s]
 69%|######8   | 110/160 [00:45<00:24,  2.02it/s]
 69%|######9   | 111/160 [00:45<00:23,  2.07it/s]
 75%|#######5  | 120/160 [00:46<00:08,  4.72it/s]
 81%|########1 | 130/160 [00:46<00:03,  8.62it/s]
 87%|########6 | 139/160 [00:46<00:01, 13.10it/s]
 92%|#########2| 148/160 [00:46<00:00, 18.69it/s]
 97%|#########6| 155/160 [00:46<00:00, 23.39it/s]
100%|##########| 160/160 [00:46<00:00,  3.44it/s]
      average     deviation  min_exec  ...  context_size  x_name       fct
157  0.000022  3.207936e-07  0.000021  ...           232   39260  ddot_omp
158  0.000022  3.701441e-07  0.000022  ...           232   39510  ddot_omp
159  0.000021  5.013417e-07  0.000020  ...           232   39760  ddot_omp

[3 rows x 9 columns]
ddot_omp_static

  0%|          | 0/160 [00:00<?, ?it/s]
  4%|4         | 7/160 [00:00<00:02, 69.17it/s]
  9%|8         | 14/160 [00:02<00:30,  4.75it/s]
 12%|#2        | 20/160 [00:02<00:18,  7.71it/s]
 16%|#6        | 26/160 [00:02<00:11, 11.34it/s]
 19%|#9        | 31/160 [00:02<00:08, 14.88it/s]
 24%|##3       | 38/160 [00:02<00:05, 21.16it/s]
 28%|##7       | 44/160 [00:03<00:04, 26.16it/s]
 32%|###1      | 51/160 [00:03<00:03, 32.86it/s]
 36%|###5      | 57/160 [00:03<00:02, 36.41it/s]
 39%|###9      | 63/160 [00:03<00:02, 36.11it/s]
 43%|####3     | 69/160 [00:03<00:02, 40.37it/s]
 47%|####6     | 75/160 [00:03<00:02, 35.86it/s]
 50%|#####     | 80/160 [00:04<00:02, 29.36it/s]
 52%|#####2    | 84/160 [00:04<00:05, 12.84it/s]
 56%|#####6    | 90/160 [00:05<00:04, 16.53it/s]
 59%|#####8    | 94/160 [00:05<00:03, 18.33it/s]
 61%|######1   | 98/160 [00:05<00:02, 21.03it/s]
 64%|######3   | 102/160 [00:05<00:03, 19.19it/s]
 66%|######5   | 105/160 [00:07<00:10,  5.37it/s]
 67%|######6   | 107/160 [00:08<00:11,  4.56it/s]
 68%|######8   | 109/160 [00:09<00:16,  3.05it/s]
 72%|#######2  | 116/160 [00:09<00:07,  5.82it/s]
 77%|#######6  | 123/160 [00:10<00:03,  9.34it/s]
 81%|########1 | 130/160 [00:10<00:02, 13.64it/s]
 86%|########5 | 137/160 [00:10<00:01, 18.59it/s]
 89%|########9 | 143/160 [00:10<00:00, 23.23it/s]
 93%|#########3| 149/160 [00:10<00:00, 28.04it/s]
 97%|#########6| 155/160 [00:10<00:00, 32.38it/s]
100%|##########| 160/160 [00:10<00:00, 14.89it/s]
      average     deviation  min_exec  ...  context_size  x_name              fct
157  0.000036  4.821721e-06  0.000034  ...           232   39260  ddot_omp_static
158  0.000037  5.205598e-06  0.000035  ...           232   39510  ddot_omp_static
159  0.000043  4.528813e-07  0.000043  ...           232   39760  ddot_omp_static

[3 rows x 9 columns]
ddot_omp_dyn

  0%|          | 0/160 [00:00<?, ?it/s]
  7%|6         | 11/160 [00:00<00:01, 81.31it/s]
 12%|#2        | 20/160 [00:00<00:02, 66.41it/s]
 17%|#6        | 27/160 [00:00<00:02, 65.88it/s]
 21%|##1       | 34/160 [00:00<00:01, 65.48it/s]
 26%|##5       | 41/160 [00:13<01:12,  1.64it/s]
 26%|##6       | 42/160 [00:14<01:16,  1.53it/s]
 29%|##9       | 47/160 [00:20<01:32,  1.22it/s]
 31%|###1      | 50/160 [00:26<02:00,  1.09s/it]
 32%|###2      | 52/160 [00:31<02:25,  1.34s/it]
 34%|###3      | 54/160 [00:33<02:15,  1.28s/it]
 34%|###4      | 55/160 [00:34<02:05,  1.19s/it]
 35%|###5      | 56/160 [00:36<02:13,  1.28s/it]
 36%|###5      | 57/160 [00:37<02:11,  1.28s/it]
 36%|###6      | 58/160 [00:39<02:15,  1.33s/it]
 37%|###6      | 59/160 [00:41<02:50,  1.69s/it]
 38%|###7      | 60/160 [00:44<02:58,  1.79s/it]
 38%|###8      | 61/160 [00:44<02:23,  1.45s/it]
 40%|####      | 64/160 [00:44<01:09,  1.38it/s]
 41%|####      | 65/160 [00:44<00:56,  1.67it/s]
 41%|####1     | 66/160 [00:45<00:48,  1.93it/s]
 42%|####1     | 67/160 [00:45<00:46,  2.00it/s]
 44%|####3     | 70/160 [00:45<00:24,  3.72it/s]
 45%|####5     | 72/160 [00:45<00:17,  4.92it/s]
 46%|####6     | 74/160 [00:45<00:14,  5.97it/s]
 48%|####8     | 77/160 [00:46<00:10,  8.13it/s]
 49%|####9     | 79/160 [00:46<00:08,  9.55it/s]
 51%|#####     | 81/160 [00:46<00:07, 11.11it/s]
 52%|#####1    | 83/160 [00:46<00:08,  8.63it/s]
 53%|#####3    | 85/160 [00:47<00:14,  5.18it/s]
 56%|#####5    | 89/160 [00:47<00:08,  8.44it/s]
 57%|#####7    | 92/160 [00:47<00:06, 10.62it/s]
 59%|#####9    | 95/160 [00:47<00:05, 12.90it/s]
 61%|######1   | 98/160 [00:47<00:04, 15.49it/s]
 63%|######3   | 101/160 [00:48<00:04, 11.84it/s]
 64%|######4   | 103/160 [00:49<00:10,  5.42it/s]
 66%|######5   | 105/160 [00:49<00:10,  5.30it/s]
 67%|######6   | 107/160 [00:52<00:24,  2.13it/s]
 68%|######8   | 109/160 [00:52<00:18,  2.78it/s]
 69%|######9   | 111/160 [00:52<00:14,  3.40it/s]
 70%|#######   | 112/160 [00:52<00:12,  3.69it/s]
 72%|#######1  | 115/160 [00:53<00:08,  5.38it/s]
 73%|#######3  | 117/160 [00:53<00:09,  4.37it/s]
 74%|#######4  | 119/160 [00:54<00:08,  4.86it/s]
 76%|#######5  | 121/160 [00:54<00:07,  5.13it/s]
 77%|#######6  | 123/160 [00:55<00:10,  3.37it/s]
 78%|#######7  | 124/160 [00:55<00:10,  3.43it/s]
 79%|#######8  | 126/160 [00:56<00:11,  2.90it/s]
 79%|#######9  | 127/160 [00:56<00:11,  2.95it/s]
 80%|########  | 128/160 [00:57<00:10,  3.04it/s]
 81%|########1 | 130/160 [00:57<00:06,  4.45it/s]
 83%|########3 | 133/160 [00:57<00:03,  7.07it/s]
 85%|########5 | 136/160 [00:57<00:02,  9.74it/s]
 87%|########6 | 139/160 [00:57<00:01, 12.15it/s]
 88%|########8 | 141/160 [00:57<00:01, 11.64it/s]
 89%|########9 | 143/160 [00:58<00:01, 11.26it/s]
 91%|######### | 145/160 [00:58<00:01,  9.08it/s]
 92%|#########1| 147/160 [00:58<00:01,  9.59it/s]
 94%|#########3| 150/160 [00:58<00:00, 12.29it/s]
 96%|#########5| 153/160 [00:58<00:00, 14.52it/s]
 97%|#########6| 155/160 [00:59<00:00, 15.13it/s]
 98%|#########8| 157/160 [00:59<00:00, 12.13it/s]
 99%|#########9| 159/160 [00:59<00:00, 12.71it/s]
100%|##########| 160/160 [00:59<00:00,  2.69it/s]
      average  deviation  min_exec  ...  context_size  x_name           fct
157  0.000091   0.000012  0.000085  ...           232   39260  ddot_omp_dyn
158  0.000179   0.000206  0.000086  ...           232   39510  ddot_omp_dyn
159  0.000098   0.000019  0.000087  ...           232   39760  ddot_omp_dyn

[3 rows x 9 columns]
ddot_omp_cpp

  0%|          | 0/160 [00:00<?, ?it/s]
  1%|          | 1/160 [00:00<00:17,  9.11it/s]
  4%|4         | 7/160 [00:00<00:04, 35.40it/s]
  7%|6         | 11/160 [00:01<00:17,  8.65it/s]
  9%|8         | 14/160 [00:04<01:07,  2.15it/s]
 12%|#1        | 19/160 [00:04<00:37,  3.72it/s]
 17%|#6        | 27/160 [00:05<00:21,  6.17it/s]
 19%|#8        | 30/160 [00:05<00:20,  6.26it/s]
 20%|##        | 32/160 [00:05<00:20,  6.24it/s]
 21%|##1       | 34/160 [00:08<00:46,  2.72it/s]
 22%|##1       | 35/160 [00:08<00:43,  2.85it/s]
 22%|##2       | 36/160 [00:09<00:59,  2.09it/s]
 23%|##3       | 37/160 [00:11<01:25,  1.43it/s]
 24%|##4       | 39/160 [00:12<01:06,  1.81it/s]
 25%|##5       | 40/160 [00:12<01:01,  1.96it/s]
 26%|##6       | 42/160 [00:12<00:42,  2.77it/s]
 29%|##8       | 46/160 [00:12<00:24,  4.73it/s]
 29%|##9       | 47/160 [00:13<00:33,  3.42it/s]
 30%|###       | 48/160 [00:13<00:30,  3.73it/s]
 31%|###       | 49/160 [00:14<00:33,  3.28it/s]
 33%|###3      | 53/160 [00:14<00:16,  6.31it/s]
 34%|###4      | 55/160 [00:14<00:14,  7.03it/s]
 36%|###5      | 57/160 [00:15<00:19,  5.34it/s]
 38%|###8      | 61/160 [00:15<00:11,  8.68it/s]
 40%|####      | 64/160 [00:15<00:10,  8.83it/s]
 44%|####4     | 71/160 [00:15<00:07, 12.49it/s]
 48%|####8     | 77/160 [00:16<00:05, 15.51it/s]
 54%|#####3    | 86/160 [00:16<00:03, 24.64it/s]
 57%|#####7    | 92/160 [00:16<00:02, 24.25it/s]
 64%|######4   | 103/160 [00:16<00:01, 36.79it/s]
 68%|######8   | 109/160 [00:16<00:01, 32.02it/s]
 76%|#######5  | 121/160 [00:17<00:01, 36.66it/s]
 81%|########  | 129/160 [00:17<00:00, 43.03it/s]
 84%|########4 | 135/160 [00:17<00:00, 34.96it/s]
 90%|######### | 144/160 [00:17<00:00, 36.98it/s]
 94%|#########4| 151/160 [00:17<00:00, 42.15it/s]
 98%|#########8| 157/160 [00:18<00:00, 36.42it/s]
100%|##########| 160/160 [00:18<00:00,  8.83it/s]
      average     deviation  min_exec  ...  context_size  x_name           fct
157  0.000019  3.201027e-07  0.000019  ...           232   39260  ddot_omp_cpp
158  0.000020  3.025570e-07  0.000020  ...           232   39510  ddot_omp_cpp
159  0.000019  3.616108e-07  0.000019  ...           232   39760  ddot_omp_cpp

[3 rows x 9 columns]
ddot_omp_cpp_16

  0%|          | 0/160 [00:00<?, ?it/s]
  1%|1         | 2/160 [00:01<01:45,  1.50it/s]
 11%|#         | 17/160 [00:01<00:13, 10.55it/s]
 17%|#6        | 27/160 [00:02<00:07, 17.40it/s]
 22%|##2       | 36/160 [00:02<00:05, 23.90it/s]
 28%|##8       | 45/160 [00:03<00:08, 13.09it/s]
 31%|###       | 49/160 [00:04<00:10, 10.26it/s]
 38%|###7      | 60/160 [00:04<00:06, 15.40it/s]
 44%|####4     | 71/160 [00:04<00:05, 17.65it/s]
 47%|####6     | 75/160 [00:05<00:05, 16.64it/s]
 49%|####8     | 78/160 [00:06<00:09,  8.28it/s]
 50%|#####     | 80/160 [00:07<00:10,  7.81it/s]
 51%|#####1    | 82/160 [00:08<00:17,  4.45it/s]
 53%|#####3    | 85/160 [00:08<00:14,  5.12it/s]
 54%|#####3    | 86/160 [00:09<00:14,  5.05it/s]
 59%|#####8    | 94/160 [00:09<00:06, 10.27it/s]
 63%|######3   | 101/160 [00:10<00:07,  8.12it/s]
 69%|######8   | 110/160 [00:10<00:04, 10.77it/s]
 71%|#######   | 113/160 [00:12<00:06,  6.75it/s]
 78%|#######7  | 124/160 [00:12<00:02, 12.03it/s]
 81%|########1 | 130/160 [00:12<00:02, 14.76it/s]
 88%|########7 | 140/160 [00:12<00:00, 22.06it/s]
 94%|#########3| 150/160 [00:12<00:00, 30.29it/s]
100%|##########| 160/160 [00:12<00:00, 39.56it/s]
100%|##########| 160/160 [00:12<00:00, 12.64it/s]
      average     deviation  min_exec  ...  context_size  x_name              fct
157  0.000019  4.335894e-07  0.000018  ...           232   39260  ddot_omp_cpp_16
158  0.000019  2.967473e-07  0.000018  ...           232   39510  ddot_omp_cpp_16
159  0.000019  5.319405e-07  0.000018  ...           232   39760  ddot_omp_cpp_16

[3 rows x 9 columns]

Let’s display the results

cc = concat(dfs)
cc['N'] = cc['x_name']

fig, ax = plt.subplots(2, 2, figsize=(10, 10))
cc[cc.N <= 1000].pivot('N', 'fct', 'average').plot(
    logy=True, ax=ax[0, 0])
cc.pivot('N', 'fct', 'average').plot(
    logy=True, ax=ax[0, 1])
cc.pivot('N', 'fct', 'average').plot(
    logy=True, logx=True, ax=ax[1, 1])
cc[((cc.fct.str.contains('omp') | (cc.fct == 'ddot_array')) &
    ~cc.fct.str.contains('dyn'))].pivot('N', 'fct', 'average').plot(
    logy=True, ax=ax[1, 0])
ax[0, 0].set_title("Comparison of cython ddot implementations")
ax[0, 1].set_title("Comparison of cython ddot implementations"
                   "\nwithout dot_product")

plt.show()
Comparison of cython ddot implementations, Comparison of cython ddot implementations without dot_product

Total running time of the script: ( 2 minutes 43.944 seconds)

Gallery generated by Sphinx-Gallery