MIKE'S NOTE: This doesn't seem to work anymore. This is widely linked to as a very popular post on StackOverflow (https://stackoverflow.com/questions/28301931/how-to-profile-cython-functions-line-by-line) but something seems to have broken since it was posted (2015, now 2023).
This notebook demonstrates the use of Robert Kern's line_profiler
(https://github.com/rkern/line_profiler) to profile cython
functions line by line. Thanks to Robert Bradshaw for implementing this feature (https://groups.google.com/forum/#!topic/cython-users/FH3TYK8BkeA).
#Load Robert Kern's line profiler
%load_ext line_profiler
import line_profiler
# Mike's note: this updated cell is from the accompanying StackOverflow post:
# https://stackoverflow.com/questions/28301931/how-to-profile-cython-functions-line-by-line
# Thanks to @tryptofame for proposing an updated snippet
from Cython.Compiler.Options import get_directive_defaults
directive_defaults = get_directive_defaults()
directive_defaults['linetrace'] = True
directive_defaults['binding'] = True
# Mike's note: also added this, as suggested later in the thread, but doesn't do much
directive_defaults['profile'] = True
# Mike's note: also added this
%load_ext cython
%%cython -a -f --compile-args=-DCYTHON_TRACE_NOGIL=1
# Mike's note: It was also suggested in the above thread to change CYTHON_TRACE_NOGIL to just CYTHON_TRACE
# Doesn't seem to make a difference either way.
#We need to define the macro CYTHON_TRACE=1 (cf. http://docs.cython.org/src/reference/compilation.html)
def cumulative_sum(int n):
cdef int s=0, i
for i in range(n):
s += i
return s
Generated by Cython 0.29.33
Yellow lines hint at Python interaction.
Click on a line that starts with a "+
" to see the C code that Cython generated for it.
01: # Mike's note: It was also suggested in the above thread to change CYTHON_TRACE_NOGIL to just CYTHON_TRACE
02: # Doesn't seem to make a difference either way.
03: #We need to define the macro CYTHON_TRACE=1 (cf. http://docs.cython.org/src/reference/compilation.html)
04:
+05: def cumulative_sum(int n):
/* Python wrapper */ static PyObject *__pyx_pw_46_cython_magic_590a00cc7698b35a9253033ed5711422_1cumulative_sum(PyObject *__pyx_self, PyObject *__pyx_arg_n); /*proto*/ static PyMethodDef __pyx_mdef_46_cython_magic_590a00cc7698b35a9253033ed5711422_1cumulative_sum = {"cumulative_sum", (PyCFunction)__pyx_pw_46_cython_magic_590a00cc7698b35a9253033ed5711422_1cumulative_sum, METH_O, 0}; static PyObject *__pyx_pw_46_cython_magic_590a00cc7698b35a9253033ed5711422_1cumulative_sum(PyObject *__pyx_self, PyObject *__pyx_arg_n) { int __pyx_v_n; PyObject *__pyx_r = 0; __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("cumulative_sum (wrapper)", 0); assert(__pyx_arg_n); { __pyx_v_n = __Pyx_PyInt_As_int(__pyx_arg_n); if (unlikely((__pyx_v_n == (int)-1) && PyErr_Occurred())) __PYX_ERR(0, 5, __pyx_L3_error) } goto __pyx_L4_argument_unpacking_done; __pyx_L3_error:; __Pyx_AddTraceback("_cython_magic_590a00cc7698b35a9253033ed5711422.cumulative_sum", __pyx_clineno, __pyx_lineno, __pyx_filename); __Pyx_RefNannyFinishContext(); return NULL; __pyx_L4_argument_unpacking_done:; __pyx_r = __pyx_pf_46_cython_magic_590a00cc7698b35a9253033ed5711422_cumulative_sum(__pyx_self, ((int)__pyx_v_n)); int __pyx_lineno = 0; const char *__pyx_filename = NULL; int __pyx_clineno = 0; /* function exit code */ __Pyx_RefNannyFinishContext(); return __pyx_r; } static PyObject *__pyx_pf_46_cython_magic_590a00cc7698b35a9253033ed5711422_cumulative_sum(CYTHON_UNUSED PyObject *__pyx_self, int __pyx_v_n) { int __pyx_v_s; int __pyx_v_i; PyObject *__pyx_r = NULL; __Pyx_TraceDeclarations __Pyx_TraceFrameInit(__pyx_codeobj_) __Pyx_RefNannyDeclarations __Pyx_RefNannySetupContext("cumulative_sum", 0); __Pyx_TraceCall("cumulative_sum", __pyx_f[0], 5, 0, __PYX_ERR(0, 5, __pyx_L1_error)); /* … */ /* function exit code */ __pyx_L1_error:; __Pyx_XDECREF(__pyx_t_4); __Pyx_AddTraceback("_cython_magic_590a00cc7698b35a9253033ed5711422.cumulative_sum", __pyx_clineno, __pyx_lineno, __pyx_filename); __pyx_r = NULL; __pyx_L0:; __Pyx_XGIVEREF(__pyx_r); __Pyx_TraceReturn(__pyx_r, 0); __Pyx_RefNannyFinishContext(); return __pyx_r; } /* … */ __pyx_tuple__2 = PyTuple_Pack(4, __pyx_n_s_n, __pyx_n_s_n, __pyx_n_s_s, __pyx_n_s_i); if (unlikely(!__pyx_tuple__2)) __PYX_ERR(0, 5, __pyx_L1_error) __Pyx_GOTREF(__pyx_tuple__2); __Pyx_GIVEREF(__pyx_tuple__2); /* … */ __Pyx_TraceLine(5,0,__PYX_ERR(0, 5, __pyx_L1_error)) __pyx_t_1 = __Pyx_CyFunction_New(&__pyx_mdef_46_cython_magic_590a00cc7698b35a9253033ed5711422_1cumulative_sum, 0, __pyx_n_s_cumulative_sum, NULL, __pyx_n_s_cython_magic_590a00cc7698b35a92, __pyx_d, ((PyObject *)__pyx_codeobj_)); if (unlikely(!__pyx_t_1)) __PYX_ERR(0, 5, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_1); if (PyDict_SetItem(__pyx_d, __pyx_n_s_cumulative_sum, __pyx_t_1) < 0) __PYX_ERR(0, 5, __pyx_L1_error) __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+06: cdef int s=0, i
__Pyx_TraceLine(6,0,__PYX_ERR(0, 6, __pyx_L1_error)) __pyx_v_s = 0;
+07: for i in range(n):
__Pyx_TraceLine(7,0,__PYX_ERR(0, 7, __pyx_L1_error)) __pyx_t_1 = __pyx_v_n; __pyx_t_2 = __pyx_t_1; for (__pyx_t_3 = 0; __pyx_t_3 < __pyx_t_2; __pyx_t_3+=1) { __pyx_v_i = __pyx_t_3;
+08: s += i
__Pyx_TraceLine(8,0,__PYX_ERR(0, 8, __pyx_L1_error)) __pyx_v_s = (__pyx_v_s + __pyx_v_i); }
09:
+10: return s
__Pyx_TraceLine(10,0,__PYX_ERR(0, 10, __pyx_L1_error)) __Pyx_XDECREF(__pyx_r); __pyx_t_4 = __Pyx_PyInt_From_int(__pyx_v_s); if (unlikely(!__pyx_t_4)) __PYX_ERR(0, 10, __pyx_L1_error) __Pyx_GOTREF(__pyx_t_4); __pyx_r = __pyx_t_4; __pyx_t_4 = 0; goto __pyx_L0;
#Print profiling statistics using the `line_profiler` API
profile = line_profiler.LineProfiler(cumulative_sum)
profile.runcall(cumulative_sum, 100)
profile.print_stats()
# Mike's note:
# On at least two computers, both macOS, one a newer M1 running Monterey and one an old Intel running High Sierra
# doesn't work either way.
# This is running line_profiler 4.0.2, and have tested both from pip
# and building directly from this commit (the latest as of this writing):
# https://github.com/pyutils/line_profiler/commit/6ee9f63d7a0b4cbec55e58f02462cdd08f9884e0
# And cython 0.29.33.
# Timer unit: 1e-09 s
Timer unit: 1e-09 s
#Print profiling statistics using the `lprun` magic
# Mike's note: same as above.
%lprun -f cumulative_sum cumulative_sum(100)