#!/usr/bin/env python # coding: utf-8 # Source: https://github.com/vgrabovets/benchmarkit # In[1]: from benchmarkit import benchmark, benchmark_analyze, benchmark_run # ## Benchmark time # In[2]: N = 10000 seq_list = list(range(N)) seq_set = set(range(N)) SAVE_PATH = '/tmp/benchmark_time.jsonl' # In[3]: @benchmark(num_iters=100, save_params=True) def search_in_list(num_items=N): return num_items - 1 in seq_list @benchmark(num_iters=100, save_params=True) def search_in_set(num_items=N): return num_items - 1 in seq_set # In[4]: benchmark_results = benchmark_run([search_in_list, search_in_set], SAVE_PATH, comment='initial benchmark search', extra_fields=['num_items']) # Change `N` and repeat benchmark (enough to change `N` in cell 2 and restart cells 3 and 4): # In[5]: N = 1000000 seq_list = list(range(N)) seq_set = set(range(N)) @benchmark(num_iters=100, save_params=True) def search_in_list(num_items=N): return num_items - 1 in seq_list @benchmark(num_iters=100, save_params=True) def search_in_set(num_items=N): return num_items - 1 in seq_set benchmark_results = benchmark_run([search_in_list, search_in_set], SAVE_PATH, comment='million items', extra_fields=['num_items']) # `benchmark_results` contains benchmark data for the last run # In[6]: benchmark_results # run `benchmark_run` from command line (without `!` in the real terminal): # In[7]: get_ipython().system('benchmark_run ../test_data/time/benchmark_functions.py --save_dir /tmp/ --comment "million items" --extra_fields num_items') # `benchmark_analyze` outputs results of benchmark stored in the file # In[8]: benchmark_df = benchmark_analyze(SAVE_PATH, extra_fields=['num_items']) # `benchmark_df` contains pandas DataFrame with the results # In[9]: benchmark_df # run `benchmark_analyze` from command line (without `!` in the real terminal): # In[10]: get_ipython().system('benchmark_analyze /tmp/benchmark_time.jsonl --extra_fields num_items') # ## Benchmark model # In[11]: from sklearn.datasets import load_iris from sklearn.linear_model import LogisticRegression # In[12]: MODEL_BENCHMARK_SAVE_FILE = '/tmp/benchmark_model.jsonl' # In[13]: x, y = load_iris(return_X_y=True) # Only parameters, passed to the decorated function `log_regression` will be saved (regularization parameter `C` and `fit_intercept`). # # In order to save results, decorated function `log_regression` should return `dict` with the results that need to be saved. # In[14]: @benchmark(save_params=True, save_output=True) def log_regression(C=1.0, fit_intercept=True): clf = LogisticRegression( random_state=0, solver='lbfgs', multi_class='multinomial', C=C, fit_intercept=fit_intercept, ) clf.fit(x, y) score = clf.score(x, y) return {'score': score} # In[15]: model_benchmark_results = benchmark_run( log_regression, MODEL_BENCHMARK_SAVE_FILE, comment='baseline model', extra_fields=['C', 'fit_intercept'], metric='score', bigger_is_better=True, ) # Change hyperparameter `C` # In[16]: @benchmark(save_params=True, save_output=True) def log_regression(C=0.5, fit_intercept=True): clf = LogisticRegression( random_state=0, solver='lbfgs', multi_class='multinomial', C=C, fit_intercept=fit_intercept, ) clf.fit(x, y) score = clf.score(x, y) return {'score': score} model_benchmark_results = benchmark_run( log_regression, MODEL_BENCHMARK_SAVE_FILE, comment='stronger regularization', extra_fields=['C', 'fit_intercept'], metric='score', bigger_is_better=True, ) # In[17]: model_benchmark_results # In[19]: model_benchmark_df = benchmark_analyze(MODEL_BENCHMARK_SAVE_FILE, metric='score', bigger_is_better=True, extra_fields=['C', 'fit_intercept']) # In[20]: model_benchmark_df # run `benchmark_analyze` from command line (without `!` in the real terminal): # In[21]: get_ipython().system('benchmark_analyze /tmp/benchmark_model.jsonl --metric score --bigger_is_better --extra_fields C fit_intercept') # Source: https://github.com/vgrabovets/benchmarkit