from jupyterhub import orm
def init_db():
db = orm.new_session_factory("sqlite:///:memory:")()
user = orm.User(name="test")
db.add(user)
db.add(orm.OAuthClient(identifier="jupyterhub"))
db.commit()
return db
def add_tokens(db, n, all_tokens=None):
user = db.query(orm.User).one()
all_tokens = all_tokens or []
for i in range(n):
all_tokens.append(user.new_api_token())
return all_tokens
db = init_db()
all_tokens = add_tokens(db, 2);
[I 2022-05-25 15:09:34.677 alembic.runtime.migration migration:201] Context impl SQLiteImpl. [I 2022-05-25 15:09:34.677 alembic.runtime.migration migration:204] Will assume non-transactional DDL. [I 2022-05-25 15:09:34.681 alembic.runtime.migration migration:615] Running stamp_revision -> 651f5419b74d
db.get_bind().echo = True
orm.APIToken.find_prefix = orm.APIToken.find_prefix_startswith
orm.APIToken.find(db, all_tokens[0])
2022-05-25 15:09:37,058 INFO sqlalchemy.engine.Engine SELECT 1
[I 2022-05-25 15:09:37.058 sqlalchemy.engine.Engine base:1772] SELECT 1
2022-05-25 15:09:37,065 INFO sqlalchemy.engine.Engine [cached since 2.41s ago] ()
[I 2022-05-25 15:09:37.065 sqlalchemy.engine.Engine base:1777] [cached since 2.41s ago] ()
2022-05-25 15:09:37,068 INFO sqlalchemy.engine.Engine BEGIN (implicit)
[I 2022-05-25 15:09:37.068 sqlalchemy.engine.Engine base:953] BEGIN (implicit)
2022-05-25 15:09:37,072 INFO sqlalchemy.engine.Engine SELECT api_tokens.user_id AS api_tokens_user_id, api_tokens.service_id AS api_tokens_service_id, api_tokens.id AS api_tokens_id, api_tokens.hashed AS api_tokens_hashed, api_tokens.prefix AS api_tokens_prefix, api_tokens.client_id AS api_tokens_client_id, api_tokens.session_id AS api_tokens_session_id, api_tokens.created AS api_tokens_created, api_tokens.expires_at AS api_tokens_expires_at, api_tokens.last_activity AS api_tokens_last_activity, api_tokens.note AS api_tokens_note, api_tokens.scopes AS api_tokens_scopes FROM api_tokens WHERE (? LIKE api_tokens.prefix || '%') AND (api_tokens.expires_at IS NULL OR api_tokens.expires_at >= ?)
[I 2022-05-25 15:09:37.072 sqlalchemy.engine.Engine base:1772] SELECT api_tokens.user_id AS api_tokens_user_id, api_tokens.service_id AS api_tokens_service_id, api_tokens.id AS api_tokens_id, api_tokens.hashed AS api_tokens_hashed, api_tokens.prefix AS api_tokens_prefix, api_tokens.client_id AS api_tokens_client_id, api_tokens.session_id AS api_tokens_session_id, api_tokens.created AS api_tokens_created, api_tokens.expires_at AS api_tokens_expires_at, api_tokens.last_activity AS api_tokens_last_activity, api_tokens.note AS api_tokens_note, api_tokens.scopes AS api_tokens_scopes FROM api_tokens WHERE (? LIKE api_tokens.prefix || '%') AND (api_tokens.expires_at IS NULL OR api_tokens.expires_at >= ?)
2022-05-25 15:09:37,072 INFO sqlalchemy.engine.Engine [generated in 0.00103s] ('fc72', '2022-05-25 13:09:37.057629')
[I 2022-05-25 15:09:37.072 sqlalchemy.engine.Engine base:1777] [generated in 0.00103s] ('fc72', '2022-05-25 13:09:37.057629')
2022-05-25 15:09:37,074 INFO sqlalchemy.engine.Engine SELECT users.id AS users_id, users.name AS users_name, users.admin AS users_admin, users.created AS users_created, users.last_activity AS users_last_activity, users.cookie_id AS users_cookie_id, users.state AS users_state, users.encrypted_auth_state AS users_encrypted_auth_state FROM users WHERE users.id = ?
[I 2022-05-25 15:09:37.074 sqlalchemy.engine.Engine base:1772] SELECT users.id AS users_id, users.name AS users_name, users.admin AS users_admin, users.created AS users_created, users.last_activity AS users_last_activity, users.cookie_id AS users_cookie_id, users.state AS users_state, users.encrypted_auth_state AS users_encrypted_auth_state FROM users WHERE users.id = ?
2022-05-25 15:09:37,075 INFO sqlalchemy.engine.Engine [generated in 0.00051s] (1,)
[I 2022-05-25 15:09:37.075 sqlalchemy.engine.Engine base:1777] [generated in 0.00051s] (1,)
<APIToken('fc72...', user='test', client_id='jupyterhub')>
orm.APIToken.find_prefix = orm.APIToken.find_prefix_equal
orm.APIToken.find(db, all_tokens[0])
2022-05-25 15:09:37,228 INFO sqlalchemy.engine.Engine SELECT api_tokens.user_id AS api_tokens_user_id, api_tokens.service_id AS api_tokens_service_id, api_tokens.id AS api_tokens_id, api_tokens.hashed AS api_tokens_hashed, api_tokens.prefix AS api_tokens_prefix, api_tokens.client_id AS api_tokens_client_id, api_tokens.session_id AS api_tokens_session_id, api_tokens.created AS api_tokens_created, api_tokens.expires_at AS api_tokens_expires_at, api_tokens.last_activity AS api_tokens_last_activity, api_tokens.note AS api_tokens_note, api_tokens.scopes AS api_tokens_scopes FROM api_tokens WHERE api_tokens.prefix = ? AND (api_tokens.expires_at IS NULL OR api_tokens.expires_at >= ?)
[I 2022-05-25 15:09:37.228 sqlalchemy.engine.Engine base:1772] SELECT api_tokens.user_id AS api_tokens_user_id, api_tokens.service_id AS api_tokens_service_id, api_tokens.id AS api_tokens_id, api_tokens.hashed AS api_tokens_hashed, api_tokens.prefix AS api_tokens_prefix, api_tokens.client_id AS api_tokens_client_id, api_tokens.session_id AS api_tokens_session_id, api_tokens.created AS api_tokens_created, api_tokens.expires_at AS api_tokens_expires_at, api_tokens.last_activity AS api_tokens_last_activity, api_tokens.note AS api_tokens_note, api_tokens.scopes AS api_tokens_scopes FROM api_tokens WHERE api_tokens.prefix = ? AND (api_tokens.expires_at IS NULL OR api_tokens.expires_at >= ?)
2022-05-25 15:09:37,230 INFO sqlalchemy.engine.Engine [generated in 0.00301s] ('fc72', '2022-05-25 13:09:37.225421')
[I 2022-05-25 15:09:37.230 sqlalchemy.engine.Engine base:1777] [generated in 0.00301s] ('fc72', '2022-05-25 13:09:37.225421')
<APIToken('fc72...', user='test', client_id='jupyterhub')>
db.get_bind().echo = False
import random
%timeit random.choice(all_tokens)
283 ns ± 1.83 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)
no_match = 'x' * len(all_tokens[0])
start = time.perf_counter()
deadline = start + 60
records = []
db = init_db()
all_tokens = []
def add_rows(tr, match, found):
n = len(all_tokens)
for run in tr.all_runs:
per_loop = run / tr.loops
records.append((n, per_loop, match, found, tr.loops))
while time.perf_counter() < deadline:
all_tokens = add_tokens(db, len(all_tokens) or 100, all_tokens)
print(len(all_tokens))
for match in ('equal', 'startswith'):
orm.APIToken.find_prefix = getattr(orm.APIToken, f"find_prefix_{match}")
for found in (True, False):
if found:
tr = %timeit -o -n 500 orm.APIToken.find(db, random.choice(all_tokens))
else:
tr = %timeit -o -n 500 orm.APIToken.find(db, no_match)
add_rows(tr, match, found)
[I 2022-05-25 15:23:16.941 alembic.runtime.migration migration:201] Context impl SQLiteImpl. [I 2022-05-25 15:23:16.941 alembic.runtime.migration migration:204] Will assume non-transactional DDL. [I 2022-05-25 15:23:16.944 alembic.runtime.migration migration:615] Running stamp_revision -> 651f5419b74d
100 158 µs ± 1.63 µs per loop (mean ± std. dev. of 7 runs, 500 loops each) 140 µs ± 378 ns per loop (mean ± std. dev. of 7 runs, 500 loops each) 164 µs ± 1.66 µs per loop (mean ± std. dev. of 7 runs, 500 loops each) 145 µs ± 1.96 µs per loop (mean ± std. dev. of 7 runs, 500 loops each) 200 161 µs ± 2.84 µs per loop (mean ± std. dev. of 7 runs, 500 loops each) 141 µs ± 1.85 µs per loop (mean ± std. dev. of 7 runs, 500 loops each) 181 µs ± 2.79 µs per loop (mean ± std. dev. of 7 runs, 500 loops each) 157 µs ± 1.17 µs per loop (mean ± std. dev. of 7 runs, 500 loops each) 400 158 µs ± 3.19 µs per loop (mean ± std. dev. of 7 runs, 500 loops each) 139 µs ± 1.5 µs per loop (mean ± std. dev. of 7 runs, 500 loops each) 197 µs ± 1.78 µs per loop (mean ± std. dev. of 7 runs, 500 loops each) 177 µs ± 1.19 µs per loop (mean ± std. dev. of 7 runs, 500 loops each) 800 163 µs ± 2.89 µs per loop (mean ± std. dev. of 7 runs, 500 loops each) 140 µs ± 1.93 µs per loop (mean ± std. dev. of 7 runs, 500 loops each) 241 µs ± 2.38 µs per loop (mean ± std. dev. of 7 runs, 500 loops each) 221 µs ± 2.1 µs per loop (mean ± std. dev. of 7 runs, 500 loops each) 1600 161 µs ± 2.52 µs per loop (mean ± std. dev. of 7 runs, 500 loops each) 142 µs ± 1.16 µs per loop (mean ± std. dev. of 7 runs, 500 loops each) 325 µs ± 2.07 µs per loop (mean ± std. dev. of 7 runs, 500 loops each) 306 µs ± 3.63 µs per loop (mean ± std. dev. of 7 runs, 500 loops each) 3200 160 µs ± 1.52 µs per loop (mean ± std. dev. of 7 runs, 500 loops each) 140 µs ± 1.98 µs per loop (mean ± std. dev. of 7 runs, 500 loops each) 497 µs ± 3.21 µs per loop (mean ± std. dev. of 7 runs, 500 loops each) 475 µs ± 2.38 µs per loop (mean ± std. dev. of 7 runs, 500 loops each) 6400 162 µs ± 1.88 µs per loop (mean ± std. dev. of 7 runs, 500 loops each) 139 µs ± 1.69 µs per loop (mean ± std. dev. of 7 runs, 500 loops each) 841 µs ± 4.1 µs per loop (mean ± std. dev. of 7 runs, 500 loops each) 818 µs ± 7.91 µs per loop (mean ± std. dev. of 7 runs, 500 loops each) 12800 162 µs ± 4.63 µs per loop (mean ± std. dev. of 7 runs, 500 loops each) 139 µs ± 1.43 µs per loop (mean ± std. dev. of 7 runs, 500 loops each) 1.53 ms ± 15.8 µs per loop (mean ± std. dev. of 7 runs, 500 loops each) 1.51 ms ± 19.6 µs per loop (mean ± std. dev. of 7 runs, 500 loops each) 25600 165 µs ± 3.12 µs per loop (mean ± std. dev. of 7 runs, 500 loops each) 141 µs ± 1.1 µs per loop (mean ± std. dev. of 7 runs, 500 loops each) 3.04 ms ± 45.3 µs per loop (mean ± std. dev. of 7 runs, 500 loops each) 2.96 ms ± 13.9 µs per loop (mean ± std. dev. of 7 runs, 500 loops each)
import pandas as pd
df = pd.DataFrame(records, columns=["n", "t", "match", "found", "loops"])
df["
import altair as alt
df["ms"] = df.t * 1e3
chart = (
alt.Chart(df)
.mark_point()
.encode(
x=alt.X(
"n",
axis=alt.Axis(title="number of tokens"),
),
y=alt.Y(
"ms",
scale=alt.Scale(type="log"),
axis=alt.Axis(title="lookup time (ms)"),
),
color="match",
shape="found",
opacity=alt.value(0.25),
)
)
chart
(
chart
+ chart.transform_regression(
"n",
"t",
groupby=["match"],
method="exp",
).mark_line()
)