import random
np.random.seed(1234)
pd.set_option('max_rows',12)
def create_frame(N):
return DataFrame(np.random.randn(N,2),columns=list('AB'))
# need top/bottom 5 elements
def by_sorting(df):
s = df.A.order()
return DataFrame({'min' : s.head(5).values,
'max' : s.tail(5).values })
def by_n(df):
return DataFrame({'min' : df.A.nsmallest(5).values,
'max' : df.A.nlargest(5).order().values })
N = 1e6
df = create_frame(N)
result_by_sorting = by_sorting(df)
result_by_n = by_n(df)
result_by_sorting.equals(result_by_n)
True
by_n(df)
max | min | |
---|---|---|
0 | 4.339092 | -4.727822 |
1 | 4.359975 | -4.622794 |
2 | 4.447241 | -4.556683 |
3 | 4.465920 | -4.504740 |
4 | 5.190941 | -4.387650 |
%timeit by_sorting(df)
10 loops, best of 3: 176 ms per loop
%timeit by_n(df)
10 loops, best of 3: 47.6 ms per loop
s = Series(np.random.randint(0,10000,size=10000000))
def f_pandas(s):
return pd.unique(s)
def f_numpy(df):
return np.unique(s.values)
(np.sort(f_pandas(s)) == np.sort(f_numpy(s))).all()
True
%timeit f_pandas(s)
10 loops, best of 3: 66.6 ms per loop
%timeit f_numpy(s)
1 loops, best of 3: 572 ms per loop