In this notebook, we illustrate several simple datasets to illustrate computational techniques.
References:
Kaggle https://www.kaggle.com/datasets
UCI Machine Learning Repository https://archive.ics.uci.edu/ml/datasets.html
Scikit-learn already includes several standard datasets http://scikit-learn.org/stable/datasets/
Movielens is a famous dataset for movie recommendation http://grouplens.org/datasets/movielens/
Turkish Statistics Association also publishes some interesting datasets http://www.tuik.gov.tr
A Handbook of Small Data Sets edited by D.J. Hand, F. Daly, A.D. Lunn, K.J. McConway and E. Ostrowski from Chapman and Hall, ISBN 0 412 39920 2. https://www.stat.ncsu.edu/research/sas/sicl/data/
import numpy as np
import pandas as pd
df_arac = pd.read_csv(u'data/arac.csv',sep=';')
df_kaza = pd.read_csv(u'data/kaza.csv',sep=';')
df_enerji = pd.read_csv(u'data/enerji.csv',sep=';')
df_iris = pd.read_csv(u'data/iris.txt',sep=' ')
df_welllog = pd.read_csv(u'data/well-log.csv',names=['y'])
df_arac
Year | All | Car | Minibus | Bus | SmallTruck | Truck | Motorcycle | SpecialVehicles | Machinery | Tractor | |
---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1966 | 231977 | 91469 | 10913 | 12041 | 31462 | 47931 | 32099 | 3610 | 2452.0 | NaN |
1 | 1967 | 284194 | 112367 | 16008 | 13332 | 39927 | 56889 | 39647 | 3641 | 2383.0 | NaN |
2 | 1968 | 318768 | 125375 | 18967 | 13948 | 43441 | 62616 | 47062 | 4033 | 3326.0 | NaN |
3 | 1969 | 354398 | 137345 | 20540 | 15529 | 48655 | 69478 | 52959 | 4568 | 5324.0 | NaN |
4 | 1970 | 369808 | 137771 | 20916 | 15980 | 52152 | 70730 | 60994 | 5070 | 6195.0 | NaN |
5 | 1971 | 403880 | 153676 | 22380 | 17140 | 57011 | 73433 | 68417 | 5349 | 6474.0 | NaN |
6 | 1972 | 460087 | 187272 | 25559 | 18504 | 62796 | 78920 | 74402 | 5747 | 6887.0 | NaN |
7 | 1973 | 543318 | 240360 | 30055 | 20011 | 71043 | 86780 | 80860 | 6420 | 7789.0 | NaN |
8 | 1974 | 647947 | 313160 | 34122 | 21404 | 81025 | 95309 | 86028 | 7338 | 9561.0 | NaN |
9 | 1975 | 785920 | 403546 | 40623 | 23763 | 98579 | 108381 | 91421 | 8450 | 11157.0 | NaN |
10 | 1976 | 920141 | 488894 | 46066 | 25388 | 116861 | 122176 | 96984 | 9224 | 14548.0 | NaN |
11 | 1977 | 1042239 | 560424 | 51999 | 27096 | 134213 | 138093 | 102127 | 10137 | 18150.0 | NaN |
12 | 1978 | 1142561 | 624438 | 56836 | 28559 | 144695 | 146551 | 109890 | 10698 | 20894.0 | NaN |
13 | 1979 | 1566405 | 688687 | 61596 | 30634 | 155278 | 157095 | 120378 | 11291 | 22875.0 | 318571.0 |
14 | 1980 | 1696681 | 742252 | 64707 | 32783 | 165821 | 164893 | 137931 | 11777 | 24090.0 | 352427.0 |
15 | 1981 | 1802742 | 776432 | 66514 | 33839 | 172269 | 172372 | 160557 | 12459 | 26246.0 | 382054.0 |
16 | 1982 | 1901926 | 811465 | 69598 | 35432 | 178762 | 180772 | 182795 | 13386 | 30160.0 | 399556.0 |
17 | 1983 | 2041244 | 856350 | 73585 | 38478 | 186427 | 190277 | 217327 | 14705 | 33532.0 | 430563.0 |
18 | 1984 | 2215174 | 919577 | 80697 | 43638 | 198106 | 197721 | 256338 | 16312 | 39445.0 | 463340.0 |
19 | 1985 | 2391357 | 983444 | 87951 | 47119 | 212505 | 205496 | 289052 | 17639 | 45561.0 | 502590.0 |
20 | 1986 | 2641353 | 1087234 | 97917 | 50798 | 224755 | 217111 | 327326 | 19448 | 50819.0 | 565945.0 |
21 | 1987 | 2887287 | 1193021 | 106314 | 53554 | 233480 | 225872 | 369894 | 21236 | 55129.0 | 628787.0 |
22 | 1988 | 3140265 | 1310257 | 112885 | 56172 | 240718 | 234166 | 420889 | 23301 | 58300.0 | 683577.0 |
23 | 1989 | 3388259 | 1434830 | 118026 | 58859 | 248567 | 241392 | 472853 | 25060 | 60191.0 | 728481.0 |
24 | 1990 | 3750678 | 1649879 | 125399 | 63700 | 263407 | 257353 | 531941 | 26519 | 63024.0 | 769456.0 |
25 | 1991 | 4101975 | 1864344 | 133632 | 68973 | 280891 | 273409 | 590488 | 28606 | 66981.0 | 794651.0 |
26 | 1992 | 4584717 | 2181388 | 145312 | 75592 | 308180 | 287160 | 655347 | 31158 | 72000.0 | 828580.0 |
27 | 1993 | 5250622 | 2619852 | 159900 | 84254 | 354290 | 305511 | 743320 | 33703 | 79233.0 | 870559.0 |
28 | 1994 | 5606712 | 2861640 | 166424 | 87545 | 374473 | 313771 | 788786 | 35495 | 83072.0 | 895506.0 |
29 | 1995 | 5922859 | 3058511 | 173051 | 90197 | 397743 | 321421 | 819922 | 37272 | 87214.0 | 937528.0 |
30 | 1996 | 6305707 | 3274156 | 182694 | 94978 | 442788 | 333269 | 854150 | 40212 | 95318.0 | 988142.0 |
31 | 1997 | 6863462 | 3570105 | 197057 | 101896 | 529838 | 353586 | 905121 | 45327 | 107151.0 | 1053381.0 |
32 | 1998 | 7371541 | 3838288 | 211495 | 108361 | 626004 | 371163 | 940935 | 49925 | 117913.0 | 1107457.0 |
33 | 1999 | 7758511 | 4072326 | 221683 | 112186 | 692935 | 378967 | 975746 | 52105 | 120937.0 | 1131626.0 |
34 | 2000 | 8320449 | 4422180 | 235885 | 118454 | 794459 | 394283 | 1011284 | 55677 | 129157.0 | 1159070.0 |
35 | 2001 | 8521956 | 4534803 | 239381 | 119306 | 833175 | 396493 | 1031221 | 57490 | 131019.0 | 1179068.0 |
36 | 2002 | 8655170 | 4600140 | 241700 | 120097 | 875381 | 399025 | 1046907 | 58790 | 133003.0 | 1180127.0 |
37 | 2003 | 8903843 | 4700343 | 245394 | 123500 | 973457 | 405034 | 1073415 | 60511 | 137933.0 | 1184256.0 |
38 | 2004 | 10236357 | 5400440 | 318954 | 152712 | 1259867 | 647420 | 1218677 | 28004 | NaN | 1210283.0 |
39 | 2005 | 11145826 | 5772745 | 338539 | 163390 | 1475057 | 676929 | 1441066 | 30333 | NaN | 1247767.0 |
40 | 2006 | 12227393 | 6140992 | 357523 | 175949 | 1695624 | 709535 | 1822831 | 34260 | NaN | 1290679.0 |
41 | 2007 | 13022945 | 6472156 | 372601 | 189128 | 1890459 | 729202 | 2003492 | 38573 | NaN | 1327334.0 |
42 | 2008 | 13765395 | 6796629 | 383548 | 199934 | 2066007 | 744217 | 2181383 | 35100 | NaN | 1358577.0 |
43 | 2009 | 14316700 | 7093964 | 384053 | 201033 | 2204951 | 727302 | 2303261 | 34104 | NaN | 1368032.0 |
44 | 2010 | 15095603 | 7544871 | 386973 | 208510 | 2399038 | 726359 | 2389488 | 35492 | NaN | 1404872.0 |
45 | 2011 | 16089528 | 8113111 | 389435 | 219906 | 2611104 | 728458 | 2527190 | 34116 | NaN | 1466208.0 |
46 | 2012 | 17033413 | 8648875 | 396119 | 235949 | 2794606 | 751650 | 2657722 | 33071 | NaN | 1515421.0 |
47 | 2013 | 17939447 | 9283923 | 421848 | 219885 | 2933050 | 755950 | 2722826 | 36148 | NaN | 1565817.0 |
48 | 2014 | 18828721 | 9857915 | 427264 | 211200 | 3062479 | 773728 | 2828466 | 40731 | NaN | 1626938.0 |
49 | 2015 | 19882069 | 10509258 | 446822 | 216566 | 3235304 | 802615 | 2938821 | 45138 | NaN | 1687545.0 |
We show each time series $x_t$ as a plot. Here, $t = 1966, \dots, 2015$.
It is also useful to visualize to get a feel of the proportional chance over time $$r_t = \log \frac{x_t}{x_{t-1}}$$
When displaying several plots, it is important to match the scales inorder not to miss relevant structure
%matplotlib inline
import matplotlib as mpl
import matplotlib.pylab as plt
import numpy as np
df_arac = pd.read_csv(u'data/arac.csv',sep=';')
df = df_arac
cols = {'All':'r', 'Bus':'g', 'Car':'b', 'Minibus': 'm', 'Motorcycle':'y', 'SmallTruck':'r', 'Truck':'k', 'SpecialVehicles':'w', 'Machinery':'g'}
N = len(cols.keys())
i = 0
plt.figure(figsize=(12,38))
for v in sorted(cols.keys()):
i += 1
plt.subplot(N,2,i)
plt.plot(df['Year'],df[v],'o-',markerfacecolor=cols[v])
#if i != 1:
# plt.ylim([0,1E7])
plt.title(v)
# Log ratio
lr = np.log10(df[v]) - np.log10(df[v].shift(1))
i += 1
plt.subplot(N,2,i)
plt.plot(df['Year'],lr,'o-',markerfacecolor=cols[v])
plt.ylim([-0.1,0.2])
plt.title(v)
plt.show()
df_kaza
Year | NumberOfAccidents | MaterialLossOnly | PersonalInjury | PersonsKilled | PersonsInjured | |
---|---|---|---|---|---|---|
0 | 2002 | 439777 | 374029 | 65748 | 4093 | 116412 |
1 | 2003 | 455637 | 388606 | 67031 | 3946 | 118214 |
2 | 2004 | 537352 | 460344 | 77008 | 4427 | 136437 |
3 | 2005 | 620789 | 533516 | 87273 | 4505 | 154086 |
4 | 2006 | 728755 | 632627 | 96128 | 4633 | 169080 |
5 | 2007 | 825561 | 718567 | 106994 | 5007 | 189057 |
6 | 2008 | 950120 | 845908 | 104212 | 4236 | 184468 |
7 | 2009 | 1053346 | 942225 | 111121 | 4324 | 201380 |
8 | 2010 | 1106201 | 989397 | 116804 | 4045 | 211496 |
9 | 2011 | 1228928 | 1097083 | 131845 | 3835 | 238074 |
10 | 2012 | 1296634 | 1143082 | 153552 | 3750 | 268079 |
11 | 2013 | 1207354 | 1046048 | 161306 | 3685 | 274829 |
12 | 2014 | 1199010 | 1030498 | 168512 | 3524 | 285059 |
cols = {'NumberOfAccidents': 'b', 'MaterialLossOnly':'g', 'PersonalInjury':'k', 'PersonsKilled':'r', 'PersonsInjured':'m'}
x = df_kaza['Year']
#y = df_kaza['PersonsInjured']
#y = df_kaza['PersonsKilled']
N = len(cols.keys())
plt.figure(figsize=(12,14))
i = 0
for v in cols.keys():
i += 1
plt.subplot(N,1,i)
y = df_kaza[v]
plt.plot(x ,y ,cols[v]+'o-', markerfacecolor=cols[v])
plt.title(v)
plt.show()
This data set contains the total energy consumption in GigaWatts and its distribution over different sectors
df_enerji
Year | Total | Household | Commercial | Government | Industrial | Illumination | Other | |
---|---|---|---|---|---|---|---|---|
0 | 1970 | 7307.80000 | 15.899450 | 4.774351 | 4.129834 | 64.173896 | 2.641014 | 8.381455 |
1 | 1971 | 8289.30000 | 16.263134 | 4.567334 | 4.130626 | 64.478303 | 2.412749 | 8.147853 |
2 | 1972 | 9527.30000 | 16.087454 | 4.676036 | 3.815352 | 64.999528 | 2.183200 | 8.238431 |
3 | 1973 | 10530.10000 | 14.793782 | 4.299104 | 3.517535 | 67.287110 | 2.059809 | 8.042659 |
4 | 1974 | 11358.70000 | 15.157544 | 5.075405 | 3.828783 | 66.720663 | 1.957971 | 7.259634 |
5 | 1975 | 13491.70000 | 17.485565 | 4.887449 | 3.675593 | 64.819852 | 1.857438 | 7.274102 |
6 | 1976 | 16078.90000 | 17.549708 | 4.653303 | 3.455460 | 65.334693 | 1.591527 | 7.415308 |
7 | 1977 | 17968.80000 | 17.701794 | 4.988090 | 3.086461 | 66.688371 | 1.418013 | 6.117270 |
8 | 1978 | 18933.80000 | 18.901647 | 4.878577 | 3.172105 | 65.523561 | 1.460879 | 6.063231 |
9 | 1979 | 19633.10000 | 20.140477 | 5.725535 | 3.168119 | 63.858993 | 1.479644 | 5.627232 |
10 | 1980 | 20398.20000 | 21.507290 | 5.621574 | 2.986538 | 63.769842 | 1.419243 | 4.695512 |
11 | 1981 | 22030.00000 | 20.944167 | 5.705402 | 2.896505 | 64.485247 | 1.354517 | 4.614163 |
12 | 1982 | 23586.80000 | 20.886258 | 5.832923 | 2.527261 | 64.433073 | 1.310055 | 5.010430 |
13 | 1983 | 24465.10000 | 21.029548 | 5.720394 | 2.808082 | 63.664976 | 1.211113 | 5.565888 |
14 | 1984 | 27635.20000 | 19.802281 | 5.680798 | 2.774360 | 65.232023 | 1.197024 | 5.313513 |
15 | 1985 | 29708.60000 | 18.965215 | 5.454649 | 3.000815 | 66.000081 | 1.370983 | 5.208256 |
16 | 1986 | 32209.70000 | 18.951123 | 5.215820 | 3.217354 | 64.843510 | 2.067700 | 5.704493 |
17 | 1987 | 36697.30000 | 18.920193 | 4.762748 | 3.184703 | 65.053560 | 2.142664 | 5.936132 |
18 | 1988 | 39721.50000 | 20.025175 | 4.988231 | 3.195750 | 63.586471 | 2.052793 | 6.151580 |
19 | 1989 | 43120.00000 | 19.565399 | 5.334416 | 2.964518 | 64.013683 | 2.123609 | 5.998377 |
20 | 1990 | 46820.00000 | 19.569201 | 5.463050 | 3.125374 | 62.391713 | 2.630073 | 6.820589 |
21 | 1991 | 49282.90000 | 21.998908 | 6.197078 | 3.782854 | 57.853332 | 2.877063 | 7.290764 |
22 | 1992 | 53984.70000 | 21.268433 | 6.057827 | 3.720684 | 58.415810 | 3.444865 | 7.092380 |
23 | 1993 | 59237.00000 | 21.201276 | 6.086399 | 3.825987 | 57.813698 | 3.832571 | 7.240070 |
24 | 1994 | 61400.91700 | 21.904705 | 6.033646 | 5.399170 | 55.598624 | 4.075099 | 6.988756 |
25 | 1995 | 67393.85100 | 21.504259 | 6.224939 | 4.468614 | 56.395955 | 4.608581 | 6.797653 |
26 | 1996 | 74156.63800 | 22.107522 | 7.741592 | 4.048820 | 54.800583 | 4.159951 | 7.141532 |
27 | 1997 | 81884.91240 | 22.610262 | 8.368328 | 4.644799 | 53.112755 | 4.042548 | 7.221309 |
28 | 1998 | 87704.61670 | 22.842661 | 8.817956 | 4.870464 | 52.607228 | 4.208718 | 6.652974 |
29 | 1999 | 91201.87700 | 24.762927 | 8.999833 | 4.139236 | 50.964237 | 4.589034 | 6.544733 |
30 | 2000 | 98295.70716 | 24.301732 | 9.501314 | 4.179161 | 49.688534 | 4.636738 | 7.692520 |
31 | 2001 | 97070.04100 | 24.268381 | 10.206843 | 4.501882 | 48.407351 | 5.035778 | 7.579765 |
32 | 2002 | 102947.86150 | 22.884812 | 10.556112 | 4.449368 | 49.043653 | 4.957773 | 8.108282 |
33 | 2003 | 111766.06690 | 22.542526 | 11.516827 | 4.074626 | 49.298672 | 4.451090 | 8.116259 |
34 | 2004 | 121141.85240 | 22.798859 | 12.923817 | 3.740024 | 49.170396 | 3.658957 | 7.707947 |
35 | 2005 | 130262.75920 | 23.748135 | 14.235676 | 3.579472 | 47.821971 | 3.180485 | 7.434261 |
36 | 2006 | 143070.49860 | 24.090251 | 14.158324 | 4.225048 | 47.547686 | 2.761137 | 7.217555 |
37 | 2007 | 155135.25970 | 23.512272 | 14.916764 | 4.469121 | 47.567871 | 2.612328 | 6.921644 |
38 | 2008 | 161947.52830 | 24.442236 | 14.759924 | 4.534958 | 46.218837 | 2.451552 | 7.592493 |
39 | 2009 | 156894.06990 | 24.951552 | 15.946336 | 4.455007 | 44.915704 | 2.450592 | 7.280809 |
40 | 2010 | 172050.62810 | 24.068907 | 16.118511 | 4.127845 | 46.108899 | 2.190216 | 7.385622 |
41 | 2011 | 186099.55100 | 23.788930 | 16.402637 | 3.907820 | 47.275875 | 2.141934 | 6.482804 |
42 | 2012 | 194923.34950 | 23.278451 | 16.327404 | 4.511259 | 47.352835 | 1.992931 | 6.537121 |
43 | 2013 | 198045.18050 | 22.707689 | 18.911461 | 4.130228 | 47.086118 | 1.936747 | 5.227757 |
print(df_enerji.columns)
x = df_enerji.Year
y = df_enerji.Total/1000
plt.plot(x, y, 'o-')
plt.title('Total Energy consumption in Turkey (in Tera-Watt-hours TWh)')
plt.xlabel('Year')
plt.ylabel('Energy Consumption/TWh')
plt.show()
Index(['Year', 'Total', 'Household', 'Commercial', 'Government', 'Industrial', 'Illumination', 'Other'], dtype='object')
Famous data set from (Fisher, 1926)
Class 1 | Class 2 | Class 3 | ? | |
---|---|---|---|---|
Iris Setosa | Iris Versicolour | Iris Virginica | ? | ? |
![]() |
![]() |
![]() |
![]() |
![]() |
Features
df_iris
sl | sw | pl | pw | c | |
---|---|---|---|---|---|
0 | 5.1 | 3.5 | 1.4 | 0.2 | 1 |
1 | 4.9 | 3.0 | 1.4 | 0.2 | 1 |
2 | 4.7 | 3.2 | 1.3 | 0.2 | 1 |
3 | 4.6 | 3.1 | 1.5 | 0.2 | 1 |
4 | 5.0 | 3.6 | 1.4 | 0.2 | 1 |
5 | 5.4 | 3.9 | 1.7 | 0.4 | 1 |
6 | 4.6 | 3.4 | 1.4 | 0.3 | 1 |
7 | 5.0 | 3.4 | 1.5 | 0.2 | 1 |
8 | 4.4 | 2.9 | 1.4 | 0.2 | 1 |
9 | 4.9 | 3.1 | 1.5 | 0.1 | 1 |
10 | 5.4 | 3.7 | 1.5 | 0.2 | 1 |
11 | 4.8 | 3.4 | 1.6 | 0.2 | 1 |
12 | 4.8 | 3.0 | 1.4 | 0.1 | 1 |
13 | 4.3 | 3.0 | 1.1 | 0.1 | 1 |
14 | 5.8 | 4.0 | 1.2 | 0.2 | 1 |
15 | 5.7 | 4.4 | 1.5 | 0.4 | 1 |
16 | 5.4 | 3.9 | 1.3 | 0.4 | 1 |
17 | 5.1 | 3.5 | 1.4 | 0.3 | 1 |
18 | 5.7 | 3.8 | 1.7 | 0.3 | 1 |
19 | 5.1 | 3.8 | 1.5 | 0.3 | 1 |
20 | 5.4 | 3.4 | 1.7 | 0.2 | 1 |
21 | 5.1 | 3.7 | 1.5 | 0.4 | 1 |
22 | 4.6 | 3.6 | 1.0 | 0.2 | 1 |
23 | 5.1 | 3.3 | 1.7 | 0.5 | 1 |
24 | 4.8 | 3.4 | 1.9 | 0.2 | 1 |
25 | 5.0 | 3.0 | 1.6 | 0.2 | 1 |
26 | 5.0 | 3.4 | 1.6 | 0.4 | 1 |
27 | 5.2 | 3.5 | 1.5 | 0.2 | 1 |
28 | 5.2 | 3.4 | 1.4 | 0.2 | 1 |
29 | 4.7 | 3.2 | 1.6 | 0.2 | 1 |
... | ... | ... | ... | ... | ... |
120 | 6.9 | 3.2 | 5.7 | 2.3 | 3 |
121 | 5.6 | 2.8 | 4.9 | 2.0 | 3 |
122 | 7.7 | 2.8 | 6.7 | 2.0 | 3 |
123 | 6.3 | 2.7 | 4.9 | 1.8 | 3 |
124 | 6.7 | 3.3 | 5.7 | 2.1 | 3 |
125 | 7.2 | 3.2 | 6.0 | 1.8 | 3 |
126 | 6.2 | 2.8 | 4.8 | 1.8 | 3 |
127 | 6.1 | 3.0 | 4.9 | 1.8 | 3 |
128 | 6.4 | 2.8 | 5.6 | 2.1 | 3 |
129 | 7.2 | 3.0 | 5.8 | 1.6 | 3 |
130 | 7.4 | 2.8 | 6.1 | 1.9 | 3 |
131 | 7.9 | 3.8 | 6.4 | 2.0 | 3 |
132 | 6.4 | 2.8 | 5.6 | 2.2 | 3 |
133 | 6.3 | 2.8 | 5.1 | 1.5 | 3 |
134 | 6.1 | 2.6 | 5.6 | 1.4 | 3 |
135 | 7.7 | 3.0 | 6.1 | 2.3 | 3 |
136 | 6.3 | 3.4 | 5.6 | 2.4 | 3 |
137 | 6.4 | 3.1 | 5.5 | 1.8 | 3 |
138 | 6.0 | 3.0 | 4.8 | 1.8 | 3 |
139 | 6.9 | 3.1 | 5.4 | 2.1 | 3 |
140 | 6.7 | 3.1 | 5.6 | 2.4 | 3 |
141 | 6.9 | 3.1 | 5.1 | 2.3 | 3 |
142 | 5.8 | 2.7 | 5.1 | 1.9 | 3 |
143 | 6.8 | 3.2 | 5.9 | 2.3 | 3 |
144 | 6.7 | 3.3 | 5.7 | 2.5 | 3 |
145 | 6.7 | 3.0 | 5.2 | 2.3 | 3 |
146 | 6.3 | 2.5 | 5.0 | 1.9 | 3 |
147 | 6.5 | 3.0 | 5.2 | 2.0 | 3 |
148 | 6.2 | 3.4 | 5.4 | 2.3 | 3 |
149 | 5.9 | 3.0 | 5.1 | 1.8 | 3 |
150 rows × 5 columns
from itertools import product
keys = ['sl','sw','pl','pw']
i = 0
N = len(keys)
plt.figure(figsize=(15,15))
for v,w in product(keys, keys):
i+=1
plt.subplot(N,N,i)
if v == w:
plt.hist(df_iris[v],bins=20)
plt.xlabel(v)
else:
plt.scatter(df_iris[v],df_iris[w],c=df_iris['c'],cmap='prism')
plt.xlabel(v)
plt.ylabel(w)
plt.show()
http://scikit-learn.org/stable/datasets/
Scikit-learn has a set of utilities for loading standard datasets used in Machine Learning.
from matplotlib.pylab import plt
from sklearn.datasets import fetch_mldata
mnist = fetch_mldata('MNIST original')
N = 8
M = 8
plt.figure(figsize=(14,14))
i = 0
for v,w in product(range(N), range(M)):
i+=1
plt.subplot(M,N,i)
plt.imshow(mnist.data[10000+5000*v+20*w].reshape(28,28),cmap='gray_r',interpolation='nearest')
plt.show()
import pandas as pd
%matplotlib inline
import matplotlib as mpl
import matplotlib.pylab as plt
import numpy as np
rnames = ['user_id', 'movie_id', 'rating', 'timestamp']
ratings = pd.read_table('data/ml-100k/u.data', sep='\t', header=None, names=rnames)
ratings
user_id | movie_id | rating | timestamp | |
---|---|---|---|---|
0 | 196 | 242 | 3 | 881250949 |
1 | 186 | 302 | 3 | 891717742 |
2 | 22 | 377 | 1 | 878887116 |
3 | 244 | 51 | 2 | 880606923 |
4 | 166 | 346 | 1 | 886397596 |
5 | 298 | 474 | 4 | 884182806 |
6 | 115 | 265 | 2 | 881171488 |
7 | 253 | 465 | 5 | 891628467 |
8 | 305 | 451 | 3 | 886324817 |
9 | 6 | 86 | 3 | 883603013 |
10 | 62 | 257 | 2 | 879372434 |
11 | 286 | 1014 | 5 | 879781125 |
12 | 200 | 222 | 5 | 876042340 |
13 | 210 | 40 | 3 | 891035994 |
14 | 224 | 29 | 3 | 888104457 |
15 | 303 | 785 | 3 | 879485318 |
16 | 122 | 387 | 5 | 879270459 |
17 | 194 | 274 | 2 | 879539794 |
18 | 291 | 1042 | 4 | 874834944 |
19 | 234 | 1184 | 2 | 892079237 |
20 | 119 | 392 | 4 | 886176814 |
21 | 167 | 486 | 4 | 892738452 |
22 | 299 | 144 | 4 | 877881320 |
23 | 291 | 118 | 2 | 874833878 |
24 | 308 | 1 | 4 | 887736532 |
25 | 95 | 546 | 2 | 879196566 |
26 | 38 | 95 | 5 | 892430094 |
27 | 102 | 768 | 2 | 883748450 |
28 | 63 | 277 | 4 | 875747401 |
29 | 160 | 234 | 5 | 876861185 |
... | ... | ... | ... | ... |
99970 | 449 | 120 | 1 | 879959573 |
99971 | 661 | 762 | 2 | 876037121 |
99972 | 721 | 874 | 3 | 877137447 |
99973 | 821 | 151 | 4 | 874792889 |
99974 | 764 | 596 | 3 | 876243046 |
99975 | 537 | 443 | 3 | 886031752 |
99976 | 618 | 628 | 2 | 891308019 |
99977 | 487 | 291 | 3 | 883445079 |
99978 | 113 | 975 | 5 | 875936424 |
99979 | 943 | 391 | 2 | 888640291 |
99980 | 864 | 685 | 4 | 888891900 |
99981 | 750 | 323 | 3 | 879445877 |
99982 | 279 | 64 | 1 | 875308510 |
99983 | 646 | 750 | 3 | 888528902 |
99984 | 654 | 370 | 2 | 887863914 |
99985 | 617 | 582 | 4 | 883789294 |
99986 | 913 | 690 | 3 | 880824288 |
99987 | 660 | 229 | 2 | 891406212 |
99988 | 421 | 498 | 4 | 892241344 |
99989 | 495 | 1091 | 4 | 888637503 |
99990 | 806 | 421 | 4 | 882388897 |
99991 | 676 | 538 | 4 | 892685437 |
99992 | 721 | 262 | 3 | 877137285 |
99993 | 913 | 209 | 2 | 881367150 |
99994 | 378 | 78 | 3 | 880056976 |
99995 | 880 | 476 | 3 | 880175444 |
99996 | 716 | 204 | 5 | 879795543 |
99997 | 276 | 1090 | 1 | 874795795 |
99998 | 13 | 225 | 2 | 882399156 |
99999 | 12 | 203 | 3 | 879959583 |
100000 rows × 4 columns
NRows = max(ratings['user_id'])
NCols = max(ratings['movie_id'])
X = np.nan*np.ones((NRows, NCols))
for k in range(len(ratings)):
i = ratings['user_id'].loc[k]-1
j = ratings['movie_id'].loc[k]-1
r = ratings['rating'].loc[k]
X[i,j] = r
print(NRows, NCols)
943 1682
plt.imshow(X[0:10,0:20], interpolation='nearest')
plt.colorbar()
plt.show()
import pandas as pd
unames = ['user_id', 'gender', 'age', 'occupation', 'zip']
users = pd.read_table('data/ml-1m/users.dat', sep='::', header=None, names=unames, engine='python')
rnames = ['user_id', 'movie_id', 'rating', 'timestamp']
ratings = pd.read_table('data/ml-1m/ratings.dat', sep='::', header=None, names=rnames, engine='python')
mnames = ['movie_id', 'title', 'genres']
movies = pd.read_table('data/ml-1m/movies.dat', sep='::', header=None, names=mnames, engine='python')
#users
ratings
#movies
user_id | movie_id | rating | timestamp | |
---|---|---|---|---|
0 | 1 | 1193 | 5 | 978300760 |
1 | 1 | 661 | 3 | 978302109 |
2 | 1 | 914 | 3 | 978301968 |
3 | 1 | 3408 | 4 | 978300275 |
4 | 1 | 2355 | 5 | 978824291 |
5 | 1 | 1197 | 3 | 978302268 |
6 | 1 | 1287 | 5 | 978302039 |
7 | 1 | 2804 | 5 | 978300719 |
8 | 1 | 594 | 4 | 978302268 |
9 | 1 | 919 | 4 | 978301368 |
10 | 1 | 595 | 5 | 978824268 |
11 | 1 | 938 | 4 | 978301752 |
12 | 1 | 2398 | 4 | 978302281 |
13 | 1 | 2918 | 4 | 978302124 |
14 | 1 | 1035 | 5 | 978301753 |
15 | 1 | 2791 | 4 | 978302188 |
16 | 1 | 2687 | 3 | 978824268 |
17 | 1 | 2018 | 4 | 978301777 |
18 | 1 | 3105 | 5 | 978301713 |
19 | 1 | 2797 | 4 | 978302039 |
20 | 1 | 2321 | 3 | 978302205 |
21 | 1 | 720 | 3 | 978300760 |
22 | 1 | 1270 | 5 | 978300055 |
23 | 1 | 527 | 5 | 978824195 |
24 | 1 | 2340 | 3 | 978300103 |
25 | 1 | 48 | 5 | 978824351 |
26 | 1 | 1097 | 4 | 978301953 |
27 | 1 | 1721 | 4 | 978300055 |
28 | 1 | 1545 | 4 | 978824139 |
29 | 1 | 745 | 3 | 978824268 |
... | ... | ... | ... | ... |
1000179 | 6040 | 2762 | 4 | 956704584 |
1000180 | 6040 | 1036 | 3 | 956715455 |
1000181 | 6040 | 508 | 4 | 956704972 |
1000182 | 6040 | 1041 | 4 | 957717678 |
1000183 | 6040 | 3735 | 4 | 960971654 |
1000184 | 6040 | 2791 | 4 | 956715569 |
1000185 | 6040 | 2794 | 1 | 956716438 |
1000186 | 6040 | 527 | 5 | 956704219 |
1000187 | 6040 | 2003 | 1 | 956716294 |
1000188 | 6040 | 535 | 4 | 964828734 |
1000189 | 6040 | 2010 | 5 | 957716795 |
1000190 | 6040 | 2011 | 4 | 956716113 |
1000191 | 6040 | 3751 | 4 | 964828782 |
1000192 | 6040 | 2019 | 5 | 956703977 |
1000193 | 6040 | 541 | 4 | 956715288 |
1000194 | 6040 | 1077 | 5 | 964828799 |
1000195 | 6040 | 1079 | 2 | 956715648 |
1000196 | 6040 | 549 | 4 | 956704746 |
1000197 | 6040 | 2020 | 3 | 956715288 |
1000198 | 6040 | 2021 | 3 | 956716374 |
1000199 | 6040 | 2022 | 5 | 956716207 |
1000200 | 6040 | 2028 | 5 | 956704519 |
1000201 | 6040 | 1080 | 4 | 957717322 |
1000202 | 6040 | 1089 | 4 | 956704996 |
1000203 | 6040 | 1090 | 3 | 956715518 |
1000204 | 6040 | 1091 | 1 | 956716541 |
1000205 | 6040 | 1094 | 5 | 956704887 |
1000206 | 6040 | 562 | 5 | 956704746 |
1000207 | 6040 | 1096 | 4 | 956715648 |
1000208 | 6040 | 1097 | 4 | 956715569 |
1000209 rows × 4 columns
f = open('movie_lens1M.txt', 'w');
nnz = len(ratings)
nrows = max(ratings['user_id'])
ncols = max(ratings['movie_id'])
print(2)
print(nrows, ncols)
print(nnz)
s = '2\n'+str(nrows)+" "+str(ncols)+"\n"+str(nnz)+"\n"
f.write(s)
for i in range(nnz):
a = ratings.iloc[i]
s = str(a['user_id']-1) + " " + str(a['movie_id']-1) + " " + str(a['rating']) + '\n'
f.write(s)
f.close()
2 6040 3952 1000209
Read the main figures from Hamlet
import re
MAXLINES = 10000
f = open('/Users/cemgil/src/ipynb/notes/data/books-eng/hamlet.txt')
i = 0
words = []
for l in f:
i+=1
# print(l,end='')
if i>MAXLINES: break
for w in re.split('(\W+)',l):
if w.isalpha():
words.append(w)
f.close()
Counts = dict()
for w in words:
if w in Counts:
Counts[w] += 1
else:
Counts[w] = 1
for w in sorted(Counts.keys()):
if w.isupper() and len(w)>1:
print(w, Counts[w])
for w in words:
if w.isupper() and len(w)>1:
print(w)
BERNARDO 26 CLAUDIUS 105 CORNELIUS 6 FORTINBRAS 5 FRANCISCO 10 GERTRUDE 76 GHOST 1 GUILDENSTERN 54 HAMLET 355 HORATIO 118 KING 104 LAERTES 54 LORD 86 LUCIANUS 3 MARCELLUS 41 MARGARET 1 OPHELIA 68 OSRIC 19 POLONIUS 114 PRINCE 2 QUEEN 76 REYNALDO 16 ROSENCRANTZ 70 VOLTIMAND 7 CLAUDIUS HAMLET POLONIUS HORATIO LAERTES LUCIANUS VOLTIMAND CORNELIUS ROSENCRANTZ GUILDENSTERN OSRIC MARCELLUS BERNARDO FRANCISCO REYNALDO FORTINBRAS GERTRUDE OPHELIA FRANCISCO BERNARDO BERNARDO FRANCISCO BERNARDO FRANCISCO BERNARDO FRANCISCO BERNARDO FRANCISCO BERNARDO FRANCISCO BERNARDO FRANCISCO HORATIO MARCELLUS HORATIO MARCELLUS FRANCISCO MARCELLUS FRANCISCO MARCELLUS BERNARDO HORATIO BERNARDO MARCELLUS BERNARDO MARCELLUS HORATIO BERNARDO HORATIO BERNARDO MARCELLUS BERNARDO MARCELLUS BERNARDO HORATIO BERNARDO MARCELLUS HORATIO MARCELLUS BERNARDO HORATIO MARCELLUS BERNARDO HORATIO MARCELLUS HORATIO MARCELLUS HORATIO MARCELLUS HORATIO BERNARDO HORATIO MARCELLUS HORATIO BERNARDO HORATIO MARCELLUS BERNARDO HORATIO MARCELLUS HORATIO MARCELLUS KING CLAUDIUS QUEEN GERTRUDE HAMLET POLONIUS LAERTES VOLTIMAND CORNELIUS KING CLAUDIUS CORNELIUS VOLTIMAND KING CLAUDIUS VOLTIMAND CORNELIUS LAERTES KING CLAUDIUS LORD POLONIUS KING CLAUDIUS HAMLET KING CLAUDIUS HAMLET QUEEN GERTRUDE HAMLET QUEEN GERTRUDE HAMLET KING CLAUDIUS QUEEN GERTRUDE HAMLET KING CLAUDIUS HAMLET HAMLET HORATIO MARCELLUS BERNARDO HORATIO HAMLET HORATIO HAMLET MARCELLUS HAMLET HORATIO HAMLET HORATIO HAMLET HORATIO HAMLET HORATIO HAMLET HORATIO HAMLET HORATIO HAMLET HORATIO HAMLET HORATIO HAMLET HORATIO HAMLET MARCELLUS HAMLET HORATIO HAMLET HORATIO HAMLET MARCELLUS BERNARDO HAMLET MARCELLUS BERNARDO HAMLET MARCELLUS BERNARDO HAMLET HORATIO HAMLET HORATIO HAMLET HORATIO HAMLET HORATIO HAMLET HORATIO HAMLET HORATIO MARCELLUS BERNARDO HORATIO HAMLET HORATIO HAMLET HORATIO HAMLET HAMLET HAMLET LAERTES OPHELIA LAERTES OPHELIA LAERTES OPHELIA LAERTES OPHELIA LAERTES POLONIUS LORD POLONIUS LAERTES LORD POLONIUS LAERTES OPHELIA LAERTES LORD POLONIUS OPHELIA LORD POLONIUS OPHELIA LORD POLONIUS OPHELIA LORD POLONIUS OPHELIA LORD POLONIUS OPHELIA LORD POLONIUS OPHELIA HAMLET HORATIO MARCELLUS HAMLET HORATIO HAMLET HORATIO HAMLET HORATIO HAMLET HORATIO HAMLET HORATIO HAMLET HAMLET HORATIO MARCELLUS HORATIO HAMLET HORATIO HAMLET HORATIO HAMLET MARCELLUS HAMLET HORATIO HAMLET HAMLET HORATIO MARCELLUS HORATIO MARCELLUS HORATIO MARCELLUS GHOST HAMLET HAMLET HAMLET HAMLET HAMLET HAMLET HAMLET HAMLET HAMLET HAMLET HAMLET MARCELLUS HORATIO MARCELLUS HORATIO HAMLET HORATIO HAMLET HORATIO MARCELLUS MARCELLUS HORATIO HAMLET HORATIO HAMLET HORATIO MARCELLUS HAMLET HORATIO MARCELLUS HAMLET HORATIO HAMLET HORATIO HAMLET HORATIO HAMLET HORATIO HAMLET HORATIO MARCELLUS HAMLET HORATIO MARCELLUS HAMLET MARCELLUS HAMLET HAMLET HORATIO HAMLET HAMLET HAMLET HORATIO HAMLET HAMLET POLONIUS POLONIUS REYNALDO LORD POLONIUS REYNALDO LORD POLONIUS REYNALDO LORD POLONIUS REYNALDO LORD POLONIUS REYNALDO LORD POLONIUS REYNALDO LORD POLONIUS REYNALDO LORD POLONIUS REYNALDO LORD POLONIUS REYNALDO LORD POLONIUS REYNALDO LORD POLONIUS REYNALDO LORD POLONIUS REYNALDO LORD POLONIUS REYNALDO LORD POLONIUS REYNALDO LORD POLONIUS REYNALDO OPHELIA OPHELIA LORD POLONIUS OPHELIA LORD POLONIUS OPHELIA LORD POLONIUS OPHELIA LORD POLONIUS OPHELIA LORD POLONIUS KING CLAUDIUS QUEEN GERTRUDE ROSENCRANTZ GUILDENSTERN KING CLAUDIUS QUEEN GERTRUDE ROSENCRANTZ GUILDENSTERN KING CLAUDIUS QUEEN GERTRUDE GUILDENSTERN QUEEN GERTRUDE ROSENCRANTZ GUILDENSTERN POLONIUS LORD POLONIUS KING CLAUDIUS LORD POLONIUS KING CLAUDIUS LORD POLONIUS KING CLAUDIUS POLONIUS QUEEN GERTRUDE KING CLAUDIUS POLONIUS VOLTIMAND CORNELIUS VOLTIMAND KING CLAUDIUS VOLTIMAND CORNELIUS LORD POLONIUS QUEEN GERTRUDE LORD POLONIUS QUEEN GERTRUDE LORD POLONIUS HAMLET KING CLAUDIUS LORD POLONIUS KING CLAUDIUS LORD POLONIUS KING CLAUDIUS QUEEN GERTRUDE LORD POLONIUS KING CLAUDIUS LORD POLONIUS KING CLAUDIUS LORD POLONIUS QUEEN GERTRUDE LORD POLONIUS KING CLAUDIUS QUEEN GERTRUDE LORD POLONIUS KING CLAUDIUS QUEEN GERTRUDE HAMLET HAMLET LORD POLONIUS HAMLET LORD POLONIUS HAMLET LORD POLONIUS HAMLET LORD POLONIUS HAMLET LORD POLONIUS HAMLET LORD POLONIUS HAMLET LORD POLONIUS HAMLET LORD POLONIUS HAMLET LORD POLONIUS HAMLET LORD POLONIUS HAMLET LORD POLONIUS HAMLET ROSENCRANTZ GUILDENSTERN LORD POLONIUS ROSENCRANTZ POLONIUS POLONIUS GUILDENSTERN ROSENCRANTZ HAMLET ROSENCRANTZ GUILDENSTERN HAMLET ROSENCRANTZ HAMLET GUILDENSTERN HAMLET ROSENCRANTZ HAMLET GUILDENSTERN HAMLET ROSENCRANTZ HAMLET ROSENCRANTZ HAMLET ROSENCRANTZ HAMLET GUILDENSTERN HAMLET ROSENCRANTZ HAMLET ROSENCRANTZ GUILDENSTERN HAMLET ROSENCRANTZ HAMLET GUILDENSTERN HAMLET ROSENCRANTZ HAMLET ROSENCRANTZ GUILDENSTERN HAMLET GUILDENSTERN HAMLET ROSENCRANTZ HAMLET ROSENCRANTZ HAMLET ROSENCRANTZ HAMLET ROSENCRANTZ HAMLET ROSENCRANTZ HAMLET ROSENCRANTZ HAMLET ROSENCRANTZ HAMLET GUILDENSTERN HAMLET ROSENCRANTZ HAMLET GUILDENSTERN HAMLET GUILDENSTERN HAMLET POLONIUS LORD POLONIUS HAMLET ROSENCRANTZ HAMLET LORD POLONIUS HAMLET LORD POLONIUS HAMLET LORD POLONIUS HAMLET LORD POLONIUS HAMLET LORD POLONIUS HAMLET LORD POLONIUS HAMLET LORD POLONIUS HAMLET LORD POLONIUS HAMLET HAMLET LORD POLONIUS LORD POLONIUS HAMLET HAMLET LORD POLONIUS LORD POLONIUS HAMLET LORD POLONIUS HAMLET LORD POLONIUS HAMLET POLONIUS HAMLET HAMLET ROSENCRANTZ HAMLET ROSENCRANTZ GUILDENSTERN KING CLAUDIUS QUEEN GERTRUDE POLONIUS OPHELIA ROSENCRANTZ GUILDENSTERN KING CLAUDIUS ROSENCRANTZ GUILDENSTERN QUEEN GERTRUDE ROSENCRANTZ GUILDENSTERN ROSENCRANTZ QUEEN GERTRUDE ROSENCRANTZ LORD POLONIUS KING CLAUDIUS ROSENCRANTZ ROSENCRANTZ GUILDENSTERN KING CLAUDIUS QUEEN GERTRUDE OPHELIA QUEEN GERTRUDE LORD POLONIUS OPHELIA KING CLAUDIUS LORD POLONIUS KING CLAUDIUS POLONIUS HAMLET HAMLET OPHELIA HAMLET OPHELIA HAMLET OPHELIA HAMLET OPHELIA HAMLET OPHELIA HAMLET OPHELIA HAMLET OPHELIA HAMLET OPHELIA HAMLET OPHELIA HAMLET OPHELIA HAMLET OPHELIA HAMLET OPHELIA KING CLAUDIUS POLONIUS KING CLAUDIUS LORD POLONIUS KING CLAUDIUS HAMLET HAMLET HAMLET HAMLET POLONIUS ROSENCRANTZ GUILDENSTERN LORD POLONIUS HAMLET POLONIUS ROSENCRANTZ GUILDENSTERN ROSENCRANTZ GUILDENSTERN HAMLET HORATIO HORATIO HAMLET HORATIO HAMLET HORATIO HAMLET KING CLAUDIUS QUEEN GERTRUDE POLONIUS OPHELIA ROSENCRANTZ GUILDENSTERN KING CLAUDIUS HAMLET KING CLAUDIUS HAMLET POLONIUS LORD POLONIUS HAMLET LORD POLONIUS HAMLET ROSENCRANTZ QUEEN GERTRUDE HAMLET LORD POLONIUS KING CLAUDIUS HAMLET OPHELIA OPHELIA HAMLET OPHELIA HAMLET OPHELIA HAMLET OPHELIA HAMLET OPHELIA HAMLET OPHELIA HAMLET OPHELIA HAMLET OPHELIA HAMLET OPHELIA HAMLET OPHELIA HAMLET OPHELIA HAMLET OPHELIA HAMLET HAMLET HAMLET HAMLET QUEEN GERTRUDE HAMLET KING CLAUDIUS HAMLET KING CLAUDIUS HAMLET LUCIANUS OPHELIA HAMLET OPHELIA HAMLET OPHELIA HAMLET LUCIANUS HAMLET OPHELIA HAMLET QUEEN GERTRUDE LORD POLONIUS KING CLAUDIUS HAMLET HORATIO HAMLET HORATIO HAMLET HORATIO HAMLET HORATIO HAMLET HORATIO HAMLET ROSENCRANTZ GUILDENSTERN GUILDENSTERN HAMLET GUILDENSTERN HAMLET GUILDENSTERN HAMLET GUILDENSTERN HAMLET GUILDENSTERN HAMLET GUILDENSTERN HAMLET GUILDENSTERN HAMLET GUILDENSTERN HAMLET ROSENCRANTZ HAMLET ROSENCRANTZ HAMLET ROSENCRANTZ HAMLET ROSENCRANTZ HAMLET ROSENCRANTZ HAMLET GUILDENSTERN HAMLET GUILDENSTERN HAMLET GUILDENSTERN HAMLET GUILDENSTERN HAMLET GUILDENSTERN HAMLET POLONIUS LORD POLONIUS HAMLET LORD POLONIUS HAMLET LORD POLONIUS HAMLET LORD POLONIUS HAMLET LORD POLONIUS HAMLET POLONIUS HAMLET KING CLAUDIUS ROSENCRANTZ GUILDENSTERN KING CLAUDIUS GUILDENSTERN ROSENCRANTZ KING CLAUDIUS ROSENCRANTZ ROSENCRANTZ GUILDENSTERN POLONIUS LORD POLONIUS KING CLAUDIUS POLONIUS HAMLET HAMLET KING CLAUDIUS QUEEN MARGARET POLONIUS LORD POLONIUS HAMLET QUEEN GERTRUDE POLONIUS HAMLET HAMLET QUEEN GERTRUDE HAMLET QUEEN GERTRUDE HAMLET QUEEN GERTRUDE HAMLET QUEEN GERTRUDE HAMLET QUEEN GERTRUDE HAMLET QUEEN GERTRUDE LORD POLONIUS HAMLET LORD POLONIUS QUEEN GERTRUDE HAMLET QUEEN GERTRUDE HAMLET QUEEN GERTRUDE HAMLET POLONIUS QUEEN GERTRUDE HAMLET QUEEN GERTRUDE HAMLET QUEEN GERTRUDE HAMLET QUEEN GERTRUDE HAMLET QUEEN GERTRUDE HAMLET QUEEN GERTRUDE HAMLET HAMLET QUEEN GERTRUDE HAMLET QUEEN GERTRUDE HAMLET QUEEN GERTRUDE HAMLET QUEEN GERTRUDE HAMLET QUEEN GERTRUDE HAMLET QUEEN GERTRUDE HAMLET POLONIUS QUEEN GERTRUDE HAMLET QUEEN GERTRUDE HAMLET QUEEN GERTRUDE HAMLET HAMLET POLONIUS KING CLAUDIUS QUEEN GERTRUDE ROSENCRANTZ GUILDENSTERN KING CLAUDIUS QUEEN GERTRUDE ROSENCRANTZ GUILDENSTERN KING CLAUDIUS QUEEN GERTRUDE KING CLAUDIUS QUEEN GERTRUDE KING CLAUDIUS ROSENCRANTZ GUILDENSTERN ROSENCRANTZ GUILDENSTERN HAMLET HAMLET ROSENCRANTZ GUILDENSTERN HAMLET ROSENCRANTZ GUILDENSTERN ROSENCRANTZ HAMLET ROSENCRANTZ HAMLET ROSENCRANTZ HAMLET ROSENCRANTZ HAMLET ROSENCRANTZ HAMLET ROSENCRANTZ HAMLET GUILDENSTERN HAMLET KING CLAUDIUS KING CLAUDIUS ROSENCRANTZ ROSENCRANTZ KING CLAUDIUS ROSENCRANTZ KING CLAUDIUS ROSENCRANTZ HAMLET GUILDENSTERN KING CLAUDIUS HAMLET KING CLAUDIUS HAMLET KING CLAUDIUS HAMLET KING CLAUDIUS HAMLET KING CLAUDIUS HAMLET KING CLAUDIUS HAMLET KING CLAUDIUS HAMLET KING CLAUDIUS HAMLET KING CLAUDIUS HAMLET KING CLAUDIUS HAMLET KING CLAUDIUS ROSENCRANTZ GUILDENSTERN FORTINBRAS PRINCE FORTINBRAS PRINCE FORTINBRAS FORTINBRAS HAMLET ROSENCRANTZ GUILDENSTERN HAMLET HAMLET HAMLET HAMLET HAMLET HAMLET ROSENCRANTZ HAMLET HAMLET QUEEN GERTRUDE HORATIO QUEEN GERTRUDE QUEEN GERTRUDE HORATIO QUEEN GERTRUDE HORATIO HORATIO OPHELIA OPHELIA QUEEN GERTRUDE OPHELIA QUEEN GERTRUDE OPHELIA QUEEN GERTRUDE OPHELIA KING CLAUDIUS QUEEN GERTRUDE OPHELIA KING CLAUDIUS OPHELIA KING CLAUDIUS OPHELIA KING CLAUDIUS OPHELIA KING CLAUDIUS OPHELIA KING CLAUDIUS HORATIO QUEEN GERTRUDE KING CLAUDIUS QUEEN GERTRUDE KING CLAUDIUS LAERTES LAERTES LAERTES LAERTES QUEEN GERTRUDE LAERTES KING CLAUDIUS LAERTES KING CLAUDIUS QUEEN GERTRUDE KING CLAUDIUS LAERTES KING CLAUDIUS LAERTES KING CLAUDIUS LAERTES KING CLAUDIUS LAERTES KING CLAUDIUS LAERTES OPHELIA OPHELIA LAERTES OPHELIA LAERTES OPHELIA LAERTES OPHELIA LAERTES OPHELIA LAERTES KING CLAUDIUS LAERTES KING CLAUDIUS HORATIO HORATIO HORATIO HORATIO HORATIO HAMLET KING CLAUDIUS LAERTES KING CLAUDIUS LAERTES KING CLAUDIUS LAERTES KING CLAUDIUS KING CLAUDIUS KING CLAUDIUS HAMLET LAERTES KING CLAUDIUS LAERTES KING CLAUDIUS LAERTES KING CLAUDIUS LAERTES KING CLAUDIUS LAERTES KING CLAUDIUS LAERTES KING CLAUDIUS LAERTES KING CLAUDIUS LAERTES KING CLAUDIUS LAERTES KING CLAUDIUS LAERTES KING CLAUDIUS LAERTES KING CLAUDIUS LAERTES KING CLAUDIUS QUEEN GERTRUDE QUEEN GERTRUDE LAERTES QUEEN GERTRUDE LAERTES QUEEN GERTRUDE LAERTES KING CLAUDIUS HAMLET HORATIO HAMLET HORATIO HAMLET HAMLET HORATIO HAMLET HORATIO HAMLET HAMLET HORATIO HAMLET HORATIO HAMLET HAMLET HAMLET HAMLET HAMLET HAMLET HAMLET HAMLET HAMLET HAMLET HAMLET HAMLET HAMLET HAMLET HAMLET HAMLET HAMLET HAMLET HAMLET HORATIO HAMLET HORATIO HAMLET HORATIO HAMLET HORATIO HAMLET OPHELIA LAERTES KING CLAUDIUS QUEEN GERTRUDE HORATIO LAERTES HAMLET LAERTES LAERTES LAERTES HAMLET QUEEN GERTRUDE LAERTES HAMLET LAERTES HAMLET KING CLAUDIUS QUEEN GERTRUDE HORATIO HAMLET QUEEN GERTRUDE HAMLET KING CLAUDIUS QUEEN GERTRUDE HAMLET QUEEN GERTRUDE HAMLET KING CLAUDIUS HORATIO LAERTES HAMLET HORATIO HAMLET HORATIO HAMLET HORATIO HAMLET HORATIO HAMLET HORATIO HAMLET HORATIO HAMLET HORATIO HAMLET HORATIO HAMLET HORATIO HAMLET HORATIO HAMLET HORATIO OSRIC OSRIC HAMLET HORATIO HAMLET OSRIC HAMLET OSRIC HAMLET OSRIC HAMLET OSRIC HAMLET HAMLET OSRIC HAMLET OSRIC HAMLET OSRIC HORATIO HAMLET OSRIC HORATIO HAMLET OSRIC HAMLET OSRIC HAMLET OSRIC HAMLET OSRIC HAMLET OSRIC HAMLET HORATIO OSRIC HAMLET OSRIC HAMLET OSRIC HAMLET
Consists of 4050 nuclear magnetic resonance measurements taken from drill while drilling a well
%matplotlib inline
import matplotlib as mpl
import matplotlib.pylab as plt
plt.figure(figsize=(14,5))
df_welllog = pd.read_csv(u'data/well-log.csv',names=['y'])
plt.plot(df_welllog.y)
plt.show()
%matplotlib inline
import matplotlib as mpl
import matplotlib.pylab as plt
import pandas as pd
plt.figure(figsize=(14,5))
df_windturbine = pd.read_csv(u'data/wind_turbine.csv')
plt.plot(df_windturbine.wind_speed)
plt.show()
Since one of the dimensions corresponds to an angle, it is natural to visualize the data using a radar plot.
import numpy as np
import matplotlib.pyplot as plt
plt.figure(figsize=(8,8))
r = df_windturbine['wind_speed']
theta = 2 * np.pi * df_windturbine['wind_direction']/360.
area = 100 * np.abs(df_windturbine['production'])/df_windturbine['production'].max()
#colors = theta
ax = plt.subplot(111, projection='polar')
c = ax.scatter(theta, r, alpha=0.9, s=area, color='b', edgecolor='w')
c.set_alpha(0.75)
plt.show()
import numpy as np
import matplotlib.pyplot as plt
plt.figure(figsize=(8,8))
r = df_windturbine['wind_speed']
theta = 2 * np.pi * df_windturbine['wind_direction']/360.
area = 100 * np.abs(df_windturbine['production'])/df_windturbine['production'].max()
#colors = theta
ax = plt.subplot(111, projection='polar')
c, = ax.plot(theta, r, alpha=0.9, color='b')
c.set_alpha(0.75)
plt.show()
plt.plot(df_windturbine['wind_speed'], df_windturbine['production'],'.')
plt.show()
# Seaborn is another visualization library
import seaborn as sns
sns.set(color_codes=True)
plt.figure(figsize=(5,5))
df = pd.read_csv(u'data/wind_turbine.csv')
sns.jointplot(x="wind_speed", y="production", data=df);
plt.show()
<matplotlib.figure.Figure at 0x11a812080>
from __future__ import print_function, division
import pandas as pd
df = pd.read_csv("data/Daily_Demand_Forecasting_Orders.csv", delimiter=';')
df
Week of the month (first week, second, third, fourth or fifth week | Day of the week (Monday to Friday) | Non-urgent order | Urgent order | Order type A | Order type B | Order type C | Fiscal sector orders | Orders from the traffic controller sector | Banking orders (1) | Banking orders (2) | Banking orders (3) | Target (Total orders) | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 4 | 316307 | 223270 | 61543 | 175586 | 302448 | 0 | 65556 | 44914 | 188411 | 14793 | 539577 |
1 | 1 | 5 | 128633 | 96042 | 38058 | 56037 | 130580 | 0 | 40419 | 21399 | 89461 | 7679 | 224675 |
2 | 1 | 6 | 43651 | 84375 | 21826 | 25125 | 82461 | 1386 | 11992 | 3452 | 21305 | 14947 | 129412 |
3 | 2 | 2 | 171297 | 127667 | 41542 | 113294 | 162284 | 18156 | 49971 | 33703 | 69054 | 18423 | 317120 |
4 | 2 | 3 | 90532 | 113526 | 37679 | 56618 | 116220 | 6459 | 48534 | 19646 | 16411 | 20257 | 210517 |
5 | 2 | 4 | 110925 | 96360 | 30792 | 50704 | 125868 | 79 | 52042 | 8773 | 47522 | 24966 | 207364 |
6 | 2 | 5 | 144124 | 118919 | 43304 | 66371 | 153368 | 0 | 46573 | 33597 | 48269 | 20973 | 263043 |
7 | 2 | 6 | 119379 | 113870 | 38584 | 85961 | 124413 | 15709 | 35033 | 26278 | 56665 | 18502 | 248958 |
8 | 3 | 2 | 218856 | 124381 | 33973 | 148274 | 162044 | 1054 | 66612 | 19461 | 103376 | 10458 | 344291 |
9 | 3 | 3 | 146518 | 101045 | 36399 | 43306 | 168723 | 865 | 58224 | 7742 | 82395 | 11948 | 248428 |
10 | 3 | 4 | 178433 | 102793 | 45706 | 111036 | 124678 | 194 | 47046 | 17299 | 108719 | 15560 | 281420 |
11 | 3 | 5 | 145865 | 91180 | 43851 | 66277 | 133440 | 6523 | 66910 | 17768 | 36693 | 29046 | 243568 |
12 | 3 | 6 | 170566 | 114412 | 43339 | 136434 | 128405 | 23200 | 32529 | 34002 | 78153 | 31949 | 308178 |
13 | 4 | 2 | 220343 | 141406 | 46241 | 120865 | 196296 | 1653 | 34878 | 32905 | 117137 | 29188 | 363402 |
14 | 4 | 3 | 193768 | 141854 | 56519 | 136709 | 143644 | 1250 | 57858 | 23956 | 101048 | 30134 | 336872 |
15 | 4 | 4 | 122736 | 124256 | 56167 | 78101 | 112724 | 0 | 52321 | 10046 | 62799 | 24233 | 246992 |
16 | 4 | 5 | 144051 | 158408 | 51660 | 92272 | 164948 | 6421 | 47167 | 6440 | 91784 | 15973 | 308880 |
17 | 4 | 6 | 105415 | 108688 | 47717 | 71474 | 113935 | 19023 | 42737 | 26020 | 27873 | 17600 | 233126 |
18 | 5 | 2 | 240660 | 163720 | 59135 | 157681 | 187564 | 0 | 39273 | 32917 | 155617 | 9203 | 404380 |
19 | 1 | 3 | 131067 | 166649 | 90476 | 80509 | 127575 | 844 | 60543 | 19141 | 78378 | 73839 | 298560 |
20 | 1 | 4 | 130129 | 98927 | 42904 | 43962 | 142383 | 193 | 54760 | 9163 | 29874 | 46992 | 229249 |
21 | 1 | 5 | 123286 | 103551 | 47331 | 72444 | 116529 | 9467 | 48732 | 21196 | 47793 | 47574 | 236304 |
22 | 1 | 6 | 190816 | 87629 | 32077 | 127358 | 137739 | 18729 | 46368 | 36798 | 92701 | 31098 | 297174 |
23 | 2 | 2 | 266741 | 141437 | 58721 | 139034 | 211646 | 1223 | 58081 | 43333 | 135314 | 29716 | 409401 |
24 | 2 | 3 | 123143 | 106083 | 36017 | 75813 | 119205 | 1809 | 45340 | 22109 | 55584 | 29803 | 231035 |
25 | 2 | 4 | 148139 | 85310 | 35576 | 79997 | 123253 | 5377 | 59686 | 14188 | 67617 | 32319 | 238826 |
26 | 2 | 5 | 118552 | 100417 | 54401 | 75613 | 105584 | 16629 | 40423 | 24682 | 47563 | 35314 | 235598 |
27 | 2 | 6 | 146959 | 95153 | 37656 | 59907 | 144549 | 0 | 50908 | 45733 | 43930 | 28998 | 242112 |
28 | 3 | 2 | 299770 | 133375 | 57810 | 236248 | 196732 | 57645 | 71772 | 57756 | 159373 | 29160 | 490790 |
29 | 3 | 3 | 151341 | 131788 | 43359 | 89382 | 156916 | 6528 | 53573 | 42638 | 62732 | 32386 | 289657 |
30 | 3 | 4 | 206206 | 92160 | 45555 | 148718 | 104186 | 93 | 49110 | 36904 | 126632 | 33237 | 298459 |
31 | 3 | 5 | 170868 | 131463 | 45550 | 120548 | 157505 | 21272 | 42534 | 79556 | 50433 | 36483 | 323603 |
32 | 4 | 3 | 435304 | 181149 | 67884 | 267342 | 281227 | 0 | 64867 | 210508 | 177229 | 30514 | 616453 |
33 | 4 | 4 | 235106 | 110874 | 70376 | 154242 | 121417 | 55 | 23257 | 163452 | 63699 | 33805 | 346035 |
34 | 4 | 5 | 168179 | 125119 | 71068 | 100544 | 136033 | 14347 | 28072 | 95989 | 50763 | 55445 | 307645 |
35 | 4 | 6 | 172783 | 77371 | 64137 | 109062 | 80648 | 3693 | 46321 | 66498 | 61593 | 31625 | 253847 |
36 | 5 | 2 | 381768 | 140041 | 118178 | 260632 | 152134 | 9135 | 34236 | 194216 | 136035 | 47601 | 530944 |
37 | 5 | 3 | 221438 | 111392 | 51199 | 124660 | 157500 | 529 | 39964 | 136119 | 66745 | 31031 | 333359 |
38 | 5 | 4 | 193957 | 111859 | 47002 | 99892 | 159462 | 540 | 59179 | 94460 | 54772 | 34616 | 306356 |
39 | 1 | 6 | 275076 | 121697 | 109888 | 131165 | 175777 | 20057 | 37906 | 138536 | 85378 | 14020 | 416830 |
40 | 2 | 2 | 252298 | 150708 | 77388 | 154863 | 182936 | 12181 | 32133 | 69093 | 169088 | 12516 | 415187 |
41 | 2 | 3 | 165472 | 102530 | 46295 | 96870 | 124837 | 0 | 48458 | 43112 | 72840 | 11304 | 268002 |
42 | 2 | 4 | 126030 | 108055 | 53366 | 69150 | 111987 | 418 | 42201 | 13736 | 70191 | 16710 | 234503 |
43 | 2 | 5 | 112246 | 106641 | 47399 | 77610 | 109715 | 15837 | 35316 | 25876 | 38646 | 13989 | 234724 |
44 | 2 | 6 | 123302 | 94315 | 48081 | 72826 | 109157 | 12447 | 43284 | 30138 | 52112 | 12632 | 230064 |
45 | 3 | 2 | 187810 | 167455 | 59042 | 130098 | 168254 | 2129 | 37817 | 36445 | 103567 | 10443 | 357394 |
46 | 3 | 3 | 119863 | 139383 | 44809 | 99072 | 115365 | 0 | 54584 | 17242 | 59231 | 12543 | 259246 |
47 | 3 | 4 | 127805 | 114813 | 39025 | 110740 | 94470 | 1617 | 33366 | 21103 | 84558 | 16683 | 244235 |
48 | 3 | 5 | 120629 | 112703 | 39600 | 240922 | 122085 | 169275 | 37387 | 20246 | 63778 | 13886 | 402607 |
49 | 3 | 6 | 130465 | 105273 | 57467 | 88462 | 109132 | 19323 | 27200 | 41713 | 59513 | 12260 | 255061 |
50 | 4 | 2 | 222282 | 120324 | 41418 | 135189 | 165999 | 0 | 39446 | 29290 | 154144 | 10811 | 342606 |
51 | 4 | 3 | 150257 | 116959 | 34193 | 115536 | 118911 | 1424 | 51346 | 19782 | 89704 | 12182 | 268640 |
52 | 4 | 4 | 96494 | 87294 | 32653 | 81576 | 74372 | 4813 | 34631 | 22420 | 49644 | 15390 | 188601 |
53 | 4 | 5 | 89526 | 99756 | 51985 | 51930 | 98107 | 12740 | 31850 | 32150 | 21573 | 13807 | 202022 |
54 | 4 | 6 | 134425 | 79084 | 36748 | 71353 | 105408 | 0 | 33970 | 28701 | 65199 | 11023 | 213509 |
55 | 5 | 2 | 158716 | 158133 | 59131 | 92639 | 165079 | 0 | 32027 | 33282 | 128269 | 9287 | 316849 |
56 | 5 | 3 | 150784 | 133069 | 54224 | 115746 | 116442 | 2559 | 51235 | 34421 | 87708 | 11354 | 286412 |
57 | 5 | 4 | 193534 | 109639 | 58378 | 142382 | 102687 | 274 | 28364 | 88404 | 91367 | 15003 | 303447 |
58 | 5 | 5 | 196555 | 108395 | 76763 | 96478 | 131709 | 0 | 37011 | 109931 | 50112 | 12957 | 304950 |
59 | 5 | 6 | 192116 | 121106 | 107568 | 121152 | 103180 | 18678 | 27328 | 108072 | 56015 | 10690 | 331900 |
df.loc[0]
Week of the month (first week, second, third, fourth or fifth week 1 Day of the week (Monday to Friday) 4 Non-urgent order 316307 Urgent order 223270 Order type A 61543 Order type B 175586 Order type C 302448 Fiscal sector orders 0 Orders from the traffic controller sector 65556 Banking orders (1) 44914 Banking orders (2) 188411 Banking orders (3) 14793 Target (Total orders) 539577 Name: 0, dtype: int64
The data fields are quite complicated so it is better to organize data as a tensor.
df.columns
Index(['Week of the month (first week, second, third, fourth or fifth week', 'Day of the week (Monday to Friday)', 'Non-urgent order', 'Urgent order', 'Order type A', 'Order type B', 'Order type C', 'Fiscal sector orders', 'Orders from the traffic controller sector', 'Banking orders (1)', 'Banking orders (2)', 'Banking orders (3)', 'Target (Total orders)'], dtype='object')
'Non-urgent order' $+$ 'Urgent order' is almost equal to 'Target (Total orders)'
'Order type A' $+$ 'Order type B' $+$ 'Order type C' = 'Target (Total orders)'
Data is better visualized if organized in weeks and day-of-week
u'Fiscal sector orders', u'Orders from the traffic controller sector', u'Banking orders (1)', u'Banking orders (2)', u'Banking orders (3)' sum to about $60$ percent of 'Target (Total orders)'
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import scipy as sc
Total orders series, when viewed as a time series is hard to interpret.
x = df['Target (Total orders)']
plt.plot(x)
plt.show()
Arrange the data by week and day of week
t1 = df['Week of the month (first week, second, third, fourth or fifth week']
t2 = df['Day of the week (Monday to Friday)']
z = [df[col] for col in ['Order type A','Order type B','Order type C','Target (Total orders)']]; vmax = 600000
#z = [df[col] for col in [u'Non-urgent order', u'Urgent order','Target (Total orders)']]
#z = [df[col] for col in [u'Fiscal sector orders', u'Orders from the traffic controller sector', u'Banking orders (1)', u'Banking orders (2)', u'Banking orders (3)']]
X = np.zeros([13,5, len(z)])
# Day and week features
F = np.zeros([13, 5, 2])
m = 0
w = 0
t1_prev = 6
t2_prev = 7
for i in range(len(t1)):
if t1_prev>t1[i]:
m = m+1
if t2_prev>t2[i]:
w = w+1
t1_prev = t1[i]
t2_prev = t2[i]
row_idx = w-1
col_idx = t2[i]-2
for j in range(len(z)):
X[row_idx, col_idx, j] = z[j][i]
F[row_idx, col_idx, 0] = t1[i]
F[row_idx, col_idx, 1] = m
plt.figure(figsize=(10,5))
for j in range(len(z)):
plt.subplot(1,len(z),j+1)
plt.imshow(X[:,:,j], vmax=vmax,vmin=0)
# plt.colorbar()
plt.show()
plt.figure(figsize=(10,5))
plt.subplot(1,2,1)
plt.imshow(F[:,:,0])
plt.subplot(1,2,2)
plt.imshow(F[:,:,1])
<matplotlib.image.AxesImage at 0x1a233b4f98>
plt.figure(figsize=(10,5))
for j in range(len(z)-1):
plt.subplot(1,len(z),j+1)
plt.imshow(X[:,:,j]/(1+X[:,:,-1]), vmax=1,vmin=0)
# plt.colorbar()
plt.show()
https://archive.ics.uci.edu/ml/datasets/NIPS+Conference+Papers+1987-2015
http://www.sidc.be/silso/infosnytot
Filename: SN_y_tot_V2.0.csv
Format: Comma Separated values (adapted for import in spreadsheets)
The separator is the semicolon ';'.
Contents:
Column 1: Gregorian calendar year (mid-year date)
Column 2: Yearly mean total sunspot number.
Column 3: Yearly mean standard deviation of the input sunspot numbers from individual stations.
Column 4: Number of observations used to compute the yearly mean total sunspot number.
Column 5: Definitive/provisional marker. '1' indicates that the value is definitive. '0' indicates that the value is still provisional.
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
from __future__ import print_function, division
import pandas as pd
df = pd.read_csv("data/sunspots.csv", delimiter=';', names=['Year','mean_Sunspots','mean_std','N','Provisional'])
df
Year | mean_Sunspots | mean_std | N | Provisional | |
---|---|---|---|---|---|
0 | 1700.5 | 8.3 | -1.0 | -1 | 1 |
1 | 1701.5 | 18.3 | -1.0 | -1 | 1 |
2 | 1702.5 | 26.7 | -1.0 | -1 | 1 |
3 | 1703.5 | 38.3 | -1.0 | -1 | 1 |
4 | 1704.5 | 60.0 | -1.0 | -1 | 1 |
5 | 1705.5 | 96.7 | -1.0 | -1 | 1 |
6 | 1706.5 | 48.3 | -1.0 | -1 | 1 |
7 | 1707.5 | 33.3 | -1.0 | -1 | 1 |
8 | 1708.5 | 16.7 | -1.0 | -1 | 1 |
9 | 1709.5 | 13.3 | -1.0 | -1 | 1 |
10 | 1710.5 | 5.0 | -1.0 | -1 | 1 |
11 | 1711.5 | 0.0 | -1.0 | -1 | 1 |
12 | 1712.5 | 0.0 | -1.0 | -1 | 1 |
13 | 1713.5 | 3.3 | -1.0 | -1 | 1 |
14 | 1714.5 | 18.3 | -1.0 | -1 | 1 |
15 | 1715.5 | 45.0 | -1.0 | -1 | 1 |
16 | 1716.5 | 78.3 | -1.0 | -1 | 1 |
17 | 1717.5 | 105.0 | -1.0 | -1 | 1 |
18 | 1718.5 | 100.0 | -1.0 | -1 | 1 |
19 | 1719.5 | 65.0 | -1.0 | -1 | 1 |
20 | 1720.5 | 46.7 | -1.0 | -1 | 1 |
21 | 1721.5 | 43.3 | -1.0 | -1 | 1 |
22 | 1722.5 | 36.7 | -1.0 | -1 | 1 |
23 | 1723.5 | 18.3 | -1.0 | -1 | 1 |
24 | 1724.5 | 35.0 | -1.0 | -1 | 1 |
25 | 1725.5 | 66.7 | -1.0 | -1 | 1 |
26 | 1726.5 | 130.0 | -1.0 | -1 | 1 |
27 | 1727.5 | 203.3 | -1.0 | -1 | 1 |
28 | 1728.5 | 171.7 | -1.0 | -1 | 1 |
29 | 1729.5 | 121.7 | -1.0 | -1 | 1 |
... | ... | ... | ... | ... | ... |
288 | 1988.5 | 123.0 | 8.4 | 6556 | 1 |
289 | 1989.5 | 211.1 | 12.8 | 6932 | 1 |
290 | 1990.5 | 191.8 | 11.2 | 7108 | 1 |
291 | 1991.5 | 203.3 | 12.7 | 6932 | 1 |
292 | 1992.5 | 133.0 | 8.9 | 7845 | 1 |
293 | 1993.5 | 76.1 | 5.8 | 8010 | 1 |
294 | 1994.5 | 44.9 | 4.4 | 8524 | 1 |
295 | 1995.5 | 25.1 | 3.7 | 8429 | 1 |
296 | 1996.5 | 11.6 | 3.1 | 7614 | 1 |
297 | 1997.5 | 28.9 | 3.6 | 7294 | 1 |
298 | 1998.5 | 88.3 | 6.6 | 6353 | 1 |
299 | 1999.5 | 136.3 | 9.3 | 6413 | 1 |
300 | 2000.5 | 173.9 | 10.1 | 5953 | 1 |
301 | 2001.5 | 170.4 | 10.5 | 6558 | 1 |
302 | 2002.5 | 163.6 | 9.8 | 6588 | 1 |
303 | 2003.5 | 99.3 | 7.1 | 7087 | 1 |
304 | 2004.5 | 65.3 | 5.9 | 6882 | 1 |
305 | 2005.5 | 45.8 | 4.7 | 7084 | 1 |
306 | 2006.5 | 24.7 | 3.5 | 6370 | 1 |
307 | 2007.5 | 12.6 | 2.7 | 6841 | 1 |
308 | 2008.5 | 4.2 | 2.5 | 6644 | 1 |
309 | 2009.5 | 4.8 | 2.5 | 6465 | 1 |
310 | 2010.5 | 24.9 | 3.4 | 6328 | 1 |
311 | 2011.5 | 80.8 | 6.7 | 6077 | 1 |
312 | 2012.5 | 84.5 | 6.7 | 5753 | 1 |
313 | 2013.5 | 94.0 | 6.9 | 5347 | 1 |
314 | 2014.5 | 113.3 | 8.0 | 5273 | 1 |
315 | 2015.5 | 69.8 | 6.4 | 8903 | 1 |
316 | 2016.5 | 39.8 | 3.9 | 9940 | 1 |
317 | 2017.5 | 21.7 | 2.6 | 11020 | 0 |
318 rows × 5 columns
start = 150
plt.plot(df['Year'][start:],df['N'][start:])
plt.show()
%connect_info
{ "shell_port": 51197, "iopub_port": 51198, "stdin_port": 51199, "control_port": 51200, "hb_port": 51201, "ip": "127.0.0.1", "key": "5f99ee29-5b6c2ecab90750c0df552698", "transport": "tcp", "signature_scheme": "hmac-sha256", "kernel_name": "" } Paste the above JSON into a file, and connect with: $> jupyter <app> --existing <file> or, if you are local, you can connect with just: $> jupyter <app> --existing kernel-9ff888c2-016d-4625-8341-97f073d6ee8b.json or even just: $> jupyter <app> --existing if this is the most recent Jupyter kernel you have started.