In [1]:
from db import DB

db = DB(filename="./baseball-archive-2012.sqlite", dbtype="sqlite")
db
Indexing schema. This will take a second...finished!
Out[1]:
DB[sqlite][localhost]:5432 > [email protected]
In [2]:
db.tables.allstarfull
Out[2]:
Column Type
playerID TEXT
yearID INTEGER
gameNum INTEGER
gameID TEXT
teamID TEXT
lgID TEXT
GP INTEGER
startingPos INTEGER
In [3]:
db.tables.allstarfull.head()
Out[3]:
playerID yearID gameNum gameID teamID lgID GP startingPos
0 aaronha01 1955 0 NLS195507120 ML1 NL 1 NaN
1 aaronha01 1956 0 ALS195607100 ML1 NL 1 NaN
2 aaronha01 1957 0 NLS195707090 ML1 NL 1 9
3 aaronha01 1958 0 ALS195807080 ML1 NL 1 9
4 aaronha01 1959 1 NLS195907070 ML1 NL 1 9
5 aaronha01 1959 2 NLS195908030 ML1 NL 1 9
In [4]:
db.tables.allstarfull.sample()
Out[4]:
playerID yearID gameNum gameID teamID lgID GP startingPos
0 kellech01 1941 0 ALS194107080 NYA AL 1 NaN
1 burdele01 1959 1 NLS195907070 ML1 NL 1 NaN
2 vaughar01 1936 0 NLS193607070 PIT NL 0 NaN
3 benchjo01 1969 0 ALS196907230 CIN NL 1 2
4 reesepe01 1951 0 ALS195107100 BRO NL 1 NaN
5 glavito02 2002 0 NLS200207090 ATL NL 0 NaN
6 dimagjo01 1939 0 ALS193907110 NYA AL 1 8
7 harrebu01 1971 0 ALS197107130 NYN NL 1 6
8 truckvi01 1949 0 NLS194907120 DET AL 1 NaN
9 patekfr01 1976 0 NLS197607130 KCA AL 1 NaN
In [5]:
db.tables.allstarfull.playerID
Out[5]:
Table Name Type
allstarfull playerID TEXT
In [6]:
db.tables.allstarfull.playerID.head()
Out[6]:
0    aaronha01
1    aaronha01
2    aaronha01
3    aaronha01
4    aaronha01
5    aaronha01
Name: playerID, dtype: object
In [7]:
db.tables.allstarfull.playerID.unique().count()
Out[7]:
1637
In [8]:
db.find_column("*player*")
Out[8]:
Table Column Name Type
allstarfull playerID TEXT
appearances playerID TEXT
awardsplayers playerID TEXT
awardsshareplayers playerID TEXT
battingpost playerID TEXT
fielding playerID TEXT
fieldingof playerID TEXT
fieldingpost playerID TEXT
master playerID TEXT
pitching playerID TEXT
pitchingpost playerID TEXT
salaries playerID TEXT
schoolsplayers playerID TEXT
tmp_batting playerID TEXT
In [9]:
db.find_column("*ID*")
Out[9]:
Table Column Name Type
allstarfull lgID TEXT
allstarfull gameID TEXT
allstarfull playerID TEXT
allstarfull teamID TEXT
allstarfull yearID INTEGER
appearances playerID TEXT
appearances teamID TEXT
appearances yearID INTEGER
appearances lgID TEXT
awardsmanagers lgID TEXT
awardsmanagers managerID TEXT
awardsmanagers yearID INTEGER
awardsmanagers awardID TEXT
awardsplayers lgID TEXT
awardsplayers playerID TEXT
awardsplayers yearID INTEGER
awardsplayers awardID TEXT
awardssharemanagers lgID TEXT
awardssharemanagers managerID TEXT
awardssharemanagers yearID INTEGER
awardssharemanagers awardID TEXT
awardsshareplayers lgID TEXT
awardsshareplayers playerID TEXT
awardsshareplayers yearID INTEGER
awardsshareplayers awardID TEXT
battingpost playerID TEXT
battingpost teamID TEXT
battingpost yearID INTEGER
battingpost GIDP INTEGER
battingpost lgID TEXT
fielding playerID TEXT
fielding teamID TEXT
fielding yearID INTEGER
fielding lgID TEXT
fieldingof playerID TEXT
fieldingof yearID INTEGER
fieldingpost lgID TEXT
fieldingpost playerID TEXT
fieldingpost teamID TEXT
fieldingpost yearID INTEGER
halloffame hofID TEXT
hofold hofID TEXT
managers lgID TEXT
managers teamID TEXT
managers managerID TEXT
managers yearID INTEGER
managershalf lgID TEXT
managershalf teamID TEXT
managershalf managerID TEXT
managershalf yearID INTEGER
master lahman40ID TEXT
master bbrefID TEXT
master retroID TEXT
master lahmanID INTEGER
master playerID TEXT
master hofID TEXT
master holtzID TEXT
master managerID TEXT
master lahman45ID TEXT
pitching playerID TEXT
pitching teamID TEXT
pitching yearID INTEGER
pitching GIDP INTEGER
pitching lgID TEXT
pitchingpost playerID TEXT
pitchingpost teamID TEXT
pitchingpost yearID INTEGER
pitchingpost GIDP INTEGER
pitchingpost lgID TEXT
salaries lgID TEXT
salaries playerID TEXT
salaries teamID TEXT
salaries yearID INTEGER
schools schoolID TEXT
schoolsplayers schoolID TEXT
schoolsplayers playerID TEXT
seriespost lgIDwinner TEXT
seriespost lgIDloser TEXT
seriespost teamIDwinner TEXT
seriespost yearID INTEGER
seriespost teamIDloser TEXT
teams teamID TEXT
teams teamIDBR TEXT
teams teamIDretro TEXT
teams yearID INTEGER
teams teamIDlahman45 TEXT
teams lgID TEXT
teams franchID TEXT
teams divID TEXT
teamsfranchises franchID TEXT
teamshalf lgID TEXT
teamshalf divID TEXT
teamshalf teamID TEXT
teamshalf yearID INTEGER
tmp_batting playerID TEXT
tmp_batting teamID TEXT
tmp_batting yearID INTEGER
tmp_batting GIDP INTEGER
tmp_batting lgID TEXT
In [10]:
db.find_column("HR")
Out[10]:
Table Column Name Type
battingpost HR INTEGER
pitching HR INTEGER
pitchingpost HR INTEGER
teams HR INTEGER
tmp_batting HR INTEGER
In [11]:
db.find_column("HR", "INTEGER")
Out[11]:
Table Column Name Type
battingpost HR INTEGER
pitching HR INTEGER
pitchingpost HR INTEGER
teams HR INTEGER
tmp_batting HR INTEGER
In [12]:
db.find_column("*", "INTEGER")
Out[12]:
Table Column Name Type
allstarfull GP INTEGER
allstarfull yearID INTEGER
allstarfull startingPos INTEGER
allstarfull gameNum INTEGER
appearances G_cf INTEGER
appearances G_all INTEGER
appearances G_2b INTEGER
appearances G_batting INTEGER
appearances G_p INTEGER
appearances G_ss INTEGER
appearances G_of INTEGER
appearances G_c INTEGER
appearances G_dh INTEGER
appearances yearID INTEGER
appearances G_3b INTEGER
appearances G_rf INTEGER
appearances G_1b INTEGER
appearances G_lf INTEGER
appearances G_defense INTEGER
appearances G_ph INTEGER
appearances G_pr INTEGER
awardsmanagers yearID INTEGER
awardsplayers yearID INTEGER
awardssharemanagers votesFirst INTEGER
awardssharemanagers pointsWon INTEGER
awardssharemanagers yearID INTEGER
awardssharemanagers pointsMax INTEGER
awardsshareplayers yearID INTEGER
awardsshareplayers pointsMax INTEGER
battingpost RBI INTEGER
battingpost BB INTEGER
battingpost HR INTEGER
battingpost IBB INTEGER
battingpost 3B INTEGER
battingpost HBP INTEGER
battingpost AB INTEGER
battingpost G INTEGER
battingpost H INTEGER
battingpost yearID INTEGER
battingpost R INTEGER
battingpost 2B INTEGER
battingpost CS INTEGER
battingpost GIDP INTEGER
battingpost SF INTEGER
battingpost SH INTEGER
battingpost SO INTEGER
battingpost SB INTEGER
fielding WP INTEGER
fielding E INTEGER
fielding stint INTEGER
fielding DP INTEGER
fielding PB INTEGER
fielding PO INTEGER
fielding A INTEGER
fielding GS INTEGER
fielding G INTEGER
fielding yearID INTEGER
fielding CS INTEGER
fielding InnOuts INTEGER
fielding SB INTEGER
fieldingof Gcf INTEGER
fieldingof Glf INTEGER
fieldingof Grf INTEGER
fieldingof yearID INTEGER
fieldingof stint INTEGER
fieldingpost A INTEGER
fieldingpost G INTEGER
fieldingpost TP INTEGER
fieldingpost PB INTEGER
fieldingpost InnOuts INTEGER
fieldingpost yearID INTEGER
fieldingpost PO INTEGER
fieldingpost SB INTEGER
fieldingpost CS INTEGER
fieldingpost GS INTEGER
fieldingpost E INTEGER
fieldingpost DP INTEGER
halloffame votes INTEGER
halloffame needed INTEGER
halloffame yearid INTEGER
halloffame ballots INTEGER
hofold votes INTEGER
hofold yearid INTEGER
hofold ballots INTEGER
managers G INTEGER
managers rank INTEGER
managers W INTEGER
managers yearID INTEGER
managers inseason INTEGER
managers L INTEGER
managershalf G INTEGER
managershalf rank INTEGER
managershalf W INTEGER
managershalf yearID INTEGER
managershalf inseason INTEGER
managershalf half INTEGER
managershalf L INTEGER
master weight INTEGER
master lahmanID INTEGER
master birthMonth INTEGER
master deathMonth INTEGER
master deathYear INTEGER
master birthYear INTEGER
master birthDay INTEGER
master deathDay INTEGER
pitching BB INTEGER
pitching HR INTEGER
pitching IBB INTEGER
pitching IPouts INTEGER
pitching BK INTEGER
pitching WP INTEGER
pitching stint INTEGER
pitching HBP INTEGER
pitching SH INTEGER
pitching ER INTEGER
pitching GS INTEGER
pitching G INTEGER
pitching H INTEGER
pitching CG INTEGER
pitching L INTEGER
pitching GF INTEGER
pitching yearID INTEGER
pitching BFP INTEGER
pitching W INTEGER
pitching GIDP INTEGER
pitching SHO INTEGER
pitching SV INTEGER
pitching R INTEGER
pitching SO INTEGER
pitching SF INTEGER
pitchingpost BB INTEGER
pitchingpost HR INTEGER
pitchingpost IBB INTEGER
pitchingpost IPouts INTEGER
pitchingpost BK INTEGER
pitchingpost WP INTEGER
pitchingpost HBP INTEGER
pitchingpost SH INTEGER
pitchingpost ER INTEGER
pitchingpost GS INTEGER
pitchingpost G INTEGER
pitchingpost H INTEGER
pitchingpost CG INTEGER
pitchingpost L INTEGER
pitchingpost GF INTEGER
pitchingpost yearID INTEGER
pitchingpost BFP INTEGER
pitchingpost W INTEGER
pitchingpost GIDP INTEGER
pitchingpost SHO INTEGER
pitchingpost SV INTEGER
pitchingpost SF INTEGER
pitchingpost R INTEGER
pitchingpost SO INTEGER
salaries yearID INTEGER
schoolsplayers yearMax INTEGER
schoolsplayers yearMin INTEGER
seriespost wins INTEGER
seriespost yearID INTEGER
seriespost losses INTEGER
seriespost ties INTEGER
teams W INTEGER
teams BB INTEGER
teams BPF INTEGER
teams HR INTEGER
teams IPouts INTEGER
teams Ghome INTEGER
teams 3B INTEGER
teams HA INTEGER
teams HBP INTEGER
teams DP INTEGER
teams SOA INTEGER
teams attendance INTEGER
teams PPF INTEGER
teams RA INTEGER
teams SHO INTEGER
teams AB INTEGER
teams E INTEGER
teams G INTEGER
teams H INTEGER
teams CG INTEGER
teams L INTEGER
teams BBA INTEGER
teams yearID INTEGER
teams R INTEGER
teams 2B INTEGER
teams CS INTEGER
teams HRA INTEGER
teams ER INTEGER
teams SV INTEGER
teams Rank INTEGER
teams SO INTEGER
teams SB INTEGER
teams SF INTEGER
teamshalf G INTEGER
teamshalf Rank INTEGER
teamshalf L INTEGER
teamshalf yearID INTEGER
teamshalf W INTEGER
tmp_batting RBI INTEGER
tmp_batting BB INTEGER
tmp_batting HR INTEGER
tmp_batting IBB INTEGER
tmp_batting 3B INTEGER
tmp_batting G_old INTEGER
tmp_batting stint INTEGER
tmp_batting G_batting INTEGER
tmp_batting HBP INTEGER
tmp_batting AB INTEGER
tmp_batting G INTEGER
tmp_batting H INTEGER
tmp_batting yearID INTEGER
tmp_batting R INTEGER
tmp_batting 2B INTEGER
tmp_batting CS INTEGER
tmp_batting GIDP INTEGER
tmp_batting SH INTEGER
tmp_batting SO INTEGER
tmp_batting SB INTEGER
tmp_batting SF INTEGER
In [13]:
db.find_table("*")
Out[13]:
Table Columns
allstarfull playerID, yearID, gameNum, gameID, teamID, lgID, GP, startingPos
appearances yearID, teamID, lgID, playerID, G_all, G_batting, G_defense, G_p, G_c, G_1b, G_2
b, G_3b, G_ss, G_lf, G_cf, G_rf, G_of, G_dh, G_ph, G_pr
awardsmanagers managerID, awardID, yearID, lgID, tie, notes
awardsplayers playerID, awardID, yearID, lgID, tie, notes
awardssharemanagers awardID, yearID, lgID, managerID, pointsWon, pointsMax, votesFirst
awardsshareplayers awardID, yearID, lgID, playerID, pointsWon, pointsMax, votesFirst
battingpost yearID, round, playerID, teamID, lgID, G, AB, R, H, 2B, 3B, HR, RBI, SB, CS, BB,
SO, IBB, HBP, SH, SF, GIDP
fielding playerID, yearID, stint, teamID, lgID, POS, G, GS, InnOuts, PO, A, E, DP, PB, WP
, SB, CS, ZR
fieldingof playerID, yearID, stint, Glf, Gcf, Grf
fieldingpost playerID, yearID, teamID, lgID, round, POS, G, GS, InnOuts, PO, A, E, DP, TP, PB
, SB, CS
halloffame hofID, yearid, votedBy, ballots, needed, votes, inducted, category
hofold hofID, yearid, votedBy, ballots, votes, inducted, category
managers managerID, yearID, teamID, lgID, inseason, G, W, L, rank, plyrMgr
managershalf managerID, yearID, teamID, lgID, inseason, half, G, W, L, rank
master lahmanID, playerID, managerID, hofID, birthYear, birthMonth, birthDay, birthCoun
try, birthState, birthCity, deathYear, deathMonth, deathDay, deathCountry, death
State, deathCity, nameFirst, nameLast, nameNote, nameGiven, nameNick, weight, he
ight, bats, throws, debut, finalGame, college, lahman40ID, lahman45ID, retroID,
holtzID, bbrefID
pitching playerID, yearID, stint, teamID, lgID, W, L, G, GS, CG, SHO, SV, IPouts, H, ER,
HR, BB, SO, BAOpp, ERA, IBB, WP, HBP, BK, BFP, GF, R, SH, SF, GIDP
pitchingpost playerID, yearID, round, teamID, lgID, W, L, G, GS, CG, SHO, SV, IPouts, H, ER,
HR, BB, SO, BAOpp, ERA, IBB, WP, HBP, BK, BFP, GF, R, SH, SF, GIDP
salaries yearID, teamID, lgID, playerID, salary
schools schoolID, schoolName, schoolCity, schoolState, schoolNick
schoolsplayers playerID, schoolID, yearMin, yearMax
seriespost yearID, round, teamIDwinner, lgIDwinner, teamIDloser, lgIDloser, wins, losses, t
ies
teams yearID, lgID, teamID, franchID, divID, Rank, G, Ghome, W, L, DivWin, WCWin, LgWi
n, WSWin, R, AB, H, 2B, 3B, HR, BB, SO, SB, CS, HBP, SF, RA, ER, ERA, CG, SHO, S
V, IPouts, HA, HRA, BBA, SOA, E, DP, FP, name, park, attendance, BPF, PPF, teamI
DBR, teamIDlahman45, teamIDretro
teamsfranchises franchID, franchName, active, NAassoc
teamshalf yearID, lgID, teamID, Half, divID, DivWin, Rank, G, W, L
tmp_batting playerID, yearID, stint, teamID, lgID, G, G_batting, AB, R, H, 2B, 3B, HR, RBI,
SB, CS, BB, SO, IBB, HBP, SH, SF, GIDP, G_old
In [14]:
db.find_table("*batting*")
Out[14]:
Table Columns
battingpost yearID, round, playerID, teamID, lgID, G, AB, R, H, 2B, 3B, HR, RBI, SB, CS, BB,
SO, IBB, HBP, SH, SF, GIDP
tmp_batting playerID, yearID, stint, teamID, lgID, G, G_batting, AB, R, H, 2B, 3B, HR, RBI,
SB, CS, BB, SO, IBB, HBP, SH, SF, GIDP, G_old
In [15]:
db.save_credentials("baseball")
In [16]:
from db import DB
In [17]:
db = DB(profile="baseball", dbtype="sqlite")
Indexing schema. This will take a second...finished!
In [18]:
db.tables
Out[18]:
Table Columns
allstarfull playerID, yearID, gameNum, gameID, teamID, lgID, GP, startingPos
appearances yearID, teamID, lgID, playerID, G_all, G_batting, G_defense, G_p, G_c, G_1b, G_2
b, G_3b, G_ss, G_lf, G_cf, G_rf, G_of, G_dh, G_ph, G_pr
awardsmanagers managerID, awardID, yearID, lgID, tie, notes
awardsplayers playerID, awardID, yearID, lgID, tie, notes
awardssharemanagers awardID, yearID, lgID, managerID, pointsWon, pointsMax, votesFirst
awardsshareplayers awardID, yearID, lgID, playerID, pointsWon, pointsMax, votesFirst
battingpost yearID, round, playerID, teamID, lgID, G, AB, R, H, 2B, 3B, HR, RBI, SB, CS, BB,
SO, IBB, HBP, SH, SF, GIDP
fielding playerID, yearID, stint, teamID, lgID, POS, G, GS, InnOuts, PO, A, E, DP, PB, WP
, SB, CS, ZR
fieldingof playerID, yearID, stint, Glf, Gcf, Grf
fieldingpost playerID, yearID, teamID, lgID, round, POS, G, GS, InnOuts, PO, A, E, DP, TP, PB
, SB, CS
halloffame hofID, yearid, votedBy, ballots, needed, votes, inducted, category
hofold hofID, yearid, votedBy, ballots, votes, inducted, category
managers managerID, yearID, teamID, lgID, inseason, G, W, L, rank, plyrMgr
managershalf managerID, yearID, teamID, lgID, inseason, half, G, W, L, rank
master lahmanID, playerID, managerID, hofID, birthYear, birthMonth, birthDay, birthCoun
try, birthState, birthCity, deathYear, deathMonth, deathDay, deathCountry, death
State, deathCity, nameFirst, nameLast, nameNote, nameGiven, nameNick, weight, he
ight, bats, throws, debut, finalGame, college, lahman40ID, lahman45ID, retroID,
holtzID, bbrefID
pitching playerID, yearID, stint, teamID, lgID, W, L, G, GS, CG, SHO, SV, IPouts, H, ER,
HR, BB, SO, BAOpp, ERA, IBB, WP, HBP, BK, BFP, GF, R, SH, SF, GIDP
pitchingpost playerID, yearID, round, teamID, lgID, W, L, G, GS, CG, SHO, SV, IPouts, H, ER,
HR, BB, SO, BAOpp, ERA, IBB, WP, HBP, BK, BFP, GF, R, SH, SF, GIDP
salaries yearID, teamID, lgID, playerID, salary
schools schoolID, schoolName, schoolCity, schoolState, schoolNick
schoolsplayers playerID, schoolID, yearMin, yearMax
seriespost yearID, round, teamIDwinner, lgIDwinner, teamIDloser, lgIDloser, wins, losses, t
ies
teams yearID, lgID, teamID, franchID, divID, Rank, G, Ghome, W, L, DivWin, WCWin, LgWi
n, WSWin, R, AB, H, 2B, 3B, HR, BB, SO, SB, CS, HBP, SF, RA, ER, ERA, CG, SHO, S
V, IPouts, HA, HRA, BBA, SOA, E, DP, FP, name, park, attendance, BPF, PPF, teamI
DBR, teamIDlahman45, teamIDretro
teamsfranchises franchID, franchName, active, NAassoc
teamshalf yearID, lgID, teamID, Half, divID, DivWin, Rank, G, W, L
tmp_batting playerID, yearID, stint, teamID, lgID, G, G_batting, AB, R, H, 2B, 3B, HR, RBI,
SB, CS, BB, SO, IBB, HBP, SH, SF, GIDP, G_old
In [19]:
db.tables.appearances.head()
Out[19]:
yearID teamID lgID playerID G_all G_batting G_defense G_p G_c G_1b G_2b G_3b G_ss G_lf G_cf G_rf G_of G_dh G_ph G_pr
0 1871 BS1 NA barnero01 31 None 31 31 0 0 0 16 0 15 0 0 0 0 None None
1 1871 BS1 NA barrofr01 18 None 18 18 0 0 0 1 0 0 13 0 4 17 None None
2 1871 BS1 NA birdsda01 29 None 29 29 0 7 0 0 0 0 0 0 27 27 None None
3 1871 BS1 NA conefr01 19 None 19 19 0 0 0 0 0 0 18 0 1 18 None None
4 1871 BS1 NA gouldch01 31 None 31 31 0 0 30 0 0 0 0 0 1 1 None None
5 1871 BS1 NA jackssa01 16 None 16 16 0 0 0 14 0 1 0 1 0 1 None None
In [19]: