%matplotlib inline
from datetime import datetime, time
import pandas as pd
import numpy as np
import psycopg2 as pg
import pandas.io.sql as sql
CRIME_DATE_FORMAT = '%m/%d/%Y %H:%M:%S %p'
from sqlalchemy import create_engine
con = create_engine('postgresql://wesm:foo@localhost:5432/strata_2015')
con
Engine(postgresql://wesm:***@localhost:5432/strata_2015)
table = pd.read_sql("select * from crimes limit 10;", con)
crimes = pd.read_csv('sf_crimes.csv')
pd.to_datetime(crimes.Date[:1000])
0 2014-01-30 1 2014-07-14 2 2015-01-18 3 2014-09-21 4 2014-03-05 5 2014-03-08 6 2014-04-06 7 2014-09-02 8 2014-07-19 9 2014-09-29 10 2014-04-14 11 2014-07-16 12 2014-01-27 13 2014-05-04 14 2014-08-16 ... 985 2014-08-29 986 2015-01-24 987 2014-07-09 988 2014-01-21 989 2014-05-17 990 2014-09-30 991 2014-02-11 992 2014-05-03 993 2014-06-30 994 2014-12-10 995 2014-07-03 996 2014-09-20 997 2014-01-19 998 2014-12-16 999 2014-07-24 Name: Date, Length: 1000, dtype: datetime64[ns]
%timeit pd.to_datetime(crimes.Date[:1000])
10 loops, best of 3: 99.5 ms per loop
%timeit pd.to_datetime(crimes.Date[:1000], format=CRIME_DATE_FORMAT)
100 loops, best of 3: 5.63 ms per loop
def null_on_error(series, converter):
def f(x):
try:
return converter(x)
except (TypeError, ValueError):
return np.nan
return series.map(f)
def number_with_comma(x):
try:
x = x.replace(',', '')
return float(x)
except (ValueError, AttributeError):
return np.nan
def to_time(x):
h, m = x.split(':')
return time(int(h), int(m))
crimes = pd.read_csv('sf_crimes.csv')
crimes.columns = crimes.columns.map(str.lower)
crimes.date = pd.to_datetime(crimes.date, format=CRIME_DATE_FORMAT)
crimes.time = crimes.time.map(to_time)
companies = pd.read_csv('companies.csv',
parse_dates=['founded_at', 'first_funding_at'])
companies['funding_total_usd'] = null_on_error(
companies.pop(' funding_total_usd '), number_with_comma)
investments = pd.read_csv('investments.csv')
acquisitions = pd.read_csv('acquisitions.csv')
rounds = pd.read_csv('rounds.csv')
/Users/wesm/anaconda/lib/python2.7/site-packages/pandas/io/parsers.py:1154: DtypeWarning: Columns (0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,20,21) have mixed types. Specify dtype option on import or set low_memory=False. data = self._reader.read(nrows)
companies.columns = companies.columns.map(str.strip)
rounds.columns = rounds.columns.map(str.strip)
investments.columns = investments.columns.map(str.strip)
acquisitions.columns = acquisitions.columns.map(str.strip)
acquisitions['price_amount'] = null_on_error(acquisitions.price_amount, number_with_comma)
crimes.loc[0]
incidntnum 140090226 category WARRANTS descript ENROUTE TO PAROLE OFFICER dayofweek Thursday date 2014-01-30 12:00:00 time 21:56:00 pddistrict MISSION resolution ARREST, BOOKED address 400 Block of SOUTH VAN NESS AV x -122.4177 y 37.76627 location (37.7662744315825, -122.417662868855) Name: 0, dtype: object
crimes.groupby('category').size().order(ascending=False)
category LARCENY/THEFT 41300 OTHER OFFENSES 22324 NON-CRIMINAL 20838 ASSAULT 13422 VANDALISM 7712 VEHICLE THEFT 7671 WARRANTS 7267 BURGLARY 6525 DRUG/NARCOTIC 5801 SUSPICIOUS OCC 5633 MISSING PERSON 5052 ROBBERY 3758 FRAUD 3176 SECONDARY CODES 2042 WEAPON LAWS 1726 TRESPASS 1247 STOLEN PROPERTY 1165 SEX OFFENSES, FORCIBLE 912 FORGERY/COUNTERFEITING 807 DRUNKENNESS 666 KIDNAPPING 559 PROSTITUTION 477 DRIVING UNDER THE INFLUENCE 408 DISORDERLY CONDUCT 389 ARSON 292 RUNAWAY 233 LIQUOR LAWS 192 EMBEZZLEMENT 153 FAMILY OFFENSES 94 SUICIDE 75 BRIBERY 58 LOITERING 38 BAD CHECKS 35 EXTORTION 33 SEX OFFENSES, NON FORCIBLE 20 GAMBLING 9 PORNOGRAPHY/OBSCENE MAT 2 TREA 1 dtype: int64
query = """
select category, count(*) as occs
from crimes
group by 1
order by occs desc;
"""
pd.read_sql(query, con)[:10]
category | occs | |
---|---|---|
0 | LARCENY/THEFT | 41300 |
1 | OTHER OFFENSES | 22324 |
2 | NON-CRIMINAL | 20838 |
3 | ASSAULT | 13422 |
4 | VANDALISM | 7712 |
5 | VEHICLE THEFT | 7671 |
6 | WARRANTS | 7267 |
7 | BURGLARY | 6525 |
8 | DRUG/NARCOTIC | 5801 |
9 | SUSPICIOUS OCC | 5633 |
def top(x, k=10):
return x.order(ascending=False)[:k]
crimes.groupby([crimes.date.dt.year,
crimes.time.map(lambda x: x.hour)]).size()
time 2014 0 7217 1 4353 2 3642 3 2537 4 1709 5 1636 6 2195 7 3553 8 5124 9 5844 10 6210 11 6464 12 8803 13 7122 14 7373 15 8111 16 8671 17 9389 18 9865 19 9087 20 8452 21 7743 22 7924 23 7092 2015 0 504 1 420 2 332 3 224 4 156 5 143 6 179 7 320 8 403 9 530 10 497 11 533 12 596 13 539 14 576 15 627 16 681 17 691 18 819 19 766 20 694 21 634 22 616 23 516 dtype: int64
query = """
select extract(year from date) as year,
extract(hour from time) as hourofday,
count(*) as occs
from crimes
group by 1, 2;
"""
sql.read_sql(query, con)
year | hourofday | occs | |
---|---|---|---|
0 | 2014 | 0 | 7217 |
1 | 2014 | 1 | 4353 |
2 | 2014 | 2 | 3642 |
3 | 2014 | 3 | 2537 |
4 | 2014 | 4 | 1709 |
5 | 2014 | 5 | 1636 |
6 | 2014 | 6 | 2195 |
7 | 2014 | 7 | 3553 |
8 | 2014 | 8 | 5124 |
9 | 2014 | 9 | 5844 |
10 | 2014 | 10 | 6210 |
11 | 2014 | 11 | 6464 |
12 | 2014 | 12 | 8803 |
13 | 2014 | 13 | 7122 |
14 | 2014 | 14 | 7373 |
15 | 2014 | 15 | 8111 |
16 | 2014 | 16 | 8671 |
17 | 2014 | 17 | 9389 |
18 | 2014 | 18 | 9865 |
19 | 2014 | 19 | 9087 |
20 | 2014 | 20 | 8452 |
21 | 2014 | 21 | 7743 |
22 | 2014 | 22 | 7924 |
23 | 2014 | 23 | 7092 |
24 | 2015 | 0 | 504 |
25 | 2015 | 1 | 420 |
26 | 2015 | 2 | 332 |
27 | 2015 | 3 | 224 |
28 | 2015 | 4 | 156 |
29 | 2015 | 5 | 143 |
30 | 2015 | 6 | 179 |
31 | 2015 | 7 | 320 |
32 | 2015 | 8 | 403 |
33 | 2015 | 9 | 530 |
34 | 2015 | 10 | 497 |
35 | 2015 | 11 | 533 |
36 | 2015 | 12 | 596 |
37 | 2015 | 13 | 539 |
38 | 2015 | 14 | 576 |
39 | 2015 | 15 | 627 |
40 | 2015 | 16 | 681 |
41 | 2015 | 17 | 691 |
42 | 2015 | 18 | 819 |
43 | 2015 | 19 | 766 |
44 | 2015 | 20 | 694 |
45 | 2015 | 21 | 634 |
46 | 2015 | 22 | 616 |
47 | 2015 | 23 | 516 |
crimes[:1].T
0 | |
---|---|
incidntnum | 140090226 |
category | WARRANTS |
descript | ENROUTE TO PAROLE OFFICER |
dayofweek | Thursday |
date | 2014-01-30 12:00:00 |
time | 21:56:00 |
pddistrict | MISSION |
resolution | ARREST, BOOKED |
address | 400 Block of SOUTH VAN NESS AV |
x | -122.4177 |
y | 37.76627 |
location | (37.7662744315825, -122.417662868855) |
crimes.groupby('category').size().order(ascending=False)
category LARCENY/THEFT 41300 OTHER OFFENSES 22324 NON-CRIMINAL 20838 ASSAULT 13422 VANDALISM 7712 VEHICLE THEFT 7671 WARRANTS 7267 BURGLARY 6525 DRUG/NARCOTIC 5801 SUSPICIOUS OCC 5633 MISSING PERSON 5052 ROBBERY 3758 FRAUD 3176 SECONDARY CODES 2042 WEAPON LAWS 1726 TRESPASS 1247 STOLEN PROPERTY 1165 SEX OFFENSES, FORCIBLE 912 FORGERY/COUNTERFEITING 807 DRUNKENNESS 666 KIDNAPPING 559 PROSTITUTION 477 DRIVING UNDER THE INFLUENCE 408 DISORDERLY CONDUCT 389 ARSON 292 RUNAWAY 233 LIQUOR LAWS 192 EMBEZZLEMENT 153 FAMILY OFFENSES 94 SUICIDE 75 BRIBERY 58 LOITERING 38 BAD CHECKS 35 EXTORTION 33 SEX OFFENSES, NON FORCIBLE 20 GAMBLING 9 PORNOGRAPHY/OBSCENE MAT 2 TREA 1 dtype: int64
category_mapping = {
'LARCENY/THEFT': 'THEFT',
'ROBBERY': 'THEFT',
'VEHICLE THEFT': 'THEFT'
}
coarse_cat = crimes.category.map(lambda x: category_mapping.get(x, x))
top(crimes.groupby(coarse_cat).size(), 20)
category THEFT 52729 OTHER OFFENSES 22324 NON-CRIMINAL 20838 ASSAULT 13422 VANDALISM 7712 WARRANTS 7267 BURGLARY 6525 DRUG/NARCOTIC 5801 SUSPICIOUS OCC 5633 MISSING PERSON 5052 FRAUD 3176 SECONDARY CODES 2042 WEAPON LAWS 1726 TRESPASS 1247 STOLEN PROPERTY 1165 SEX OFFENSES, FORCIBLE 912 FORGERY/COUNTERFEITING 807 DRUNKENNESS 666 KIDNAPPING 559 PROSTITUTION 477 dtype: int64
#crimes.category.map(category_mapping.__getitem__)
crimes.category.map(lambda x: category_mapping[x])
--------------------------------------------------------------------------- KeyError Traceback (most recent call last) <ipython-input-22-d4a241dd2ad9> in <module>() 1 #crimes.category.map(category_mapping.__getitem__) ----> 2 crimes.category.map(lambda x: category_mapping[x]) /Users/wesm/anaconda/lib/python2.7/site-packages/pandas/core/series.pyc in map(self, arg, na_action) 2012 index=self.index).__finalize__(self) 2013 else: -> 2014 mapped = map_f(values, arg) 2015 return self._constructor(mapped, 2016 index=self.index).__finalize__(self) /Users/wesm/anaconda/lib/python2.7/site-packages/pandas/lib.so in pandas.lib.map_infer (pandas/lib.c:56502)() <ipython-input-22-d4a241dd2ad9> in <lambda>(x) 1 #crimes.category.map(category_mapping.__getitem__) ----> 2 crimes.category.map(lambda x: category_mapping[x]) KeyError: 'WARRANTS'
query = """
select
case
when category in ('LARCENY/THEFT', 'ROBBERY', 'VEHICLE THEFT')
then 'THEFT'
else category
end as coarse_cat,
count(*) as occs
from crimes
group by 1
order by occs desc;
"""
sql.read_sql(query, con)[:10]
coarse_cat | occs | |
---|---|---|
0 | THEFT | 52729 |
1 | OTHER OFFENSES | 22324 |
2 | NON-CRIMINAL | 20838 |
3 | ASSAULT | 13422 |
4 | VANDALISM | 7712 |
5 | WARRANTS | 7267 |
6 | BURGLARY | 6525 |
7 | DRUG/NARCOTIC | 5801 |
8 | SUSPICIOUS OCC | 5633 |
9 | MISSING PERSON | 5052 |
keys = [crimes.date.dt.year,
crimes.time.map(lambda x: x.hour)]
counts = crimes.groupby(keys).size()
counts = counts.unstack(0)
share = counts / counts.sum()
share.plot(kind='bar')
<matplotlib.axes.AxesSubplot at 0x107ee7390>
crimes['coarse_cat'] = coarse_cat
thefts = crimes[crimes.coarse_cat == 'THEFT']
thefts.groupby(thefts.date.dt.weekday).size()
0 7201 1 7082 2 7353 3 7243 4 8259 5 8368 6 7223 dtype: int64
day_names = pd.Series(['Weekday'] * 5 + ['Weekend'] * 2)
day_names
0 Weekday 1 Weekday 2 Weekday 3 Weekday 4 Weekday 5 Weekend 6 Weekend dtype: object
day_type = day_names.take(thefts.date.dt.weekday)
thefts.groupby(day_type.values).size()
Weekday 37138 Weekend 15591 dtype: int64
ORDER BY
+ LIMIT
to findquery = """
with t0 as (
select descript, count(*) as occs
from crimes
group by 1
order by occs desc
limit 5
)
select descript, dayofweek, count(*) as occs
from crimes
where descript in (select descript from t0)
group by 1, 2
order by descript, dayofweek;
"""
sql.read_sql(query, con)
descript | dayofweek | occs | |
---|---|---|---|
0 | AIDED CASE, MENTAL DISTURBED | Friday | 704 |
1 | AIDED CASE, MENTAL DISTURBED | Monday | 718 |
2 | AIDED CASE, MENTAL DISTURBED | Saturday | 747 |
3 | AIDED CASE, MENTAL DISTURBED | Sunday | 710 |
4 | AIDED CASE, MENTAL DISTURBED | Thursday | 745 |
5 | AIDED CASE, MENTAL DISTURBED | Tuesday | 735 |
6 | AIDED CASE, MENTAL DISTURBED | Wednesday | 781 |
7 | DRIVERS LICENSE, SUSPENDED OR REVOKED | Friday | 899 |
8 | DRIVERS LICENSE, SUSPENDED OR REVOKED | Monday | 876 |
9 | DRIVERS LICENSE, SUSPENDED OR REVOKED | Saturday | 785 |
10 | DRIVERS LICENSE, SUSPENDED OR REVOKED | Sunday | 778 |
11 | DRIVERS LICENSE, SUSPENDED OR REVOKED | Thursday | 829 |
12 | DRIVERS LICENSE, SUSPENDED OR REVOKED | Tuesday | 904 |
13 | DRIVERS LICENSE, SUSPENDED OR REVOKED | Wednesday | 878 |
14 | GRAND THEFT FROM LOCKED AUTO | Friday | 2492 |
15 | GRAND THEFT FROM LOCKED AUTO | Monday | 2267 |
16 | GRAND THEFT FROM LOCKED AUTO | Saturday | 2605 |
17 | GRAND THEFT FROM LOCKED AUTO | Sunday | 2162 |
18 | GRAND THEFT FROM LOCKED AUTO | Thursday | 2281 |
19 | GRAND THEFT FROM LOCKED AUTO | Tuesday | 2234 |
20 | GRAND THEFT FROM LOCKED AUTO | Wednesday | 2241 |
21 | LOST PROPERTY | Friday | 860 |
22 | LOST PROPERTY | Monday | 606 |
23 | LOST PROPERTY | Saturday | 907 |
24 | LOST PROPERTY | Sunday | 728 |
25 | LOST PROPERTY | Thursday | 637 |
26 | LOST PROPERTY | Tuesday | 593 |
27 | LOST PROPERTY | Wednesday | 691 |
28 | PETTY THEFT OF PROPERTY | Friday | 825 |
29 | PETTY THEFT OF PROPERTY | Monday | 558 |
30 | PETTY THEFT OF PROPERTY | Saturday | 985 |
31 | PETTY THEFT OF PROPERTY | Sunday | 749 |
32 | PETTY THEFT OF PROPERTY | Thursday | 673 |
33 | PETTY THEFT OF PROPERTY | Tuesday | 607 |
34 | PETTY THEFT OF PROPERTY | Wednesday | 644 |
query = """
select c.descript, c.dayofweek, count(*) as occs
from crimes c
inner join (
select descript, count(*) as occs
from crimes
group by 1
order by occs desc
limit 5
) c2 on c.descript = c2.descript
group by 1, 2;
"""
sql.read_sql(query, con)
descript | dayofweek | occs | |
---|---|---|---|
0 | PETTY THEFT OF PROPERTY | Thursday | 673 |
1 | LOST PROPERTY | Wednesday | 691 |
2 | PETTY THEFT OF PROPERTY | Wednesday | 644 |
3 | LOST PROPERTY | Thursday | 637 |
4 | LOST PROPERTY | Friday | 860 |
5 | PETTY THEFT OF PROPERTY | Friday | 825 |
6 | GRAND THEFT FROM LOCKED AUTO | Friday | 2492 |
7 | DRIVERS LICENSE, SUSPENDED OR REVOKED | Sunday | 778 |
8 | DRIVERS LICENSE, SUSPENDED OR REVOKED | Monday | 876 |
9 | GRAND THEFT FROM LOCKED AUTO | Thursday | 2281 |
10 | AIDED CASE, MENTAL DISTURBED | Saturday | 747 |
11 | GRAND THEFT FROM LOCKED AUTO | Wednesday | 2241 |
12 | AIDED CASE, MENTAL DISTURBED | Wednesday | 781 |
13 | AIDED CASE, MENTAL DISTURBED | Thursday | 745 |
14 | GRAND THEFT FROM LOCKED AUTO | Saturday | 2605 |
15 | AIDED CASE, MENTAL DISTURBED | Friday | 704 |
16 | LOST PROPERTY | Saturday | 907 |
17 | PETTY THEFT OF PROPERTY | Saturday | 985 |
18 | DRIVERS LICENSE, SUSPENDED OR REVOKED | Tuesday | 904 |
19 | PETTY THEFT OF PROPERTY | Tuesday | 607 |
20 | AIDED CASE, MENTAL DISTURBED | Monday | 718 |
21 | LOST PROPERTY | Tuesday | 593 |
22 | DRIVERS LICENSE, SUSPENDED OR REVOKED | Saturday | 785 |
23 | AIDED CASE, MENTAL DISTURBED | Sunday | 710 |
24 | GRAND THEFT FROM LOCKED AUTO | Tuesday | 2234 |
25 | LOST PROPERTY | Monday | 606 |
26 | PETTY THEFT OF PROPERTY | Monday | 558 |
27 | AIDED CASE, MENTAL DISTURBED | Tuesday | 735 |
28 | PETTY THEFT OF PROPERTY | Sunday | 749 |
29 | LOST PROPERTY | Sunday | 728 |
30 | DRIVERS LICENSE, SUSPENDED OR REVOKED | Friday | 899 |
31 | GRAND THEFT FROM LOCKED AUTO | Sunday | 2162 |
32 | DRIVERS LICENSE, SUSPENDED OR REVOKED | Wednesday | 878 |
33 | DRIVERS LICENSE, SUSPENDED OR REVOKED | Thursday | 829 |
34 | GRAND THEFT FROM LOCKED AUTO | Monday | 2267 |
counts = crimes.groupby('descript').size()
counts
descript ABANDONMENT OF CHILD 4 ABORTION 1 ACCESS CARD INFORMATION, PUBLICATION OF 1 ACCESS CARD INFORMATION, THEFT OF 135 ACCIDENTAL BURNS 1 ACCIDENTAL SHOOTING 1 ACTS AGAINST PUBLIC TRANSIT 34 ADVERTISING DISTRIBUTORS PERMIT VIOLATION 2 AEROSOL CONTAINER; SALE, PURCHASE OR POSSESSION OF 3 AFFIXING ADVERTISMENTS TO POLES 1 AGGRAVATED ASSAULT OF POLICE OFFICER,BODILY FORCE 39 AGGRAVATED ASSAULT ON POLICE OFFICER WITH A KNIFE 2 AGGRAVATED ASSAULT WITH A DEADLY WEAPON 1102 AGGRAVATED ASSAULT WITH A GUN 158 AGGRAVATED ASSAULT WITH A KNIFE 447 ... VIOLATION OF STAY AWAY ORDER 131 WARRANT ARREST 4042 WEAPON, ASSAULT, POSSESSION, MANUFACTURE, OR SALE 29 WEAPON, ASSAULT, REGISTRATION OR TRANSFER VIOLATION 1 WEAPON, DEADLY, CARRYING WITH INTENT TO COMMIT ASSAULT 37 WEAPON, DEADLY, EXHIBITING TO RESIST ARREST 6 WEAPON, DEADLY, POSSESSION OF TO VIOLATE 136.1 PC 1 WEAPON, POSSESS OR BRING OTHER ON SCHOOL GROUNDS 33 WEAPON, POSSESSING IN PUBLIC BUILDING OR OPEN MEETING 7 WEAPON, TAKING OR ATTEMPTING TO TAKE FROM PEACE OFFICER 3 WEAPONS POSSESSION BY JUVENILE SUSPECT 2 WEARING MASK OR DISGUISE FOR UNLAWFUL PURPOSE 2 WILLFUL CRUELTY TO CHILD 25 WIRETAPS, UNAUTHORIZED 1 YOUTH COURT 1 Length: 744, dtype: int64
counts.order(ascending=False)[:5].index
Index([u'GRAND THEFT FROM LOCKED AUTO', u'DRIVERS LICENSE, SUSPENDED OR REVOKED', u'AIDED CASE, MENTAL DISTURBED', u'PETTY THEFT OF PROPERTY', u'LOST PROPERTY'], dtype='object')
# this could be very large
K = 5
counts = crimes.groupby('descript').size()
top_descripts = counts.order(ascending=False)[:K].index
top_descripts
filtered = crimes[crimes.descript.isin(top_descripts)]
result = (filtered
.groupby(['descript', filtered.time.map(lambda x: x.hour)])
.size())
#result
result.unstack('descript')
descript | AIDED CASE, MENTAL DISTURBED | DRIVERS LICENSE, SUSPENDED OR REVOKED | GRAND THEFT FROM LOCKED AUTO | LOST PROPERTY | PETTY THEFT OF PROPERTY |
---|---|---|---|---|---|
time | |||||
0 | 170 | 384 | 526 | 250 | 284 |
1 | 120 | 250 | 327 | 177 | 234 |
2 | 114 | 193 | 237 | 106 | 83 |
3 | 103 | 121 | 133 | 42 | 43 |
4 | 74 | 131 | 78 | 11 | 27 |
5 | 77 | 97 | 96 | 32 | 22 |
6 | 123 | 45 | 144 | 44 | 35 |
7 | 165 | 126 | 194 | 97 | 83 |
8 | 217 | 201 | 276 | 167 | 142 |
9 | 234 | 208 | 405 | 206 | 207 |
10 | 245 | 204 | 588 | 265 | 227 |
11 | 284 | 207 | 665 | 246 | 229 |
12 | 288 | 286 | 655 | 395 | 283 |
13 | 282 | 262 | 633 | 289 | 228 |
14 | 254 | 243 | 652 | 264 | 270 |
15 | 265 | 236 | 666 | 321 | 294 |
16 | 331 | 329 | 779 | 319 | 317 |
17 | 275 | 386 | 1015 | 289 | 335 |
18 | 263 | 358 | 1731 | 277 | 345 |
19 | 300 | 291 | 1746 | 242 | 308 |
20 | 286 | 227 | 1595 | 243 | 274 |
21 | 252 | 279 | 1208 | 241 | 223 |
22 | 220 | 449 | 1121 | 238 | 239 |
23 | 198 | 436 | 812 | 261 | 309 |
acquisitions.price_amount.isnull().value_counts()
True 51424 False 3816 dtype: int64
acqs_known = acquisitions[acquisitions.price_amount.notnull()]
acqs_known[acqs_known.price_amount > acqs_known.price_amount.mean()]
company_permalink | company_name | company_category_list | company_market | company_country_code | company_state_code | company_region | company_city | acquirer_permalink | acquirer_name | ... | acquirer_country_code | acquirer_state_code | acquirer_region | acquirer_city | acquired_at | acquired_month | acquired_quarter | acquired_year | price_amount | price_currency_code | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
31 | /organization/21st-century-insurance | 21st Century Insurance | |Finance|Business Services|Insurance| | Business Services | USA | AL | AL - Other | De Armanville | /organization/american-international-group | American International Group | ... | USA | NY | New York City | New York | 2007-09-01 | 2007-09 | 2007-Q3 | 2007 | 749000000 | USD |
56 | /organization/3com | 3Com | |Curated Web| | Curated Web | USA | MA | Boston | Marlborough | /organization/hewlett-packard | Hewlett-Packard | ... | USA | CA | SF Bay Area | Palo Alto | 2009-11-11 | 2009-11 | 2009-Q4 | 2009 | 2700000000 | USD |
69 | /organization/3par | 3PAR | |Services|Cloud Data Services|Software| | Cloud Data Services | USA | CA | SF Bay Area | Fremont | /organization/hewlett-packard | Hewlett-Packard | ... | USA | CA | SF Bay Area | Palo Alto | 2010-09-02 | 2010-09 | 2010-Q3 | 2010 | 2350000000 | USD |
108 | /organization/91-wireless | 91 Wireless | |Mobile| | Mobile | CHN | NaN | Fuzhou Shi | Fuzhou Shi | /organization/baidu | Baidu | ... | CHN | NaN | Beijing | Beijing | 2013-07-15 | 2013-07 | 2013-Q3 | 2013 | 1900000000 | USD |
136 | /organization/abc | ABC | |News| | News | NaN | NaN | NaN | NaN | /organization/the-walt-disney-company | The Walt Disney Company | ... | USA | CA | Los Angeles | Burbank | 1995-08-05 | 1995-08 | 1995-Q3 | 1995 | 19000000000 | USD |
152 | /organization/abovenet | AboveNet | |Information Technology|Internet|Service Provi... | Internet | USA | NY | New York City | White Plains | /organization/zayo-group | Zayo | ... | USA | CO | Denver | Boulder | 2012-03-19 | 2012-03 | 2012-Q1 | 2012 | 2300000000 | USD |
155 | /organization/abraxis-bioscience | Abraxis BioScience | |Pharmaceuticals|Biotechnology| | Biotechnology | USA | CA | Los Angeles | Los Angeles | /organization/celgene | Celgene | ... | USA | NJ | Newark | Summit | 2010-06-30 | 2010-06 | 2010-Q2 | 2010 | 2900000000 | USD |
173 | /organization/accelrys | Accelrys | |Software| | Software | USA | CA | San Diego | San Diego | /organization/dassault | Dassault Systemes | ... | FRA | NaN | NaN | NaN | 2014-01-30 | 2014-01 | 2014-Q1 | 2014 | 750000000 | USD |
187 | /organization/access-midstream-partners-lp-inv... | Access Midstream Partners LP Investments | |Natural Gas Uses| | Natural Gas Uses | USA | OK | Oklahoma City | Oklahoma City | /organization/the-williams-companies | The Williams Companies | ... | USA | OK | OK - Other | Snyder | 2012-12-21 | 2012-12 | 2012-Q4 | 2012 | 2250000000 | USD |
195 | /organization/acclarent | Acclarent | |Medical|Biotechnology| | Biotechnology | USA | CA | SF Bay Area | Menlo Park | /organization/johnson-johnson | Johnson & Johnson | ... | USA | NJ | Newark | New Brunswick | 2009-12-17 | 2009-12 | 2009-Q4 | 2009 | 785000000 | USD |
343 | /organization/admob | AdMob | |Mobile|Advertising| | Advertising | USA | CA | SF Bay Area | Mountain View | /organization/google | ... | USA | CA | SF Bay Area | Mountain View | 2009-11-09 | 2009-11 | 2009-Q4 | 2009 | 750000000 | USD | |
382 | /organization/advanced-computer-software | Advanced Computer Software | |Health Care|Information Technology|Software| | Software | GBR | NaN | Cobham | Cobham | /organization/vista-equity-partners | Vista Equity Partners | ... | USA | TX | Austin | Austin | 2014-11-25 | 2014-11 | 2014-Q4 | 2014 | 725000000 | GBP |
406 | /organization/advanstar-communications | Advanstar Communications | |Market Research|Events|Business Services| | Events | USA | CA | Los Angeles | Santa Monica | /organization/ubm-electronics | UBM Electronics | ... | USA | CA | SF Bay Area | San Francisco | 2014-10-01 | 2014-10 | 2014-Q4 | 2014 | 972000000 | USD |
420 | /organization/advo | Advo | NaN | NaN | NaN | NaN | NaN | NaN | /organization/valassis | Valassis | ... | USA | MI | Detroit | Livonia | 2006-07-06 | 2006-07 | 2006-Q3 | 2006 | 1300000000 | USD |
430 | /organization/aeluros | Aeluros | |Semiconductors| | Semiconductors | USA | CA | SF Bay Area | Mountain View | /organization/broadcom | Broadcom | ... | USA | CA | Anaheim | Irvine | 2012-02-17 | 2012-02 | 2012-Q1 | 2012 | 3700000000 | USD |
443 | /organization/affiliated-computer-services | Affiliated Computer Services | |Consulting| | Consulting | USA | TX | Dallas | Dallas | /organization/xerox | Xerox | ... | USA | AL | AL - Other | Normal | 2009-09-28 | 2009-09 | 2009-Q3 | 2009 | 5750000000 | USD |
516 | /organization/airwatch | AirWatch | |Mobile Devices|Mobile| | Mobile | USA | GA | Atlanta | Atlanta | /organization/vmware | VMware | ... | USA | CA | SF Bay Area | Palo Alto | 2014-01-22 | 2014-01 | 2014-Q1 | 2014 | 1540000000 | USD |
536 | /organization/aktiv-kapital | Aktiv Kapital | |Finance| | Finance | NaN | NaN | NaN | NaN | /organization/portfolio-recovery-associates | Portfolio Recovery Associates | ... | USA | VA | Norfolk - Virginia Beach | Norfolk | 2014-02-20 | 2014-02 | 2014-Q1 | 2014 | 1300000000 | USD |
540 | /organization/alabama-gas-corp | Alabama Gas Corp | NaN | NaN | USA | AL | Birmingham | Birmingham | /organization/laclede-group | Laclede Group | ... | USA | MO | St. Louis | St Louis | 2014-04-07 | 2014-04 | 2014-Q2 | 2014 | 1340000000 | USD |
576 | /organization/algeta | Algeta | |Biotechnology| | Biotechnology | NOR | NaN | Oslo | Oslo | /organization/bayer-ag-germany | Bayer AG | ... | DEU | NaN | Leverkusen | Leverkusen | 2013-12-19 | 2013-12 | 2013-Q4 | 2013 | 2900000000 | USD |
588 | /organization/alios-biopharma | Alios BioPharma | |Biotechnology| | Biotechnology | USA | CA | SF Bay Area | South San Francisco | /organization/johnson-johnson | Johnson & Johnson | ... | USA | NJ | Newark | New Brunswick | 2014-09-30 | 2014-09 | 2014-Q3 | 2014 | 1750000000 | USD |
606 | /organization/allergan | Allergan | |Medical|Pharmaceuticals|Biotechnology| | Medical | USA | CA | Anaheim | Irvine | /organization/actavis | Actavis | ... | USA | NJ | Newark | Parsippany | 2014-11-16 | 2014-11 | 2014-Q4 | 2014 | 65500000000 | USD |
618 | /organization/adl-media-inc | AllMedia Inc | |Advertising| | Advertising | USA | IA | IA - Other | Plano | /organization/liberty-global | Liberty Global | ... | USA | CO | Denver | Englewood | 2014-05-08 | 2014-05 | 2014-Q2 | 2014 | 930000000 | USD |
629 | /organization/alltel | Alltel | |Public Relations| | Public Relations | USA | AR | Little Rock | Little Rock | /organization/verizon | Verizon Communications | ... | USA | NY | New York City | New York | 2008-06-05 | 2008-06 | 2008-Q2 | 2008 | 28100000000 | USD |
654 | /organization/altalink | AltaLink | |Local Businesses|Energy|Utilities| | Utilities | CAN | AB | Calgary | Calgary | /organization/midamerican-energy | Berkshire Hathaway Energy | ... | USA | IA | Des Moines | Des Moines | 2014-05-02 | 2014-05 | 2014-Q2 | 2014 | 3520000000 | USD |
691 | /organization/amcol-international | AMCOL International | |Manufacturing| | Manufacturing | USA | IL | Chicago | Hoffman Estates | /organization/minerals-technologies | Minerals Technologies | ... | USA | NY | New York City | New York | 2014-03-10 | 2014-03 | 2014-Q1 | 2014 | 1700000000 | USD |
703 | /organization/americanexpress | American Express | |Finance| | Finance | USA | NY | New York City | New York | /organization/standard-chartered-bank | Standard Chartered Bank | ... | GBR | NaN | London | London | 2008-02-28 | 2008-02 | 2008-Q1 | 2008 | 823000000 | USD |
716 | /organization/american-petroleum-tankers | American Petroleum Tankers | NaN | NaN | USA | PA | Philadelphia | Plymouth Meeting | /organization/kinder-morgan | Kinder Morgan Energy | ... | USA | TX | Houston | Houston | 2013-12-23 | 2013-12 | 2013-Q4 | 2013 | 962000000 | USD |
719 | /organization/american-realty-capital-healthca... | American Realty Capital Healthcare Trust | NaN | NaN | USA | MA | Boston | Boston | /organization/ventas | Ventas | ... | USA | IL | Chicago | Chicago | 2014-06-02 | 2014-06 | 2014-Q2 | 2014 | 2600000000 | USD |
723 | /organization/ami-semiconductor | AMI Semiconductor | |Electronics|Manufacturing|Design| | Design | USA | ID | Idaho Falls | Pocatello | /organization/on-semiconductor | ON Semiconductor | ... | USA | AZ | Phoenix | Phoenix | 2007-12-14 | 2007-12 | 2007-Q4 | 2007 | 915000000 | USD |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
12702 | /organization/viawest | ViaWest | |Data Centers|Services|Web Hosting| | Web Hosting | USA | CO | Denver | Greenwood Village | /organization/shaw-communications | Shaw Communications | ... | CAN | AB | Calgary | Calgary | 2014-08-04 | 2014-08 | 2014-Q3 | 2014 | 1200000000 | USD |
12704 | /organization/viber-media | Viber Media | |Android|iPhone|VoIP|Messaging| | Android | CYP | NaN | Cyprus | Limassol | /organization/rakuten | Rakuten | ... | JPN | NaN | Tokyo | Tokyo | 2014-02-14 | 2014-02 | 2014-Q1 | 2014 | 900000000 | USD |
12717 | /organization/video-gaming-technologies | Video Gaming Technologies | |Video Games| | Video Games | USA | TN | Nashville | Franklin | /organization/aristocrat-technologies-inc | Aristocrat Technologies, Inc | ... | AUS | NaN | AUS - Other | NaN | 2014-07-07 | 2014-07 | 2014-Q3 | 2014 | 1300000000 | USD |
12749 | /organization/vion-food-group | Vion Food Group | |Hospitality| | Hospitality | NLD | NaN | Eindhoven | Eindhoven | /organization/darling-international | Darling Ingredients | ... | USA | TX | Dallas | Irving | 2013-10-07 | 2013-10 | 2013-Q4 | 2013 | 1600000000 | EUR |
12764 | /organization/viropharma | ViroPharma | |Pharmaceuticals|Biotechnology| | Biotechnology | USA | PA | Philadelphia | Exton | /organization/shire | Shire | ... | IRL | NaN | Dublin | Dublin | 2013-11-10 | 2013-11 | 2013-Q4 | 2013 | 4200000000 | USD |
12843 | /organization/vizada | Vizada | |DOD/Military|Mobile| | Mobile | USA | MD | Washington, D.C. | Rockville | /organization/eads-astrium | EADS Astrium | ... | USA | TX | Houston | Houston | 2011-08-03 | 2011-08 | 2011-Q3 | 2011 | 960000000 | USD |
12854 | /organization/vk | Vkontake | |Communities|Social Network Media|Social Media| | Communities | RUS | NaN | St. Petersburg | Saint Petersburg | /organization/mail-ru | Mail.Ru Group | ... | RUS | NaN | Moscow | Moscow | 2014-09-16 | 2014-09 | 2014-Q3 | 2014 | 1470000000 | USD |
12887 | /organization/volvo-rents-construction | Volvo Rents Construction | NaN | NaN | CHE | NaN | CHE - Other | Eison | /organization/platinum-equity-llc | Platinum Equity | ... | USA | CA | Los Angeles | Beverly Hills | 2013-12-10 | 2013-12 | 2013-Q4 | 2013 | 1100000000 | USD |
12957 | /organization/warner-chilcott | Warner Chilcott | |Pharmaceuticals|Biotechnology| | Biotechnology | USA | NJ | Newark | Rockaway | /organization/actavis | Actavis | ... | USA | NJ | Newark | Parsippany | 2013-05-20 | 2013-05 | 2013-Q2 | 2013 | 8500000000 | USD |
12981 | /organization/waze | Waze | |Navigation|Transportation| | Transportation | USA | CA | SF Bay Area | Palo Alto | /organization/google | ... | USA | CA | SF Bay Area | Mountain View | 2013-06-11 | 2013-06 | 2013-Q2 | 2013 | 996000000 | USD | |
13011 | /organization/webex | WebEx | |Curated Web| | Curated Web | USA | CA | SF Bay Area | Santa Clara | /organization/cisco | Cisco | ... | USA | CA | SF Bay Area | San Jose | 2007-03-15 | 2007-03 | 2007-Q1 | 2007 | 3200000000 | USD |
13030 | /organization/webtrends | Webtrends | |SEO|Curated Web| | Curated Web | USA | OR | Portland, Oregon | Portland | /organization/netiq | NetIQ | ... | USA | CA | SF Bay Area | San Jose | 2001-01-17 | 2001-01 | 2001-Q1 | 2001 | 1000000000 | USD |
13061 | /organization/western-gas-resources | Western Gas Resources | |Clean Energy|Oil| | Clean Energy | USA | TX | Seminole | Seminole | /organization/anadarko-petroleum-corporation | Anadarko Petroleum Corporation | ... | USA | TX | Seminole | Seminole | 2006-06-01 | 2006-06 | 2006-Q2 | 2006 | 4800000000 | USD |
13069 | /organization/weyerhaeuser | Weyerhaeuser | |Manufacturing| | Manufacturing | USA | WA | Seattle | Federal Way | /organization/tri-pointe-homes | Tri Pointe Homes | ... | USA | CA | SF Bay Area | San Ramon | 2013-11-04 | 2013-11 | 2013-Q4 | 2013 | 2700000000 | USD |
13080 | /organization/whatsapp | |Messaging| | Messaging | USA | CA | SF Bay Area | Santa Clara | /organization/facebook | ... | USA | CA | SF Bay Area | Menlo Park | 2014-02-19 | 2014-02 | 2014-Q1 | 2014 | 19000000000 | USD | ||
13081 | /organization/wheelabrator-technologies | Wheelabrator Technologies | |Waste Management| | Waste Management | USA | NH | Manchester, New Hampshire | Hampton | /organization/energy-capital-partners | Energy Capital Partners | ... | USA | NJ | Newark | Short Hills | 2014-07-29 | 2014-07 | 2014-Q3 | 2014 | 1940000000 | USD |
13121 | /organization/wild-flavors | WILD Flavors | |Food Processing| | Food Processing | DEU | NaN | Frankfurt | Heidelberg | /organization/archer-daniels-midland-company | Archer Daniels Midland Company | ... | USA | IL | Chicago | Chicago | 2014-07-07 | 2014-07 | 2014-Q3 | 2014 | 3000000000 | USD |
13143 | /organization/wind-river | Wind River | |Software| | Software | USA | CA | SF Bay Area | Alameda | /organization/intel | Intel | ... | USA | CA | SF Bay Area | Santa Clara | 2009-07-17 | 2009-07 | 2009-Q3 | 2009 | 884000000 | USD |
13146 | /organization/windsor-foods | Windsor Foods | |Consumer Goods|Food Processing| | Food Processing | USA | TX | Houston | Houston | /organization/ajinomoto | Ajinomoto | ... | JPN | NaN | Tokyo | Tokyo | 2014-09-10 | 2014-09 | 2014-Q3 | 2014 | 800000000 | USD |
13174 | /organization/wittur | Wittur | |Manufacturing|Public Safety|Heavy Industry| | Heavy Industry | NaN | NaN | NaN | NaN | /organization/bain-capital-2 | Bain Capital | ... | USA | MA | Boston | Boston | 2014-12-23 | 2014-12 | 2014-Q4 | 2014 | 750000000 | USD |
13238 | /organization/wyeth-pharmaceuticals | Wyeth Pharmaceuticals | |Biotechnology|Pharmaceuticals|Health and Well... | Biotechnology | NaN | NaN | NaN | NaN | /organization/nestl | Nestl | ... | CHE | NaN | Vevey | Vevey | 2012-04-01 | 2012-04 | 2012-Q2 | 2012 | 11850000000 | USD |
13239 | /organization/wyeth-pharmaceuticals | Wyeth Pharmaceuticals | |Biotechnology|Pharmaceuticals|Health and Well... | Biotechnology | NaN | NaN | NaN | NaN | /organization/pfizer | Pfizer | ... | USA | NY | New York City | New York | 2009-01-23 | 2009-01 | 2009-Q1 | 2009 | 68000000000 | USD |
13277 | /organization/xircom | Xircom | |Web Hosting| | Web Hosting | USA | CA | Los Angeles | Thousand Oaks | /organization/intel | Intel | ... | USA | CA | SF Bay Area | Santa Clara | 2001-01-15 | 2001-01 | 2001-Q1 | 2001 | 748000000 | USD |
13323 | /organization/yammer | Yammer | |Twitter Applications|Networking|Social Media|... | Enterprise Software | USA | CA | SF Bay Area | San Francisco | /organization/microsoft | Microsoft | ... | USA | WA | Seattle | Redmond | 2012-06-25 | 2012-06 | 2012-Q2 | 2012 | 1200000000 | USD |
13324 | /organization/yankee-candle-company | Yankee Candle Company | |Manufacturing| | Manufacturing | USA | AR | Fayetteville | Rogers | /organization/jarden | Jarden | ... | USA | NY | New York City | Rye | 2013-09-03 | 2013-09 | 2013-Q3 | 2013 | 1750000000 | USD |
13356 | /organization/yoplait | Yoplait | |Food Processing| | Food Processing | USA | MN | Minneapolis | Minneapolis | /organization/generalmills | General Mills | ... | USA | MN | Minneapolis | Minneapolis | 2011-07-01 | 2011-07 | 2011-Q3 | 2011 | 1200000000 | USD |
13357 | /organization/york-risk-services-group | York Risk Services Group | |Insurance| | Insurance | USA | NJ | Newark | Parsippany | /organization/onex | Onex | ... | CAN | ON | Toronto | Toronto | 2014-07-16 | 2014-07 | 2014-Q3 | 2014 | 1330000000 | USD |
13371 | /organization/youtube | YouTube | |Video|Online Rental|Entertainment|Games| | Games | USA | CA | SF Bay Area | San Bruno | /organization/google | ... | USA | CA | SF Bay Area | Mountain View | 2006-10-01 | 2006-10 | 2006-Q4 | 2006 | 1650000000 | USD | |
13396 | /organization/zappos | Zappos | |Curated Web| | Curated Web | USA | NV | Las Vegas | Las Vegas | /organization/amazon | Amazon | ... | USA | WA | Seattle | Seattle | 2009-07-22 | 2009-07 | 2009-Q3 | 2009 | 1200000000 | USD |
13437 | /organization/ziggo | Ziggo | |Information Services|Entertainment|Investment... | Investment Management | NLD | NaN | Utrecht | Utrecht | /organization/liberty-global | Liberty Global | ... | USA | CO | Denver | Englewood | 2014-01-27 | 2014-01 | 2014-Q1 | 2014 | 13700000000 | USD |
537 rows × 22 columns
query = """
select *
from acquisitions a
where price_amount > (
select avg(price_amount)
from acquisitions
);
"""
sql.read_sql(query, con)
company_permalink | company_name | company_category_list | company_market | company_country_code | company_state_code | company_region | company_city | acquirer_permalink | acquirer_name | ... | acquirer_country_code | acquirer_state_code | acquirer_region | acquirer_city | acquired_at | acquired_month | acquired_quarter | acquired_year | price_amount | price_currency_code | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | /organization/21st-century-insurance | 21st Century Insurance | |Finance|Business Services|Insurance| | Business Services | USA | AL | AL - Other | De Armanville | /organization/american-international-group | American International Group | ... | USA | NY | New York City | New York | 2007-09-01 | 2007-09 | 2007-Q3 | 2007 | 749000000 | USD |
1 | /organization/3com | 3Com | |Curated Web| | Curated Web | USA | MA | Boston | Marlborough | /organization/hewlett-packard | Hewlett-Packard | ... | USA | CA | SF Bay Area | Palo Alto | 2009-11-11 | 2009-11 | 2009-Q4 | 2009 | 2700000000 | USD |
2 | /organization/3par | 3PAR | |Services|Cloud Data Services|Software| | Cloud Data Services | USA | CA | SF Bay Area | Fremont | /organization/hewlett-packard | Hewlett-Packard | ... | USA | CA | SF Bay Area | Palo Alto | 2010-09-02 | 2010-09 | 2010-Q3 | 2010 | 2350000000 | USD |
3 | /organization/91-wireless | 91 Wireless | |Mobile| | Mobile | CHN | None | Fuzhou Shi | Fuzhou Shi | /organization/baidu | Baidu | ... | CHN | None | Beijing | Beijing | 2013-07-15 | 2013-07 | 2013-Q3 | 2013 | 1900000000 | USD |
4 | /organization/abc | ABC | |News| | News | None | None | None | None | /organization/the-walt-disney-company | The Walt Disney Company | ... | USA | CA | Los Angeles | Burbank | 1995-08-05 | 1995-08 | 1995-Q3 | 1995 | 19000000000 | USD |
5 | /organization/abovenet | AboveNet | |Information Technology|Internet|Service Provi... | Internet | USA | NY | New York City | White Plains | /organization/zayo-group | Zayo | ... | USA | CO | Denver | Boulder | 2012-03-19 | 2012-03 | 2012-Q1 | 2012 | 2300000000 | USD |
6 | /organization/abraxis-bioscience | Abraxis BioScience | |Pharmaceuticals|Biotechnology| | Biotechnology | USA | CA | Los Angeles | Los Angeles | /organization/celgene | Celgene | ... | USA | NJ | Newark | Summit | 2010-06-30 | 2010-06 | 2010-Q2 | 2010 | 2900000000 | USD |
7 | /organization/accelrys | Accelrys | |Software| | Software | USA | CA | San Diego | San Diego | /organization/dassault | Dassault Systemes | ... | FRA | None | None | None | 2014-01-30 | 2014-01 | 2014-Q1 | 2014 | 750000000 | USD |
8 | /organization/access-midstream-partners-lp-inv... | Access Midstream Partners LP Investments | |Natural Gas Uses| | Natural Gas Uses | USA | OK | Oklahoma City | Oklahoma City | /organization/the-williams-companies | The Williams Companies | ... | USA | OK | OK - Other | Snyder | 2012-12-21 | 2012-12 | 2012-Q4 | 2012 | 2250000000 | USD |
9 | /organization/acclarent | Acclarent | |Medical|Biotechnology| | Biotechnology | USA | CA | SF Bay Area | Menlo Park | /organization/johnson-johnson | Johnson & Johnson | ... | USA | NJ | Newark | New Brunswick | 2009-12-17 | 2009-12 | 2009-Q4 | 2009 | 785000000 | USD |
10 | /organization/admob | AdMob | |Mobile|Advertising| | Advertising | USA | CA | SF Bay Area | Mountain View | /organization/google | ... | USA | CA | SF Bay Area | Mountain View | 2009-11-09 | 2009-11 | 2009-Q4 | 2009 | 750000000 | USD | |
11 | /organization/advanced-computer-software | Advanced Computer Software | |Health Care|Information Technology|Software| | Software | GBR | None | Cobham | Cobham | /organization/vista-equity-partners | Vista Equity Partners | ... | USA | TX | Austin | Austin | 2014-11-25 | 2014-11 | 2014-Q4 | 2014 | 725000000 | GBP |
12 | /organization/advanstar-communications | Advanstar Communications | |Market Research|Events|Business Services| | Events | USA | CA | Los Angeles | Santa Monica | /organization/ubm-electronics | UBM Electronics | ... | USA | CA | SF Bay Area | San Francisco | 2014-10-01 | 2014-10 | 2014-Q4 | 2014 | 972000000 | USD |
13 | /organization/advo | Advo | None | None | None | None | None | None | /organization/valassis | Valassis | ... | USA | MI | Detroit | Livonia | 2006-07-06 | 2006-07 | 2006-Q3 | 2006 | 1300000000 | USD |
14 | /organization/aeluros | Aeluros | |Semiconductors| | Semiconductors | USA | CA | SF Bay Area | Mountain View | /organization/broadcom | Broadcom | ... | USA | CA | Anaheim | Irvine | 2012-02-17 | 2012-02 | 2012-Q1 | 2012 | 3700000000 | USD |
15 | /organization/affiliated-computer-services | Affiliated Computer Services | |Consulting| | Consulting | USA | TX | Dallas | Dallas | /organization/xerox | Xerox | ... | USA | AL | AL - Other | Normal | 2009-09-28 | 2009-09 | 2009-Q3 | 2009 | 5750000000 | USD |
16 | /organization/airwatch | AirWatch | |Mobile Devices|Mobile| | Mobile | USA | GA | Atlanta | Atlanta | /organization/vmware | VMware | ... | USA | CA | SF Bay Area | Palo Alto | 2014-01-22 | 2014-01 | 2014-Q1 | 2014 | 1540000000 | USD |
17 | /organization/aktiv-kapital | Aktiv Kapital | |Finance| | Finance | None | None | None | None | /organization/portfolio-recovery-associates | Portfolio Recovery Associates | ... | USA | VA | Norfolk - Virginia Beach | Norfolk | 2014-02-20 | 2014-02 | 2014-Q1 | 2014 | 1300000000 | USD |
18 | /organization/alabama-gas-corp | Alabama Gas Corp | None | None | USA | AL | Birmingham | Birmingham | /organization/laclede-group | Laclede Group | ... | USA | MO | St. Louis | St Louis | 2014-04-07 | 2014-04 | 2014-Q2 | 2014 | 1340000000 | USD |
19 | /organization/algeta | Algeta | |Biotechnology| | Biotechnology | NOR | None | Oslo | Oslo | /organization/bayer-ag-germany | Bayer AG | ... | DEU | None | Leverkusen | Leverkusen | 2013-12-19 | 2013-12 | 2013-Q4 | 2013 | 2900000000 | USD |
20 | /organization/alios-biopharma | Alios BioPharma | |Biotechnology| | Biotechnology | USA | CA | SF Bay Area | South San Francisco | /organization/johnson-johnson | Johnson & Johnson | ... | USA | NJ | Newark | New Brunswick | 2014-09-30 | 2014-09 | 2014-Q3 | 2014 | 1750000000 | USD |
21 | /organization/allergan | Allergan | |Medical|Pharmaceuticals|Biotechnology| | Medical | USA | CA | Anaheim | Irvine | /organization/actavis | Actavis | ... | USA | NJ | Newark | Parsippany | 2014-11-16 | 2014-11 | 2014-Q4 | 2014 | 65500000000 | USD |
22 | /organization/adl-media-inc | AllMedia Inc | |Advertising| | Advertising | USA | IA | IA - Other | Plano | /organization/liberty-global | Liberty Global | ... | USA | CO | Denver | Englewood | 2014-05-08 | 2014-05 | 2014-Q2 | 2014 | 930000000 | USD |
23 | /organization/alltel | Alltel | |Public Relations| | Public Relations | USA | AR | Little Rock | Little Rock | /organization/verizon | Verizon Communications | ... | USA | NY | New York City | New York | 2008-06-05 | 2008-06 | 2008-Q2 | 2008 | 28100000000 | USD |
24 | /organization/altalink | AltaLink | |Local Businesses|Energy|Utilities| | Utilities | CAN | AB | Calgary | Calgary | /organization/midamerican-energy | Berkshire Hathaway Energy | ... | USA | IA | Des Moines | Des Moines | 2014-05-02 | 2014-05 | 2014-Q2 | 2014 | 3520000000 | USD |
25 | /organization/amcol-international | AMCOL International | |Manufacturing| | Manufacturing | USA | IL | Chicago | Hoffman Estates | /organization/minerals-technologies | Minerals Technologies | ... | USA | NY | New York City | New York | 2014-03-10 | 2014-03 | 2014-Q1 | 2014 | 1700000000 | USD |
26 | /organization/americanexpress | American Express | |Finance| | Finance | USA | NY | New York City | New York | /organization/standard-chartered-bank | Standard Chartered Bank | ... | GBR | None | London | London | 2008-02-28 | 2008-02 | 2008-Q1 | 2008 | 823000000 | USD |
27 | /organization/american-petroleum-tankers | American Petroleum Tankers | None | None | USA | PA | Philadelphia | Plymouth Meeting | /organization/kinder-morgan | Kinder Morgan Energy | ... | USA | TX | Houston | Houston | 2013-12-23 | 2013-12 | 2013-Q4 | 2013 | 962000000 | USD |
28 | /organization/american-realty-capital-healthca... | American Realty Capital Healthcare Trust | None | None | USA | MA | Boston | Boston | /organization/ventas | Ventas | ... | USA | IL | Chicago | Chicago | 2014-06-02 | 2014-06 | 2014-Q2 | 2014 | 2600000000 | USD |
29 | /organization/ami-semiconductor | AMI Semiconductor | |Electronics|Manufacturing|Design| | Design | USA | ID | Idaho Falls | Pocatello | /organization/on-semiconductor | ON Semiconductor | ... | USA | AZ | Phoenix | Phoenix | 2007-12-14 | 2007-12 | 2007-Q4 | 2007 | 915000000 | USD |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
507 | /organization/viawest | ViaWest | |Data Centers|Services|Web Hosting| | Web Hosting | USA | CO | Denver | Greenwood Village | /organization/shaw-communications | Shaw Communications | ... | CAN | AB | Calgary | Calgary | 2014-08-04 | 2014-08 | 2014-Q3 | 2014 | 1200000000 | USD |
508 | /organization/viber-media | Viber Media | |Android|iPhone|VoIP|Messaging| | Android | CYP | None | Cyprus | Limassol | /organization/rakuten | Rakuten | ... | JPN | None | Tokyo | Tokyo | 2014-02-14 | 2014-02 | 2014-Q1 | 2014 | 900000000 | USD |
509 | /organization/video-gaming-technologies | Video Gaming Technologies | |Video Games| | Video Games | USA | TN | Nashville | Franklin | /organization/aristocrat-technologies-inc | Aristocrat Technologies, Inc | ... | AUS | None | AUS - Other | None | 2014-07-07 | 2014-07 | 2014-Q3 | 2014 | 1300000000 | USD |
510 | /organization/vion-food-group | Vion Food Group | |Hospitality| | Hospitality | NLD | None | Eindhoven | Eindhoven | /organization/darling-international | Darling Ingredients | ... | USA | TX | Dallas | Irving | 2013-10-07 | 2013-10 | 2013-Q4 | 2013 | 1600000000 | EUR |
511 | /organization/viropharma | ViroPharma | |Pharmaceuticals|Biotechnology| | Biotechnology | USA | PA | Philadelphia | Exton | /organization/shire | Shire | ... | IRL | None | Dublin | Dublin | 2013-11-10 | 2013-11 | 2013-Q4 | 2013 | 4200000000 | USD |
512 | /organization/vizada | Vizada | |DOD/Military|Mobile| | Mobile | USA | MD | Washington, D.C. | Rockville | /organization/eads-astrium | EADS Astrium | ... | USA | TX | Houston | Houston | 2011-08-03 | 2011-08 | 2011-Q3 | 2011 | 960000000 | USD |
513 | /organization/vk | Vkontake | |Communities|Social Network Media|Social Media| | Communities | RUS | None | St. Petersburg | Saint Petersburg | /organization/mail-ru | Mail.Ru Group | ... | RUS | None | Moscow | Moscow | 2014-09-16 | 2014-09 | 2014-Q3 | 2014 | 1470000000 | USD |
514 | /organization/volvo-rents-construction | Volvo Rents Construction | None | None | CHE | None | CHE - Other | Eison | /organization/platinum-equity-llc | Platinum Equity | ... | USA | CA | Los Angeles | Beverly Hills | 2013-12-10 | 2013-12 | 2013-Q4 | 2013 | 1100000000 | USD |
515 | /organization/warner-chilcott | Warner Chilcott | |Pharmaceuticals|Biotechnology| | Biotechnology | USA | NJ | Newark | Rockaway | /organization/actavis | Actavis | ... | USA | NJ | Newark | Parsippany | 2013-05-20 | 2013-05 | 2013-Q2 | 2013 | 8500000000 | USD |
516 | /organization/waze | Waze | |Navigation|Transportation| | Transportation | USA | CA | SF Bay Area | Palo Alto | /organization/google | ... | USA | CA | SF Bay Area | Mountain View | 2013-06-11 | 2013-06 | 2013-Q2 | 2013 | 996000000 | USD | |
517 | /organization/webex | WebEx | |Curated Web| | Curated Web | USA | CA | SF Bay Area | Santa Clara | /organization/cisco | Cisco | ... | USA | CA | SF Bay Area | San Jose | 2007-03-15 | 2007-03 | 2007-Q1 | 2007 | 3200000000 | USD |
518 | /organization/webtrends | Webtrends | |SEO|Curated Web| | Curated Web | USA | OR | Portland, Oregon | Portland | /organization/netiq | NetIQ | ... | USA | CA | SF Bay Area | San Jose | 2001-01-17 | 2001-01 | 2001-Q1 | 2001 | 1000000000 | USD |
519 | /organization/western-gas-resources | Western Gas Resources | |Clean Energy|Oil| | Clean Energy | USA | TX | Seminole | Seminole | /organization/anadarko-petroleum-corporation | Anadarko Petroleum Corporation | ... | USA | TX | Seminole | Seminole | 2006-06-01 | 2006-06 | 2006-Q2 | 2006 | 4800000000 | USD |
520 | /organization/weyerhaeuser | Weyerhaeuser | |Manufacturing| | Manufacturing | USA | WA | Seattle | Federal Way | /organization/tri-pointe-homes | Tri Pointe Homes | ... | USA | CA | SF Bay Area | San Ramon | 2013-11-04 | 2013-11 | 2013-Q4 | 2013 | 2700000000 | USD |
521 | /organization/whatsapp | |Messaging| | Messaging | USA | CA | SF Bay Area | Santa Clara | /organization/facebook | ... | USA | CA | SF Bay Area | Menlo Park | 2014-02-19 | 2014-02 | 2014-Q1 | 2014 | 19000000000 | USD | ||
522 | /organization/wheelabrator-technologies | Wheelabrator Technologies | |Waste Management| | Waste Management | USA | NH | Manchester, New Hampshire | Hampton | /organization/energy-capital-partners | Energy Capital Partners | ... | USA | NJ | Newark | Short Hills | 2014-07-29 | 2014-07 | 2014-Q3 | 2014 | 1940000000 | USD |
523 | /organization/wild-flavors | WILD Flavors | |Food Processing| | Food Processing | DEU | None | Frankfurt | Heidelberg | /organization/archer-daniels-midland-company | Archer Daniels Midland Company | ... | USA | IL | Chicago | Chicago | 2014-07-07 | 2014-07 | 2014-Q3 | 2014 | 3000000000 | USD |
524 | /organization/wind-river | Wind River | |Software| | Software | USA | CA | SF Bay Area | Alameda | /organization/intel | Intel | ... | USA | CA | SF Bay Area | Santa Clara | 2009-07-17 | 2009-07 | 2009-Q3 | 2009 | 884000000 | USD |
525 | /organization/windsor-foods | Windsor Foods | |Consumer Goods|Food Processing| | Food Processing | USA | TX | Houston | Houston | /organization/ajinomoto | Ajinomoto | ... | JPN | None | Tokyo | Tokyo | 2014-09-10 | 2014-09 | 2014-Q3 | 2014 | 800000000 | USD |
526 | /organization/wittur | Wittur | |Manufacturing|Public Safety|Heavy Industry| | Heavy Industry | None | None | None | None | /organization/bain-capital-2 | Bain Capital | ... | USA | MA | Boston | Boston | 2014-12-23 | 2014-12 | 2014-Q4 | 2014 | 750000000 | USD |
527 | /organization/wyeth-pharmaceuticals | Wyeth Pharmaceuticals | |Biotechnology|Pharmaceuticals|Health and Well... | Biotechnology | None | None | None | None | /organization/nestl | Nestl | ... | CHE | None | Vevey | Vevey | 2012-04-01 | 2012-04 | 2012-Q2 | 2012 | 11850000000 | USD |
528 | /organization/wyeth-pharmaceuticals | Wyeth Pharmaceuticals | |Biotechnology|Pharmaceuticals|Health and Well... | Biotechnology | None | None | None | None | /organization/pfizer | Pfizer | ... | USA | NY | New York City | New York | 2009-01-23 | 2009-01 | 2009-Q1 | 2009 | 68000000000 | USD |
529 | /organization/xircom | Xircom | |Web Hosting| | Web Hosting | USA | CA | Los Angeles | Thousand Oaks | /organization/intel | Intel | ... | USA | CA | SF Bay Area | Santa Clara | 2001-01-15 | 2001-01 | 2001-Q1 | 2001 | 748000000 | USD |
530 | /organization/yammer | Yammer | |Twitter Applications|Networking|Social Media|... | Enterprise Software | USA | CA | SF Bay Area | San Francisco | /organization/microsoft | Microsoft | ... | USA | WA | Seattle | Redmond | 2012-06-25 | 2012-06 | 2012-Q2 | 2012 | 1200000000 | USD |
531 | /organization/yankee-candle-company | Yankee Candle Company | |Manufacturing| | Manufacturing | USA | AR | Fayetteville | Rogers | /organization/jarden | Jarden | ... | USA | NY | New York City | Rye | 2013-09-03 | 2013-09 | 2013-Q3 | 2013 | 1750000000 | USD |
532 | /organization/yoplait | Yoplait | |Food Processing| | Food Processing | USA | MN | Minneapolis | Minneapolis | /organization/generalmills | General Mills | ... | USA | MN | Minneapolis | Minneapolis | 2011-07-01 | 2011-07 | 2011-Q3 | 2011 | 1200000000 | USD |
533 | /organization/york-risk-services-group | York Risk Services Group | |Insurance| | Insurance | USA | NJ | Newark | Parsippany | /organization/onex | Onex | ... | CAN | ON | Toronto | Toronto | 2014-07-16 | 2014-07 | 2014-Q3 | 2014 | 1330000000 | USD |
534 | /organization/youtube | YouTube | |Video|Online Rental|Entertainment|Games| | Games | USA | CA | SF Bay Area | San Bruno | /organization/google | ... | USA | CA | SF Bay Area | Mountain View | 2006-10-01 | 2006-10 | 2006-Q4 | 2006 | 1650000000 | USD | |
535 | /organization/zappos | Zappos | |Curated Web| | Curated Web | USA | NV | Las Vegas | Las Vegas | /organization/amazon | Amazon | ... | USA | WA | Seattle | Seattle | 2009-07-22 | 2009-07 | 2009-Q3 | 2009 | 1200000000 | USD |
536 | /organization/ziggo | Ziggo | |Information Services|Entertainment|Investment... | Investment Management | NLD | None | Utrecht | Utrecht | /organization/liberty-global | Liberty Global | ... | USA | CO | Denver | Englewood | 2014-01-27 | 2014-01 | 2014-Q1 | 2014 | 13700000000 | USD |
537 rows × 22 columns
companies.loc[0]
permalink /organization/-fame name #fame homepage_url http://livfame.com category_list |Media| market Media status operating country_code IND state_code NaN region Mumbai city Mumbai funding_rounds 1 founded_at nan founded_month NaN founded_quarter NaN founded_year NaN first_funding_at 2015-01-05 last_funding_at 2015-01-05 funding_total_usd 1e+07 Name: 0, dtype: object
SEMI JOIN
or ANTI JOIN
rounds.loc[0]
company_permalink /organization/-fame company_name #fame company_category_list |Media| company_market Media company_country_code IND company_state_code NaN company_region Mumbai company_city Mumbai funding_round_permalink /funding-round/9a01d05418af9f794eebff7ace91f638 funding_round_type venture funding_round_code B funded_at 2015-01-05 funded_month 2015-01 funded_quarter 2015-Q1 funded_year 2015 raised_amount_usd 10,000,000 Unnamed: 16 NaN Name: 0, dtype: object
companies.loc[0]
permalink /organization/-fame name #fame homepage_url http://livfame.com category_list |Media| market Media status operating country_code IND state_code NaN region Mumbai city Mumbai funding_rounds 1 founded_at nan founded_month NaN founded_quarter NaN founded_year NaN first_funding_at 2015-01-05 last_funding_at 2015-01-05 funding_total_usd 1e+07 Name: 0, dtype: object
len(companies)
54292
query = """
select status, count(*) as count
from companies c
where not exists (
select 1
from rounds r
where c.permalink = r.company_permalink
and r.funding_round_type = 'venture'
)
and c.founded_at > '2010-01-01'
group by 1;
"""
sql.read_sql(query, con)
status | count | |
---|---|---|
0 | None | 250 |
1 | operating | 12309 |
2 | closed | 542 |
3 | acquired | 288 |
QUERY PLAN
----------------------------------------------------------------------------------
HashAggregate (cost=8454.36..8454.38 rows=2 width=9)
-> Hash Semi Join (cost=4466.18..8388.91 rows=13090 width=9)
Hash Cond: ((c.permalink)::text = (r.company_permalink)::text)
-> Seq Scan on companies c (cost=0.00..2143.65 rows=18526 width=36)
Filter: (founded_at > '2010-01-01'::date)
-> Hash (cost=3635.51..3635.51 rows=42933 width=26)
-> Seq Scan on rounds r (cost=0.00..3635.51 rows=42933 width=26)
Filter: ((funding_round_type)::text = 'venture'::text)
(8 rows)
rounds[:20]
company_permalink | company_name | company_category_list | company_market | company_country_code | company_state_code | company_region | company_city | funding_round_permalink | funding_round_type | funding_round_code | funded_at | funded_month | funded_quarter | funded_year | raised_amount_usd | Unnamed: 16 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | /organization/-fame | #fame | |Media| | Media | IND | NaN | Mumbai | Mumbai | /funding-round/9a01d05418af9f794eebff7ace91f638 | venture | B | 2015-01-05 | 2015-01 | 2015-Q1 | 2015 | 10,000,000 | NaN |
1 | /organization/hashoff | #HASHOFF | |Digital Media|Internet|Social Media| | Digital Media | USA | CO | Denver | Denver | /funding-round/669d6203c0374e6cf0e8d10f75ba0b8a | debt_financing | NaN | 2014-12-08 | 2014-12 | 2014-Q4 | 2014 | 455,000 | NaN |
2 | /organization/waywire | #waywire | |Entertainment|Politics|Social Media|News| | News | USA | NY | New York City | New York | /funding-round/cc409188fa2b63482bd9008f682c2efa | seed | NaN | 2012-06-30 | 2012-06 | 2012-Q2 | 2012 | 1,750,000 | NaN |
3 | /organization/tv-communications | &TV Communications | |Games| | Games | USA | CA | Los Angeles | Los Angeles | /funding-round/86d22afc65107b6941e6c43c671ecbb8 | venture | NaN | 2010-06-04 | 2010-06 | 2010-Q2 | 2010 | 1,000,000 | NaN |
4 | /organization/tv-communications | &TV Communications | |Games| | Games | USA | CA | Los Angeles | Los Angeles | /funding-round/59a3669a64e39360c2b939300bcda162 | venture | NaN | 2010-09-23 | 2010-09 | 2010-Q3 | 2010 | 3,000,000 | NaN |
5 | /organization/rock-your-paper | 'Rock' Your Paper | |Publishing|Education| | Publishing | EST | NaN | Tallinn | Tallinn | /funding-round/f06b420775f7cb6c1541a9db526534bb | seed | NaN | 2012-08-09 | 2012-08 | 2012-Q3 | 2012 | 40,000 | NaN |
6 | /organization/in-touch-network | (In)Touch Network | |Electronics|Guides|Coffee|Restaurants|Music|i... | Electronics | GBR | NaN | London | London | /funding-round/33c3f135f05d7b734b8d7b7c8ae82647 | seed | NaN | 2011-04-01 | 2011-04 | 2011-Q2 | 2011 | 1,500,000 | NaN |
7 | /organization/r-ranch-and-mine | -R- Ranch and Mine | |Tourism|Entertainment|Games| | Entertainment | USA | TX | Dallas | Fort Worth | /funding-round/029720f7eeb218f51c43df5155671472 | equity_crowdfunding | NaN | 2014-08-17 | 2014-08 | 2014-Q3 | 2014 | 10,000 | NaN |
8 | /organization/r-ranch-and-mine | -R- Ranch and Mine | |Tourism|Entertainment|Games| | Entertainment | USA | TX | Dallas | Fort Worth | /funding-round/766b0bcead9ca8560af5e1ade579fb7f | equity_crowdfunding | NaN | 2014-09-26 | 2014-09 | 2014-Q3 | 2014 | 50,000 | NaN |
9 | /organization/club-domains | .Club Domains | |Software| | Software | USA | FL | Ft. Lauderdale | Oakland Park | /funding-round/aee671c9707f8278a9544c8ae37650e0 | venture | B | 2013-05-31 | 2013-05 | 2013-Q2 | 2013 | 7,000,000 | NaN |
10 | /organization/fox-networks | .Fox Networks | |Advertising| | Advertising | NaN | NaN | NaN | NaN | /funding-round/69a1536a6f4506538afd7aa7241ddbe1 | undisclosed | NaN | 2007-01-16 | 2007-01 | 2007-Q1 | 2007 | 4,912,393 | NaN |
11 | /organization/0-6-com | 0-6.com | |Curated Web| | Curated Web | NaN | NaN | NaN | NaN | /funding-round/5727accaeaa57461bd22a9bdd945382d | venture | A | 2008-03-19 | 2008-03 | 2008-Q1 | 2008 | 2,000,000 | NaN |
12 | /organization/004-technologies | 004 Technologies | |Software| | Software | USA | IL | Springfield, Illinois | Champaign | /funding-round/1278dd4e6a37fa4b7d7e06c21b3c1830 | venture | NaN | 2014-07-24 | 2014-07 | 2014-Q3 | 2014 | NaN | NaN |
13 | /organization/01games-technology | 01Games Technology | |Games| | Games | HKG | NaN | Hong Kong | Hong Kong | /funding-round/7d53696f2b4f607a2f2a8cbb83d01839 | seed | NaN | 2014-07-01 | 2014-07 | 2014-Q3 | 2014 | 41,250 | NaN |
14 | /organization/1-2-3-listo | 1,2,3 Listo | |E-Commerce| | E-Commerce | CHL | NaN | Santiago | Las Condes | /funding-round/6de4609e894495105bc791ed64361288 | seed | NaN | 2013-02-18 | 2013-02 | 2013-Q1 | 2013 | 40,000 | NaN |
15 | /organization/1-4-all | 1-4 All | |Entertainment|Games|Software| | Software | USA | NC | NC - Other | Connellys Springs | /funding-round/e97a192e13ea0ee3c4f71136b4f3ec16 | equity_crowdfunding | NaN | 2013-04-21 | 2013-04 | 2013-Q2 | 2013 | NaN | NaN |
16 | /organization/1-800-dentist | 1-800-DENTIST | |Health and Wellness| | Health and Wellness | USA | CA | Los Angeles | Los Angeles | /funding-round/5274aacc211163fc7c86539ce94bbacc | undisclosed | NaN | 2010-08-19 | 2010-08 | 2010-Q3 | 2010 | NaN | NaN |
17 | /organization/1-800-doctors | 1-800-DOCTORS | |Health and Wellness| | Health and Wellness | USA | NJ | Newark | Iselin | /funding-round/9eb8c7790a0c200d79e75785d1c4aa12 | convertible_note | NaN | 2011-03-02 | 2011-03 | 2011-Q1 | 2011 | 1,750,000 | NaN |
18 | /organization/1-618-technology | 1.618 Technology | |Real Estate| | Real Estate | USA | FL | Orlando | Orlando | /funding-round/83b8f4c7d37ecef5e001a5e953bf461a | equity_crowdfunding | NaN | 2014-01-22 | 2014-01 | 2014-Q1 | 2014 | NaN | NaN |
19 | /organization/10-minutes-with | 10 Minutes With | |Education| | Education | GBR | NaN | London | London | /funding-round/f245a74b4c54610ae843e17bdf4d1113 | seed | NaN | 2013-01-01 | 2013-01 | 2013-Q1 | 2013 | 400,000 | NaN |
companies_with_round = rounds.company_permalink.unique()
companies.permalink.isin(companies_with_round)
0 True 1 True 2 True 3 True 4 True 5 True 6 True 7 True 8 True 9 True 10 True 11 True 12 True 13 True 14 True ... 54277 False 54278 False 54279 False 54280 False 54281 False 54282 False 54283 False 54284 False 54285 False 54286 False 54287 False 54288 False 54289 False 54290 False 54291 False Name: permalink, Length: 54292, dtype: bool
query = """
select year, hour, occs,
occs / sum(occs) OVER (PARTITION BY YEAR) as share
from (
select extract(year from date) as year,
extract(hour from time) as hour,
count(*) as occs
from crimes
group by 1, 2
) t0
"""
results = sql.read_sql(query, con)
results
year | hour | occs | share | |
---|---|---|---|---|
0 | 2014 | 0 | 7217 | 0.048076 |
1 | 2014 | 1 | 4353 | 0.028998 |
2 | 2014 | 2 | 3642 | 0.024261 |
3 | 2014 | 3 | 2537 | 0.016900 |
4 | 2014 | 4 | 1709 | 0.011385 |
5 | 2014 | 5 | 1636 | 0.010898 |
6 | 2014 | 6 | 2195 | 0.014622 |
7 | 2014 | 7 | 3553 | 0.023668 |
8 | 2014 | 8 | 5124 | 0.034134 |
9 | 2014 | 9 | 5844 | 0.038930 |
10 | 2014 | 10 | 6210 | 0.041368 |
11 | 2014 | 11 | 6464 | 0.043060 |
12 | 2014 | 12 | 8803 | 0.058641 |
13 | 2014 | 13 | 7122 | 0.047443 |
14 | 2014 | 14 | 7373 | 0.049115 |
15 | 2014 | 15 | 8111 | 0.054032 |
16 | 2014 | 16 | 8671 | 0.057762 |
17 | 2014 | 17 | 9389 | 0.062545 |
18 | 2014 | 18 | 9865 | 0.065716 |
19 | 2014 | 19 | 9087 | 0.060533 |
20 | 2014 | 20 | 8452 | 0.056303 |
21 | 2014 | 21 | 7743 | 0.051580 |
22 | 2014 | 22 | 7924 | 0.052786 |
23 | 2014 | 23 | 7092 | 0.047243 |
24 | 2015 | 0 | 504 | 0.042014 |
25 | 2015 | 1 | 420 | 0.035012 |
26 | 2015 | 2 | 332 | 0.027676 |
27 | 2015 | 3 | 224 | 0.018673 |
28 | 2015 | 4 | 156 | 0.013004 |
29 | 2015 | 5 | 143 | 0.011921 |
30 | 2015 | 6 | 179 | 0.014922 |
31 | 2015 | 7 | 320 | 0.026676 |
32 | 2015 | 8 | 403 | 0.033595 |
33 | 2015 | 9 | 530 | 0.044181 |
34 | 2015 | 10 | 497 | 0.041430 |
35 | 2015 | 11 | 533 | 0.044431 |
36 | 2015 | 12 | 596 | 0.049683 |
37 | 2015 | 13 | 539 | 0.044932 |
38 | 2015 | 14 | 576 | 0.048016 |
39 | 2015 | 15 | 627 | 0.052267 |
40 | 2015 | 16 | 681 | 0.056769 |
41 | 2015 | 17 | 691 | 0.057603 |
42 | 2015 | 18 | 819 | 0.068273 |
43 | 2015 | 19 | 766 | 0.063855 |
44 | 2015 | 20 | 694 | 0.057853 |
45 | 2015 | 21 | 634 | 0.052851 |
46 | 2015 | 22 | 616 | 0.051350 |
47 | 2015 | 23 | 516 | 0.043014 |
keys = [crimes.date.dt.year,
crimes.time.map(lambda x: x.hour)]
counts = crimes.groupby(keys).size()
counts
time 2014 0 7217 1 4353 2 3642 3 2537 4 1709 5 1636 6 2195 7 3553 8 5124 9 5844 10 6210 11 6464 12 8803 13 7122 14 7373 15 8111 16 8671 17 9389 18 9865 19 9087 20 8452 21 7743 22 7924 23 7092 2015 0 504 1 420 2 332 3 224 4 156 5 143 6 179 7 320 8 403 9 530 10 497 11 533 12 596 13 539 14 576 15 627 16 681 17 691 18 819 19 766 20 694 21 634 22 616 23 516 dtype: int64
keys = [crimes.date.dt.year,
crimes.time.map(lambda x: x.hour)]
counts = crimes.groupby(keys).size()
counts.name = 'count'
counts.index.names = ['year', 'hour']
counts2 = counts.reset_index()
counts2
year | hour | count | |
---|---|---|---|
0 | 2014 | 0 | 7217 |
1 | 2014 | 1 | 4353 |
2 | 2014 | 2 | 3642 |
3 | 2014 | 3 | 2537 |
4 | 2014 | 4 | 1709 |
5 | 2014 | 5 | 1636 |
6 | 2014 | 6 | 2195 |
7 | 2014 | 7 | 3553 |
8 | 2014 | 8 | 5124 |
9 | 2014 | 9 | 5844 |
10 | 2014 | 10 | 6210 |
11 | 2014 | 11 | 6464 |
12 | 2014 | 12 | 8803 |
13 | 2014 | 13 | 7122 |
14 | 2014 | 14 | 7373 |
15 | 2014 | 15 | 8111 |
16 | 2014 | 16 | 8671 |
17 | 2014 | 17 | 9389 |
18 | 2014 | 18 | 9865 |
19 | 2014 | 19 | 9087 |
20 | 2014 | 20 | 8452 |
21 | 2014 | 21 | 7743 |
22 | 2014 | 22 | 7924 |
23 | 2014 | 23 | 7092 |
24 | 2015 | 0 | 504 |
25 | 2015 | 1 | 420 |
26 | 2015 | 2 | 332 |
27 | 2015 | 3 | 224 |
28 | 2015 | 4 | 156 |
29 | 2015 | 5 | 143 |
30 | 2015 | 6 | 179 |
31 | 2015 | 7 | 320 |
32 | 2015 | 8 | 403 |
33 | 2015 | 9 | 530 |
34 | 2015 | 10 | 497 |
35 | 2015 | 11 | 533 |
36 | 2015 | 12 | 596 |
37 | 2015 | 13 | 539 |
38 | 2015 | 14 | 576 |
39 | 2015 | 15 | 627 |
40 | 2015 | 16 | 681 |
41 | 2015 | 17 | 691 |
42 | 2015 | 18 | 819 |
43 | 2015 | 19 | 766 |
44 | 2015 | 20 | 694 |
45 | 2015 | 21 | 634 |
46 | 2015 | 22 | 616 |
47 | 2015 | 23 | 516 |
def add_share(x):
x['share'] = x['count'] / x['count'].sum()
return x
counts2.groupby('year').apply(add_share)
year | hour | count | share | |
---|---|---|---|---|
0 | 2014 | 0 | 7217 | 0.048076 |
1 | 2014 | 1 | 4353 | 0.028998 |
2 | 2014 | 2 | 3642 | 0.024261 |
3 | 2014 | 3 | 2537 | 0.016900 |
4 | 2014 | 4 | 1709 | 0.011385 |
5 | 2014 | 5 | 1636 | 0.010898 |
6 | 2014 | 6 | 2195 | 0.014622 |
7 | 2014 | 7 | 3553 | 0.023668 |
8 | 2014 | 8 | 5124 | 0.034134 |
9 | 2014 | 9 | 5844 | 0.038930 |
10 | 2014 | 10 | 6210 | 0.041368 |
11 | 2014 | 11 | 6464 | 0.043060 |
12 | 2014 | 12 | 8803 | 0.058641 |
13 | 2014 | 13 | 7122 | 0.047443 |
14 | 2014 | 14 | 7373 | 0.049115 |
15 | 2014 | 15 | 8111 | 0.054032 |
16 | 2014 | 16 | 8671 | 0.057762 |
17 | 2014 | 17 | 9389 | 0.062545 |
18 | 2014 | 18 | 9865 | 0.065716 |
19 | 2014 | 19 | 9087 | 0.060533 |
20 | 2014 | 20 | 8452 | 0.056303 |
21 | 2014 | 21 | 7743 | 0.051580 |
22 | 2014 | 22 | 7924 | 0.052786 |
23 | 2014 | 23 | 7092 | 0.047243 |
24 | 2015 | 0 | 504 | 0.042014 |
25 | 2015 | 1 | 420 | 0.035012 |
26 | 2015 | 2 | 332 | 0.027676 |
27 | 2015 | 3 | 224 | 0.018673 |
28 | 2015 | 4 | 156 | 0.013004 |
29 | 2015 | 5 | 143 | 0.011921 |
30 | 2015 | 6 | 179 | 0.014922 |
31 | 2015 | 7 | 320 | 0.026676 |
32 | 2015 | 8 | 403 | 0.033595 |
33 | 2015 | 9 | 530 | 0.044181 |
34 | 2015 | 10 | 497 | 0.041430 |
35 | 2015 | 11 | 533 | 0.044431 |
36 | 2015 | 12 | 596 | 0.049683 |
37 | 2015 | 13 | 539 | 0.044932 |
38 | 2015 | 14 | 576 | 0.048016 |
39 | 2015 | 15 | 627 | 0.052267 |
40 | 2015 | 16 | 681 | 0.056769 |
41 | 2015 | 17 | 691 | 0.057603 |
42 | 2015 | 18 | 819 | 0.068273 |
43 | 2015 | 19 | 766 | 0.063855 |
44 | 2015 | 20 | 694 | 0.057853 |
45 | 2015 | 21 | 634 | 0.052851 |
46 | 2015 | 22 | 616 | 0.051350 |
47 | 2015 | 23 | 516 | 0.043014 |
rounds[:1].T
0 | |
---|---|
company_permalink | /organization/-fame |
company_name | #fame |
company_category_list | |Media| |
company_market | Media |
company_country_code | IND |
company_state_code | NaN |
company_region | Mumbai |
company_city | Mumbai |
funding_round_permalink | /funding-round/9a01d05418af9f794eebff7ace91f638 |
funding_round_type | venture |
funding_round_code | B |
funded_at | 2015-01-05 |
funded_month | 2015-01 |
funded_quarter | 2015-Q1 |
funded_year | 2015 |
raised_amount_usd | 10,000,000 |
Unnamed: 16 | NaN |
Use ORDER BY in the window clause to achieve cumulative statistics
query = """
select company_permalink, funded_at,
sum(case when raised_amount_usd is null then 0 else raised_amount_usd end)
OVER (PARTITION BY company_permalink ORDER BY funded_at)
as cumulative_raised
from rounds
order by company_permalink;
"""
sql.read_sql(query, con)
company_permalink | funded_at | cumulative_raised | |
---|---|---|---|
0 | /organization/-fame | 2015-01-05 | 10000000 |
1 | /organization/-qounter | 2014-03-01 | 700000 |
2 | /organization/-qounter | 2014-10-14 | 700000 |
3 | /organization/0-6-com | 2008-03-19 | 2000000 |
4 | /organization/004-technologies | 2014-07-24 | 0 |
5 | /organization/01games-technology | 2014-07-01 | 41250 |
6 | /organization/0ndine-biomedical-inc | 2009-09-11 | 43360 |
7 | /organization/0ndine-biomedical-inc | 2009-12-21 | 762851 |
8 | /organization/0xdata | 2013-01-03 | 1700000 |
9 | /organization/0xdata | 2014-07-19 | 10600000 |
10 | /organization/1 | 2011-07-20 | 1000050 |
11 | /organization/1 | 2013-02-05 | 1000050 |
12 | /organization/1 | 2014-02-05 | 1150050 |
13 | /organization/1-2-3-listo | 2013-02-18 | 40000 |
14 | /organization/1-4-all | 2013-04-21 | 0 |
15 | /organization/1-618-technology | 2014-01-22 | 0 |
16 | /organization/1-800-dentist | 2010-08-19 | 0 |
17 | /organization/1-800-doctors | 2011-03-02 | 1750000 |
18 | /organization/10-20-media | 2009-06-18 | 500000 |
19 | /organization/10-20-media | 2010-03-30 | 750000 |
20 | /organization/10-20-media | 2011-01-11 | 1550000 |
21 | /organization/10-20-media | 2011-12-28 | 2050000 |
22 | /organization/10-minutes-with | 2013-01-01 | 400000 |
23 | /organization/10-minutes-with | 2014-10-09 | 4400000 |
24 | /organization/1000-corks | 2011-08-23 | 40000 |
25 | /organization/1000-markets | 2009-05-15 | 500000 |
26 | /organization/1000chi | 2010-04-01 | 43923865 |
27 | /organization/1000jobboersen-de | 2011-09-16 | 0 |
28 | /organization/1000memories | 2010-01-01 | 15000 |
29 | /organization/1000memories | 2011-02-16 | 2535000 |
... | ... | ... | ... |
87131 | /organization/zynga | 2010-04-24 | 70213000 |
87132 | /organization/zynga | 2010-06-14 | 370213000 |
87133 | /organization/zynga | 2010-10-12 | 376550786 |
87134 | /organization/zynga | 2011-02-18 | 866550786 |
87135 | /organization/zyngenia | 2010-09-09 | 25000000 |
87136 | /organization/zynstra | 2012-03-03 | 225000 |
87137 | /organization/zynstra | 2012-09-18 | 2550000 |
87138 | /organization/zynstra | 2013-09-12 | 6350000 |
87139 | /organization/zynstra | 2014-07-09 | 14750000 |
87140 | /organization/zyomyx-inc | 2010-01-29 | 555016 |
87141 | /organization/zyomyx-inc | 2013-06-30 | 12555016 |
87142 | /organization/zyomyx-inc | 2013-07-11 | 26775015 |
87143 | /organization/zyomyx-inc | 2014-03-26 | 34275015 |
87144 | /organization/zypsee | 2013-08-13 | 2150000 |
87145 | /organization/zyraz-technology | 2008-01-01 | 2007363 |
87146 | /organization/zyraz-technology | 2009-09-11 | 7407363 |
87147 | /organization/zyraz-technology | 2009-10-09 | 15398910 |
87148 | /organization/zyraz-technology | 2013-02-15 | 15419877 |
87149 | /organization/zyrra | 2010-10-21 | 50000 |
87150 | /organization/zyrra | 2010-11-15 | 495000 |
87151 | /organization/zyrra | 2010-12-15 | 1370000 |
87152 | /organization/zyrra | 2012-10-18 | 1510500 |
87153 | /organization/zytoprotec | 2013-01-29 | 2686600 |
87154 | /organization/zzish | 2014-03-24 | 320000 |
87155 | /organization/zznode-science-and-technology-co... | 2012-04-01 | 1587301 |
87156 | /organization/zzzzapp-com | 2011-11-01 | 7000 |
87157 | /organization/zzzzapp-com | 2013-03-19 | 39360 |
87158 | /organization/zzzzapp-com | 2013-08-01 | 71525 |
87159 | /organization/zzzzapp-com | 2014-01-01 | 84480 |
87160 | /organization/zzzzapp-com | 2014-09-10 | 97398 |
87161 rows × 3 columns
cut
(linear / ad hoc binning) and qcut
(quantile binning) functionsacquisitions.price_amount.isnull().value_counts()
True 51424 False 3816 dtype: int64
acqs_known = acquisitions[acquisitions.price_amount.notnull()]
acqs_known.price_amount.describe()
count 3.816000e+03 mean 7.182556e+08 std 3.884648e+09 min 1.000000e+00 25% 1.822500e+07 50% 8.295000e+07 75% 3.450000e+08 max 1.500000e+11 Name: price_amount, dtype: float64
top = 1000000000.
acqs = acqs_known[(acqs_known.price_amount <= top) & (acqs_known.price_amount > 0)]
pd.cut(acqs.price_amount, 4)
2 (-999998.999, 250000000.75] 4 (-999998.999, 250000000.75] 7 (-999998.999, 250000000.75] 22 (-999998.999, 250000000.75] 23 (-999998.999, 250000000.75] 25 (250000000.75, 500000000.5] 31 (500000000.5, 750000000.25] 35 (500000000.5, 750000000.25] 38 (-999998.999, 250000000.75] 39 (-999998.999, 250000000.75] 44 (250000000.75, 500000000.5] 46 (250000000.75, 500000000.5] 53 (-999998.999, 250000000.75] 65 (-999998.999, 250000000.75] 70 (250000000.75, 500000000.5] ... 13458 (-999998.999, 250000000.75] 13463 (-999998.999, 250000000.75] 13471 (-999998.999, 250000000.75] 13472 (500000000.5, 750000000.25] 13473 (-999998.999, 250000000.75] 13475 (-999998.999, 250000000.75] 13476 (-999998.999, 250000000.75] 13480 (-999998.999, 250000000.75] 13483 (-999998.999, 250000000.75] 13488 (-999998.999, 250000000.75] 13491 (-999998.999, 250000000.75] 13495 (-999998.999, 250000000.75] 13496 (250000000.75, 500000000.5] 13505 (-999998.999, 250000000.75] 13506 (-999998.999, 250000000.75] Name: price_amount, Length: 3400, dtype: category Categories (4, object): [(-999998.999, 250000000.75] < (250000000.75, 500000000.5] < (500000000.5, 750000000.25] < (750000000.25, 1000000000]]
bins = pd.cut(acqs.price_amount, [0, top / 4, top / 2, 3 * top / 4, top])
acqs.groupby(bins).size()
price_amount (0, 250000000] 2650 (250000000, 500000000] 464 (500000000, 750000000] 188 (750000000, 1000000000] 98 dtype: int64
# Get Quantile numbers
(pd.qcut(acqs.price_amount, 4, labels=False) + 1).value_counts()
1 871 3 856 4 844 2 829 dtype: int64
quartiles = pd.qcut(acqs.price_amount, 4, labels=False)
grouped = acqs.groupby(quartiles)
def f(x):
return pd.Series({
'lb': x.price_amount.min(),
'ub': x.price_amount.max(),
'num': len(x)
})
grouped.apply(f)
lb | num | ub | |
---|---|---|---|
0 | 1 | 871 | 15000000 |
1 | 15040000 | 829 | 59000000 |
2 | 60000000 | 856 | 210000000 |
3 | 212000000 | 844 | 1000000000 |
query = """
with t0 as ( select
company_permalink, price_amount
from acquisitions a
where price_amount is not null
and price_amount <= 1000000000
)
select t2.bucket, sum(t2.price_amount), count(*)
from (
select t0.company_permalink, t0.price_amount,
ceil((t0.price_amount - lb) / ((ub - lb) / 4)) as bucket
from t0
cross join (
select min(price_amount) - 1e-14 as lb, max(price_amount) as ub
from t0
) t1
) t2
group by 1;
"""
sql.read_sql(query, con)
bucket | sum | count | |
---|---|---|---|
0 | 1 | 1.588106e+11 | 2650 |
1 | 2 | 1.677637e+11 | 464 |
2 | 3 | 1.173550e+11 | 188 |
3 | 4 | 8.616420e+10 | 98 |
query = """
select bucket, min(price_amount) as lb,
max(price_amount) as ub, count(*)
from (
select
ntile(4) over (order by price_amount) AS bucket,
price_amount
from acquisitions a
where price_amount is not null
and price_amount < 1000000000
) t0
group by 1
order by bucket;
"""
sql.read_sql(query, con)
bucket | lb | ub | count | |
---|---|---|---|---|
0 | 1 | 1 | 15000000 | 847 |
1 | 2 | 15000000 | 57500000 | 847 |
2 | 3 | 57900000 | 206000000 | 847 |
3 | 4 | 206000000 | 996000000 | 846 |