import pandas
import matplotlib.pyplot as plt
cities = pandas.read_csv('US_cities.csv')
import geopandas
from geopandas import GeoDataFrame
from shapely.geometry import Point
geometry = [Point(xy) for xy in zip(cities.Longitude, cities.Latitude)]
cities = cities.drop(['Longitude', 'Latitude'], axis = 1)
crs = {'init':'epsg:4326'}
cities = GeoDataFrame(cities, crs=crs, geometry=geometry)
# cities = cities.to_crs({'init': 'epsg:3395'})
cities.head()
id | Country | City | AccentCity | Region | Population | geometry | |
---|---|---|---|---|---|---|---|
0 | 2907755 | us | alabaster | Alabaster | AL | 26738.0 | POINT (-86.81638890000001 33.2441667) |
1 | 2907759 | us | albertville | Albertville | AL | 18368.0 | POINT (-86.20888890000001 34.2675) |
2 | 2907765 | us | alexander city | Alexander City | AL | 14993.0 | POINT (-85.95388890000002 32.9438889) |
3 | 2907804 | us | anniston | Anniston | AL | 23423.0 | POINT (-85.8316667 33.6597222) |
4 | 2907848 | us | athens | Athens | AL | 20470.0 | POINT (-86.9716667 34.8027778) |
cafes = geopandas.read_file('cafes_us_OSM.geojson')
cafes.head()
id | @id | amenity | building | cuisine | name | operator | wheelchair | website | addr:city | ... | karaoke | gambling | reservation | tourism:disused | indoor:level | landuse | diet:raw | name:it | amenity:ice_cream | geometry | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | node/31327678 | node/31327678 | cafe | yes | None | None | None | None | None | None | ... | None | None | None | None | None | None | None | None | None | POINT (-122.330085 47.5454108) |
1 | node/32153673 | node/32153673 | cafe | None | coffee_shop | Siuslaw Coffee Roasters | None | None | None | None | ... | None | None | None | None | None | None | None | None | None | POINT (-124.1081239 43.9662488) |
2 | node/33979948 | node/33979948 | cafe | None | None | Country Style | MTY Group | yes | None | None | ... | None | None | None | None | None | None | None | None | None | POINT (-76.4835513 44.2569218) |
3 | node/34084322 | node/34084322 | cafe | None | coffee_shop | Tim Hortons | Restaurant Brands International Inc. | None | http://www.timhortons.com/ | None | ... | None | None | None | None | None | None | None | None | None | POINT (-76.5066058 44.2598381) |
4 | node/35332415 | node/35332415 | cafe | None | coffee_shop | Starbucks | None | None | None | None | ... | None | None | None | None | None | None | None | None | None | POINT (-76.9346255 40.2387067) |
5 rows × 400 columns
cafes.sample().T.dropna()
6889 | |
---|---|
id | node/2157948579 |
@id | node/2157948579 |
amenity | cafe |
cuisine | bubble_tea |
name | Verde Tea Cafe |
addr:city | Mountain View |
addr:housenumber | 852 |
addr:street | Villa Street |
geometry | POINT (-122.0794931 37.3941483) |
cafes['count'] = 1
cafes[['cuisine', 'count']].groupby('cuisine').sum().sort_values('count', ascending=False)[:10]
count | |
---|---|
cuisine | |
coffee_shop | 6166 |
donut | 625 |
ice_cream | 290 |
sandwich | 284 |
american | 220 |
coffee | 78 |
cafe | 73 |
regional | 71 |
bagel | 64 |
tea | 52 |
sorted(list(cafes.columns))[:20]
['@id', 'Door', 'FIXME', 'Notes', 'access', 'addr', 'addr.source:street', 'addr:city', 'addr:country', 'addr:county', 'addr:direction', 'addr:door', 'addr:flats', 'addr:floor', 'addr:full', 'addr:house', 'addr:housename', 'addr:housenumber', 'addr:interpolation', 'addr:place']
cafes.isnull().sum() / len(cafes)
id 0.000000 @id 0.000000 amenity 0.000000 building 0.998607 cuisine 0.552107 name 0.030798 operator 0.970932 wheelchair 0.950800 website 0.814443 addr:city 0.763946 addr:country 0.961514 addr:housenumber 0.665257 addr:province 0.971412 addr:street 0.657954 internet_access 0.852784 opening_hours 0.866766 payment:amex 0.985154 payment:coins 0.984817 payment:notes 0.984817 recycling:coffee_cups 0.999856 created_by 0.995628 addr:postcode 0.764186 description 0.992024 hst_number 0.999760 self_checkout 0.999952 capacity 0.998847 smoking 0.948734 wifi 0.971748 wheelchair:description 0.998462 source 0.943689 ... name:nl 0.999952 source:phone 0.999952 payment:cheque 0.999904 dessert:type 0.999952 payment:discover 0.999952 payment:paypal 0.999952 cat 0.999952 name:zh_pinyin 0.999952 service:bicycle:cleaning 0.999952 service:bicycle:parts 0.999952 service:bicycle:pump 0.999952 Door 0.999952 automatic_door 0.999952 addr:street:prefix 0.999952 black_owned 0.999952 ref:miaaddr 0.999952 service 0.999952 name:etymology:wikidata 0.999952 name:pt 0.999904 karaoke 0.999952 gambling 0.999952 reservation 0.999952 tourism:disused 0.999952 indoor:level 0.999856 landuse 0.999952 diet:raw 0.999952 name:it 0.999952 amenity:ice_cream 0.999952 geometry 0.000000 count 0.000000 dtype: float64
cafes = cafes.T[cafes.isnull().sum() / len(cafes) < 0.6].T
cafes.sample(5)
id | @id | amenity | cuisine | name | geometry | count | |
---|---|---|---|---|---|---|---|
12245 | node/3367217305 | node/3367217305 | cafe | None | Tropical Smoothie Cafe | POINT (-113.5616411 37.1121392) | 1 |
9133 | node/2605729050 | node/2605729050 | cafe | coffee_shop | Dunkin Donuts (Summer Only) | POINT (-70.3747085 43.515823) | 1 |
15173 | node/4045366894 | node/4045366894 | cafe | donut | Dunkin' Donuts | POINT (-70.5956206 41.566752) | 1 |
17698 | node/4514634690 | node/4514634690 | cafe | coffee_shop | Starbucks | POINT (-78.6799591 35.8571021) | 1 |
7638 | node/2332676900 | node/2332676900 | cafe | None | Starbucks | POINT (-111.8926801 40.588471) | 1 |
del cafes['id']
del cafes['@id']
cafes.head()
amenity | cuisine | name | geometry | count | |
---|---|---|---|---|---|
0 | cafe | None | None | POINT (-122.330085 47.5454108) | 1 |
1 | cafe | coffee_shop | Siuslaw Coffee Roasters | POINT (-124.1081239 43.9662488) | 1 |
2 | cafe | None | Country Style | POINT (-76.4835513 44.2569218) | 1 |
3 | cafe | coffee_shop | Tim Hortons | POINT (-76.5066058 44.2598381) | 1 |
4 | cafe | coffee_shop | Starbucks | POINT (-76.9346255 40.2387067) | 1 |
us_geo = geopandas.read_file('us_20m.json')
us_geo.crs = {'init' :'epsg:4326'}
us_geo = us_geo[(us_geo.NAME != "Hawaii") & (us_geo.NAME != "Alaska") & (us_geo.NAME != "Puerto Rico")]
us_geo.plot()
plt.show()
cafes = geopandas.GeoDataFrame(cafes)
cafes.crs = {'init' :'epsg:4326'}
cafes = cafes.to_crs({'init': 'epsg:3395'})
us_geo = us_geo.to_crs({'init': 'epsg:3395'})
base = us_geo.plot(color='#e9eeee', edgecolor='#555555', figsize=(20,20))
cafes.plot(ax=base, marker='o', color='green', markersize=1, alpha=0.5)
plt.show()
base = us_geo.plot(color='#e9eeee', edgecolor='#555555', figsize=(20,20))
cafes[cafes['cuisine']!='donut'].plot(ax=base, marker='o', color='green', markersize=1, alpha=0.5)
cafes[cafes['cuisine']=='donut'].plot(ax=base, marker='o', color='red', markersize=5, alpha=1)
plt.show()
cafes = cafes.to_crs({'init': 'epsg:4326'})
us_geo = us_geo.to_crs({'init': 'epsg:4326'})
from shapely.ops import nearest_points
multipoint = cities.geometry.unary_union
def near(point, pts=multipoint):
nearest = cities.geometry == nearest_points(point, pts)[1]
return cities[nearest].City.get_values()[0]
cafes_sample = cafes.sample(50)
cafes_sample['nearest'] = cafes_sample.apply(lambda cafe: near(cafe.geometry), axis=1)
cafes_sample[['name', 'nearest', 'geometry']]
name | nearest | geometry | |
---|---|---|---|
19188 | Dooney's Cafe | niagara falls | POINT (-79.42479849999999 43.6623165999999) |
15660 | Firelands Café | sandusky | POINT (-82.59462509999999 41.39778199999985) |
5068 | Starbucks | fort collins | POINT (-105.0921089 40.55302779999984) |
9178 | Mitchell's Coffee House | lakeland | POINT (-81.9550215 28.04630939999938) |
7915 | Bagel and More | new york | POINT (-74.00000740000002 40.73848729999983) |
4036 | Starbucks Coffee | oakland | POINT (-122.273898 37.80044439999977) |
12121 | Starbucks | mount vernon | POINT (-73.8283531 40.95044309999985) |
14026 | The Wydown | washington | POINT (-77.03219110000001 38.91645309999979) |
7614 | Belle Epicurean | seattle | POINT (-122.2920724 47.62608069998335) |
17372 | Cafe Ross | san anselmo | POINT (-122.5555492 37.96018919999976) |
15504 | None | golden | POINT (-105.2218426 39.75571879999981) |
4858 | Lazy Daisy's | niagara falls | POINT (-79.31956969999999 43.67267709999989) |
13221 | Heirloom Salad Company | iowa city | POINT (-91.53248619999998 41.65992709999987) |
5497 | Starbucks | morgan hill | POINT (-121.6436148 37.11249939999973) |
4587 | Coffee Time | erie | POINT (-80.54027480000002 43.4857523999999) |
10416 | aroma espresso bar | niagara falls | POINT (-79.3938836 43.64589749999989) |
1439 | Starbucks Coffee | niagara falls | POINT (-79.38845619999999 43.6701574999999) |
11772 | Tim Hortons | niagara falls | POINT (-79.18708129999999 43.7833634999999) |
5148 | Sweet Sue's Bakeshop | parole | POINT (-76.5314048 38.70842029999979) |
20659 | Cacao Coffee House | santa monica | POINT (-118.4544201 34.04468719999961) |
19773 | Subway | morgan hill | POINT (-121.639992 37.11093679999973) |
1723 | The Bean | las cruces | POINT (-106.7955689 32.27744659999957) |
4756 | Tim Hortons | niagara falls | POINT (-79.6083119 43.5715151999999) |
7973 | Blue Dot Cafe & Coffee Bar | alameda | POINT (-122.2543493 37.76682479999977) |
4452 | Tim Hortons | ogdensburg | POINT (-75.8779526 45.30398529999992) |
1682 | Starbucks | simpsonville | POINT (-82.25589590000001 34.71154639999966) |
12911 | Starbucks | jacksonville beach | POINT (-81.52557749999998 30.25847729999948) |
20467 | Ramen Yamadaya | beverly hills | POINT (-118.4430765 34.05781379999963) |
15700 | Starbucks | newberg | POINT (-123.17308 45.22743829999992) |
11611 | Bubble Tea | university city | POINT (-90.307902 38.65640029999979) |
3876 | Fraticelli's Authentic Italian Grill | niagara falls | POINT (-79.38241399999998 43.8638979999999) |
9824 | Starbucks | guttenberg | POINT (-73.9546503 40.76999359999984) |
13241 | Starbucks | south salt lake | POINT (-111.9002395 40.72960419999983) |
13378 | Biggby Coffee | okemos | POINT (-84.4209608 42.71937419999987) |
1602 | The Coffee House | gallup | POINT (-108.7425899 35.52710149999967) |
6107 | North Star Coffeehouse | portland | POINT (-122.681791 45.57780759999992) |
17066 | Sweet Phee's | astoria | POINT (-124.0547878 46.34944119999994) |
6657 | Iris Cafe | newport | POINT (-84.51223830000001 39.1106949999998) |
10078 | Over Easy Cafe | asheville | POINT (-82.55205099999999 35.59628989999968) |
6013 | Starbucks | la vista | POINT (-96.09052419999999 41.26213729999984) |
1169 | Meshuggah Cafe | university city | POINT (-90.30181019999999 38.65578739999978) |
8122 | Oak Tree Teriyaki | shoreline | POINT (-122.3439809 47.70209019998354) |
4541 | Zaya Cafe | oakland | POINT (-122.2689682 37.80750479999976) |
12264 | Market Square Bakehouse | amesbury | POINT (-70.93043590000001 42.85812609999989) |
2091 | Bean Traders | durham | POINT (-78.93879529999998 35.90966479999969) |
3367 | Kaneko Dining | salem | POINT (-123.0271285 44.93283059999991) |
18961 | Starbucks | wellesley | POINT (-71.2916949 42.30060659999987) |
20441 | Starbucks | ferndale | POINT (-123.1211255 49.17761419998703) |
2100 | Zoo Bar Cafe | washington | POINT (-77.05554120000001 38.9298749999998) |
8457 | Espresso | anacortes | POINT (-122.6124295 48.4966675999855) |
How would you compute the distance from a given cafe to the nearest Starbucks?
Geopandas mapping: http://geopandas.org/mapping.html