#!/usr/bin/env python # coding: utf-8 # # Tokyo Photographs # In[1]: from IPython.display import display_markdown display_markdown(open("README.md").read(), raw=True) # In[65]: get_ipython().run_line_magic('matplotlib', 'inline') import geopandas as gpd import pandas as pd import matplotlib.pyplot as plt import numpy as np # In[72]: db = pd.read_csv('data/tokyo.csv') # ## Randomly subsetting # In[73]: # Set the "seed" so every run produces the generates the same random numbers np.random.seed(1234) # Create a sequence of length equal to the number of rows in the table ri = np.arange(len(db)) # Randomly reorganize (shuffle) the values np.random.shuffle(ri) # Reindex the table by using only the first 10,000 numbers # of the (now randomly arranged) sequence db = db.iloc[ri[:10000], :] # ## Reproject XY coordinates in separate columns # In[74]: get_ipython().run_cell_magic('time', '', 'pts = db.apply(lambda r: Point(r.longitude, r.latitude), axis=1)\n') # In[75]: gdb = gpd.GeoDataFrame(db.assign(geometry=pts), \ crs={'init' :'epsg:4326'}) # In[76]: get_ipython().run_cell_magic('time', '', 'gdb = gdb.to_crs(epsg=3857)\n') # In[77]: get_ipython().run_cell_magic('time', '', "xys = gdb['geometry'].apply(lambda pt: pd.Series({'x': pt.x, 'y': pt.y}))\ngdb['x'] = xys['x']\ngdb['y'] = xys['y']\n") # In[79]: gdb.drop('geometry', axis=1).to_csv('tokyo_clean.csv', index=False) # --- # # ## Download link # # {download}`[Download the *tokyo_clean.csv* file] `