#!/usr/bin/env python # coding: utf-8 # ### Install # In[1]: get_ipython().system('pip3 install pydeck') # In[3]: get_ipython().system('jupyter nbextension install --sys-prefix --symlink --overwrite --py pydeck') get_ipython().system('jupyter nbextension enable --sys-prefix --py pydeck') # In[1]: # export MAPBOX_API_KEY="your key" # ### 공식 홈페이지 예시 # In[11]: import pydeck # 2014 locations of car accidents in the UK UK_ACCIDENTS_DATA = ('https://raw.githubusercontent.com/uber-common/' 'deck.gl-data/master/examples/3d-heatmap/heatmap-data.csv') # Define a layer to display on a map layer = pydeck.Layer( 'HexagonLayer', UK_ACCIDENTS_DATA, get_position='[lng, lat]', auto_highlight=True, elevation_scale=50, pickable=True, elevation_range=[0, 3000], extruded=True, coverage=1) # Set the viewport location view_state = pydeck.ViewState( longitude=-1.415, latitude=52.2323, zoom=6, min_zoom=5, max_zoom=15, pitch=40.5, bearing=-27.36) # Render r = pydeck.Deck(layers=[layer], initial_view_state=view_state) # r.to_html('demo.html') r.show() # ### Conway의 생명 게임 # - [Example](https://github.com/uber/deck.gl/blob/master/bindings/python/pydeck/examples/06%20-%20Conway's%20Game%20of%20Life.ipynb) # In[12]: import random def new_board(x, y, num_live_cells=2, num_dead_cells=3): """Initializes a board for Conway's Game of Life""" board = [] for i in range(0, y): # Defaults to a 3:2 dead cell:live cell ratio board.append([random.choice([0] * num_dead_cells + [1] * num_live_cells) for _ in range(0, x)]) return board def get(board, x, y): """Return the value at location (x, y) on a board, wrapping around if out-of-bounds""" return board[y % len(board)][x % len(board[0])] def assign(board, x, y, value): """Assigns a value at location (x, y) on a board, wrapping around if out-of-bounds""" board[y % len(board)][x % len(board[0])] = value def count_neighbors(board, x, y): """Counts the number of living neighbors a cell at (x, y) on a board has""" return sum([ get(board, x - 1, y), get(board, x + 1, y), get(board, x, y - 1), get(board, x, y + 1), get(board, x + 1, y + 1), get(board, x + 1, y - 1), get(board, x - 1, y + 1), get(board, x - 1, y - 1)]) def process_life(board): """Creates the next iteration from a passed state of Conway's Game of Life""" next_board = new_board(len(board[0]), len(board)) for y in range(0, len(board)): for x in range(0, len(board[y])): num_neighbors = count_neighbors(board, x, y) is_alive = get(board, x, y) == 1 if num_neighbors < 2 and is_alive: assign(next_board, x, y, 0) elif 2 <= num_neighbors <= 3 and is_alive: assign(next_board, x, y, 1) elif num_neighbors > 3 and is_alive: assign(next_board, x, y, 0) elif num_neighbors == 3 and not is_alive: assign(next_board, x, y, 1) else: assign(next_board, x, y, 0) return next_board # In[13]: from IPython.display import clear_output import time def draw_board(board): res = '' for row in board: for col in row: if col == 1: res += '* ' else: res += ' ' res += '\n' return res board = new_board(20, 20) NUM_ITERATIONS = 100 for i in range(0, NUM_ITERATIONS): print('Iteration ' + str(i + 1)) board = process_life(board) res = draw_board(board) print(res) time.sleep(0.1) clear_output(wait=True) # In[14]: import numpy as np import pandas as pd import pydeck as deck PINK = [155, 155, 255, 245] PURPLE = [255, 155, 255, 245] SCALING_FACTOR = 1000.0 def convert_board_to_df(board): """Makes the board matrix into a list for easier processing""" rows = [] for x in range(0, len(board[0])): for y in range(0, len(board)): rows.append([[x / SCALING_FACTOR, y / SCALING_FACTOR], PURPLE if board[y][x] else PINK]) return pd.DataFrame(rows, columns=['position', 'color']) board = new_board(30, 30) records = convert_board_to_df(board) layer = deck.Layer( 'PointCloudLayer', records, get_position='position', get_color='color', get_radius=40) view_state = deck.ViewState(latitude=0.00, longitude=0.00, zoom=13, bearing=44, pitch=45) r = deck.Deck(layers=[layer], initial_view_state=view_state, map_style='') r.show() # In[17]: NUM_ITERATIONS = 100 display(r.show()) for i in range(0, NUM_ITERATIONS): board = process_life(board) records = convert_board_to_df(board) layer.data = records r.update() time.sleep(0.1) # In[20]: import pandas as pd UK_ACCIDENTS_DATA = 'https://raw.githubusercontent.com/uber-common/deck.gl-data/master/examples/3d-heatmap/heatmap-data.csv' pd.read_csv(UK_ACCIDENTS_DATA).head() # In[24]: UK_ACCIDENTS_DATA # In[29]: layer = pydeck.Layer( 'HexagonLayer', UK_ACCIDENTS_DATA, get_position='[lng,lat]', auto_highlight=True, elevation_scale=50, pickable=True, elevation_range=[0, 3000], extruded=True, coverage=1) # Set the viewport location view_state = pydeck.ViewState( longitude=-1.415, latitude=52.2323, zoom=6, min_zoom=5, max_zoom=15, pitch=40.5, bearing=-27.36) # Combined all of it and render a viewport r = pydeck.Deck(layers=[layer], initial_view_state=view_state) r.show() # In[33]: layer.elevation_range = [0, 10000] r.update() # In[26]: import pydeck as pdk # In[28]: get_ipython().run_line_magic('pinfo', 'pdk.Deck') # In[34]: import time r.show() # In[35]: for i in range(0, 10000, 1000): layer.elevation_range = [0, i] r.update() time.sleep(0.1) # ### Scatter Plots # In[36]: import pandas as pd from pydeck import ( data_utils, Deck, Layer ) # First, let's use Pandas to download our data URL = 'https://raw.githubusercontent.com/ajduberstein/data_sets/master/beijing_subway_station.csv' df = pd.read_csv(URL) df.head() # In[37]: from ast import literal_eval # We have to re-code position to be one field in a list, so we'll do that here: # The CSV encodes the [R, G, B, A] color values listed in it as a string df['color'] = df.apply(lambda x: literal_eval(x['color']), axis=1) # In[38]: df.head() # In[39]: # Use pydeck's data_utils module to fit a viewport to the central 90% of the data viewport = data_utils.compute_view(points=df[['lng', 'lat']], view_proportion=0.9) auto_zoom_map = Deck(layers=None, initial_view_state=viewport) auto_zoom_map.show() # In[40]: from IPython.core.display import display import ipywidgets year = 2019 scatterplot = Layer( 'ScatterplotLayer', df, id='scatterplot-layer', get_radius=500, get_fill_color='color', get_position='[lng, lat]') r = Deck(layers=[scatterplot], initial_view_state=viewport) # Create an HTML header to display the year display_el = ipywidgets.HTML('

{}

'.format(year)) display(display_el) # Show the current visualization r.show() # In[41]: import time for y in range(1971, 2020): scatterplot.data = df[df['opening_date'] <= str(y)] year = y # Reset the header to display the year display_el.value = '

{}

'.format(year) r.update() time.sleep(0.2) # ### Using pydeck to manipulate data # In[42]: import pydeck as pdk DATA_URL = 'https://api.data.gov.sg/v1/transport/taxi-availability' COLOR_RANGE = [ [255, 255, 178, 25], [254, 217, 118, 85], [254, 178, 76, 127], [253, 141, 60, 170], [240, 59, 32, 212], [189, 0, 38, 255] ] # In[43]: import pandas as pd import requests json = requests.get(DATA_URL).json() df = pd.DataFrame(json["features"][0]["geometry"]["coordinates"]) df.columns = ['lng', 'lat'] viewport = pdk.data_utils.compute_view(df[['lng', 'lat']]) layer = pdk.Layer( 'ScreenGridLayer', df, cell_size_pixels=20, color_range=COLOR_RANGE, get_position='[lng, lat]', pickable=True, auto_highlight=True) r = pdk.Deck(layers=[layer], initial_view_state=viewport) # In[44]: r.show() # In[46]: pd.DataFrame([r.deck_widget.selected_data]) # ### Plotting massive data sets.ipynb # In[47]: import pandas as pd all_lidar = pd.concat([ pd.read_csv('https://raw.githubusercontent.com/ajduberstein/kitti_subset/master/kitti_1.csv'), pd.read_csv('https://raw.githubusercontent.com/ajduberstein/kitti_subset/master/kitti_2.csv'), pd.read_csv('https://raw.githubusercontent.com/ajduberstein/kitti_subset/master/kitti_3.csv'), pd.read_csv('https://raw.githubusercontent.com/ajduberstein/kitti_subset/master/kitti_4.csv'), ]) # Filter to one frame of data lidar = all_lidar[all_lidar['source'] == 136] lidar.loc[: , ['x', 'y']] = lidar[['x', 'y']] / 10000 # In[48]: import pydeck as pdk point_cloud = pdk.Layer( 'PointCloudLayer', lidar[['x', 'y', 'z']], get_position='[x, y, z * 10]', get_normal=[0, 0, 1], get_color=[255, 0, 100, 200], pickable=True, auto_highlight=True, point_size=1) view_state = pdk.data_utils.compute_view(lidar[['x', 'y']], 0.9) view_state.max_pitch = 360 view_state.pitch = 80 view_state.bearing = 120 r = pdk.Deck( point_cloud, initial_view_state=view_state, map_style='') r.show() # In[49]: import time from collections import deque # Choose a handful of frames to loop through frame_buffer = deque([42, 56, 81, 95]) print('Press the stop icon to exit') while True: current_frame = frame_buffer[0] lidar = all_lidar[all_lidar['source'] == current_frame] r.layers[0].get_position = '[x / 10000, y / 10000, z * 10]' r.layers[0].data = lidar.to_dict(orient='records') frame_buffer.rotate() r.update() time.sleep(0.5) # ### Interacting with other Jupyter widgets.ipynb # In[50]: LIGHTS_URL = 'https://raw.githubusercontent.com/ajduberstein/lights_at_night/master/chengdu_lights_at_night.csv' df = pd.read_csv(LIGHTS_URL) df.head() # In[ ]: df['color'] = df['brightness'].apply(lambda val: [255, val * 4, 255, 255]) df.sample(10) # In[71]: plottable = df[df['year'] == 1993].to_dict(orient='records') view_state = pdk.ViewState( latitude=31.0, longitude=104.5, zoom=8, max_zoom=8, min_zoom=8) scatterplot = pdk.Layer( 'HeatmapLayer', data=plottable, get_position='[lng, lat]', get_weight='brightness', opacity=0.5, pickable=False, get_radius=800) r = pdk.Deck( layers=[scatterplot], initial_view_state=view_state, views=[pdk.View(type='MapView', controller=None)]) r.show() # In[72]: import ipywidgets as widgets from IPython.display import display slider = widgets.IntSlider(1992, min=1993, max=2013, step=2) def on_change(v): results = df[df['year'] == slider.value].to_dict(orient='records') scatterplot.data = results r.update() slider.observe(on_change, names='value') display(slider) # In[ ]: tooltip = { "html": "Elevation Value: {elevationValue}", "style": { "backgroundColor": "steelblue", "color": "white" } } # ### Tooltip # In[55]: import pydeck as pdk layer = pdk.Layer( 'HexagonLayer', UK_ACCIDENTS_DATA, get_position='[lng, lat]', auto_highlight=True, elevation_scale=50, pickable=True, elevation_range=[0, 3000], extruded=True, coverage=1) # Set the viewport location view_state = pdk.ViewState( longitude=-1.415, latitude=52.2323, zoom=6, min_zoom=5, max_zoom=15, pitch=40.5, bearing=-27.36) # Combined all of it and render a viewport r = pdk.Deck( layers=[layer], initial_view_state=view_state, tooltip={ 'html': 'Elevation Value: {elevationValue}', 'style': { 'color': 'white' } } ) r.show() # - 그냥 텍스트로 하기 # # In[54]: import pydeck as pdk layer = pdk.Layer( 'HexagonLayer', UK_ACCIDENTS_DATA, get_position='[lng, lat]', auto_highlight=True, elevation_scale=50, pickable=True, elevation_range=[0, 3000], extruded=True, coverage=1) # Set the viewport location view_state = pdk.ViewState( longitude=-1.415, latitude=52.2323, zoom=6, min_zoom=5, max_zoom=15, pitch=40.5, bearing=-27.36) # Combined all of it and render a viewport r = pdk.Deck( layers=[layer], initial_view_state=view_state, tooltip = { "text": "Elevation: {elevationValue}" } ) r.show() # - Tooltip을 그냥 True값만 주기 # # In[56]: import pydeck as pdk layer = pdk.Layer( 'HexagonLayer', UK_ACCIDENTS_DATA, get_position='[lng, lat]', auto_highlight=True, elevation_scale=50, pickable=True, elevation_range=[0, 3000], extruded=True, coverage=1) # Set the viewport location view_state = pdk.ViewState( longitude=-1.415, latitude=52.2323, zoom=6, min_zoom=5, max_zoom=15, pitch=40.5, bearing=-27.36) # Combined all of it and render a viewport r = pdk.Deck( layers=[layer], initial_view_state=view_state, tooltip=True ) r.show() # In[57]: UK_ACCIDENTS_DATA = 'https://raw.githubusercontent.com/uber-common/deck.gl-data/master/examples/3d-heatmap/heatmap-data.csv' uk_data = pd.read_csv(UK_ACCIDENTS_DATA) # In[58]: uk_data.head() # ### 미국 택시 데이터 시각화 # In[87]: query = """ SELECT * FROM `bigquery-public-data.new_york_taxi_trips.tlc_yellow_trips_2015` WHERE EXTRACT(MONTH from pickup_datetime) = 1 LIMIT 100000 """ # In[63]: get_ipython().run_cell_magic('time', '', "taxi_df = pd.read_gbq(query=query, dialect='standard', project_id='geultto')\n") # ### GridLayer # - 10만개 데이터 # In[91]: arc_layer = pdk.Layer( 'GridLayer', taxi_df, get_position='[pickup_longitude, pickup_latitude]', pickable=True, auto_highlight=True, tooltip=True ) nyc_center = [-73.9808, 40.7648] view_state = pdk.ViewState(longitude=nyc_center[0], latitude=nyc_center[1], zoom=9) r = pdk.Deck(layers=[arc_layer], initial_view_state=view_state) r.show() # ### Arc Layer # In[106]: zip_code_query = """ WITH base_data AS ( SELECT nyc_taxi.*, pickup.zip_code as pickup_zip_code, pickup.internal_point_lat as pickup_zip_code_lat, pickup.internal_point_lon as pickup_zip_code_lon, dropoff.zip_code as dropoff_zip_code, dropoff.internal_point_lat as dropoff_zip_code_lat, dropoff.internal_point_lon as dropoff_zip_code_lon FROM ( SELECT * FROM `bigquery-public-data.new_york_taxi_trips.tlc_yellow_trips_2015` WHERE EXTRACT(MONTH from pickup_datetime) = 1 and pickup_latitude <= 90 and pickup_latitude >= -90 and dropoff_latitude <= 90 and dropoff_latitude >= -90 ) AS nyc_taxi JOIN ( SELECT zip_code, state_code, state_name, city, county, zip_code_geom, internal_point_lat, internal_point_lon FROM `bigquery-public-data.geo_us_boundaries.zip_codes` WHERE state_code='NY' ) AS pickup ON ST_CONTAINS(pickup.zip_code_geom, st_geogpoint(pickup_longitude, pickup_latitude)) JOIN ( SELECT zip_code, state_code, state_name, city, county, zip_code_geom, internal_point_lat, internal_point_lon FROM `bigquery-public-data.geo_us_boundaries.zip_codes` WHERE state_code='NY' ) AS dropoff ON ST_CONTAINS(dropoff.zip_code_geom, st_geogpoint(dropoff_longitude, dropoff_latitude)) ) SELECT * FROM base_data limit 10000 """ # In[107]: get_ipython().run_cell_magic('time', '', "taxi_df_by_zipcode = pd.read_gbq(query=zip_code_query, dialect='standard', project_id='geultto')\n") # In[108]: taxi_df_by_zipcode.head(3) # In[114]: arc_layer = pdk.Layer( 'ArcLayer', taxi_df_by_zipcode, get_source_position='[pickup_zip_code_lon, pickup_zip_code_lat]', get_target_position='[dropoff_zip_code_lon, dropoff_zip_code_lat]', get_source_color='[255, 255, 120]', get_target_color='[255, 0, 0]', get_widht='elevationValue', pickable=True, auto_highlight=True, ) nyc_center = [-73.9808, 40.7648] view_state = pdk.ViewState(longitude=nyc_center[0], latitude=nyc_center[1], zoom=9) r = pdk.Deck(layers=[arc_layer], initial_view_state=view_state) r.show() # ### Aggregate # In[160]: agg_query = """ WITH base_data AS ( SELECT nyc_taxi.*, pickup.zip_code as pickup_zip_code, pickup.internal_point_lat as pickup_zip_code_lat, pickup.internal_point_lon as pickup_zip_code_lon, dropoff.zip_code as dropoff_zip_code, dropoff.internal_point_lat as dropoff_zip_code_lat, dropoff.internal_point_lon as dropoff_zip_code_lon FROM ( SELECT * FROM `bigquery-public-data.new_york_taxi_trips.tlc_yellow_trips_2015` WHERE EXTRACT(MONTH from pickup_datetime) = 1 and pickup_latitude <= 90 and pickup_latitude >= -90 and dropoff_latitude <= 90 and dropoff_latitude >= -90 ) AS nyc_taxi JOIN ( SELECT zip_code, state_code, state_name, city, county, zip_code_geom, internal_point_lat, internal_point_lon FROM `bigquery-public-data.geo_us_boundaries.zip_codes` WHERE state_code='NY' ) AS pickup ON ST_CONTAINS(pickup.zip_code_geom, st_geogpoint(pickup_longitude, pickup_latitude)) JOIN ( SELECT zip_code, state_code, state_name, city, county, zip_code_geom, internal_point_lat, internal_point_lon FROM `bigquery-public-data.geo_us_boundaries.zip_codes` WHERE state_code='NY' ) AS dropoff ON ST_CONTAINS(dropoff.zip_code_geom, st_geogpoint(dropoff_longitude, dropoff_latitude)) ) SELECT pickup_zip_code, pickup_zip_code_lat, pickup_zip_code_lon, dropoff_zip_code, dropoff_zip_code_lat, dropoff_zip_code_lon, COUNT(*) AS cnt FROM base_data GROUP BY 1,2,3,4,5,6 limit 10000 """ # In[161]: get_ipython().run_cell_magic('time', '', "agg_df = pd.read_gbq(query=agg_query, dialect='standard', project_id='geultto')\n") # In[162]: agg_df.head() # In[164]: agg_df = agg_df.sort_values('cnt', ascending=False) # In[165]: agg_df = agg_df[:100] # In[226]: arc_layer = pdk.Layer( 'ArcLayer', agg_df, get_source_position='[pickup_zip_code_lon, pickup_zip_code_lat]', get_target_position='[dropoff_zip_code_lon, dropoff_zip_code_lat]', get_source_color='[255, 255, 120]', get_target_color='[255, 0, 0]', width_units='meters', get_width="1+10*cnt/500", pickable=True, auto_highlight=True, ) nyc_center = [-73.9808, 40.7648] view_state = pdk.ViewState(longitude=nyc_center[0], latitude=nyc_center[1], zoom=9) r = pdk.Deck(layers=[arc_layer], initial_view_state=view_state, tooltip={ 'html': 'count: {cnt}', 'style': { 'color': 'white' } } ) r.show() # ### 요일별 위젯 # In[230]: agg_query2 = """ WITH base_data AS ( SELECT nyc_taxi.*, pickup.zip_code as pickup_zip_code, pickup.internal_point_lat as pickup_zip_code_lat, pickup.internal_point_lon as pickup_zip_code_lon, dropoff.zip_code as dropoff_zip_code, dropoff.internal_point_lat as dropoff_zip_code_lat, dropoff.internal_point_lon as dropoff_zip_code_lon FROM ( SELECT * FROM `bigquery-public-data.new_york_taxi_trips.tlc_yellow_trips_2015` WHERE EXTRACT(MONTH from pickup_datetime) = 1 and pickup_latitude <= 90 and pickup_latitude >= -90 and dropoff_latitude <= 90 and dropoff_latitude >= -90 LIMIT 100000 ) AS nyc_taxi JOIN ( SELECT zip_code, state_code, state_name, city, county, zip_code_geom, internal_point_lat, internal_point_lon FROM `bigquery-public-data.geo_us_boundaries.zip_codes` WHERE state_code='NY' ) AS pickup ON ST_CONTAINS(pickup.zip_code_geom, st_geogpoint(pickup_longitude, pickup_latitude)) JOIN ( SELECT zip_code, state_code, state_name, city, county, zip_code_geom, internal_point_lat, internal_point_lon FROM `bigquery-public-data.geo_us_boundaries.zip_codes` WHERE state_code='NY' ) AS dropoff ON ST_CONTAINS(dropoff.zip_code_geom, st_geogpoint(dropoff_longitude, dropoff_latitude)) ) SELECT CAST(format_datetime('%u', pickup_datetime) AS INT64) -1 AS weekday, pickup_zip_code, pickup_zip_code_lat, pickup_zip_code_lon, dropoff_zip_code, dropoff_zip_code_lat, dropoff_zip_code_lon, COUNT(*) AS cnt FROM base_data GROUP BY 1,2,3,4,5,6,7 """ # In[242]: get_ipython().run_cell_magic('time', '', "agg_df2 = pd.read_gbq(query=agg_query2, dialect='standard', project_id='geultto')\n") # In[243]: agg_df2.head() # In[244]: default_data = agg_df2[agg_df2['weekday'] == 0].to_dict(orient='records') # In[250]: arc_layer = pdk.Layer( 'ArcLayer', default_data, get_source_position='[pickup_zip_code_lon, pickup_zip_code_lat]', get_target_position='[dropoff_zip_code_lon, dropoff_zip_code_lat]', get_source_color='[255, 255, 120]', get_target_color='[255, 0, 0]', width_units='meters', get_width="1+10*cnt/500", pickable=True, auto_highlight=True, ) nyc_center = [-73.9808, 40.7648] view_state = pdk.ViewState(longitude=nyc_center[0], latitude=nyc_center[1], zoom=9) r = pdk.Deck(layers=[arc_layer], initial_view_state=view_state, tooltip={ 'html': 'count: {cnt}', 'style': { 'color': 'white' } } ) r.show() # In[246]: # Widget 슬라이더 생성 import ipywidgets as widgets from IPython.display import display slider = widgets.IntSlider(0, min=0, max=6, step=1) # Widget에서 사용할 함수 정의 def on_change(v): results = agg_df2[agg_df2['weekday'] == slider.value].to_dict(orient='records') arc_layer.data = results r.update() # Deck과 슬라이더 연결 slider.observe(on_change, names='value') display(slider) # In[ ]: