#!/usr/bin/env python
# coding: utf-8
# ### Install
# In[1]:
get_ipython().system('pip3 install pydeck')
# In[3]:
get_ipython().system('jupyter nbextension install --sys-prefix --symlink --overwrite --py pydeck')
get_ipython().system('jupyter nbextension enable --sys-prefix --py pydeck')
# In[1]:
# export MAPBOX_API_KEY="your key"
# ### 공식 홈페이지 예시
# In[11]:
import pydeck
# 2014 locations of car accidents in the UK
UK_ACCIDENTS_DATA = ('https://raw.githubusercontent.com/uber-common/'
'deck.gl-data/master/examples/3d-heatmap/heatmap-data.csv')
# Define a layer to display on a map
layer = pydeck.Layer(
'HexagonLayer',
UK_ACCIDENTS_DATA,
get_position='[lng, lat]',
auto_highlight=True,
elevation_scale=50,
pickable=True,
elevation_range=[0, 3000],
extruded=True,
coverage=1)
# Set the viewport location
view_state = pydeck.ViewState(
longitude=-1.415,
latitude=52.2323,
zoom=6,
min_zoom=5,
max_zoom=15,
pitch=40.5,
bearing=-27.36)
# Render
r = pydeck.Deck(layers=[layer], initial_view_state=view_state)
# r.to_html('demo.html')
r.show()
# ### Conway의 생명 게임
# - [Example](https://github.com/uber/deck.gl/blob/master/bindings/python/pydeck/examples/06%20-%20Conway's%20Game%20of%20Life.ipynb)
# In[12]:
import random
def new_board(x, y, num_live_cells=2, num_dead_cells=3):
"""Initializes a board for Conway's Game of Life"""
board = []
for i in range(0, y):
# Defaults to a 3:2 dead cell:live cell ratio
board.append([random.choice([0] * num_dead_cells + [1] * num_live_cells) for _ in range(0, x)])
return board
def get(board, x, y):
"""Return the value at location (x, y) on a board, wrapping around if out-of-bounds"""
return board[y % len(board)][x % len(board[0])]
def assign(board, x, y, value):
"""Assigns a value at location (x, y) on a board, wrapping around if out-of-bounds"""
board[y % len(board)][x % len(board[0])] = value
def count_neighbors(board, x, y):
"""Counts the number of living neighbors a cell at (x, y) on a board has"""
return sum([
get(board, x - 1, y),
get(board, x + 1, y),
get(board, x, y - 1),
get(board, x, y + 1),
get(board, x + 1, y + 1),
get(board, x + 1, y - 1),
get(board, x - 1, y + 1),
get(board, x - 1, y - 1)])
def process_life(board):
"""Creates the next iteration from a passed state of Conway's Game of Life"""
next_board = new_board(len(board[0]), len(board))
for y in range(0, len(board)):
for x in range(0, len(board[y])):
num_neighbors = count_neighbors(board, x, y)
is_alive = get(board, x, y) == 1
if num_neighbors < 2 and is_alive:
assign(next_board, x, y, 0)
elif 2 <= num_neighbors <= 3 and is_alive:
assign(next_board, x, y, 1)
elif num_neighbors > 3 and is_alive:
assign(next_board, x, y, 0)
elif num_neighbors == 3 and not is_alive:
assign(next_board, x, y, 1)
else:
assign(next_board, x, y, 0)
return next_board
# In[13]:
from IPython.display import clear_output
import time
def draw_board(board):
res = ''
for row in board:
for col in row:
if col == 1:
res += '* '
else:
res += ' '
res += '\n'
return res
board = new_board(20, 20)
NUM_ITERATIONS = 100
for i in range(0, NUM_ITERATIONS):
print('Iteration ' + str(i + 1))
board = process_life(board)
res = draw_board(board)
print(res)
time.sleep(0.1)
clear_output(wait=True)
# In[14]:
import numpy as np
import pandas as pd
import pydeck as deck
PINK = [155, 155, 255, 245]
PURPLE = [255, 155, 255, 245]
SCALING_FACTOR = 1000.0
def convert_board_to_df(board):
"""Makes the board matrix into a list for easier processing"""
rows = []
for x in range(0, len(board[0])):
for y in range(0, len(board)):
rows.append([[x / SCALING_FACTOR, y / SCALING_FACTOR], PURPLE if board[y][x] else PINK])
return pd.DataFrame(rows, columns=['position', 'color'])
board = new_board(30, 30)
records = convert_board_to_df(board)
layer = deck.Layer(
'PointCloudLayer',
records,
get_position='position',
get_color='color',
get_radius=40)
view_state = deck.ViewState(latitude=0.00, longitude=0.00, zoom=13, bearing=44, pitch=45)
r = deck.Deck(layers=[layer], initial_view_state=view_state, map_style='')
r.show()
# In[17]:
NUM_ITERATIONS = 100
display(r.show())
for i in range(0, NUM_ITERATIONS):
board = process_life(board)
records = convert_board_to_df(board)
layer.data = records
r.update()
time.sleep(0.1)
# In[20]:
import pandas as pd
UK_ACCIDENTS_DATA = 'https://raw.githubusercontent.com/uber-common/deck.gl-data/master/examples/3d-heatmap/heatmap-data.csv'
pd.read_csv(UK_ACCIDENTS_DATA).head()
# In[24]:
UK_ACCIDENTS_DATA
# In[29]:
layer = pydeck.Layer(
'HexagonLayer',
UK_ACCIDENTS_DATA,
get_position='[lng,lat]',
auto_highlight=True,
elevation_scale=50,
pickable=True,
elevation_range=[0, 3000],
extruded=True,
coverage=1)
# Set the viewport location
view_state = pydeck.ViewState(
longitude=-1.415,
latitude=52.2323,
zoom=6,
min_zoom=5,
max_zoom=15,
pitch=40.5,
bearing=-27.36)
# Combined all of it and render a viewport
r = pydeck.Deck(layers=[layer], initial_view_state=view_state)
r.show()
# In[33]:
layer.elevation_range = [0, 10000]
r.update()
# In[26]:
import pydeck as pdk
# In[28]:
get_ipython().run_line_magic('pinfo', 'pdk.Deck')
# In[34]:
import time
r.show()
# In[35]:
for i in range(0, 10000, 1000):
layer.elevation_range = [0, i]
r.update()
time.sleep(0.1)
# ### Scatter Plots
# In[36]:
import pandas as pd
from pydeck import (
data_utils,
Deck,
Layer
)
# First, let's use Pandas to download our data
URL = 'https://raw.githubusercontent.com/ajduberstein/data_sets/master/beijing_subway_station.csv'
df = pd.read_csv(URL)
df.head()
# In[37]:
from ast import literal_eval
# We have to re-code position to be one field in a list, so we'll do that here:
# The CSV encodes the [R, G, B, A] color values listed in it as a string
df['color'] = df.apply(lambda x: literal_eval(x['color']), axis=1)
# In[38]:
df.head()
# In[39]:
# Use pydeck's data_utils module to fit a viewport to the central 90% of the data
viewport = data_utils.compute_view(points=df[['lng', 'lat']], view_proportion=0.9)
auto_zoom_map = Deck(layers=None, initial_view_state=viewport)
auto_zoom_map.show()
# In[40]:
from IPython.core.display import display
import ipywidgets
year = 2019
scatterplot = Layer(
'ScatterplotLayer',
df,
id='scatterplot-layer',
get_radius=500,
get_fill_color='color',
get_position='[lng, lat]')
r = Deck(layers=[scatterplot], initial_view_state=viewport)
# Create an HTML header to display the year
display_el = ipywidgets.HTML('
{}
'.format(year))
display(display_el)
# Show the current visualization
r.show()
# In[41]:
import time
for y in range(1971, 2020):
scatterplot.data = df[df['opening_date'] <= str(y)]
year = y
# Reset the header to display the year
display_el.value = '{}
'.format(year)
r.update()
time.sleep(0.2)
# ### Using pydeck to manipulate data
# In[42]:
import pydeck as pdk
DATA_URL = 'https://api.data.gov.sg/v1/transport/taxi-availability'
COLOR_RANGE = [
[255, 255, 178, 25],
[254, 217, 118, 85],
[254, 178, 76, 127],
[253, 141, 60, 170],
[240, 59, 32, 212],
[189, 0, 38, 255]
]
# In[43]:
import pandas as pd
import requests
json = requests.get(DATA_URL).json()
df = pd.DataFrame(json["features"][0]["geometry"]["coordinates"])
df.columns = ['lng', 'lat']
viewport = pdk.data_utils.compute_view(df[['lng', 'lat']])
layer = pdk.Layer(
'ScreenGridLayer',
df,
cell_size_pixels=20,
color_range=COLOR_RANGE,
get_position='[lng, lat]',
pickable=True,
auto_highlight=True)
r = pdk.Deck(layers=[layer], initial_view_state=viewport)
# In[44]:
r.show()
# In[46]:
pd.DataFrame([r.deck_widget.selected_data])
# ### Plotting massive data sets.ipynb
# In[47]:
import pandas as pd
all_lidar = pd.concat([
pd.read_csv('https://raw.githubusercontent.com/ajduberstein/kitti_subset/master/kitti_1.csv'),
pd.read_csv('https://raw.githubusercontent.com/ajduberstein/kitti_subset/master/kitti_2.csv'),
pd.read_csv('https://raw.githubusercontent.com/ajduberstein/kitti_subset/master/kitti_3.csv'),
pd.read_csv('https://raw.githubusercontent.com/ajduberstein/kitti_subset/master/kitti_4.csv'),
])
# Filter to one frame of data
lidar = all_lidar[all_lidar['source'] == 136]
lidar.loc[: , ['x', 'y']] = lidar[['x', 'y']] / 10000
# In[48]:
import pydeck as pdk
point_cloud = pdk.Layer(
'PointCloudLayer',
lidar[['x', 'y', 'z']],
get_position='[x, y, z * 10]',
get_normal=[0, 0, 1],
get_color=[255, 0, 100, 200],
pickable=True,
auto_highlight=True,
point_size=1)
view_state = pdk.data_utils.compute_view(lidar[['x', 'y']], 0.9)
view_state.max_pitch = 360
view_state.pitch = 80
view_state.bearing = 120
r = pdk.Deck(
point_cloud,
initial_view_state=view_state,
map_style='')
r.show()
# In[49]:
import time
from collections import deque
# Choose a handful of frames to loop through
frame_buffer = deque([42, 56, 81, 95])
print('Press the stop icon to exit')
while True:
current_frame = frame_buffer[0]
lidar = all_lidar[all_lidar['source'] == current_frame]
r.layers[0].get_position = '[x / 10000, y / 10000, z * 10]'
r.layers[0].data = lidar.to_dict(orient='records')
frame_buffer.rotate()
r.update()
time.sleep(0.5)
# ### Interacting with other Jupyter widgets.ipynb
# In[50]:
LIGHTS_URL = 'https://raw.githubusercontent.com/ajduberstein/lights_at_night/master/chengdu_lights_at_night.csv'
df = pd.read_csv(LIGHTS_URL)
df.head()
# In[ ]:
df['color'] = df['brightness'].apply(lambda val: [255, val * 4, 255, 255])
df.sample(10)
# In[71]:
plottable = df[df['year'] == 1993].to_dict(orient='records')
view_state = pdk.ViewState(
latitude=31.0,
longitude=104.5,
zoom=8,
max_zoom=8,
min_zoom=8)
scatterplot = pdk.Layer(
'HeatmapLayer',
data=plottable,
get_position='[lng, lat]',
get_weight='brightness',
opacity=0.5,
pickable=False,
get_radius=800)
r = pdk.Deck(
layers=[scatterplot],
initial_view_state=view_state,
views=[pdk.View(type='MapView', controller=None)])
r.show()
# In[72]:
import ipywidgets as widgets
from IPython.display import display
slider = widgets.IntSlider(1992, min=1993, max=2013, step=2)
def on_change(v):
results = df[df['year'] == slider.value].to_dict(orient='records')
scatterplot.data = results
r.update()
slider.observe(on_change, names='value')
display(slider)
# In[ ]:
tooltip = {
"html": "Elevation Value: {elevationValue}",
"style": {
"backgroundColor": "steelblue",
"color": "white"
}
}
# ### Tooltip
# In[55]:
import pydeck as pdk
layer = pdk.Layer(
'HexagonLayer',
UK_ACCIDENTS_DATA,
get_position='[lng, lat]',
auto_highlight=True,
elevation_scale=50,
pickable=True,
elevation_range=[0, 3000],
extruded=True,
coverage=1)
# Set the viewport location
view_state = pdk.ViewState(
longitude=-1.415,
latitude=52.2323,
zoom=6,
min_zoom=5,
max_zoom=15,
pitch=40.5,
bearing=-27.36)
# Combined all of it and render a viewport
r = pdk.Deck(
layers=[layer],
initial_view_state=view_state,
tooltip={
'html': 'Elevation Value: {elevationValue}',
'style': {
'color': 'white'
}
}
)
r.show()
# - 그냥 텍스트로 하기
#
# In[54]:
import pydeck as pdk
layer = pdk.Layer(
'HexagonLayer',
UK_ACCIDENTS_DATA,
get_position='[lng, lat]',
auto_highlight=True,
elevation_scale=50,
pickable=True,
elevation_range=[0, 3000],
extruded=True,
coverage=1)
# Set the viewport location
view_state = pdk.ViewState(
longitude=-1.415,
latitude=52.2323,
zoom=6,
min_zoom=5,
max_zoom=15,
pitch=40.5,
bearing=-27.36)
# Combined all of it and render a viewport
r = pdk.Deck(
layers=[layer],
initial_view_state=view_state,
tooltip = {
"text": "Elevation: {elevationValue}"
}
)
r.show()
# - Tooltip을 그냥 True값만 주기
#
# In[56]:
import pydeck as pdk
layer = pdk.Layer(
'HexagonLayer',
UK_ACCIDENTS_DATA,
get_position='[lng, lat]',
auto_highlight=True,
elevation_scale=50,
pickable=True,
elevation_range=[0, 3000],
extruded=True,
coverage=1)
# Set the viewport location
view_state = pdk.ViewState(
longitude=-1.415,
latitude=52.2323,
zoom=6,
min_zoom=5,
max_zoom=15,
pitch=40.5,
bearing=-27.36)
# Combined all of it and render a viewport
r = pdk.Deck(
layers=[layer],
initial_view_state=view_state,
tooltip=True
)
r.show()
# In[57]:
UK_ACCIDENTS_DATA = 'https://raw.githubusercontent.com/uber-common/deck.gl-data/master/examples/3d-heatmap/heatmap-data.csv'
uk_data = pd.read_csv(UK_ACCIDENTS_DATA)
# In[58]:
uk_data.head()
# ### 미국 택시 데이터 시각화
# In[87]:
query = """
SELECT
*
FROM `bigquery-public-data.new_york_taxi_trips.tlc_yellow_trips_2015`
WHERE EXTRACT(MONTH from pickup_datetime) = 1
LIMIT 100000
"""
# In[63]:
get_ipython().run_cell_magic('time', '', "taxi_df = pd.read_gbq(query=query, dialect='standard', project_id='geultto')\n")
# ### GridLayer
# - 10만개 데이터
# In[91]:
arc_layer = pdk.Layer(
'GridLayer',
taxi_df,
get_position='[pickup_longitude, pickup_latitude]',
pickable=True,
auto_highlight=True,
tooltip=True
)
nyc_center = [-73.9808, 40.7648]
view_state = pdk.ViewState(longitude=nyc_center[0], latitude=nyc_center[1], zoom=9)
r = pdk.Deck(layers=[arc_layer], initial_view_state=view_state)
r.show()
# ### Arc Layer
# In[106]:
zip_code_query = """
WITH base_data AS
(
SELECT
nyc_taxi.*,
pickup.zip_code as pickup_zip_code,
pickup.internal_point_lat as pickup_zip_code_lat,
pickup.internal_point_lon as pickup_zip_code_lon,
dropoff.zip_code as dropoff_zip_code,
dropoff.internal_point_lat as dropoff_zip_code_lat,
dropoff.internal_point_lon as dropoff_zip_code_lon
FROM (
SELECT *
FROM `bigquery-public-data.new_york_taxi_trips.tlc_yellow_trips_2015`
WHERE
EXTRACT(MONTH from pickup_datetime) = 1
and pickup_latitude <= 90 and pickup_latitude >= -90
and dropoff_latitude <= 90 and dropoff_latitude >= -90
) AS nyc_taxi
JOIN (
SELECT zip_code, state_code, state_name, city, county, zip_code_geom, internal_point_lat, internal_point_lon
FROM `bigquery-public-data.geo_us_boundaries.zip_codes`
WHERE state_code='NY'
) AS pickup
ON ST_CONTAINS(pickup.zip_code_geom, st_geogpoint(pickup_longitude, pickup_latitude))
JOIN (
SELECT zip_code, state_code, state_name, city, county, zip_code_geom, internal_point_lat, internal_point_lon
FROM `bigquery-public-data.geo_us_boundaries.zip_codes`
WHERE state_code='NY'
) AS dropoff
ON ST_CONTAINS(dropoff.zip_code_geom, st_geogpoint(dropoff_longitude, dropoff_latitude))
)
SELECT
*
FROM base_data
limit 10000
"""
# In[107]:
get_ipython().run_cell_magic('time', '', "taxi_df_by_zipcode = pd.read_gbq(query=zip_code_query, dialect='standard', project_id='geultto')\n")
# In[108]:
taxi_df_by_zipcode.head(3)
# In[114]:
arc_layer = pdk.Layer(
'ArcLayer',
taxi_df_by_zipcode,
get_source_position='[pickup_zip_code_lon, pickup_zip_code_lat]',
get_target_position='[dropoff_zip_code_lon, dropoff_zip_code_lat]',
get_source_color='[255, 255, 120]',
get_target_color='[255, 0, 0]',
get_widht='elevationValue',
pickable=True,
auto_highlight=True,
)
nyc_center = [-73.9808, 40.7648]
view_state = pdk.ViewState(longitude=nyc_center[0], latitude=nyc_center[1], zoom=9)
r = pdk.Deck(layers=[arc_layer], initial_view_state=view_state)
r.show()
# ### Aggregate
# In[160]:
agg_query = """
WITH base_data AS
(
SELECT
nyc_taxi.*,
pickup.zip_code as pickup_zip_code,
pickup.internal_point_lat as pickup_zip_code_lat,
pickup.internal_point_lon as pickup_zip_code_lon,
dropoff.zip_code as dropoff_zip_code,
dropoff.internal_point_lat as dropoff_zip_code_lat,
dropoff.internal_point_lon as dropoff_zip_code_lon
FROM (
SELECT *
FROM `bigquery-public-data.new_york_taxi_trips.tlc_yellow_trips_2015`
WHERE
EXTRACT(MONTH from pickup_datetime) = 1
and pickup_latitude <= 90 and pickup_latitude >= -90
and dropoff_latitude <= 90 and dropoff_latitude >= -90
) AS nyc_taxi
JOIN (
SELECT zip_code, state_code, state_name, city, county, zip_code_geom, internal_point_lat, internal_point_lon
FROM `bigquery-public-data.geo_us_boundaries.zip_codes`
WHERE state_code='NY'
) AS pickup
ON ST_CONTAINS(pickup.zip_code_geom, st_geogpoint(pickup_longitude, pickup_latitude))
JOIN (
SELECT zip_code, state_code, state_name, city, county, zip_code_geom, internal_point_lat, internal_point_lon
FROM `bigquery-public-data.geo_us_boundaries.zip_codes`
WHERE state_code='NY'
) AS dropoff
ON ST_CONTAINS(dropoff.zip_code_geom, st_geogpoint(dropoff_longitude, dropoff_latitude))
)
SELECT
pickup_zip_code,
pickup_zip_code_lat,
pickup_zip_code_lon,
dropoff_zip_code,
dropoff_zip_code_lat,
dropoff_zip_code_lon,
COUNT(*) AS cnt
FROM base_data
GROUP BY 1,2,3,4,5,6
limit 10000
"""
# In[161]:
get_ipython().run_cell_magic('time', '', "agg_df = pd.read_gbq(query=agg_query, dialect='standard', project_id='geultto')\n")
# In[162]:
agg_df.head()
# In[164]:
agg_df = agg_df.sort_values('cnt', ascending=False)
# In[165]:
agg_df = agg_df[:100]
# In[226]:
arc_layer = pdk.Layer(
'ArcLayer',
agg_df,
get_source_position='[pickup_zip_code_lon, pickup_zip_code_lat]',
get_target_position='[dropoff_zip_code_lon, dropoff_zip_code_lat]',
get_source_color='[255, 255, 120]',
get_target_color='[255, 0, 0]',
width_units='meters',
get_width="1+10*cnt/500",
pickable=True,
auto_highlight=True,
)
nyc_center = [-73.9808, 40.7648]
view_state = pdk.ViewState(longitude=nyc_center[0], latitude=nyc_center[1], zoom=9)
r = pdk.Deck(layers=[arc_layer], initial_view_state=view_state,
tooltip={
'html': 'count: {cnt}',
'style': {
'color': 'white'
}
}
)
r.show()
# ### 요일별 위젯
# In[230]:
agg_query2 = """
WITH base_data AS
(
SELECT
nyc_taxi.*,
pickup.zip_code as pickup_zip_code,
pickup.internal_point_lat as pickup_zip_code_lat,
pickup.internal_point_lon as pickup_zip_code_lon,
dropoff.zip_code as dropoff_zip_code,
dropoff.internal_point_lat as dropoff_zip_code_lat,
dropoff.internal_point_lon as dropoff_zip_code_lon
FROM (
SELECT *
FROM `bigquery-public-data.new_york_taxi_trips.tlc_yellow_trips_2015`
WHERE
EXTRACT(MONTH from pickup_datetime) = 1
and pickup_latitude <= 90 and pickup_latitude >= -90
and dropoff_latitude <= 90 and dropoff_latitude >= -90
LIMIT 100000
) AS nyc_taxi
JOIN (
SELECT zip_code, state_code, state_name, city, county, zip_code_geom, internal_point_lat, internal_point_lon
FROM `bigquery-public-data.geo_us_boundaries.zip_codes`
WHERE state_code='NY'
) AS pickup
ON ST_CONTAINS(pickup.zip_code_geom, st_geogpoint(pickup_longitude, pickup_latitude))
JOIN (
SELECT zip_code, state_code, state_name, city, county, zip_code_geom, internal_point_lat, internal_point_lon
FROM `bigquery-public-data.geo_us_boundaries.zip_codes`
WHERE state_code='NY'
) AS dropoff
ON ST_CONTAINS(dropoff.zip_code_geom, st_geogpoint(dropoff_longitude, dropoff_latitude))
)
SELECT
CAST(format_datetime('%u', pickup_datetime) AS INT64) -1 AS weekday,
pickup_zip_code,
pickup_zip_code_lat,
pickup_zip_code_lon,
dropoff_zip_code,
dropoff_zip_code_lat,
dropoff_zip_code_lon,
COUNT(*) AS cnt
FROM base_data
GROUP BY 1,2,3,4,5,6,7
"""
# In[242]:
get_ipython().run_cell_magic('time', '', "agg_df2 = pd.read_gbq(query=agg_query2, dialect='standard', project_id='geultto')\n")
# In[243]:
agg_df2.head()
# In[244]:
default_data = agg_df2[agg_df2['weekday'] == 0].to_dict(orient='records')
# In[250]:
arc_layer = pdk.Layer(
'ArcLayer',
default_data,
get_source_position='[pickup_zip_code_lon, pickup_zip_code_lat]',
get_target_position='[dropoff_zip_code_lon, dropoff_zip_code_lat]',
get_source_color='[255, 255, 120]',
get_target_color='[255, 0, 0]',
width_units='meters',
get_width="1+10*cnt/500",
pickable=True,
auto_highlight=True,
)
nyc_center = [-73.9808, 40.7648]
view_state = pdk.ViewState(longitude=nyc_center[0], latitude=nyc_center[1], zoom=9)
r = pdk.Deck(layers=[arc_layer], initial_view_state=view_state,
tooltip={
'html': 'count: {cnt}',
'style': {
'color': 'white'
}
}
)
r.show()
# In[246]:
# Widget 슬라이더 생성
import ipywidgets as widgets
from IPython.display import display
slider = widgets.IntSlider(0, min=0, max=6, step=1)
# Widget에서 사용할 함수 정의
def on_change(v):
results = agg_df2[agg_df2['weekday'] == slider.value].to_dict(orient='records')
arc_layer.data = results
r.update()
# Deck과 슬라이더 연결
slider.observe(on_change, names='value')
display(slider)
# In[ ]: