#!/usr/bin/env python
# coding: utf-8
# # Airport Weather Data
#
# Grab weather data files from Weather Underground for airports near capital cities of the BRICS countries.
# In[ ]:
#I'm using pandas, a data anlays package, which used to have external data source access built in
#Recently, that data access has been moved to an external package
#May as well go with recommnded external datasource route
get_ipython().system('pip install pandas-datareader')
# In[4]:
#Import the python packages we need
import pandas as pd
# In[2]:
#What are the capitals? - The World Bank data source can help
from pandas_datareader import wb
countries=wb.get_countries()
bricsCountries = ['Brazil','Russian Federation','India','China','South Africa']
brics=countries[countries['name'].isin(bricsCountries)]
brics
# In[3]:
#Linit the columns
brics=brics[['name','capitalCity','iso2c', 'iso3c']]
brics
# In[4]:
#I found a list of IATA code in a CSV file on the web..
iata=pd.read_csv('https://raw.githubusercontent.com/datasets/airport-codes/master/data/airport-codes.csv')
iata.head()
# In[5]:
#Limit the IATA data columns to ones we're interested in
cols=['iso_country','name','municipality','iata_code']
#Just look at rows that meet certain criteria:
airportHasIATAcode = pd.notnull(iata['iata_code'])
airportIsLargeAirport = (iata['type']=='large_airport')
airportIsInCapital = iata['municipality'].isin(brics['capitalCity'])
#Select rows on that basis
iata[ airportHasIATAcode & airportIsLargeAirport & airportIsInCapital ][cols]
# In[6]:
#Data ALWAYS SUCKS - change the filter criteria
internationalAirport = iata['name'].str.contains('International')
bricsCountry = iata['iso_country'].isin(brics['iso2c'])
iata[ airportHasIATAcode & airportIsLargeAirport & internationalAirport & bricsCountry ][cols]
# In[25]:
#We're going to grab quite a few data files - and we want to play nice by sleeping between them...
import time
#Look at the URL - spot the pattern that lets us construct it automatically
url_pattern ='https://www.wunderground.com/history/airport/{airport}/{year}/{month}/1/MonthlyHistory.html?format=1'
#List of airports from inspection above
capitalAirports=['BSB','DME','DEL','PEK','CPT']
#Folder to put data files in
airportDataDir='airportData'
#Make sure that folder exists - and create it if it doesn't
get_ipython().system('mkdir -p {airportDataDir}')
colorder=None
#Grab data for multiple years if necessary...
for year in [2014]:
#Print out a message to keep track of progress...
print('Getting data for {}...'.format(year))
#Grab data for multiple airports, if necessary
for airport in capitalAirports:
print('\t...{}...: '.format(airport), end='')
#Create a blank dataframe to hold the data for one airport for all the months in one year
df=pd.DataFrame()
#Loop through months in a year - range(1,N) returns [1,2,3,..,N-1]
for month in range(1,12+1):
#play nice by waiting a second between each call to the Wunderground API
time.sleep(1)
#construct a URL using specific loop values
url=url_pattern.format(airport=airport, year=year, month=month)
print('{} '.format(month), end='')
#Build up a single dataframe containing data for one airport for all months in one year
#pd.concat() takes a list of dataframes and joins them vertically
tmp=pd.read_csv(url)
#Or do a "not in weathercols" to identify Date?
tmp.columns=['Date' if c.startswith('+') or c.startswith('-') or c in['CST','UTC','SAST'] else c for c in tmp]
if colorder is None: colorder=tmp.columns
#May also want to reorder so Date is first col? eg:
#tmp.columns=['Date']+[c if c!='Date' for c in tmp.columns]
df=pd.concat([df,tmp])
#create a filename to store the data in
fn="{datadir}/airport_weather_{iata}_{year}.csv".format(datadir=airportDataDir, year=year, iata=airport)
df[colorder].to_csv(fn,index=False)
print('...done {} {}'.format(airport, year))
# In[8]:
#List the files in the data dump directory
get_ipython().system('ls {airportDataDir}')
# In[27]:
get_ipython().system('head {airportDataDir}/airport_weather_PEK_2014.csv')
# In[11]:
get_ipython().system('head airport_weather_BSB_2014.csv')
# In[10]:
get_ipython().system('head airport_weather_DEL_2014.csv')
# In[9]:
#Cleaning example
dfdirty=pd.read_csv('airport_weather_PEK_2014.csv')
dfdirty.columns=[c.replace('
','') for c in dfdirty.columns]
dfdirty['WindDirDegrees']=dfdirty['WindDirDegrees'].str.replace('
','')
dfdirty.head()
# ## Simple Mapping
#
# Just becuase, an example of how to create a simple map using the airport locations...
# In[17]:
import folium
airportMap = folium.Map()
def addMarker(row):
latlng=[row['latitude_deg'],row['longitude_deg']]
popupText='{}, {} ({})'.format(row['name'],row['municipality'],row['iata_code'])
folium.Marker(latlng, popup=popupText ).add_to(airportMap)
iata[ iata['iata_code'].isin(capitalAirports) ].apply(addMarker, axis=1)
airportMap
# In[ ]: