#!/usr/bin/env python # coding: utf-8 # # Airport Weather Data # # Grab weather data files from Weather Underground for airports near capital cities of the BRICS countries. # In[ ]: #I'm using pandas, a data anlays package, which used to have external data source access built in #Recently, that data access has been moved to an external package #May as well go with recommnded external datasource route get_ipython().system('pip install pandas-datareader') # In[4]: #Import the python packages we need import pandas as pd # In[2]: #What are the capitals? - The World Bank data source can help from pandas_datareader import wb countries=wb.get_countries() bricsCountries = ['Brazil','Russian Federation','India','China','South Africa'] brics=countries[countries['name'].isin(bricsCountries)] brics # In[3]: #Linit the columns brics=brics[['name','capitalCity','iso2c', 'iso3c']] brics # In[4]: #I found a list of IATA code in a CSV file on the web.. iata=pd.read_csv('https://raw.githubusercontent.com/datasets/airport-codes/master/data/airport-codes.csv') iata.head() # In[5]: #Limit the IATA data columns to ones we're interested in cols=['iso_country','name','municipality','iata_code'] #Just look at rows that meet certain criteria: airportHasIATAcode = pd.notnull(iata['iata_code']) airportIsLargeAirport = (iata['type']=='large_airport') airportIsInCapital = iata['municipality'].isin(brics['capitalCity']) #Select rows on that basis iata[ airportHasIATAcode & airportIsLargeAirport & airportIsInCapital ][cols] # In[6]: #Data ALWAYS SUCKS - change the filter criteria internationalAirport = iata['name'].str.contains('International') bricsCountry = iata['iso_country'].isin(brics['iso2c']) iata[ airportHasIATAcode & airportIsLargeAirport & internationalAirport & bricsCountry ][cols] # In[25]: #We're going to grab quite a few data files - and we want to play nice by sleeping between them... import time #Look at the URL - spot the pattern that lets us construct it automatically url_pattern ='https://www.wunderground.com/history/airport/{airport}/{year}/{month}/1/MonthlyHistory.html?format=1' #List of airports from inspection above capitalAirports=['BSB','DME','DEL','PEK','CPT'] #Folder to put data files in airportDataDir='airportData' #Make sure that folder exists - and create it if it doesn't get_ipython().system('mkdir -p {airportDataDir}') colorder=None #Grab data for multiple years if necessary... for year in [2014]: #Print out a message to keep track of progress... print('Getting data for {}...'.format(year)) #Grab data for multiple airports, if necessary for airport in capitalAirports: print('\t...{}...: '.format(airport), end='') #Create a blank dataframe to hold the data for one airport for all the months in one year df=pd.DataFrame() #Loop through months in a year - range(1,N) returns [1,2,3,..,N-1] for month in range(1,12+1): #play nice by waiting a second between each call to the Wunderground API time.sleep(1) #construct a URL using specific loop values url=url_pattern.format(airport=airport, year=year, month=month) print('{} '.format(month), end='') #Build up a single dataframe containing data for one airport for all months in one year #pd.concat() takes a list of dataframes and joins them vertically tmp=pd.read_csv(url) #Or do a "not in weathercols" to identify Date? tmp.columns=['Date' if c.startswith('+') or c.startswith('-') or c in['CST','UTC','SAST'] else c for c in tmp] if colorder is None: colorder=tmp.columns #May also want to reorder so Date is first col? eg: #tmp.columns=['Date']+[c if c!='Date' for c in tmp.columns] df=pd.concat([df,tmp]) #create a filename to store the data in fn="{datadir}/airport_weather_{iata}_{year}.csv".format(datadir=airportDataDir, year=year, iata=airport) df[colorder].to_csv(fn,index=False) print('...done {} {}'.format(airport, year)) # In[8]: #List the files in the data dump directory get_ipython().system('ls {airportDataDir}') # In[27]: get_ipython().system('head {airportDataDir}/airport_weather_PEK_2014.csv') # In[11]: get_ipython().system('head airport_weather_BSB_2014.csv') # In[10]: get_ipython().system('head airport_weather_DEL_2014.csv') # In[9]: #Cleaning example dfdirty=pd.read_csv('airport_weather_PEK_2014.csv') dfdirty.columns=[c.replace('
','') for c in dfdirty.columns] dfdirty['WindDirDegrees']=dfdirty['WindDirDegrees'].str.replace('
','') dfdirty.head() # ## Simple Mapping # # Just becuase, an example of how to create a simple map using the airport locations... # In[17]: import folium airportMap = folium.Map() def addMarker(row): latlng=[row['latitude_deg'],row['longitude_deg']] popupText='{}, {} ({})'.format(row['name'],row['municipality'],row['iata_code']) folium.Marker(latlng, popup=popupText ).add_to(airportMap) iata[ iata['iata_code'].isin(capitalAirports) ].apply(addMarker, axis=1) airportMap # In[ ]: