Grab weather data files from Weather Underground for airports near capital cities of the BRICS countries.
#I'm using pandas, a data anlays package, which used to have external data source access built in
#Recently, that data access has been moved to an external package
#May as well go with recommnded external datasource route
!pip install pandas-datareader
#Import the python packages we need
import pandas as pd
#What are the capitals? - The World Bank data source can help
from pandas_datareader import wb
countries=wb.get_countries()
bricsCountries = ['Brazil','Russian Federation','India','China','South Africa']
brics=countries[countries['name'].isin(bricsCountries)]
brics
adminregion | capitalCity | iso3c | incomeLevel | iso2c | latitude | lendingType | longitude | name | region | |
---|---|---|---|---|---|---|---|---|---|---|
34 | Latin America & Caribbean (excluding high income) | Brasilia | BRA | Upper middle income | BR | -15.7801 | IBRD | -47.9292 | Brazil | Latin America & Caribbean |
49 | East Asia & Pacific (excluding high income) | Beijing | CHN | Upper middle income | CN | 40.0495 | IBRD | 116.2860 | China | East Asia & Pacific |
134 | South Asia | New Delhi | IND | Lower middle income | IN | 28.6353 | IBRD | 77.2250 | India | South Asia |
236 | Europe & Central Asia (excluding high income) | Moscow | RUS | Upper middle income | RU | 55.7558 | IBRD | 37.6176 | Russian Federation | Europe & Central Asia |
301 | Sub-Saharan Africa (excluding high income) | Pretoria | ZAF | Upper middle income | ZA | -25.7460 | IBRD | 28.1871 | South Africa | Sub-Saharan Africa |
#Linit the columns
brics=brics[['name','capitalCity','iso2c', 'iso3c']]
brics
name | capitalCity | iso2c | iso3c | |
---|---|---|---|---|
34 | Brazil | Brasilia | BR | BRA |
49 | China | Beijing | CN | CHN |
134 | India | New Delhi | IN | IND |
236 | Russian Federation | Moscow | RU | RUS |
301 | South Africa | Pretoria | ZA | ZAF |
#I found a list of IATA code in a CSV file on the web..
iata=pd.read_csv('https://raw.githubusercontent.com/datasets/airport-codes/master/data/airport-codes.csv')
iata.head()
ident | type | name | latitude_deg | longitude_deg | elevation_ft | continent | iso_country | iso_region | municipality | gps_code | iata_code | local_code | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 00A | heliport | Total Rf Heliport | 40.070801 | -74.933601 | 11.0 | NaN | US | US-PA | Bensalem | 00A | NaN | 00A |
1 | 00AK | small_airport | Lowell Field | 59.949200 | -151.695999 | 450.0 | NaN | US | US-AK | Anchor Point | 00AK | NaN | 00AK |
2 | 00AL | small_airport | Epps Airpark | 34.864799 | -86.770302 | 820.0 | NaN | US | US-AL | Harvest | 00AL | NaN | 00AL |
3 | 00AR | heliport | Newport Hospital & Clinic Heliport | 35.608700 | -91.254898 | 237.0 | NaN | US | US-AR | Newport | 00AR | NaN | 00AR |
4 | 00AZ | small_airport | Cordes Airport | 34.305599 | -112.165001 | 3810.0 | NaN | US | US-AZ | Cordes | 00AZ | NaN | 00AZ |
#Limit the IATA data columns to ones we're interested in
cols=['iso_country','name','municipality','iata_code']
#Just look at rows that meet certain criteria:
airportHasIATAcode = pd.notnull(iata['iata_code'])
airportIsLargeAirport = (iata['type']=='large_airport')
airportIsInCapital = iata['municipality'].isin(brics['capitalCity'])
#Select rows on that basis
iata[ airportHasIATAcode & airportIsLargeAirport & airportIsInCapital ][cols]
iso_country | name | municipality | iata_code | |
---|---|---|---|---|
41779 | RU | Domodedovo International Airport | Moscow | DME |
41781 | RU | Sheremetyevo International Airport | Moscow | SVO |
41788 | RU | Chkalovskiy Airport | Moscow | CKL |
41802 | RU | Vnukovo International Airport | Moscow | VKO |
42569 | IN | Indira Gandhi International Airport | New Delhi | DEL |
46022 | CN | Beijing Capital International Airport | Beijing | PEK |
46033 | CN | Beijing Nanyuan Airport | Beijing | NAY |
#Data ALWAYS SUCKS - change the filter criteria
internationalAirport = iata['name'].str.contains('International')
bricsCountry = iata['iso_country'].isin(brics['iso2c'])
iata[ airportHasIATAcode & airportIsLargeAirport & internationalAirport & bricsCountry ][cols]
iso_country | name | municipality | iata_code | |
---|---|---|---|---|
17465 | ZA | Cape Town International Airport | Cape Town | CPT |
17550 | ZA | OR Tambo International Airport | Johannesburg | JNB |
17579 | ZA | King Shaka International Airport | Durban | DUR |
33920 | BR | Val de Cans/Júlio Cezar Ribeiro International ... | Belém | BEL |
33926 | BR | Presidente Juscelino Kubistschek International... | Brasília | BSB |
33936 | BR | Tancredo Neves International Airport | Belo Horizonte | CNF |
33959 | BR | Hercílio Luz International Airport | Florianópolis | FLN |
33964 | BR | RIOgaleão – Tom Jobim International Airport | Rio De Janeiro | GIG |
33968 | BR | Guarulhos - Governador André Franco Montoro In... | São Paulo | GRU |
34033 | BR | Governador Aluízio Alves International Airport | Natal | NAT |
34041 | BR | Deputado Luiz Eduardo Magalhães International ... | Salvador | SSA |
41282 | RU | Sochi International Airport | Sochi | AER |
41779 | RU | Domodedovo International Airport | Moscow | DME |
41781 | RU | Sheremetyevo International Airport | Moscow | SVO |
41802 | RU | Vnukovo International Airport | Moscow | VKO |
41842 | RU | Ufa International Airport | Ufa | UFA |
41848 | RU | Kurumoch International Airport | Samara | KUF |
42118 | IN | Chhatrapati Shivaji International Airport | Mumbai | BOM |
42409 | IN | Netaji Subhash Chandra Bose International Airport | Kolkata | CCU |
42554 | IN | Sri Guru Ram Dass Jee International Airport | Amritsar | ATQ |
42569 | IN | Indira Gandhi International Airport | New Delhi | DEL |
42674 | IN | Bengaluru International Airport | Bangalore | BLR |
42679 | IN | Cochin International Airport | Cochin | COK |
42680 | IN | Calicut International Airport | Calicut | CCJ |
42686 | IN | Rajiv Gandhi International Airport | Hyderabad | HYD |
42691 | IN | Chennai International Airport | Chennai | MAA |
42704 | IN | Trivandrum International Airport | Trivandrum | TRV |
46022 | CN | Beijing Capital International Airport | Beijing | PEK |
46037 | CN | Tianjin Binhai International Airport | Tianjin | TSN |
46050 | CN | Guangzhou Baiyun International Airport | Guangzhou | CAN |
46051 | CN | Changsha Huanghua International Airport | Changsha | CSX |
46053 | CN | Guilin Liangjiang International Airport | Guilin City | KWL |
46060 | CN | Shenzhen Bao'an International Airport | Shenzhen | SZX |
46066 | CN | Zhengzhou Xinzheng International Airport | Zhengzhou | CGO |
46069 | CN | Wuhan Tianhe International Airport | Wuhan | WUH |
46076 | CN | Haikou Meilan International Airport | Haikou | HAK |
46077 | CN | Sanya Phoenix International Airport | Sanya | SYX |
46094 | CN | Xi'an Xianyang International Airport | Xianyang | XIY |
46130 | CN | Kunming Wujiaba International Airport | Kunming | KMG |
46133 | CN | Xiamen Gaoqi International Airport | Xiamen | XMN |
46139 | CN | Fuzhou Changle International Airport | Fuzhou | FOC |
46141 | CN | Hangzhou Xiaoshan International Airport | Hangzhou | HGH |
46150 | CN | Ningbo Lishe International Airport | Ningbo | NGB |
46153 | CN | Shanghai Pudong International Airport | Shanghai | PVG |
46157 | CN | Shanghai Hongqiao International Airport | Shanghai | SHA |
46174 | CN | Chongqing Jiangbei International Airport | Chongqing | CKG |
46191 | CN | Chengdu Shuangliu International Airport | Chengdu | CTU |
46209 | CN | Ürümqi Diwopu International Airport | Ürümqi | URC |
#We're going to grab quite a few data files - and we want to play nice by sleeping between them...
import time
#Look at the URL - spot the pattern that lets us construct it automatically
url_pattern ='https://www.wunderground.com/history/airport/{airport}/{year}/{month}/1/MonthlyHistory.html?format=1'
#List of airports from inspection above
capitalAirports=['BSB','DME','DEL','PEK','CPT']
#Folder to put data files in
airportDataDir='airportData'
#Make sure that folder exists - and create it if it doesn't
!mkdir -p {airportDataDir}
colorder=None
#Grab data for multiple years if necessary...
for year in [2014]:
#Print out a message to keep track of progress...
print('Getting data for {}...'.format(year))
#Grab data for multiple airports, if necessary
for airport in capitalAirports:
print('\t...{}...: '.format(airport), end='')
#Create a blank dataframe to hold the data for one airport for all the months in one year
df=pd.DataFrame()
#Loop through months in a year - range(1,N) returns [1,2,3,..,N-1]
for month in range(1,12+1):
#play nice by waiting a second between each call to the Wunderground API
time.sleep(1)
#construct a URL using specific loop values
url=url_pattern.format(airport=airport, year=year, month=month)
print('{} '.format(month), end='')
#Build up a single dataframe containing data for one airport for all months in one year
#pd.concat() takes a list of dataframes and joins them vertically
tmp=pd.read_csv(url)
#Or do a "not in weathercols" to identify Date?
tmp.columns=['Date' if c.startswith('+') or c.startswith('-') or c in['CST','UTC','SAST'] else c for c in tmp]
if colorder is None: colorder=tmp.columns
#May also want to reorder so Date is first col? eg:
#tmp.columns=['Date']+[c if c!='Date' for c in tmp.columns]
df=pd.concat([df,tmp])
#create a filename to store the data in
fn="{datadir}/airport_weather_{iata}_{year}.csv".format(datadir=airportDataDir, year=year, iata=airport)
df[colorder].to_csv(fn,index=False)
print('...done {} {}'.format(airport, year))
Getting data for 2014... ...BSB...: 1 2 3 4 5 6 7 8 9 10 11 12 ...done BSB 2014 ...DME...: 1 2 3 4 5 6 7 8 9 10 11 12 ...done DME 2014 ...DEL...: 1 2 3 4 5 6 7 8 9 10 11 12 ...done DEL 2014 ...PEK...: 1 2 3 4 5 6 7 8 9 10 11 12 ...done PEK 2014 ...CPT...: 1 2 3 4 5 6 7 8 9 10 11 12 ...done CPT 2014
#List the files in the data dump directory
!ls {airportDataDir}
airport_weather_BSB_2014.csv airport_weather_DME_2014.csv airport_weather_CPT_2014.csv airport_weather_PEK_2014.csv airport_weather_DEL_2014.csv
!head {airportDataDir}/airport_weather_PEK_2014.csv
Date,Max TemperatureC,Mean TemperatureC,Min TemperatureC,Dew PointC,MeanDew PointC,Min DewpointC,Max Humidity, Mean Humidity, Min Humidity, Max Sea Level PressurehPa, Mean Sea Level PressurehPa, Min Sea Level PressurehPa, Max VisibilityKm, Mean VisibilityKm, Min VisibilitykM, Max Wind SpeedKm/h, Mean Wind SpeedKm/h, Max Gust SpeedKm/h,Precipitationmm, CloudCover, Events,WindDirDegrees<br /> 2014-1-1,12,4,-2,-12,-17,-20,44,19,7,1019,1015,1012,31,11,5,32,16,47.0,0.0,,,291<br /> 2014-1-2,7,0,-6,-6,-9,-13,74,50,28,1021,1018,1015,18,7,3,18,5,,0.0,,,20<br /> 2014-1-3,9,3,-2,-7,-13,-18,64,32,9,1026,1022,1018,31,13,5,18,8,,0.0,,,24<br /> 2014-1-4,2,-2,-6,-4,-7,-9,80,68,44,1026,1023,1020,10,5,2,11,5,,0.0,2.0,,56<br /> 2014-1-5,7,0,-7,-5,-11,-15,80,51,15,1028,1025,1021,31,7,3,18,10,,0.0,,,18<br /> 2014-1-6,0,-2,-3,-4,-5,-7,80,72,61,1028,1026,1024,5,3,2,14,5,,0.0,8.0,Snow,130<br /> 2014-1-7,5,0,-5,-4,-12,-25,86,45,6,1033,1027,1024,31,6,2,18,8,,0.0,8.0,,7<br /> 2014-1-8,1,-4,-8,-20,-23,-28,36,21,6,1036,1034,1032,31,30,26,32,14,47.0,0.0,,,344<br /> 2014-1-9,1,-6,-12,-21,-26,-35,41,19,4,1035,1032,1029,31,24,10,26,11,43.0,0.0,,,321<br />
!head airport_weather_BSB_2014.csv
CloudCover, Events, Max Gust SpeedKm/h, Max Sea Level PressurehPa, Max VisibilityKm, Max Wind SpeedKm/h, Mean Humidity, Mean Sea Level PressurehPa, Mean VisibilityKm, Mean Wind SpeedKm/h, Min Humidity, Min Sea Level PressurehPa, Min VisibilitykM,-02,-03,Dew PointC,Max Humidity,Max TemperatureC,Mean TemperatureC,MeanDew PointC,Min DewpointC,Min TemperatureC,Precipitationmm,WindDirDegrees<br /> 3.0,Thunderstorm,,1019,19,14,64,1017,11,5,33,1014,10,2014-1-1,,19,94,31,24,18,16,17,0.0,125<br /> 2.0,,,1018,19,16,65,1016,12,5,33,1013,10,2014-1-2,,19,94,30,23,17,14,17,0.0,102<br /> 2.0,,,1018,19,19,55,1016,13,6,19,1015,10,2014-1-3,,20,88,32,24,15,11,18,0.0,133<br /> 2.0,,,1020,19,24,47,1018,12,6,16,1015,10,2014-1-4,,17,77,32,25,13,8,18,0.0,106<br /> 3.0,Rain,,1020,19,24,48,1018,12,8,23,1016,10,2014-1-5,,16,83,30,24,13,9,19,0.0,106<br /> 3.0,Thunderstorm,,1019,19,19,62,1017,11,5,20,1016,10,2014-1-6,,19,94,30,24,16,11,18,1.02,14<br /> 2.0,Thunderstorm,,1020,19,24,58,1017,11,6,27,1015,10,2014-1-7,,17,88,29,23,15,12,18,0.0,85<br /> 2.0,,,1020,19,19,55,1017,12,8,22,1016,10,2014-1-8,,15,88,29,22,13,10,15,0.0,115<br /> 3.0,,,1019,19,21,50,1017,11,8,29,1014,10,2014-1-9,,16,73,29,24,13,10,18,0.0,110<br />
!head airport_weather_DEL_2014.csv
CloudCover, Events, Max Gust SpeedKm/h, Max Sea Level PressurehPa, Max VisibilityKm, Max Wind SpeedKm/h, Mean Humidity, Mean Sea Level PressurehPa, Mean VisibilityKm, Mean Wind SpeedKm/h, Min Humidity, Min Sea Level PressurehPa, Min VisibilitykM,+0330,+0430,Dew PointC,Max Humidity,Max TemperatureC,Mean TemperatureC,MeanDew PointC,Min DewpointC,Min TemperatureC,Precipitationmm,WindDirDegrees<br /> 8.0,,,1034,2,6,70,1033,2,2,58,1033,2,2014-1-1,,-9,82,-4,-7,-10,-11,-9,0.0,-1<br /> 8.0,,,1035,3,6,69,1033,2,0,53,1032,2,2014-1-2,,-6,75,-1,-4,-8,-11,-8,0.0,-1<br /> 8.0,,,1034,3,6,72,1033,2,0,54,1033,2,2014-1-3,,-4,84,1,-3,-6,-10,-8,0.0,-1<br /> 8.0,,,1034,8,14,74,1033,4,2,64,1032,2,2014-1-4,,-4,80,-1,-2,-5,-6,-3,0.0,-1<br /> 8.0,Snow,,1034,6,14,79,1033,3,2,71,1033,2,2014-1-5,,-4,93,-1,-3,-6,-8,-6,0.0,-1<br /> 8.0,,,1036,10,14,67,1035,6,3,45,1033,3,2014-1-6,,-7,80,-2,-4,-9,-12,-9,0.0,-1<br /> 7.0,Snow,,1036,10,14,72,1031,6,6,48,1025,2,2014-1-7,,-10,86,-4,-7,-11,-12,-9,0.0,30<br /> 8.0,,,1039,6,11,75,1034,4,5,65,1032,2,2014-1-8,,-9,87,-5,-7,-10,-12,-11,0.0,-1<br /> 5.0,Fog,,1042,6,8,88,1034,4,3,67,1022,1,2014-1-9,,-9,100,-6,-11,-12,-17,-16,0.0,70<br />
#Cleaning example
dfdirty=pd.read_csv('airport_weather_PEK_2014.csv')
dfdirty.columns=[c.replace('<br />','') for c in dfdirty.columns]
dfdirty['WindDirDegrees']=dfdirty['WindDirDegrees'].str.replace('<br />','')
dfdirty.head()
CST | Max TemperatureC | Mean TemperatureC | Min TemperatureC | Dew PointC | MeanDew PointC | Min DewpointC | Max Humidity | Mean Humidity | Min Humidity | ... | Max VisibilityKm | Mean VisibilityKm | Min VisibilitykM | Max Wind SpeedKm/h | Mean Wind SpeedKm/h | Max Gust SpeedKm/h | Precipitationmm | CloudCover | Events | WindDirDegrees | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 2014-1-1 | 12 | 4 | -2 | -12 | -17 | -20 | 44 | 19 | 7 | ... | 31 | 11 | 5 | 32 | 16 | 47.0 | 0.0 | NaN | NaN | 291 |
1 | 2014-1-2 | 7 | 0 | -6 | -6 | -9 | -13 | 74 | 50 | 28 | ... | 18 | 7 | 3 | 18 | 5 | NaN | 0.0 | NaN | NaN | 20 |
2 | 2014-1-3 | 9 | 3 | -2 | -7 | -13 | -18 | 64 | 32 | 9 | ... | 31 | 13 | 5 | 18 | 8 | NaN | 0.0 | NaN | NaN | 24 |
3 | 2014-1-4 | 2 | -2 | -6 | -4 | -7 | -9 | 80 | 68 | 44 | ... | 10 | 5 | 2 | 11 | 5 | NaN | 0.0 | 2.0 | NaN | 56 |
4 | 2014-1-5 | 7 | 0 | -7 | -5 | -11 | -15 | 80 | 51 | 15 | ... | 31 | 7 | 3 | 18 | 10 | NaN | 0.0 | NaN | NaN | 18 |
5 rows × 23 columns
Just becuase, an example of how to create a simple map using the airport locations...
import folium
airportMap = folium.Map()
def addMarker(row):
latlng=[row['latitude_deg'],row['longitude_deg']]
popupText='{}, {} ({})'.format(row['name'],row['municipality'],row['iata_code'])
folium.Marker(latlng, popup=popupText ).add_to(airportMap)
iata[ iata['iata_code'].isin(capitalAirports) ].apply(addMarker, axis=1)
airportMap