import pandas as pd, json, numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
url='http://en.wikipedia.org/wiki/List_of_airports_in_Hungary'
df=pd.read_html(url)
df=df[0].loc[:6].T.set_index(0).T.loc[2:].set_index('IATA')
df
Location served | County | ICAO | Airport name | Elev. | Runways | |
---|---|---|---|---|---|---|
IATA | ||||||
BUD | Budapest | (Capital) | LHBP | Budapest Ferenc Liszt International Airport | 151 m (495 ft) | 3010 m x 59 m 3707 x 59 m |
DEB | Debrecen | Hajdú-Bihar | LHDC | Debrecen International Airport | 109 m (359 ft) | 2498 m x 40 m |
SOB | Sármellék | Zala | LHSM | Hévíz-Balaton Airport | 124 m (408 ft) | 2500 x 60 m |
QGY | Győr-Pér | Győr-Moson-Sopron | LHPR | Győr-Pér International Airport | 129 m (424 ft) | 2030 x 30 m 1134 x 43 m |
QPJ | Pécs-Pogány | Baranya | LHPP | Pécs-Pogány International Airport | 305 m (1000 ft) | 1500 x 30 m |
from pygeocoder import Geocoder
apik='AIzaSyDybC2OroTE_XDJTuxjKruxFpby5VDhEGk'
locations={}
for i in df.index:
results = Geocoder(apik).geocode(i+' airport Hungary')
locations[i]=results[0].coordinates
print i
BUD DEB SOB QGY QPJ
file("locations_hu.json",'w').write(json.dumps(locations))
locations=json.loads(file('locations_hu.json','r').read())
import requests
airportialinks={}
for i in locations:
print i,
if i=='QPJ': url='https://cse.google.com/cse?cx=partner-pub-6479063288582225%3A8064105798&cof=FORID%3A10&ie=UTF-8&q='+'PEV'+'+airport+hungary'
else: url='https://cse.google.com/cse?cx=partner-pub-6479063288582225%3A8064105798&cof=FORID%3A10&ie=UTF-8&q='+str(i)+'+airport+hungary'
m=requests.get(url).content
z=pd.read_html(m)[5][0][0]
z=z[z.find('http'):]
airportialinks[i]=z
print z
QPJ https://www.airportia.com/hungary/pécs_pogány-airport/map/ DEB https://www.airportia.com/hungary/debrecen-international-airport SOB https://www.airportia.com/hungary/sármellék...airport/arrivals BUD https://www.airportia.com/hungary/budapest-liszt-ferenc-international-airport QGY https://www.airportia.com/hungary/győr_pér...airport/photos
#reformat
for z in airportialinks:
airportialinks[z]=airportialinks[z].split('arrivals')[0].split('departures')[0].replace(' ','').replace('...','-international-')
if airportialinks[z][-1]!='/':airportialinks[z]+='/'
#manual fixes
if z=='QGY':airportialinks[z]=u'https://www.airportia.com/hungary/győr_pér-international-airport/'
print airportialinks[z]
https://www.airportia.com/hungary/pécs_pogány-airport/map/ https://www.airportia.com/hungary/debrecen-international-airport/ https://www.airportia.com/hungary/sármellék-international-airport/ https://www.airportia.com/hungary/győr_pér-international-airport/ https://www.airportia.com/hungary/budapest-liszt-ferenc-international-airport/
sch={}
record schedules for 2 weeks, then augment count with weekly flight numbers. seasonal and seasonal charter will count as once per week for 3 months, so 12/52 per week. TGM separate, since its history is in the past.
for i in locations:
print i
if i not in sch:sch[i]={}
#march 11-24 = 2 weeks
for d in range (11,25):
if d not in sch[i]:
try:
url=airportialinks[i]
full=url+'arrivals/201703'+str(d)
m=requests.get(full).content
sch[i][full]=pd.read_html(m)[0]
#print full
except: pass #print 'no tables',i,d
QPJ DEB SOB BUD QGY
for i in range(11,25):
testurl=u'https://www.airportia.com/hungary/budapest-liszt-ferenc-international-airport/arrivals/201703'+str(i)
print 'nr. of flights on March',i,':',len(sch['BUD'][testurl])
testurl=u'https://www.airportia.com/hungary/budapest-liszt-ferenc-international-airport/arrivals/20170318'
k=sch['BUD'][testurl]
k[k['From']=='Frankfurt FRA']
nr. of flights on March 11 : 87 nr. of flights on March 12 : 117 nr. of flights on March 13 : 122 nr. of flights on March 14 : 102 nr. of flights on March 15 : 108 nr. of flights on March 16 : 109 nr. of flights on March 17 : 126 nr. of flights on March 18 : 86 nr. of flights on March 19 : 120 nr. of flights on March 20 : 126 nr. of flights on March 21 : 105 nr. of flights on March 22 : 110 nr. of flights on March 23 : 110 nr. of flights on March 24 : 124
Flight | From | Airline | Scheduled | Arrival | Status | Unnamed: 6 | |
---|---|---|---|---|---|---|---|
14 | LH1334 | Frankfurt FRA | Lufthansa | 10:05 | 09:57 | Landed | Track > |
41 | LH1338 | Frankfurt FRA | Lufthansa | 13:45 | 13:33 | Landed | Track > |
61 | LH1340 | Frankfurt FRA | Lufthansa | 18:00 | 17:59 | Landed | Track > |
79 | LH1342 | Frankfurt FRA | Lufthansa | 23:05 | 22:58 | Landed | Track > |
sch
checks out with source
mdf=pd.DataFrame()
for i in sch:
for d in sch[i]:
df=sch[i][d].drop(sch[i][d].columns[3:],axis=1).drop(sch[i][d].columns[0],axis=1)
df['To']=i
df['Date']=d
mdf=pd.concat([mdf,df])
mdf=mdf.replace('Hahn','Frankfurt')
mdf=mdf.replace('Hahn HHN','Frankfurt HHN')
mdf['City']=[i[:i.rfind(' ')] for i in mdf['From']]
mdf['Airport']=[i[i.rfind(' ')+1:] for i in mdf['From']]
k=mdf[mdf['Date']==testurl]
k[k['From']=='Frankfurt FRA']
From | Airline | To | Date | City | Airport | |
---|---|---|---|---|---|---|
14 | Frankfurt FRA | Lufthansa | BUD | https://www.airportia.com/hungary/budapest-lis... | Frankfurt | FRA |
41 | Frankfurt FRA | Lufthansa | BUD | https://www.airportia.com/hungary/budapest-lis... | Frankfurt | FRA |
61 | Frankfurt FRA | Lufthansa | BUD | https://www.airportia.com/hungary/budapest-lis... | Frankfurt | FRA |
79 | Frankfurt FRA | Lufthansa | BUD | https://www.airportia.com/hungary/budapest-lis... | Frankfurt | FRA |
mdf
checks out with source
file("mdf_hu_arrv.json",'w').write(json.dumps(mdf.reset_index().to_json()))
len(mdf)
1584
airlines=set(mdf['Airline'])
cities=set(mdf['City'])
file("cities_hu_arrv.json",'w').write(json.dumps(list(cities)))
file("airlines_hu_arrv.json",'w').write(json.dumps(list(airlines)))
citycoords={}
for i in cities:
if i not in citycoords:
if i==u'Birmingham': z='Birmingham, UK'
elif i==u'Valencia': z='Valencia, Spain'
elif i==u'Naples': z='Naples, Italy'
elif i==u'St. Petersburg': z='St. Petersburg, Russia'
elif i==u'Bristol': z='Bristol, UK'
else: z=i
citycoords[i]=Geocoder(apik).geocode(z)
print i
Manchester Lyon Sofia Oslo Kiev Istanbul Paris Bologna Riga Cairo Algiers St. Petersburg Gothenburg Nurnberg Barcelona Cologne Lanzarote Rotterdam Vienna Glasgow Nice Cluj-Napoca Edinburgh Liverpool Alicante Larnaca Dortmund Moscow Madrid Thessaloniki Munich Malmo Kutaisi Berlin Geneva Leeds Fuerteventura Catania Treviso Brussels Hong Kong Eilat Porto Dubai Eindhoven Malaga Helsinki Naples Basel East Midlands Hamburg Dublin Dusseldorf Tenerife Athens Stuttgart Zurich Minsk Pisa Stockholm Bristol Tel Aviv Venice Frankfurt Las Palmas Bucharest Reykjavik Belgrade Doha Billund Karlsruhe/Baden-Baden Prague Baku Birmingham Luqa Milan Rome London Lisbon Bari Amsterdam Copenhagen Hurghada Warsaw
citysave={}
for i in citycoords:
citysave[i]={"coords":citycoords[i][0].coordinates,
"country":citycoords[i][0].country}
file("citysave_hu_arrv.json",'w').write(json.dumps(citysave))