#!/usr/bin/env python
# coding: utf-8

# # Part II : Extracting Fourquare Data

# > Jump to :  
# * [Part 1](https://github.com/Niladri-B/Coursera_Captstone/blob/master/wk4/Capstone_part1.ipynb) *Extracting Street Addresses & Coordinates* 
# * [Part 3](https://github.com/Niladri-B/Coursera_Captstone/blob/master/wk4/Capstone_part3.ipynb) , *Exploratory Data Analysis*
# * [Part 4](https://github.com/Niladri-B/Coursera_Captstone/blob/master/wk4/Capstone_part4.ipynb), *Clustering and Visualising*
# * [Part 5](https://github.com/Niladri-B/Coursera_Captstone/blob/master/wk4/Capstone_part5.ipynb), *Conclusion & Discussion*

# ## 1. Set up environment

# In[9]:


import pandas as pd
import numpy as np
import folium
import requests
import re


# In[2]:


streetData = pd.read_csv('./streetData_Midcoordinates.csv')


# ## 2. Basic Folium Map visualisation

# In[3]:


streetData.head()


# In[5]:


#1.2.1 Make map with street info
latitude = 59.9133301
longitude = 10.7389701
map_oslo = folium.Map(location=[latitude, longitude], zoom_start=10)

# add markers to map
for lat, lng, street , in zip(streetData['MidLatitude'], streetData['MidLongitude'],
                                                      streetData['Street']):
    
    #Create pop-up label to display
    label = '{}'.format(street)#neighborhood, borough originally
    label = folium.Popup(label, parse_html=True)
    folium.CircleMarker(
        [lat, lng],
        radius=2,#Change radius of circle marker
        popup=label,
        color='blue',
        fill=False,
        #fill_color='#3186cc',
        fill_opacity=0.7,
        parse_html=False).add_to(map_oslo)  
    
map_oslo


# ## 3. Use Foursquare API to extract various transport information

# ### 3.1 Find Trikk

# #### Set up Foursquare developer credentials, including Client ID, Client Secret and Version (hidden here)

# In[13]:


#FULL VERSION: Trikk for street

def getTrikkNearby(post, bydelLat, bydelLon, radius, query):#Where names = postcode
    
    venues_list=[]#Initialise empty list to store details
    for post, lat, lng in zip(post, bydelLat, bydelLon):
        print(post)

        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}'.format(
                CLIENT_ID,
               CLIENT_SECRET,
               lat,#VITAL to use lat, NOT bydelLat as otherwise get misc info like dtype, and name and that makes a weird URL
               lng,#VITAL: DO NOT USE bydelLat
               VERSION,
               query,
               radius)
        #print(url,'\n')

            
        # make the GET request
        results = requests.get(url).json() ##[#"response"]['groups'][0]['items']
        
        try:
            places = results['response']['venues'][0]
            # return only relevant information for each nearby venue
            venues_list.append([(
                post, #postcode
                #bydel,#I think this is useful to have, several post codes are in the same borough that may have similar char.
                lat, 
                lng, 
                places['name'],
                places['location']['distance'])]) #for v in results])
        except:
                venues_list.append([ (post, lat, lng, 'NA', 'NA')]) #for v in results ])
            
            
    #Create new dataframe and fill it with values from the venue_list array with 2 nested for loops
                                                ##Note: placement of 'item' below
    #print(venues_list)
    nearby_venues = pd.DataFrame([item for Venue_list in venues_list for item in Venue_list])
    nearby_venues.columns = ['Street',#'Neighborhood' 
                             
                  'Street Latitude',#'Neighborhood Latitude' 
                  'Street Longitude',#'Neighborhood Longitude' 
                  'Trikk', 
                  'Trikk Distance']#, 
                  #'Venue Longitude', 
                  #'Venue Category']
    
    return(nearby_venues)

#2.1. Run the above code

streetTrikk = getTrikkNearby(post=streetData['Street'],
               bydelLat=streetData['MidLatitude'],
               bydelLon=streetData['MidLongitude'],
               radius = 400,
               query = 'Trikk'
               )


# In[14]:


#Check shape of df
streetTrikk.shape


# In[15]:


#View
streetTrikk.head()


# In[66]:


#Save to drive
streetTrikk.to_csv(path_or_buf='./streetTrikk.csv', index = False)


# ## Step 2: Obtain & clean Bus info

# In[48]:


#FULL VERSION; CHANGED CODE
# Extend to all postcodes

busPost = {}
def getBusNearby (post, bydelLat, bydelLon, query, radius, count):
    
    for post, lat, lng in zip(post, bydelLat, bydelLon):
        count= count+1
        #if count in [1896,1897]:
        print('\n',post,'\t',count)

            # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}'.format(
                CLIENT_ID,
               CLIENT_SECRET,
               lat,#VITAL to use lat, NOT bydelLat as otherwise get misc info like dtype, and name and that makes a weird URL
               lng,#VITAL: DO NOT USE bydelLat
               VERSION,
               query,
               radius)
        #print(url)

            
        # make the GET request
        results = requests.get(url).json() ##[#"response"]['groups'][0]['items']
        #print(results)
        
        try:
            places = results['response']['venues']
            print('Number of entries in {} is {}'.format(post,len(places)))
            #print(places)
                
            if len(places) > 0:
                busList = list()
                for i in range(len(places)):# len creates the number, range creates an iterable list because for works on A LIST
                    
                    if re.search('(B|b)uss[the]*?', places[i]['name']):
                        print(places[i]['name'])   
                        #Add info to dictionary, if post does not exist already
                        if post not in busPost:
                            #Add info as tuple of bus route+distance
                            busList.append((places[i]['name'],places[i]['location']['distance']))
                            busPost[post] = busList #First entry
                                
                        #But if post is already in the dict, append to newBus, then append to busList
                        else:
                                #print('Post already exists')
                                #print(places[i]['name'],places[i]['location']['distance'])
                            newBus = places[i]['name'],places[i]['location']['distance']
                                #print(type(newBus))
                            busList.append(newBus)
                    
                    #If the search matches something else than Buss 
                    else:
                            
                            
                        if post not in busPost:
                            #Add info as tuple of bus route+distance
                            busList.append((places[i]['name'],places[i]['location']['distance']))
                            busPost[post] = busList #First entry
                                
                        #But if post is already in the dict, append to newBus, then append to busList
                        else:
                            continue

            #When places length = 0
            else: 
                busPost[post] = '' #'0'                        
            
        except:
        #continue
            busPost[post] = '' #'0'

        
#ColnNames
#Postcode 	Bydel/District 	Latitude 	Longitude
getBusNearby(post=streetData['Street'],#[0:2000],#[0:5],#[0:20],
             bydelLat=streetData['MidLatitude'],#[0:2000],#[0:5],#[0:20],
             bydelLon=streetData['MidLongitude'],#[0:2000],#[0:5],#[0:20],
             radius = 400,
             query = 'Bus Stops',
             count = 0
             )   


# #### Clean the street bus dictionary

# In[49]:


len(busPost)


# In[50]:


[(k,v) for k,v in busPost.items()]


# In[54]:


#Check that streets have been properly ascribed buses (by checking against some of the ones known to have busses)
   for k,v in busPost.items():
       if k == 'Hausmanns gate':
           print(k,v)


# In[55]:


#Make a clean dictionary containing only buses with route numbers on them
## This helps remove some miscellaneous places named like 'Bussola' which is a pizza place actually
cleanBus = dict()    
for k,v in busPost.items():
    #print(v)
    clean =[]
    for tuple in v:
        #print((tuple)[1])#Acces the distance
        string = tuple[0]
        if re.search('\d', string):#If this returns a match
            if k not in cleanBus:#If key does not exist
                clean.append(string)#append to list
                cleanBus[k] = clean#Assign first entry 
            else: #if key already exists
                clean.append(string)#Only append to list
        

# In[56]:


print('Length of Clean street Bus dictionary is:', len(cleanBus))


# In[40]:


#Stats
#Out of 100 streets, 46 have bus within 400m
#Out of 1000 streets, 481 have bus within 400m
#Out of 2460 streets, 1191 have bus within 400m


# In[57]:


#Convert to a dataframe
streetBusesClean = pd.DataFrame.from_dict(cleanBus, orient = 'index')
streetBusesClean.head()


# In[59]:


#Reset index
streetBusesClean.reset_index(inplace = True)

#Change column name
streetBusesClean.columns.values[0] = 'Street'

#Change to list
streetBusesClean.columns = streetBusesClean.columns.tolist()

#Verfiy access to column
streetBusesClean[['Street']].head()


# In[60]:


#Save the cleaned up bus data frame locally
streetBusesClean.to_csv(path_or_buf= './streetBusesClean.csv', index = False)


# In[62]:


#Merge with Street Trikk dataframe
streetTrikkBussClean = pd.merge(streetTrikk, streetBusesClean, on = 'Street', how = 'left')
streetTrikkBussClean.head()


# In[65]:


#Check shape of merged Trikk + Buss
streetTrikkBussClean.shape


# ## Step 3: Obtain T-Bane info

# In[68]:


#Extend to run through all postcodes
metroPost = dict()
def getMetroNearby (post, bydelLat, bydelLon, radius, query, count):
    
    for post, lat, lng in zip(post, bydelLat, bydelLon):
        count= count+1
        print('\n',post,'\t',count)

        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}'.format(
                CLIENT_ID,
               CLIENT_SECRET,
               lat,#VITAL to use lat, NOT bydelLat as otherwise get misc info like dtype, and name and that makes a weird URL
               lng,#VITAL: DO NOT USE bydelLat
               VERSION,
               query,
               radius)
        

        # make the GET request
        results = requests.get(url).json() ##[#"response"]['groups'][0]['items']
        #print(len(results))#of type Dictionary with 2 keys = 'meta', 'response'
        #print(results ['meta'])
        #print(results ['response'])
        #print(len(results ['response']['venues']))
        
        #Situation 1: GET gives result
        try:
            places = results['response']['venues']
            print('Number of entries in {} is {}'.format(post,len(places)))
            #print(places)
            
            #Situation 2: GET result > 0
            if len(places) > 0:     
                
                
                metroList = list()
                for i in results['response']['venues']:
                    
                    #Situation 2.1: len > 0 + matches Metro
                    if (len(i['categories']) > 0) and (i['categories'][0]['shortName'] == 'Metro'):#'Train Station'
                        
                        #Situation 2.1.1 if post/key does NOT EXIST already
                        if post not in metroPost:
                            metroTuple = i['name'], i['location']['distance']
                            metroList.append(metroTuple)
                            #First assignment
                            metroPost[post] = metroList #post as key from the loop function above
                            
                        #Situation 2.1.2 if post/key EXISTS already
                        elif post in metroPost:
                            metroTuple = i['name'], i['location']['distance']
                            metroList.append(metroTuple)
                            
                        
                    #Situation 2.2: len > 0 + does NOT MATCH Train Station
                    elif (len(i['categories']) > 0) and (i['categories'][0]['shortName'] != 'Metro'):#If the short name is NOT 'Train Station'
                       
                        #Situation 2.2.1 key does NOT exist
                        if post not in metroPost:
                            #First assignment
                            metroPost[post] = metroList #post as key from the loop function above
                            
                        #Situation 2.2.2 key EXISTS
                        elif post in metroPost:
                            continue #Just skip no need to do anything as postcode already exists with a BLANK list OR a list that HAS INFO
                        
            #Situation 2: GET result = 0
            elif len(places) == 0:
                print('<<We have this situation>>')
                metroPost[post] = metroList #'0'

                                
        #Situation 1: GET does not give result
        except:
        #continue
            print('We have this situation')
            metroPost[post] = [] #Keeping blank list as value

        
getMetroNearby(post=streetData['Street'],         #[0:200],#[0:20],
               
               bydelLat=streetData['MidLatitude'],      #[0:200],#[0:20],
               bydelLon=streetData['MidLongitude'],     #[0:200],#[0:20],
               radius = 400,
               query = 'T-bane',
               count = 0
               )   


# In[69]:


len(metroPost)


# In[70]:


#Convert to dictionary
streetMetro = pd.DataFrame.from_dict(metroPost, orient = 'Index')
streetMetro.head()


# In[71]:


#Reset index
streetMetro.reset_index(inplace = True)

#Change coln names from 0,1,2,3 to something bettee
streetMetro.columns = ['Street','T-bane_1','T-bane_2','T-bane_3','T-bane_4']
streetMetro.head()


# In[72]:


#Save to disk
streetMetro.to_csv(path_or_buf='streetMetro.csv', index = False)


# In[73]:


#Merge with street Trikk Buss
streetTrikkBussMetro = pd.merge(streetTrikkBussClean, streetMetro, on = 'Street', how = 'left')
streetTrikkBussMetro.head()


# In[74]:


streetTrikkBussMetro.shape


# ## Step 4: Obtain Train info

# In[75]:


#Extend to run through all postcodes
trainPost = dict()
def getTrainsNearby (post, bydelLat, bydelLon, radius, query, count):
    
    for post, lat, lng in zip(post, bydelLat, bydelLon):
        count= count+1
        print('\n',post,'\t',count)

        # create the API request URL
        url = 'https://api.foursquare.com/v2/venues/search?client_id={}&client_secret={}&ll={},{}&v={}&query={}&radius={}'.format(
                CLIENT_ID,
               CLIENT_SECRET,
               lat,#VITAL to use lat, NOT bydelLat as otherwise get misc info like dtype, and name and that makes a weird URL
               lng,#VITAL: DO NOT USE bydelLat
               VERSION,
               query,
               radius)
        

        # make the GET request
        results = requests.get(url).json() ##[#"response"]['groups'][0]['items']
        #print(len(results))#of type Dictionary with 2 keys = 'meta', 'response'
        #print(results ['meta'])
        #print(results ['response'])
        #print(len(results ['response']['venues']))
        
        #Situation 1: GET gives result
        try:
            places = results['response']['venues']
            print('Number of entries in {} is {}'.format(post,len(places)))
            print(places)
            
            #Situation 2: GET result > 0
            if len(places) > 0:     
                
                
                trainList = list()
                for i in results['response']['venues']:
                    
                    #Situation 2.1: len > 0 + matches Train Station
                    if (len(i['categories']) > 0) and (i['categories'][0]['shortName'] == 'Train Station'):#'Train Station'
                        
                        #Situation 2.1.1 if post/key does NOT EXIST already
                        if post not in trainPost:
                            trainTuple = i['name'], i['location']['distance']
                            trainList.append(trainTuple)
                            #First assignment
                            trainPost[post] = trainList #post as key from the loop function above
                            
                        #Situation 2.1.2 if post/key EXISTS already
                        elif post in trainPost:
                            trainTuple = i['name'], i['location']['distance']
                            trainList.append(trainTuple)
                            
                        
                    #Situation 2.2: len > 0 + does NOT MATCH Train Station
                    elif (len(i['categories']) > 0) and (i['categories'][0]['shortName'] != 'Train Station'):#If the short name is NOT 'Train Station'
                       
                        #Situation 2.2.1 key does NOT exist
                        if post not in trainPost:
                            #First assignment
                            trainPost[post] = trainList #post as key from the loop function above
                            
                        #Situation 2.2.2 key EXISTS
                        elif post in trainPost:
                            continue #Just skip no need to do anything as postcode already exists with a BLANK list OR a list that HAS INFO
                        
            #Situation 2: GET result = 0
            elif len(places) == 0:
                print('<<We have this situation>>')
                trainPostPost[post] = trainList #'0'

                                
        #Situation 1: GET does not give result
        except:
        #continue
            print('We have this situation')
            trainPost[post] = [] #Keeping blank list as value

        
getTrainsNearby(post=streetData['Street'],         #[0:200],#[0:20],
               
               bydelLat=streetData['MidLatitude'],      #[0:200],#[0:20],
               bydelLon=streetData['MidLongitude'],     #[0:200],#[0:20],
               radius = 400,
               query = 'Train Station',
               count = 0
               )   


# In[2747]:


#sorted( [(k,v) for k,v in trainPost.items()] )


# In[76]:


print('Length of dictionary is {}'.format(len(trainPost)))


# In[ ]:


#Manually add Nationaltheatret Stasjon to Ruseløkkveien


# In[2698]:


#for k,v in trainPost.items():
#    if k == 'Ruseløkkveien':
#        trainPost[k].append('Nationaltheatret stasjon')
#        print(k,v)

#See below for 1. Finding streets within 400m of Ruseløkkveien that has Nationaltheatret stasjon 2. Adding station info to those streets


# ### Important: Foursquare API completely misses the Nationaltheatret station

# I manually found it to be located on the street named: Ruseløkkveien. This info needs to added.  
# Additionally, to _approximately_ find other streets in the 400m vicinity of this station, I will try to find all streets within 400m of Ruseløkkveien and assign the station to those streets as well.

# ### Try to find streets that are within 400m of Ruseløkkveien

# In[77]:


#Import the library that allows one to estimate distance from two geo-coordinate sets.
import geopy.distance


# In[78]:


#We will work on a copy of the original Street Data df, just to ensure the original remains intact
streetDataModified = streetData
streetDataModified.head()


# In[79]:


#Find geo-coordinates of Ruseløkkveien
streetDataModified[streetDataModified.Street == 'Ruseløkkveien']#dataframe


# In[80]:


#Try to see how to access the latitude/longitude
streetDataModified[streetDataModified.Street == 'Ruseløkkveien'].iloc[0,2]#This access the longitude


# In[81]:


#Create a function that will estimate the distances
## Here we will estimate the distance of each street in the dataframe to Ruseløøkveien and output those that are within 400m (and skip Ruseløkkveien)

#Empty list to store streets within 400m
streetsNearby = list()
def distanceFromStreet (dataframe, fullData):
    
    #Load up the coordinates of Ruseløkkveien
    lat = dataframe.iloc[0,1]
    lon = dataframe.iloc[0,2]
    coords_1 = lat,lon
    
    #Find nearby streets
    for i in range(fullData.shape[0]):
        df = fullData.iloc[i:i+1,]
        #print(df)
        lat2 = df.iloc[0,1]
        lon2 = df.iloc[0,2]
        coords_2 = lat2, lon2
        #print(coords_2)
        distance = geopy.distance.distance(coords_1, coords_2).m
        if distance <= 400 and distance > 0:
            print(df, round(distance, 2) )
            streetsNearby.append(df.iloc[0,0])
        
    
    #return (coords_1, coords_2)

    
distanceFromStreet(streetDataModified[streetDataModified.Street == 'Ruseløkkveien'], fullData= streetDataModified)


# In[82]:


#Have a look at streets whose mid points are within 400m from Ruseløkkveien
streetsNearby


# In[83]:


#Add info about Nationaltheatret stasjon to above list
for k,v in trainPost.items():
    if k in streetsNearby:
        trainPost[k].append('Nationaltheatret stasjon')
        print(k,v)


# In[84]:


#Convert to the updated dictionary to dataframe
streetTrain = pd.DataFrame.from_dict(trainPost, orient = 'index')
streetTrain.head()

#Reset index
streetTrain.reset_index(inplace = True)

#Change column names
streetTrain.columns = ['Street','Train Station']
streetTrain.head()


# In[85]:


#Save to local drive
streetTrain.to_csv(path_or_buf= './streetTrain.csv', index = False)


# In[86]:


#Merge on full street Trikk, Buss, T-Bane df
streetTrikkBussMetroTog = pd.merge(streetTrikkBussMetro, streetTrain, on = 'Street', how = 'left')
streetTrikkBussMetroTog.shape


# In[87]:


#View your accomplishment and hardwork!
streetTrikkBussMetroTog.head()


# In[2760]:


#Save to harddrive
streetTrikkBussMetroTog.to_csv(index = False, path_or_buf= './streetData_TrikkBusMetroTog.csv')