#!/usr/bin/env python # coding: utf-8 # # Google Suggestions # # This is a demo with tooltips. It provides a set of Google queries starting with "What if %countryname% ...". The result will be shown on the map when you hover over the country. # # To try queries starting with other words change the START_OF_QUERY constant. In this case, you need to specify the path to your chromedriver (PATH_TO_CHROMEDRIVER), which is used to get data. It can be downloaded [here](https://chromedriver.chromium.org/downloads). # In[1]: import os import pandas as pd import geopandas as gpd from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.common.exceptions import WebDriverException from lets_plot import * # In[2]: LetsPlot.setup_html() # In[3]: START_OF_QUERY = 'What if' SUGGESTION_STUB = 'no data' PATH_TO_CHROMEDRIVER = '' PATH_TO_DATA = "https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/google_suggestions_2020.csv" # In[4]: def get_naturalearth_data(data_type="admin_0_countries", columns=["NAME", "geometry"]): import shapefile from shapely.geometry import shape naturalearth_url = "https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/" + \ "data/naturalearth/{0}/data.shp?raw=true".format(data_type) sf = shapefile.Reader(naturalearth_url) gdf = gpd.GeoDataFrame( [ dict(zip([field[0] for field in sf.fields[1:]], record)) for record in sf.records() ], geometry=[shape(s) for s in sf.shapes()] )[columns] gdf.columns = [col.lower() for col in gdf.columns] return gdf # In[5]: def split_list_to_bunches(l, *, bunch_size=1): if bunch_size < 1: raise Exception('Wrong input') bunches = [] i = 0 while i < len(l): bunches.append([]) for j in range(bunch_size): if i == len(l): break bunches[-1].append(l[i]) i += 1 return bunches # In[6]: def get_suggestions_data(countries, *, start_of_query='', driver_path='', data_path=''): BUNCH_SIZE = 20 if not os.path.isfile(driver_path): return pd.read_csv(data_path) suggestions = [] for countries_bunch in split_list_to_bunches(countries, bunch_size=BUNCH_SIZE): with webdriver.Chrome(executable_path=driver_path) as driver: driver.get('http://www.google.com') for country in countries_bunch: query = '{0} {1} '.format(start_of_query, country).lower() suggestion = SUGGESTION_STUB try: input_elem = WebDriverWait(driver, 2).until( EC.presence_of_element_located((By.CSS_SELECTOR, 'input[role="combobox"]')) ) input_elem.send_keys(query) li_elem = WebDriverWait(driver, 1).until( EC.presence_of_element_located((By.CSS_SELECTOR, 'ul[role="listbox"]>li')) ) text_container_elems = driver.find_elements_by_css_selector('ul[role="listbox"] div[role="option"]') suggestion = next( elem.get_attribute('textContent') for elem in text_container_elems if elem.get_attribute('textContent').find(start_of_query.lower()) == 0 ) except WebDriverException: pass except StopIteration: pass suggestions.append(suggestion) driver.refresh() return pd.DataFrame(dict(country=countries, suggestion=suggestions)) # In[7]: world_gdf = get_naturalearth_data(columns=["NAME", "ISO_A3", "CONTINENT", "POP_EST", "GDP_MD", "geometry"]) suggestions_df = get_suggestions_data(world_gdf['name'], \ start_of_query=START_OF_QUERY, \ driver_path=PATH_TO_CHROMEDRIVER, \ data_path=PATH_TO_DATA) df = suggestions_df.merge(world_gdf, left_on='country', right_on='name') gdf = gpd.GeoDataFrame(df, geometry='geometry') suggestions_gdf = gdf[~(gdf.suggestion == SUGGESTION_STUB)] no_data_gdf = gdf[gdf.suggestion == SUGGESTION_STUB] # In[8]: ggplot() + \ geom_map(data=no_data_gdf, fill='gray', size=.2, alpha=.5, tooltips=layer_tooltips().line('@name')) + \ geom_map(aes(fill='suggestion'), data=suggestions_gdf, size=.2, alpha=.5, tooltips=layer_tooltips().line('@name').line('Google suggestion|@suggestion')) + \ scale_fill_discrete(name='Google suggestion') + \ ggtitle('First Google Query Suggestion Starting with \'%s \'' % START_OF_QUERY) + \ ggsize(700, 500) + \ theme_void() + theme(legend_position='none') # Gray means there were no suggestions for that query.