'''
This script will scrape the twitter users that my favourite funny twitter acct(@AbbieEvansXO) herself follows.
This is my hack to get a mostly correct list of funny accounts. (Otherwise, I'd have had to individually find them & then create a list - may be later
We will later scrape the tweets from this 'following' list of twiiter accounts
'''
#Import required libraries
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
import requests
import time
import pandas as pd
#This is going to be the base funny twitter account. We'll first scrape other funny twitter accounts she follows.
twitter_username = "AbbieEvansXO"
driver = webdriver.Chrome('chromedriver.exe')
#browser.get("https://twitter.com/" + twitter_username+"/following")
time.sleep(1)
'\nelem = browser.find_element_by_tag_name("body")\n\nno_of_pagedowns = 20\n\nwhile no_of_pagedowns:\n elem.send_keys(Keys.PAGE_DOWN)\n time.sleep(0.2)\n no_of_pagedowns-=1\n'
#log in and go to the following page for @AbbieEvansXO
driver.get("https://www.twitter.com/login")
elem = driver.find_element_by_css_selector(".js-initial-focus")
elem.clear()
elem.send_keys('MY_EMAIL')
elem = driver.find_element_by_css_selector(".js-password-field")
elem.clear()
elem.send_keys('MY_PASSWORD')
elem.send_keys(Keys.RETURN)
time.sleep(2)
driver.get("https://twitter.com/" + twitter_username+"/following")
#Looks like the page is dynamically loaded. Keep scrolling to end of page
for x in range(1, 10):
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(2)
#Parse page using Beautifulsoup
pagesrc = driver.page_source
soup = BeautifulSoup(pagesrc, "lxml")
username = []
for users in soup.find_all("b", class_ = "u-linkComplex-target"):
username.append(users.text)
userprofile = []
for users in soup.find_all("a", class_ = "fullname ProfileNameTruncated-link u-textInheritColor js-nav"):
userprofile.append(users.text.strip())
#Since I follow her myself, I'll remove my account. It shows up first in following
username.pop(0)
username.pop(0)
userprofile.pop(0)
# Create data frame:
following = pd.DataFrame(list(zip(userprofile, username)), columns = ["Profile", "Username"])
following["username_upper"] = followers["Username"].str.upper()
following = followers.sort_values(["username_upper"])
del following["username_upper"]
following.head()
#Saving the output dataframe to CSV file. These accounts will be scraped in next script
following.to_csv("Following list.csv")