Notebook

In [1]:

'''

This script will scrape the twitter users that my favourite funny twitter acct(@AbbieEvansXO) herself follows.
This is my hack to get a mostly correct list of funny accounts. (Otherwise, I'd have had to individually find them & then create a list - may be later
We will later scrape the tweets from this 'following' list of twiiter accounts

'''


#Import required libraries
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
import requests
import time
import pandas as pd

In [5]:

#This is going to be the base funny twitter account. We'll first scrape other funny twitter accounts she follows.
twitter_username = "AbbieEvansXO"
driver = webdriver.Chrome('chromedriver.exe')
#browser.get("https://twitter.com/" + twitter_username+"/following")

time.sleep(1)

Out[5]:

'\nelem = browser.find_element_by_tag_name("body")\n\nno_of_pagedowns = 20\n\nwhile no_of_pagedowns:\n    elem.send_keys(Keys.PAGE_DOWN)\n    time.sleep(0.2)\n    no_of_pagedowns-=1\n'

In [6]:

#log in and go to the following page for @AbbieEvansXO
driver.get("https://www.twitter.com/login")

elem = driver.find_element_by_css_selector(".js-initial-focus")
elem.clear()
elem.send_keys('MY_EMAIL')

elem = driver.find_element_by_css_selector(".js-password-field")
elem.clear()
elem.send_keys('MY_PASSWORD')

elem.send_keys(Keys.RETURN)
time.sleep(2)


driver.get("https://twitter.com/" + twitter_username+"/following")


#Looks like the page is dynamically loaded. Keep scrolling to end of page
for x in range(1, 10):
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(2)

#Parse page using Beautifulsoup
pagesrc = driver.page_source
soup = BeautifulSoup(pagesrc, "lxml")

username = []
for users in soup.find_all("b", class_ = "u-linkComplex-target"):
    username.append(users.text)
    
userprofile = []
for users in soup.find_all("a", class_ = "fullname ProfileNameTruncated-link u-textInheritColor js-nav"):
    userprofile.append(users.text.strip())

#Since I follow her myself, I'll remove my account. It shows up first in following
username.pop(0)
username.pop(0)

userprofile.pop(0)

# Create data frame:
following = pd.DataFrame(list(zip(userprofile, username)), columns = ["Profile", "Username"])
following["username_upper"] = followers["Username"].str.upper()
following = followers.sort_values(["username_upper"])
del following["username_upper"]
following.head()

In [ ]:

#Saving the output dataframe to CSV file. These accounts will be scraped in next script
following.to_csv("Following list.csv")