# Imports
from tqdm import tqdm
from colorama import Fore
import json
import pandas as pd
import re
# load data in memory
data = []
with open('../data/positive_tweets.json', 'r') as infile:
for line in infile:
text = json.loads(line).get('text').strip()
data.append(text)
def get_all(text):
"""extracts @ and # words and stores them in list buffer"""
return [i for i in text.split() if i.startswith('@') or i.startswith('#')]
mentions = []
# print Fore.__dict__
# bar_format let's you add color to each of left, middle, right segment of the progress bar
pbar = tqdm(range(len(data)), bar_format="%s{l_bar}%s{bar}%s{r_bar}" % (Fore.BLACK, Fore.GREEN, Fore.RED))
for i in pbar:
text = data[i]
mentions.append(get_all(text))
# show progressbar ui
pbar.set_description("Processing %s" % i)
Processing 4999: 100%|██████████| 5000/5000 [00:06<00:00, 764.45it/s]
mentions[:4]
[[u'#FollowFriday', u'@France_Inte', u'@PKuchly57', u'@Milipol_Paris'], [u'@Lamb2ja'], [u'@DespiteOfficial'], [u'@97sides']]