In [28]:

#Random word list generator

from random import choices
_words = ["hello", "goodbye", "this", "th@t", 'whenever', 'wherever', 'cogdog']
choices(_words, k=10)

Out[28]:

['th@t',
 'this',
 'goodbye',
 'cogdog',
 'th@t',
 'whenever',
 'goodbye',
 'whenever',
 'th@t',
 'wherever']

In [29]:

%%capture
try:
    import pandas as pd
except:
    !pip install pandas 

In [30]:

#Create a simple dataframe with two random word lists

import pandas as pd

df = pd.DataFrame({'col1':choices(_words, k=100), 'col2':choices(_words+['gotcha'], k=100)})

#We can save the data to a csv file...
df.to_csv('mywords.csv', index=False)

#Or preview it
df.head()

Out[30]:

	col1	col2
0	th@t	hello
1	whenever	th@t
2	th@t	goodbye
3	cogdog	whenever
4	wherever	th@t

In [31]:

#Here's what it looks like as csv
!head -n 3 mywords.csv

col1,col2
th@t,hello
whenever,th@t

In [32]:

#load the csv into another dataframe - to show we can
df2 = pd.read_csv('mywords.csv')
df2.head()

Out[32]:

	col1	col2
0	th@t	hello
1	whenever	th@t
2	th@t	goodbye
3	cogdog	whenever
4	wherever	th@t

In [33]:

%%capture
#Install wordcloud package
try:
    import wordcloud
except:
    !pip install wordcloud

In [34]:

%%capture
try:
    import matplotlib
except:
    !pip install matplotlib

In [35]:

#Required graphics package
import matplotlib.pyplot as plt
#...and magic to diplay results inline in the notebook...
%matplotlib inline

In [36]:

from wordcloud import WordCloud

# Generate a word cloud image
wordcloud = WordCloud().generate(' '.join(df2['col1'].tolist()))

plt.imshow(wordcloud, interpolation='bilinear');

In [37]:

wordcloud = WordCloud().generate(' '.join(df2['col2'].tolist()))

plt.imshow(wordcloud, interpolation='bilinear');

If you have sentences, they can be split...

It's particularly easy if the split is regular. For example:

In [38]:

df['col3a'] = '@'+df['col1']+ '/' + df['col2']
df.head()

Out[38]:

	col1	col2	col3a
0	th@t	hello	@th@t/hello
1	whenever	th@t	@whenever/th@t
2	th@t	goodbye	@th@t/goodbye
3	cogdog	whenever	@cogdog/whenever
4	wherever	th@t	@wherever/th@t

In [39]:

#We can split a string in a column and then expand it over a couple of columns
df[['col3b','col3c']] = df['col3a'].str.split('/', 1, expand=True)
df.head()

Out[39]:

	col1	col2	col3a	col3b	col3c
0	th@t	hello	@th@t/hello	@th@t	hello
1	whenever	th@t	@whenever/th@t	@whenever	th@t
2	th@t	goodbye	@th@t/goodbye	@th@t	goodbye
3	cogdog	whenever	@cogdog/whenever	@cogdog	whenever
4	wherever	th@t	@wherever/th@t	@wherever	th@t

Want username without the @?

In [40]:

#We could do a trivial replace, but we can also regex to be more precise
#https://pandas.pydata.org/pandas-docs/stable/generated/pandas.Series.str.replace.html
df['col3b'] = df['col3b'].str.replace('^@','')
df.head()

Out[40]:

	col1	col2	col3a	col3b	col3c
0	th@t	hello	@th@t/hello	th@t	hello
1	whenever	th@t	@whenever/th@t	whenever	th@t
2	th@t	goodbye	@th@t/goodbye	th@t	goodbye
3	cogdog	whenever	@cogdog/whenever	cogdog	whenever
4	wherever	th@t	@wherever/th@t	wherever	th@t

In [ ]: