import nltk
nltk.download('stopwords')
import pandas as pd
import re
import string
pd.set_option('display.max_colwidth', 200)
stopwords = nltk.corpus.stopwords.words('english')
data = pd.DataFrame({
'label' : [ 'one', 'second'],
'body_text' : [ 'The Democratic Party has a more complicated relationship with Donald Trump than it likes to admit.', \
'It wants voters to remember the nonstop chaos of his administration, his Twitter rants, \
how he debased the presidency on Jan. 6 and won’t stop lying about the 2020 election results.']
})
data
[nltk_data] Downloading package stopwords to /root/nltk_data... [nltk_data] Package stopwords is already up-to-date!
label | body_text | |
---|---|---|
0 | one | The Democratic Party has a more complicated relationship with Donald Trump than it likes to admit. |
1 | second | It wants voters to remember the nonstop chaos of his administration, his Twitter rants, how he debased the presidency on Jan. 6 and won’t stop lying about the 2020 election res... |
def clean_text(text):
text = "".join([word for word in text if word not in string.punctuation])
tokens = re.split('\W+', text)
text = [word for word in tokens if word not in stopwords]
return text
data['body_text_nostop'] = data['body_text'].apply(lambda x: clean_text(x.lower()))
data.head()
label | body_text | body_text_nostop | |
---|---|---|---|
0 | one | The Democratic Party has a more complicated relationship with Donald Trump than it likes to admit. | [democratic, party, complicated, relationship, donald, trump, likes, admit] |
1 | second | It wants voters to remember the nonstop chaos of his administration, his Twitter rants, how he debased the presidency on Jan. 6 and won’t stop lying about the 2020 election res... | [wants, voters, remember, nonstop, chaos, administration, twitter, rants, debased, presidency, jan, 6, stop, lying, 2020, election, results] |
ps = nltk.PorterStemmer()
# stem is one of attribute or function of PorterStemmer of NLTK
def stemming(tokenized_text):
text = [ps.stem(word) for word in tokenized_text]
return text
data['body_text_stemmed'] = data['body_text_nostop'].apply(lambda x: stemming(x))
data.head()
label | body_text | body_text_nostop | body_text_stemmed | |
---|---|---|---|---|
0 | one | The Democratic Party has a more complicated relationship with Donald Trump than it likes to admit. | [democratic, party, complicated, relationship, donald, trump, likes, admit] | [democrat, parti, complic, relationship, donald, trump, like, admit] |
1 | second | It wants voters to remember the nonstop chaos of his administration, his Twitter rants, how he debased the presidency on Jan. 6 and won’t stop lying about the 2020 election res... | [wants, voters, remember, nonstop, chaos, administration, twitter, rants, debased, presidency, jan, 6, stop, lying, 2020, election, results] | [want, voter, rememb, nonstop, chao, administr, twitter, rant, debas, presid, jan, 6, stop, lie, 2020, elect, result] |
x = lambda a : a + 10
print(x(5))
15
y=lambda x: x+3
y(5)
8
(lambda x: x+3)(5)
8
def add(x):
return x+3
add(5)
8
import pandas as pd
import numpy as np
dff=pd.DataFrame([[1, 2, 3, 4],
[5, 6, 7, 8],
[np.square(6), np.sqrt(144), 22, 34]])
dff.index=[0,1,2]
dff.columns=['A', 'B', 'C','D']
dff
A | B | C | D | |
---|---|---|---|---|
0 | 1 | 2.0 | 3 | 4 |
1 | 5 | 6.0 | 7 | 8 |
2 | 36 | 12.0 | 22 | 34 |
import pandas as pd
import numpy as np
df=pd.DataFrame([[1, 2, 3, 4],
[5, 6, 7, 8],
[9, 10, 11, 12]],
index=[0,1,2],
columns=['A', 'B', 'C','D'])
df
A | B | C | D | |
---|---|---|---|---|
0 | 1 | 2 | 3 | 4 |
1 | 5 | 6 | 7 | 8 |
2 | 9 | 10 | 11 | 12 |
df2 = df.apply(lambda x : x + 10)
print(df2)
df3 = df + 10
print(df3)
A B C D 0 11 12 13 14 1 15 16 17 18 2 19 20 21 22 A B C D 0 11 12 13 14 1 15 16 17 18 2 19 20 21 22
df.at[1, 'A'] =np.square(df.at[1, 'A'])
df.at[0, 'A'] = 20
df
A | B | C | D | |
---|---|---|---|---|
0 | 20 | 2 | 3 | 4 |
1 | 25 | 6 | 7 | 8 |
2 | 9 | 10 | 11 | 12 |
df2["A"] = df2["A"].apply(lambda x: x-2)
df2
A | B | C | D | |
---|---|---|---|---|
0 | 9 | 12 | 13 | 14 |
1 | 13 | 16 | 17 | 18 |
2 | 17 | 30 | 32 | 44 |
df2 = df.apply(lambda x: np.square(x) if x.name in ['A','B'] else x)
df2
A | B | C | D | |
---|---|---|---|---|
0 | 400 | 4 | 3 | 4 |
1 | 625 | 36 | 7 | 8 |
2 | 81 | 400 | 22 | 34 |
# Using DataFrame.map() to Single Column
df['A'] = df['A'].map(lambda A: A/2.)
df
A | B | C | D | |
---|---|---|---|---|
0 | 10.0 | 2 | 3 | 4 |
1 | 12.5 | 6 | 7 | 8 |
2 | 4.5 | 20 | 22 | 34 |
# Using DataFrame.assign() and Lambda
df2 = df.assign(B=lambda df: df.B/2)
df2
A | B | C | D | |
---|---|---|---|---|
0 | 10.0 | 1.0 | 3 | 4 |
1 | 12.5 | 3.0 | 7 | 8 |
2 | 4.5 | 10.0 | 22 | 34 |
# importing pandas and numpy
import pandas as pd
import numpy as np
# crete a sample dataframe
data = pd.DataFrame({
'age' : [ 10, 22, 13, 21, 12, 11, 17],
'section' : [ 'A', 'B', 'C', 'B', 'B', 'A', 'A'],
'city' : [ 'Gurgaon', 'Delhi', 'Mumbai', 'Delhi', 'Mumbai', 'Delhi', 'Mumbai'],
'gender' : [ 'M', 'F', 'F', 'M', 'M', 'M', 'F'],
'favourite_color' : [ 'red', np.NAN, 'yellow', np.NAN, 'black', 'green', 'red']
})
# view the data
data
age | section | city | gender | favourite_color | |
---|---|---|---|---|---|
0 | 10 | A | Gurgaon | M | red |
1 | 22 | B | Delhi | F | NaN |
2 | 13 | C | Mumbai | F | yellow |
3 | 21 | B | Delhi | M | NaN |
4 | 12 | B | Mumbai | M | black |
5 | 11 | A | Delhi | M | green |
6 | 17 | A | Mumbai | F | red |
len(data)
7
for i in range(len(data)):
data.at[i, 'age']= i * 10
data
age | section | city | gender | favourite_color | |
---|---|---|---|---|---|
0 | 0 | A | Gurgaon | M | red |
1 | 10 | B | Delhi | F | NaN |
2 | 20 | C | Mumbai | F | yellow |
3 | 30 | B | Delhi | M | NaN |
4 | 40 | B | Mumbai | M | black |
5 | 50 | A | Delhi | M | green |
6 | 60 | A | Mumbai | F | red |
x = lambda a, b : a * b
print(x(5, 6))
30
x = lambda a, b, c : a + b + c
print(x(5, 6, 2))
13
def myfunc(n):
return lambda a : a * n
mydoubler = myfunc(2)
print(mydoubler(11))
22
(lambda x: x + 1)(2)
3
add_one = lambda x: x + 1
add_one(2)
3
full_name = lambda first, last: f'Full name: {first.title()} {last.title()}'
full_name('guido', 'van rossum')
'Full name: Guido Van Rossum'
x ="Cyrus Hi"
# lambda gets pass to print
(lambda x : print(x))(x)
Cyrus Hi
# Program to filter out only the even items from a list
my_list = [1, 5, 4, 6, 8, 11, 3, 12, 0]
new_list = list(filter(lambda x: (x%2 == 0) , my_list))
print(new_list)
[4, 6, 8, 12, 0]
# Program to double each item in a list using map()
my_list = [1, 5, 4, 6, 8, 11, 3, 12, 100]
new_list = list(map(lambda x: x * 2 , my_list))
print(new_list)
[2, 10, 8, 12, 16, 22, 6, 24, 200]
sequences = [10,2,8,7,5,4,3,11,0, 1]
filtered_result = filter (lambda x: x > 4, sequences)
print(list(filtered_result))
[10, 8, 7, 5, 11]
square = lambda x : x * x
square(5)
25
greet = lambda name: print('Hello', name, '?')
greet('Cyrus')
Hello Cyrus ?
Lambda
are generally used when a function is needed temporarily for a short period of time, often to be used inside another function such as filter, map and reduce.
Using lambda
function, you can define a function and call it immediately at the end of definition. This can’t be done with def functions
print((lambda x: x if(x > 10) else 10)(5))
10