import nltk nltk.download('stopwords') import pandas as pd import re import string pd.set_option('display.max_colwidth', 200) stopwords = nltk.corpus.stopwords.words('english') data = pd.DataFrame({ 'label' : [ 'one', 'second'], 'body_text' : [ 'The Democratic Party has a more complicated relationship with Donald Trump than it likes to admit.', \ 'It wants voters to remember the nonstop chaos of his administration, his Twitter rants, \ how he debased the presidency on Jan. 6 and won’t stop lying about the 2020 election results.'] }) data def clean_text(text): text = "".join([word for word in text if word not in string.punctuation]) tokens = re.split('\W+', text) text = [word for word in tokens if word not in stopwords] return text data['body_text_nostop'] = data['body_text'].apply(lambda x: clean_text(x.lower())) data.head() ps = nltk.PorterStemmer() # stem is one of attribute or function of PorterStemmer of NLTK def stemming(tokenized_text): text = [ps.stem(word) for word in tokenized_text] return text data['body_text_stemmed'] = data['body_text_nostop'].apply(lambda x: stemming(x)) data.head() x = lambda a : a + 10 print(x(5)) y=lambda x: x+3 y(5) (lambda x: x+3)(5) def add(x): return x+3 add(5) import pandas as pd import numpy as np dff=pd.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8], [np.square(6), np.sqrt(144), 22, 34]]) dff.index=[0,1,2] dff.columns=['A', 'B', 'C','D'] dff import pandas as pd import numpy as np df=pd.DataFrame([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]], index=[0,1,2], columns=['A', 'B', 'C','D']) df df2 = df.apply(lambda x : x + 10) print(df2) df3 = df + 10 print(df3) df.at[1, 'A'] =np.square(df.at[1, 'A']) df.at[0, 'A'] = 20 df df2["A"] = df2["A"].apply(lambda x: x-2) df2 df2 = df.apply(lambda x: np.square(x) if x.name in ['A','B'] else x) df2 # Using DataFrame.map() to Single Column df['A'] = df['A'].map(lambda A: A/2.) df # Using DataFrame.assign() and Lambda df2 = df.assign(B=lambda df: df.B/2) df2 # importing pandas and numpy import pandas as pd import numpy as np # crete a sample dataframe data = pd.DataFrame({ 'age' : [ 10, 22, 13, 21, 12, 11, 17], 'section' : [ 'A', 'B', 'C', 'B', 'B', 'A', 'A'], 'city' : [ 'Gurgaon', 'Delhi', 'Mumbai', 'Delhi', 'Mumbai', 'Delhi', 'Mumbai'], 'gender' : [ 'M', 'F', 'F', 'M', 'M', 'M', 'F'], 'favourite_color' : [ 'red', np.NAN, 'yellow', np.NAN, 'black', 'green', 'red'] }) # view the data data len(data) for i in range(len(data)): data.at[i, 'age']= i * 10 data x = lambda a, b : a * b print(x(5, 6)) x = lambda a, b, c : a + b + c print(x(5, 6, 2)) def myfunc(n): return lambda a : a * n mydoubler = myfunc(2) print(mydoubler(11)) (lambda x: x + 1)(2) add_one = lambda x: x + 1 add_one(2) full_name = lambda first, last: f'Full name: {first.title()} {last.title()}' full_name('guido', 'van rossum') x ="Cyrus Hi" # lambda gets pass to print (lambda x : print(x))(x) # Program to filter out only the even items from a list my_list = [1, 5, 4, 6, 8, 11, 3, 12, 0] new_list = list(filter(lambda x: (x%2 == 0) , my_list)) print(new_list) # Program to double each item in a list using map() my_list = [1, 5, 4, 6, 8, 11, 3, 12, 100] new_list = list(map(lambda x: x * 2 , my_list)) print(new_list) sequences = [10,2,8,7,5,4,3,11,0, 1] filtered_result = filter (lambda x: x > 4, sequences) print(list(filtered_result)) square = lambda x : x * x square(5) greet = lambda name: print('Hello', name, '?') greet('Cyrus') print((lambda x: x if(x > 10) else 10)(5))