import nltk
nltk.download('stopwords')

import pandas as pd
import re
import string
pd.set_option('display.max_colwidth', 200)

stopwords = nltk.corpus.stopwords.words('english')

data = pd.DataFrame({
    'label' :     [ 'one', 'second'],
    'body_text' : [ 'The Democratic Party has a more complicated relationship with Donald Trump than it likes to admit.', \
                   'It wants voters to remember the nonstop chaos of his administration, his Twitter rants, \
                    how he debased the presidency on Jan. 6 and won’t stop lying about the 2020 election results.']
})
data

def clean_text(text):
    text = "".join([word for word in text if word not in string.punctuation])
    tokens = re.split('\W+', text)
    text = [word for word in tokens if word not in stopwords]
    return text

data['body_text_nostop'] = data['body_text'].apply(lambda x: clean_text(x.lower()))

data.head()

ps = nltk.PorterStemmer()

# stem   is one of attribute or function of PorterStemmer of NLTK
def stemming(tokenized_text):
    text = [ps.stem(word) for word in tokenized_text]
    return text

data['body_text_stemmed'] = data['body_text_nostop'].apply(lambda x: stemming(x))

data.head()


x = lambda a : a + 10
print(x(5))

y=lambda x: x+3
y(5)

(lambda x: x+3)(5)

def add(x):
  return x+3

add(5)


import pandas as pd
import numpy as np

dff=pd.DataFrame([[1, 2, 3, 4],
                [5, 6, 7, 8],
                [np.square(6), np.sqrt(144), 22, 34]])

dff.index=[0,1,2]
dff.columns=['A', 'B', 'C','D']

dff

import pandas as pd
import numpy as np

df=pd.DataFrame([[1, 2, 3, 4],
                [5, 6, 7, 8],
                [9, 10, 11, 12]],
                index=[0,1,2],
                columns=['A', 'B', 'C','D'])

df

df2 = df.apply(lambda x : x + 10)
print(df2)
df3 = df + 10
print(df3)

df.at[1, 'A'] =np.square(df.at[1, 'A'])
df.at[0, 'A'] = 20

df

df2["A"] = df2["A"].apply(lambda x: x-2)
df2

df2 = df.apply(lambda x: np.square(x) if x.name in ['A','B'] else x)
df2

# Using DataFrame.map() to Single Column
df['A'] = df['A'].map(lambda A: A/2.)
df


# Using DataFrame.assign() and Lambda
df2 = df.assign(B=lambda df: df.B/2)
df2


# importing pandas and numpy
import pandas as pd
import numpy as np

# crete a sample dataframe
data = pd.DataFrame({
    'age' :     [ 10, 22, 13, 21, 12, 11, 17],
    'section' : [ 'A', 'B', 'C', 'B', 'B', 'A', 'A'],
    'city' :    [ 'Gurgaon', 'Delhi', 'Mumbai', 'Delhi', 'Mumbai', 'Delhi', 'Mumbai'],
    'gender' :  [ 'M', 'F', 'F', 'M', 'M', 'M', 'F'],
    'favourite_color' : [ 'red', np.NAN, 'yellow', np.NAN, 'black', 'green', 'red']
})

# view the data
data

len(data)

for i in range(len(data)):
  data.at[i, 'age']= i * 10

data


x = lambda a, b : a * b
print(x(5, 6))

x = lambda a, b, c : a + b + c
print(x(5, 6, 2))

def myfunc(n):
  return lambda a : a * n

mydoubler = myfunc(2)

print(mydoubler(11))

(lambda x: x + 1)(2)

add_one = lambda x: x + 1
add_one(2)

full_name = lambda first, last: f'Full name: {first.title()} {last.title()}'
full_name('guido', 'van rossum')

x ="Cyrus Hi"

# lambda gets pass to print
(lambda x : print(x))(x)


# Program to filter out only the even items from a list
my_list = [1, 5, 4, 6, 8, 11, 3, 12, 0]

new_list = list(filter(lambda x: (x%2 == 0) , my_list))

print(new_list)

# Program to double each item in a list using map()

my_list = [1, 5, 4, 6, 8, 11, 3, 12, 100]

new_list = list(map(lambda x: x * 2 , my_list))

print(new_list)

sequences = [10,2,8,7,5,4,3,11,0, 1]
filtered_result = filter (lambda x: x > 4, sequences) 
print(list(filtered_result))

square = lambda x : x * x
square(5)

greet = lambda name: print('Hello', name, '?') 
greet('Cyrus')

print((lambda x: x if(x > 10) else 10)(5))