print "Big data examiner" #Big data examiner is a one stop place to learn datascience. 

a= 'Big data'
print type(a)

b= 'Examiner'
print type(b)

c= 4.5 

print isinstance(a, str)
print isinstance(a,int)
print isinstance(c, (int, float))

a = 'Bill gates'
a.<tab> # remove <tab> and press tab button

import numpy as np # importing numpy as np
data_new = [6, 7.5, 8, 0, 1]
data = np.array(data1) # accessing numpy as np. Here I am converting a list to array
data

x= [1,2,3,4]
y = x 
z=list(x)
print x is y
print x is not z

# you can use the following operators:
# x // y -> this is called floor divide, it drops the fractional remainder
# x** y -> raise x to the y the power.
# x< =y, x<y -> True if y is less than or equal to y. Same implies with greater than symbol.
# same applies to other logical operators such as &, |, ^, ==, !=

# list, dict, arrays are a mutable
programming = ['Python', 'R', 'Java', 'Php']
programming[2] ='c++'
print programming

#Strings and tuples are immutable
z_tuple = (9, 10, 11, 23)
z_tuple[1] = 'twenty two' # you cant mutate a tuple


# you can write multiline strings using triple quotes ''' or """
"""
Hi! learn Python it is fun 
Data science and machine learning are amazing
"""

# As I said before python strings are immutable.
x= ' This is big data examiner'
x[10] = 'f'

x = 'Java is a powerful programming language'
y = x.replace('Java', 'Python')
y

# many python objects can be converted to a string using 'str' function
x = 56664
y = str(x)
print y
print type(y)
# strings act like other sequences, such as lists and tuples
a = 'Python'
print list(a)
print a[:3] # you can slice a python string 
print a[3:]

#String concentation is very important
p = "P is the best programming language"
q = ", I have ever seen"
z = p+q
z

print "Hii space left is just %.3f gb, and  the data base is %s"  %(0.987, 'mysql')
print "Hii space left is just %f gb, and  the data base is %s"  %(0.987, 'mysql')
print "Hii space left is just %d gb, and  the data base is %s"  %(0.987, 'mysql')



# boolean values in python are written as True and False.
print True and True
print True or False
print True and False

#Empty iterables(list, dict, strings, tuples etc) are treated as False ,if used with a control flow(if, for ,while.. etc)
print bool([]), bool([1,2,3])
print bool('Hello Python!'), bool('')
bool(0), bool(1)

x = '1729'
y = float(x)
print type(y)
print int(y)
print bool(y)

#Python date and time module provides datetime, date and time types
from datetime import datetime, date, time
td = datetime(1989,6,9,5,1, 30)# do not write number 6 as 06, you will get an invalid token error.
print td.day
print td.minute
print td.date()
print td.time()
td.strftime('%m/%d/%y %H:%M:%S')#strf method converts the date and time into a string

from datetime import datetime, date, time
datetime.strptime('1989911', '%Y%m%d') # strings can be converted to date and time objects using strptime
td = datetime(1989,6,9,5,1, 30)
td.replace(hour =0 ,minute=0, second=30)#you can replace function to edit datetim function

from datetime import datetime, date, time
td = datetime(1989,6,9,5,1, 30)
td1 = datetime(1988,8, 31, 11, 2, 23)
new_time =td1 - td # you can subtract two different date and time functions
print new_time 
print type(new_time) # the type is date and time
print td +new_time

print float('7.968')
float('Big data')

# suppose we want our float function to return the input value, we can do this using the folowing code.
def return_float(x):
    try:
        return float(x)
    except:
        return x

print return_float('4.55')
print return_float('big data') # This time it didnt return a value error

#print float((9,8))  ->this will return a type error, remove the comment and check the output.
def return_float(x):
    try:
        return float(x)
    except(TypeError, ValueError):# type error and value error are mentioned as a exception values
        return x
print return_float((9,8))  #now you can see it returns 9,8

# these are called ternary expressions
x = 'Life is short use python'
'This is my favourite quote' if x == 'Life is short use python'  else 'I hate R'

#Tuples are one dimensional, fixed length, imutable sequence of Python Objects.
machine_learning = 77, 45, 67
print machine_learning
pythonista = (87, 56, 98), (78, 45, 33) #Nested Tuples
print pythonista

#You can convert any sequence to a tuple by using 'tuple' keyword
print tuple([4,0,2])
pythonista = tuple('Python')
print pythonista
pythonista[0] # you can accessing each element in a tuple, 

x = tuple(['Manu',[99,88], 'Jeevan'])
#x[2] = 'Prakash' # you cant modify a tuple like this
x[1].append(77)# But you can append to a object to a tuple
x

y = ('Mean', 'Median', 'Mode')+('Chisquare', 'Annova') + ('statistical significance',) # you can concatenate a tuple using'+' symbol. 
print y
('Mean', 'Median') *4  # try printing a tuple using a number

deep_learning =('Theano', 'Open cv', 'Torch') # you can un pack a tuple
x,y,z= deep_learning
print x
print y
print z

countries ='Usa', 'India', ('Afghanistan',' Pakistan'),  
a,b,(c,d) = countries
print a
print b
print c
print d

countries ='Usa', 'India', ('Afghanistan',' Pakistan'), 'Usa', 'Usa'
countries.count('Usa') # .count can be used to count how many values are ther in a tuple

countries =['Usa', 'India','Afghanistan',' Pakistan']
y = countries.extend(['Britian', 'Canada', 'Uzbekistan', 'Turkey'])
z = countries.sort(key=len) # countries are sorted according to number of characters
print countries 
# extend can be a handy feature when your lists are large.

import bisect
b = [9,9,9,9,5,6,3,5,3,2,1,4,7,8]
b.sort()
x =bisect.bisect(b,2) # bisect.bisect finds the location where an element should be inserted to keep it sorted.
y= bisect.bisect(b, 5)
print x
print y

# When iterating over a sequence; to keep track of the index of the current element, you can use 'enumerate'
languages = ['Bigdata', 'Hadoop', 'mapreduce', 'Nosql']

for i,val in enumerate(languages):
    print i,val

#Sorted function returns a new sorted list from a sequence
print sorted([89, 99,45,63,25,53,34,56])
print sorted('Big data examiner')

hot_job = ['Big_data', 'data science', 'data scientist', 'data base developer']
languages = ['c', 'c++', 'java', 'python']
statistics = ['Mean', 'Median', 'Mode', 'Chi square']
print zip(hot_job, languages, statistics)

for i, (x,y) in enumerate(zip(hot_job, languages)):   #See how I use zip and enumerate together
    print('%d: %s, %s' %(i,x,y))

# you can unzip a zipped sequence as follows
rockers = [('Jame', 'Manu'), ('Govind', 'Dheepan'),('Partha', 'Reddy')]
first_names, last_names = zip(*rockers)
print first_names
print last_names

#Use reversed keyword to reverse a sequence
list(reversed(range(20)))

# you can combine two dictionaries using 'update' method 
d1 = {'a' : 'octave', 'b' : 'Java'}
d1.update({'c' : 'foo', 'd' : 12})
print d1
d2 = {'a' : 'octave', 'b' : 'Java'}
d2.update({'b' : 'foo', 'c' : 12}) #the dictionary inside brackets, overrides the value 'b' in d2
print d2

# dict type function accepts a tuple
data_science = dict(zip(range(10), reversed(range(10)))) # see how I am using zip and dict to create a key- value pair
data_science

# The keys of a dictionary should be immutable(int, string, float, tuples). The technical term for this is hashability
print hash('string')
print hash((1,2,3))
print hash([1,2,4]) # generates an error as lists are immutable

# An easy way to convert a list into a key is to convert it to a tuple
fg ={}
fg[tuple([3,4,5])] = 45
fg

# a set is an unordered collection of unique elements.
set([3,3,4,4,4,6,7,7,7,8])

#Sets support mathematical set operations like union, intersection, difference, and symmetric difference
a = {1, 2, 3, 4, 5}
b = {3, 4, 5, 6, 7, 8}
print a|b # union
print a&b #intersection-> common elements in two dictionaries
print a-b
print a^b # symmetric difference
print {1,2,3} =={3,2,1} # if values are equal so True

football_clubs = ['Manchester', 'Liverpool', 'Arsenal', 'Chelsea', 'Mancity', 'Tottenham', 'Barcelona','Dortmund']

football ={}
for clubs in football_clubs: 
    club = clubs[0] # gets the first character of football_clubs
    if club not in football_clubs: 
        football[club] = [clubs]
    else:
        football[club].append(clubs)
print football    

# Usually, a Python dictionary throws a KeyError if you try to get an item with a key that is not currently in the dictionary. 
#The defaultdict in contrast will simply create any items that you try to access (provided of course they do not exist yet). To create such a "default" item, it calls the function object that you pass in the constructor 
#(more precisely, it's an arbitrary "callable" object, which includes function and type objects).

# The Same operation can be done using default dict
from collections import defaultdict # default dict is present in collections library
soccer = defaultdict(list)

for clubs in football_clubs:
    soccer[clubs[0]].append(clubs)
print soccer

# a function can return multiple values
def b():
    x =34
    y =45
    z =89
    return x,y,z

# Example of a closure function. The function returns True, if a element is repeated in the list.
def dict_funct():
    new_dict = {}    # create a new dictionary
    def modifier(z):      
        if z in new_dict:   # if z is in dictionary
            return True         
        else: 
            new_dict[z]=True
            return False
    return modifier

x = dict_funct()
list_func = [5,4,6,5,3,4,6,2,1,5]
y = [x(i) for i in list_func]
print y        

# If we are doing some data cleaning, we will be having a messy data set like this. 
import re

states = ['       Kerala', 'Gujarat!', 'Delhi', 'Telengana', 'TriPUra', 'Tamil Nadu##', 'West Bengal?']


def remove_functions(strp): 
    return re.sub('[!#?]', '', strp) 

ooops = [str.strip, remove_functions, str.title] # create a list of functions

def clean_data(oops, funky):  # function takes two arguments
    result = []    # create a empty list
    for data in oops:    # loop over(go to each and every element) in  states
        for fun in funky: # loop over ooops list
            data = fun(data)  # apply each and every function in ooops to states.
        result.append(data)  # attach formmated states data to a new list
    return result             # return the list 
    
x = clean_data(states, ooops)
print x


# Lambda is short form of writing a function. 
def f(x):
    return x**2
print f(8)
#same function using lambda
y = lambda x: x**2 
print y(9)


def new_objjj():
    for x in xrange(100):
        yield x**2     #when using  generator functions, Use yield instead of return.
some_variable = new_objjj()

# The above function can be written as follows
new_obj = (x**2 for x in range(100)) 

#Generator expressions can be used inside any Python function that will accept a generator
y = sum(x**2 for x in xrange(100))
print y

dict((i,i**2) for i in xrange(5)) #xrange is faster than range


rkeys=[1,2,3]
rvals=['South','Sardinia','North']
rmap={e[0]:e[1] for e in zip(rkeys,rvals)} # use of Zip function
rmap