#!/usr/bin/env python
# coding: utf-8
# ---
#
#
#
Department of Data Science
# Course: Tools and Techniques for Data Science
#
# ---
# Instructor: Muhammad Arif Butt, Ph.D.
# Lecture 3.2 (NumPy-02)
#
# # _Array vs List.ipynb_
#
# In[ ]:
# # Learning agenda of this notebook
# 1. A Comparison
# - Python Lists
# - Python Arrays
# - NumPy Arrays
# 2. Memory Consumption of Python List and Numpy Array
# 3. Operation cost on Python List and Numpy Array
# In[ ]:
# ### a. Python Lists
# - Python List is a numerically ordered sequence of elements that can store elements of heterogeneous types, is iterable, mutable and allows duplicate elements.
# - A Python List is built-in type in Python and can be created by placing comma separated values in square brackets, and you don't have to specify the type while creating a Python List
# - Python list is by default 1 dimensional. But we can create an N-Dimensional list. But then too it will be 1 D list storing another 1D list
# - Items are stored non-contiguously in memory.
# - More memory hungry.
# - Operations on Lists are typically slower, however, append operation will take O(1) time.
# In[ ]:
# creating a list containing elements belonging to different data types
mylist = [1, "Data Science", ['a','e'], False, 5.72]
print(mylist)
print(type(mylist))
# In[ ]:
# ### b. Python Arrays
# - A simple Python array is a sequence of objects of similar data dype. Python array module requires all array elements to be of the same type. Moreover, to create an array, you'll need to specify a value type.
#
# ```
# array(typecode [, initializer])
# ```
#
# - Return a new array whose items are restricted by typecode, and initialized from the optional initializer value, which must be a list, string or iterable over elements of the appropriate type.
#
# - Arrays represent basic values and behave very much like lists, except the type of objects stored in them is constrained. The type is specified at object creation time by using a type code, which is a single character.
# - The following type codes are defined:
#
#
# Type code C Type Minimum size in bytes
# 'b' signed integer 1
# 'B' unsigned integer 1
# 'u' Unicode character 2 (see note)
# 'h' signed integer 2
# 'H' unsigned integer 2
# 'i' signed integer 2
# 'I' unsigned integer 2
# 'l' signed integer 4
# 'L' unsigned integer 4
# 'q' signed integer 8 (see note)
# 'Q' unsigned integer 8 (see note)
# 'f' floating point 4
# 'd' floating point 8
# In[ ]:
# To use Python arrays, you have to import Python's built-in array module
import array
# declaring array of integers
arr1 = array.array('i', [3, 6, 9, 2])
print(arr1)
print(type(arr1))
# declaring array of floats
arr2 = array.array("f", [3.4, 6.7, 9.5, 2])
print(arr2)
print(type(arr2))
# Python arrays can grow/shrink dynamically
arr2.append(999)
print(arr2)
# In[ ]:
# ### c. Numpy Arrays
# - A NumPy array is a numerically ordered sequence of elements stored contiguously in memory, that can store elements of homogeneous types (usually numbers but can be boolians, strings, or other objects), is iterable, mutable, non-growable/shrinkable and allows duplicate elements.
# - NumPy arrays have a fixed size at creation, unlike Python lists/arrays (which can grow dynamically). If you change the size of a numPy array, it will create a new array and delete the original.
# - NumPy arrays are less memory hungry and offer better performance than Python Lists.
#
#
#
# **Differences between Python List and NumPy Arrays:**
# 1. Lists are part of core Python. Arrays are not part of core Python
# 2. Lists can contain elements of different types. An Array’s elements must all be of the same type
# 3. Lists don’t need to be declared. Arrays need to be declared before use.
# 4. Arrays (in Numpy) are optimized for fast mathematical operations. Lists are not.
# 5. Arrays are optimized for storage (which is why you need to declare them before use). Lists are not.
# 6. Lists can grow/shrink and are more flexible (they allow easy extension or reduction by adding/deleting elements). Arrays are not flexible.
#
#
# - In general if you are going to make heavy use of mathematical operations, or need to store and process a large amount of numerical data, you should go with arrays rather than lists. If you are also particular about efficient memory storage, you should use arrays.
# In[2]:
# NumPy array upcast data type of all elements to bigger datatype in case of different types
import numpy as np
array1 = np.array([3.5, True, 9, 2.7, False])
print(array1)
print(type(array1))
print(type(array1[1]))
# In[3]:
# NumPy array upcast data type of all elements to bigger datatype in case of different types
import numpy as np
array1 = np.array([3.5, 9, 2.7, 'arif', False])
print(array1)
print(type(array1))
print(type(array1[1]))
# In[1]:
# If you mention the data type, the elements are automatically typecasted to the mentioned type
import numpy as np
array1 = np.array([3.5, False, 9.8, 2.7, True], dtype=np.uint16)
print(array1)
print(type(array1))
print(type(array1[1]))
# In[ ]:
# If you mention the data type, the elements are automatically typecasted to the mentioned type
import numpy as np
array1 = np.array([3.5, False, 9.8, 2.7, True], dtype=np.str)
print(array1)
print(type(array1))
print(type(array1[1]))
# In[ ]:
# ## 2. Memory Consumption of NumPy Array and Python List
# - Python Lists consume more memory than NumPy arrays
# In[ ]:
import numpy as np
import sys
# declaring a list of 1000 elements
list1 = range(1000)
element_size = sys.getsizeof(list1)
list1_size = element_size * len(list1)
print("Size of each element = {} and Size of list1 = {} bytes".format(element_size, list1_size))
# declaring a Numpy array of 1000 elements
array1 = np.arange(1000, dtype=np.uint8)
print("\nSize of each element = {} and Size of array1 = {} bytes".format(array1.itemsize, array1.nbytes))
# In[ ]:
# ## 3. Operations on NumPy Arrays vs Python Lists
# - NumPy arrays are stored at one continuous place in memory unlike lists, so processes can access and manipulate them very efficiently.
# - This behavior is called **locality of reference** in computer science.
# - This is the main reason why NumPy is faster than lists.
# - As a proof of concept, we can multiply two list and and then two arrays, and compare their multiplication time
# ### Effect of * operator on NumPy Array and Python List
# In[ ]:
# You can multiply two numPy arrays using * operator
import numpy as np
myarray1 = np.array([1, 2, 3, 4, 5, 6])
myarray2 = np.array([1, 2, 3, 4, 5, 6])
myarray3 = myarray1 * myarray2
myarray3
# In[ ]:
# you can't multiply two lists using a * operator, you have to use a loop
mylist1 = [1, 2, 3, 4, 5, 6]
mylist2 = [1, 2, 3, 4, 5, 6]
mylist3 = [0, 0, 0, 0, 0, 0]
for i in range(0,6):
mylist3[i] = mylist1[i] * mylist2[i]
mylist3
# **Let us calculate time to multiply two numPy arrays of 1 million elements**
# In[ ]:
import time
size = 1000000
array1 = np.arange(size)
array2 = np.arange(size)
# capturing time before the multiplication of Numpy arrays
initialTime = time.time()
# multiplying elements of both the Numpy arrays and stored in another Numpy array
array3 = array1 * array2
# capturing time again after the multiplication is done
finishTime = time.time()
print("\nTime taken by NumPy Arrays to perform multiplication:", finishTime - initialTime, "seconds")
# **Let us calculate time to multiply two Python Lists of 1 million elements**
# In[ ]:
import time
# Creating two large size Lists and multiplying them element by element
list1 = list(range(size))
list2 = list(range(size))
list3 = list(range(size))
# capturing time before the multiplication of Python Lists
initialTime = time.time()
# multiplying elements of both the lists and stored in another list
# simply run a loop and overwrite the elements of the new list with resulting value
for i in range(0, len(list1)):
list3[i] = list1[i] * list2[i]
# capturing time again after the multiplication is done
finishTime = time.time()
print("\nTime taken by Lists to perform multiplication:", finishTime - initialTime, "seconds")
# In[ ]: