#!/usr/bin/env python # coding: utf-8 # --- # # #

Department of Data Science

#

Course: Tools and Techniques for Data Science

# # --- #

Instructor: Muhammad Arif Butt, Ph.D.

#

Lecture 3.2 (NumPy-02)

# Open In Colab # # _Array vs List.ipynb_ # # In[ ]: # # Learning agenda of this notebook # 1. A Comparison # - Python Lists # - Python Arrays # - NumPy Arrays # 2. Memory Consumption of Python List and Numpy Array # 3. Operation cost on Python List and Numpy Array # In[ ]: # ### a. Python Lists # - Python List is a numerically ordered sequence of elements that can store elements of heterogeneous types, is iterable, mutable and allows duplicate elements. # - A Python List is built-in type in Python and can be created by placing comma separated values in square brackets, and you don't have to specify the type while creating a Python List # - Python list is by default 1 dimensional. But we can create an N-Dimensional list. But then too it will be 1 D list storing another 1D list # - Items are stored non-contiguously in memory. # - More memory hungry. # - Operations on Lists are typically slower, however, append operation will take O(1) time. # In[ ]: # creating a list containing elements belonging to different data types mylist = [1, "Data Science", ['a','e'], False, 5.72] print(mylist) print(type(mylist)) # In[ ]: # ### b. Python Arrays # - A simple Python array is a sequence of objects of similar data dype. Python array module requires all array elements to be of the same type. Moreover, to create an array, you'll need to specify a value type. # # ``` # array(typecode [, initializer]) # ``` # # - Return a new array whose items are restricted by typecode, and initialized from the optional initializer value, which must be a list, string or iterable over elements of the appropriate type. # # - Arrays represent basic values and behave very much like lists, except the type of objects stored in them is constrained. The type is specified at object creation time by using a type code, which is a single character. # - The following type codes are defined: # # # Type code C Type Minimum size in bytes # 'b' signed integer 1 # 'B' unsigned integer 1 # 'u' Unicode character 2 (see note) # 'h' signed integer 2 # 'H' unsigned integer 2 # 'i' signed integer 2 # 'I' unsigned integer 2 # 'l' signed integer 4 # 'L' unsigned integer 4 # 'q' signed integer 8 (see note) # 'Q' unsigned integer 8 (see note) # 'f' floating point 4 # 'd' floating point 8 # In[ ]: # To use Python arrays, you have to import Python's built-in array module import array # declaring array of integers arr1 = array.array('i', [3, 6, 9, 2]) print(arr1) print(type(arr1)) # declaring array of floats arr2 = array.array("f", [3.4, 6.7, 9.5, 2]) print(arr2) print(type(arr2)) # Python arrays can grow/shrink dynamically arr2.append(999) print(arr2) # In[ ]: # ### c. Numpy Arrays # - A NumPy array is a numerically ordered sequence of elements stored contiguously in memory, that can store elements of homogeneous types (usually numbers but can be boolians, strings, or other objects), is iterable, mutable, non-growable/shrinkable and allows duplicate elements. # - NumPy arrays have a fixed size at creation, unlike Python lists/arrays (which can grow dynamically). If you change the size of a numPy array, it will create a new array and delete the original. # - NumPy arrays are less memory hungry and offer better performance than Python Lists. # # # # **Differences between Python List and NumPy Arrays:** # 1. Lists are part of core Python. Arrays are not part of core Python # 2. Lists can contain elements of different types. An Array’s elements must all be of the same type # 3. Lists don’t need to be declared. Arrays need to be declared before use. # 4. Arrays (in Numpy) are optimized for fast mathematical operations. Lists are not. # 5. Arrays are optimized for storage (which is why you need to declare them before use). Lists are not. # 6. Lists can grow/shrink and are more flexible (they allow easy extension or reduction by adding/deleting elements). Arrays are not flexible. # # # - In general if you are going to make heavy use of mathematical operations, or need to store and process a large amount of numerical data, you should go with arrays rather than lists. If you are also particular about efficient memory storage, you should use arrays. # In[2]: # NumPy array upcast data type of all elements to bigger datatype in case of different types import numpy as np array1 = np.array([3.5, True, 9, 2.7, False]) print(array1) print(type(array1)) print(type(array1[1])) # In[3]: # NumPy array upcast data type of all elements to bigger datatype in case of different types import numpy as np array1 = np.array([3.5, 9, 2.7, 'arif', False]) print(array1) print(type(array1)) print(type(array1[1])) # In[1]: # If you mention the data type, the elements are automatically typecasted to the mentioned type import numpy as np array1 = np.array([3.5, False, 9.8, 2.7, True], dtype=np.uint16) print(array1) print(type(array1)) print(type(array1[1])) # In[ ]: # If you mention the data type, the elements are automatically typecasted to the mentioned type import numpy as np array1 = np.array([3.5, False, 9.8, 2.7, True], dtype=np.str) print(array1) print(type(array1)) print(type(array1[1])) # In[ ]: # ## 2. Memory Consumption of NumPy Array and Python List # - Python Lists consume more memory than NumPy arrays # In[ ]: import numpy as np import sys # declaring a list of 1000 elements list1 = range(1000) element_size = sys.getsizeof(list1) list1_size = element_size * len(list1) print("Size of each element = {} and Size of list1 = {} bytes".format(element_size, list1_size)) # declaring a Numpy array of 1000 elements array1 = np.arange(1000, dtype=np.uint8) print("\nSize of each element = {} and Size of array1 = {} bytes".format(array1.itemsize, array1.nbytes)) # In[ ]: # ## 3. Operations on NumPy Arrays vs Python Lists # - NumPy arrays are stored at one continuous place in memory unlike lists, so processes can access and manipulate them very efficiently. # - This behavior is called **locality of reference** in computer science. # - This is the main reason why NumPy is faster than lists. # - As a proof of concept, we can multiply two list and and then two arrays, and compare their multiplication time # ### Effect of * operator on NumPy Array and Python List # In[ ]: # You can multiply two numPy arrays using * operator import numpy as np myarray1 = np.array([1, 2, 3, 4, 5, 6]) myarray2 = np.array([1, 2, 3, 4, 5, 6]) myarray3 = myarray1 * myarray2 myarray3 # In[ ]: # you can't multiply two lists using a * operator, you have to use a loop mylist1 = [1, 2, 3, 4, 5, 6] mylist2 = [1, 2, 3, 4, 5, 6] mylist3 = [0, 0, 0, 0, 0, 0] for i in range(0,6): mylist3[i] = mylist1[i] * mylist2[i] mylist3 # **Let us calculate time to multiply two numPy arrays of 1 million elements** # In[ ]: import time size = 1000000 array1 = np.arange(size) array2 = np.arange(size) # capturing time before the multiplication of Numpy arrays initialTime = time.time() # multiplying elements of both the Numpy arrays and stored in another Numpy array array3 = array1 * array2 # capturing time again after the multiplication is done finishTime = time.time() print("\nTime taken by NumPy Arrays to perform multiplication:", finishTime - initialTime, "seconds") # **Let us calculate time to multiply two Python Lists of 1 million elements** # In[ ]: import time # Creating two large size Lists and multiplying them element by element list1 = list(range(size)) list2 = list(range(size)) list3 = list(range(size)) # capturing time before the multiplication of Python Lists initialTime = time.time() # multiplying elements of both the lists and stored in another list # simply run a loop and overwrite the elements of the new list with resulting value for i in range(0, len(list1)): list3[i] = list1[i] * list2[i] # capturing time again after the multiplication is done finishTime = time.time() print("\nTime taken by Lists to perform multiplication:", finishTime - initialTime, "seconds") # In[ ]: