Department of Data Science

Course: Tools and Techniques for Data Science

# # --- #

Instructor: Muhammad Arif Butt, Ph.D.

Lecture 3.7 (NumPy-07)

# In[ ]: # # Learning agenda of this notebook # ## 1. Concatenating of NumPy Arrays #

# ## 2. Stacking of NumPy Arrays #

# In[ ]: # In[ ]: # ## 3. Splitting NumPy Arrays #

# In[ ]: # To install this library in Jupyter notebook #import sys #!{sys.executable} -m pip install numpy # In[ ]: import numpy as np np.__version__ , np.__path__ # In[ ]: # ## 1. Concatenating NumPy Arrays #

# #

# # The `np.concatenate()` method is used to join arrays with respect to given axis. # # ``` # np.concatenate(tup, axis=0) # ``` # # - Where `tup` is comma separated ndarrays # - If axis is 0, it will join the arrays by row-wise (vertically). For 2-D arrays, the number of columns must match. # - If axis is 1, it will join the arrays by column-wise (horizontally). For 2-D arrays, the number of rows must match. # - For 1-D arrays, the arrays can be of any size/length. # - The original arrays remains as such, as it does not occur in-place. # In[ ]: # **Example:** Concatenate two 1-D Arrays along axis = 0 (row wise). The 1-D arrays can be of any size/length. # In[2]: import numpy as np arr1 = np.random.randint(low = 1, high = 100, size = 5) arr2 = np.random.randint(low = 1, high = 100, size = 3) print("arr1 = ", arr1) print("arr2 = ", arr2) arr3 = np.concatenate((arr1, arr2)) arr3 = np.concatenate((arr1, arr2), axis=0) print("\nnp.concatenate((arr1,arr2)) = ", arr3) # You cannot concatenate 1-D arrays on `axis=1`, as it do not exist :) # In[ ]: # **Example:** Concatenate two 2-D Arrays along `axis=0` (vertically/row-wise). The number of columns of two arrays must match. # In[7]: arr1 = np.random.randint(low = 1, high = 10, size = (2,3)) arr2 = np.random.randint(low = 1, high = 10, size = (3,3)) print("arr1 = \n", arr1) print("arr2 = \n", arr2) arr3 = np.concatenate((arr1, arr2), axis=0) print("\nnp.concatenate((arr1,arr2)) = \n", arr3) # In[ ]: # In[ ]: # **Example:** Concatenate two 2-D Arrays along `axis=1` (horizontally/column-wise). The number of rows of two arrays must match. # In[8]: arr1 = np.random.randint(low = 1, high = 10, size = (2,2)) arr2 = np.random.randint(low = 1, high = 10, size = (2,3)) print("arr1 = \n", arr1) print("arr2 = \n", arr2) arr3 = np.concatenate((arr1, arr2), axis=1) print("\nnp.concatenate((arr1,arr2)) = \n", arr3) # In[ ]: # ## 2. Stacking NumPy Arrays # - Concatenating joins a sequence of arrays along an existing axis, and stacking joins a sequence of arrays along existing as well as along a new axis. # - We can perform stacking along three dimensions: # - `np.vstack()` : it performs vertical stacking along the rows. # - `np.hstack()` : it performs horizontal stacking along with the columns. # - `np.dstack()` : it performs in-depth stacking along a new third axis (depth). # # **Note:** # - `numpy.stack()` is the most general of the three methods, offering an axis parameter for specifying which way to put the arrays together. # - `np.column_stack()` is used to stack 1-D arrays as columns into 2-D array. # - `np.row_stack()` is used to stack 1-D arrays as rows into 2-D array. # **Example: `np.row_stack()`** # In[1]: #ROW STACK import numpy as np arr1 = np.array([2,5,1]) arr2 = np.array([3,6,2]) print("arr1 = ", arr1) print("arr2 = ", arr2) arr3 = np.row_stack((arr1, arr2)) print ("\nnp.row_stack((arr1, arr2)):\n ", arr3) # **Example: `np.column_stack()`** # In[2]: #COLUMN STACK import numpy as np arr1 = np.array([2,5,1]) arr2 = np.array([3,6,2]) print("arr1 = ", arr1) print("arr2 = ", arr2) arr3 = np.column_stack((arr1, arr2)) print ("\nnp.column_stack((arr1, arr2)):\n ", arr3) # In[ ]: # In[ ]: # In[ ]: # In[ ]: #

# # ### a. Use `np.vstack()` for Row-Wise Concatenation # The `np.vstack()` method is used to stack arrays vertically or row-wise. # # ``` # np.vstack(tup) # ``` # # - Where `tup` is comma separated ndarrays # - 1-D arrays must have the same size/length, while for 2-D arrays, the number of columns must match. # - It returns an ndarray formed by stacking the given arrays, will be at least 2-D. # - The original arrays remains as such, as it does not occur in-place. # In[ ]: # **Example:** Perform vertical stacking of two 1-D Arrays, which must have the same size/length. # In[11]: import numpy as np arr1 = np.random.randint(low = 1, high = 10, size = 4) arr2 = np.random.randint(low = 1, high = 10, size = 4) print("arr1 = ", arr1) print("arr2 = ", arr2) arr3 = np.vstack((arr1, arr2)) print ("\nnp.vstack((arr1, arr2)):\n ", arr3) # Note: The output array is a 2-D array # In[ ]: # **Example:** Perform vertical stacking of two 2-D Arrays. The number of columns of two arrays must match # In[14]: arr1 = np.random.randint(low = 1, high = 10, size = (2,3)) arr2 = np.random.randint(low = 1, high = 10, size = (3,3)) print("arr1 = \n", arr1) print("arr2 = \n", arr2) arr3 = np.vstack((arr1, arr2)) print ("\n np.vstack((arr1, arr2)):\n ", arr3) # In[ ]: #

# # ### b. Using `np.hstack()` for Column-Wise Concatenation # The `np.hstack()` method is used to stack arrays horizontally or column-wise. # # ``` # np.hstack(tup) # ``` # # - Where `tup` is comma separated ndarrays # - 1-D arrays can have any size/length, while for 2-D arrays, the number of rows must match. # - It returns an ndarray formed by stacking the given arrays. # - The original arrays remains as such, as it does not occur in-place. # In[ ]: # **Example:** Perform horizontal stacking of two 1-D Arrays, which can be of different size/length # In[15]: arr1 = np.random.randint(low = 1, high = 10, size = 5) arr2 = np.random.randint(low = 1, high = 10, size = 4) print("arr1 = ", arr1) print("arr2 = ", arr2) arr3 = np.hstack((arr1, arr2)) print ("\n np.hstack((arr1, arr2)):\n ", arr3) # Note: The output array is a 1-D array # In[ ]: # **Example:** Perform horizontal stacking of two 2-D Arrays. The number of rows of two arrays must match # In[18]: arr1 = np.random.randint(low = 1, high = 10, size = (2,2)) arr2 = np.random.randint(low = 1, high = 10, size = (2,3)) print("arr1 = \n", arr1) print("arr2 = \n", arr2) arr3 = np.hstack((arr1, arr2)) print ("\n np.hstack((arr1, arr2)):\n ", arr3) # In[ ]: # In[ ]: # ### e. Using `np.stack()` # # - The `np.stack()` method is used to join a sequence of same dimension arrays along a new axis. # - The axis parameter specifies the index of the new axis in the dimensions of the result. # - For example, if axis=0 it will be the first dimension and if axis=-1 it will be the last dimension. # ``` # np.stack(a1, a2, a3, ..., axis=0) # ``` # # - Where `tup` is comma separated ndarrays # - 1-D or 2-D arrays must have the same shape, while n-D arrays must have the same shape along all but the third axis. # - It returns the array formed by stacking the given arrays, which has one more dimension than the input arrays. # - This function makes most sense for arrays with up to 3 dimensions. For instance, for pixel-data with a height (first axis), width (second axis), and r/g/b channels (third axis). # # # # **Note:** Concatenating joins a sequence of tensors along an existing axis, and stacking joins a sequence of tensors along a new axis # **Example:** Perform stacking of two 1-D Arrays, which must have the same size/shape. # In[20]: import numpy as np arr1 = np.random.randint(low = 1, high = 10, size = 4) arr2 = np.random.randint(low = 1, high = 10, size = 4) print("arr1 = ", arr1) print("arr2 = ", arr2) # Stacking two 1-D arrays along axis 0 using stack() arr3 = np.stack((arr1, arr2), axis = 0) print ("\n np.stack(arr1, arr2, axis=0):\n ", arr3) # Stacking the two 1-D arrays using row_stack() arr4 = np.row_stack((arr1, arr2)) print ("\n np.row_stack(arr1, arr2)):\n ", arr4) # Stacking two 1-D arrays along axis 1 using stack() arr5 = np.stack((arr1, arr2), axis = 1) print ("\n np.stack((arr1, arr2), axis=1):\n ", arr5) # Stacking the two 1-D arrays using column_stack() arr6 = np.column_stack((arr1, arr2)) print ("\n np.column_stack(arr1, arr2):\n ", arr6) # In[ ]: # **Example:** Perform stacking of two 2-D Arrays, which must have the same size/shape. # In[21]: import numpy as np arr1 = np.array([[4,3,1],[5,6,2]]) arr2 = np.array([[5,1,8],[3,9,1]]) print("arr1 = \n", arr1) print("arr2 = \n", arr2) # Stacking the two arrays along axis 0 arr3 = np.stack((arr1, arr2), axis = 0) print ("\n np.stack((arr1, arr2), axis=0): \n", arr3) print("shape of arr3:", arr3.shape) # In[22]: import numpy as np arr1 = np.array([[4,3,1],[5,6,2]]) arr2 = np.array([[5,1,8],[3,9,1]]) print("arr1 = \n", arr1) print("arr2 = \n", arr2) # Stacking the two arrays along axis 1 arr3 = np.stack((arr1, arr2), axis = 1) print ("\n np.stack((arr1, arr2), axis=1): \n", arr3) print("shape of arr3:", arr3.shape) # In[ ]: import numpy as np arr1 = np.array([[4,3,1],[5,6,2]]) arr2 = np.array([[5,1,8],[3,9,1]]) print("arr1 = \n", arr1) print("arr2 = \n", arr2) # Stacking the two arrays along last axis arr3 = np.stack((arr1, arr2), axis = -1) print ("\n np.stack((arr1, arr2), axis=-1):\n ", arr3) print("shape of arr3:", arr3.shape) # In[ ]: # ## 3. Splitting NumPy Arrays # - Splitting is reverse operation of Joining, and is used to split one array into multiple arrays.... # - We can perform splitting along three dimensions: # - `np.split()` : Split array into a list of multiple sub-arrays of equal size. # - `np.hsplit()` : Split array into multiple sub-arrays horizontally (column wise). # - `np.vsplit()` : Split array into multiple sub-arrays vertically (row wise). # In[ ]: # ### a. The `np.split()` and `np.array_split()` Methods # #

# #

# - The `np.split()` method splits an array into multiple sub-arrays of equal sizes. # # ``` # np.split(arr, size, axis=0) # ``` # - Where, # - `arr` is the array to be divided into sub-arrays. # - `size` is an size of the sub-arrays, into which `arr` will be divided along the axis. # - `axis` is the axis along which to split, default is 0. # - If such split is not possible, an error is raised. To avoid error you can use `np.array_split()` # - It returns a list of sub-arrays as views into `arr` # **Example:** Use of `split()` # In[ ]: arr1 = np.random.randint(low = 1, high = 10, size = 20) print("arr1:\n",arr1) # The split size must be a factor of array size (can be 1, 2, 4, 5, 10) print("\nSub-arrays: \n", np.split(arr1, 4)) # In[ ]: # In[ ]: # **Example:** Use of `array_split()` # In[ ]: # create an array of float type arr1 = np.random.randint(low = 1, high = 10, size = 13) print("arr1:\n",arr1) # The array_split() will not flag an error if size is not a factor of array size (will manage) print("\nSub-arrays: \n", np.array_split(arr1, 4)) # In[ ]: #

# # ### b. The `np.hsplit()` Method # - The `np.hsplit()` method is used to split an array into multiple sub-arrays horizontally (column-wise). # - The `np.hsplit()` is equivalent to split with axis=1, the array is always split along the second axis regardless of the array dimension. # # ``` # np.hsplit(arr, size) # ``` # - Where, # - `arr` is the array to be divided into sub-arrays. # - `size` is the size of the sub-arrays, into which `arr` will be divided along the axis. For `hsplit()`, size argument should be a factor of number of columns, else it flags an error # - It returns a list of sub-arrays as views into `arr` # **Example:** # In[ ]: # create an array of float type with 4 rows and 4 columns with sequential values from 0 to 15 arr1 = np.arange(16.0).reshape(4,4) # print array print("arr1:\n",arr1) print("shape: ", arr1.shape) # In[ ]: # horizontally split array into 2 subarrays print("\nSub-arrays: \n", np.hsplit(arr1, 2)) # In[ ]: #

# # ### c. The `np.vsplit()` Method # - The `np.vsplit()` method is used to split an array into multiple sub-arrays vertically (row-wise). Not applicable for 1-D array. # - The `np.vsplit()` is equivalent to split with axis=0, the array is always split along the first axis regardless of the array dimension. # # ``` # np.vsplit(arr, size) # ``` # -Where, # - `arr` is the array to be divided into sub-arrays. # - `size` is the size of the sub-arrays, into which `arr` will be divided along the axis. For `vsplit()`, size argument should be a factor of number of rows, else it flags an error # - It returns a list of sub-arrays as views into `arr` # **Example:** # In[ ]: # create an array of float type with 4 rows and 5 columns with sequential values from 0 to 19 arr1 = np.arange(16.0).reshape(4,4) print("arr1:\n",arr1) print("shape: ", arr1.shape) # In[ ]: # vertically split array into 2 subarrays (remember size argument must be a factor of number of rows ) print("\nSub-arrays: \n", np.vsplit(arr1, 2)) # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[ ]: # In[10]: import numpy as np arr1 = np.array([2,5,1]) arr2 = np.array([3,6,2]) arr3 = np.column_stack((arr1, arr2)) arr4 = np.vstack((arr1, arr2)) arr5 = np.hstack((arr1, arr2)) arr6 = np.stack((arr1, arr2), axis = 1) print ("arr3: ", arr3) print ("arr4: ", arr4) print ("arr5: ", arr5) print ("arr6: ", arr6) # In[ ]: