# Upload file to Google Drive, then save filepath # NOTE: you'll need to change this variable to match your own filepath filepath = 'drive/My Drive/OCEAN 215 - data/nino34.long.data.txt' # Give Colab access to Google Drive from google.colab import drive drive.mount('/content/drive') # Load file into Colab import numpy as np years = np.genfromtxt(filepath,skip_header=1,skip_footer=5,usecols=0,dtype=int,delimiter=None) print(years.shape) # Check dimensions of the years NumPy array data = np.genfromtxt(filepath,skip_header=1,skip_footer=5,usecols=range(1,13),dtype=float,delimiter=None) print(data.shape) # Check dimensions of the data NumPy array print(data) # Get a preview of the data by printing # Mask out the missing data using np.NaN (a placeholder) data[data == -99.99] = np.NaN # Check updated array print(data) # Reshape from 2D to 1D, because the data is a time series data_1d = np.reshape(data,(data.size,)) print(data_1d) # Alternate way of reshaping a 2D array to a 1D array data_1d = data.flatten() print(data_1d) # Construct 1-D time array (for x-values, because we plot x-values vs. y-values) # We want it to look like this: # [ January 15, 1870, # February 15, 1870, # March 15, 1870, # ... # November 15, 2020, # December 15, 2020 ] from datetime import datetime all_months = np.tile(range(1,13),len(years)) # print(all_months) all_years = np.repeat(range(1870,2021),12) # print(all_years) datetimes = [datetime(all_years[idx],all_months[idx],15) for idx in range(data.size)] datetimes = np.array(datetimes) # Because we prefer arrays, not lists print(datetimes) # Plot the El Niño index import matplotlib.pyplot as plt plt.subplots(figsize=(16,4)) plt.plot(datetimes,data_1d,color='k',lw=1) # color options: https://matplotlib.org/3.3.2/gallery/color/named_colors.html plt.scatter(datetimes,data_1d,s=4,c='darkorange') plt.title('Niño 3.4 index') plt.xlabel('Time') plt.ylabel('Average sea surface temperature (°C)') plt.grid() # Goal: zoom into just a portion of the x-axis (years 1980-2020) fig, ax = plt.subplots(figsize=(16,4)) plt.plot(datetimes,data_1d,c='k',lw=1) # color options: https://matplotlib.org/3.3.2/gallery/color/named_colors.html plt.scatter(datetimes,data_1d,s=4,c='darkorange') plt.title('Niño 3.4 index') plt.xlabel('Time') plt.ylabel('Average sea surface temperature (°C)') plt.grid() # plt.xlim(datetime(1980,1,1),datetime(2020,12,31)) # Option 1 (call xlim() on the plt module) # ax.set_xlim(datetime(1980,1,1),datetime(2020,12,31)) # Option 2 (call set_xlim() on the axes object saved from plt.subplots()) plt.gca().set_xlim(datetime(1980,1,1),datetime(2020,12,31)) # Option 3 (call set_xlim() on the current axes) # This is how we test for NaNs # np.isnan() returns a boolean (True or False) np.isnan(50) # Returns False np.isnan(np.nan) # Returns True # Calculate average value of El Niño index, ignoring NaN values sum = 0.0 nan_counter = 0 for value in data_1d: if np.isnan(value): # Notice the if-statement is inside the for-loop print('We found a NaN') nan_counter += 1 else: # The else statement will be entered when "value" is not a NaN sum += value print(sum) average = sum / (len(data_1d) - nan_counter) # Exclude NaN values from average (nan_counter is 4 here) print(average) # Round to one decimal place print(round(average,1)) # Shift El Niño index values down by the average temperature, so they're centered at y = 0 data_1d_shifted = data_1d.copy() for index in range(len(data_1d)): data_1d_shifted[index] = data_1d_shifted[index] - average # Alternate way: # data_1d_shifted[index] -= average # Add a horizontal line at y = 0 fig, ax = plt.subplots(figsize=(16,4)) plt.plot(datetimes,data_1d_shifted,c='k',lw=1) plt.plot([datetime(1865,1,1),datetime(2025,1,1)],[0,0],ls='--',c='k') # This is a line between two points at (1865,0) and (2025,0) plt.scatter(datetimes,data_1d_shifted,s=4,c='darkorange') plt.title('Niño 3.4 index') plt.xlabel('Time') plt.ylabel('Average sea surface temperature (°C)') plt.grid() # Calculate average value of El Niño index using NumPy # This won't work, because it will give the answer "NaN"... average = np.mean(data_1d) print('Option 1:',average) # ... so we have slice out the NaNs average = np.mean(data_1d[:-4]) print('Option 2:',average) # Or we can get rid of the NaNs using conditional indexing average = np.mean(data_1d[~np.isnan(data_1d)]) # Here, np.isnan(data_1d) returns a Boolean array, # which we reverse using the tilde (~) to turn True to False, and False to True print('Option 3:',average) # Or we can ignore NaNs using the alternate NaN-excluding version of np.mean() average = np.nanmean(data_1d) print('Option 4:',average)