OCEAN 215 slides:
# print() function
print('Python is great!')
# Remember: run cells using play button or Shift-Return / Shift-Enter
Python is great!
# Creating variables
day_of_month = 24 # int (integer)
print(day_of_month)
hour_of_day = 9 + 45/60 # float (floating-point, or decimal, number)
print(hour_of_day)
month = 'January' # string (can use single or double quotes)
print(month)
date = 'January' + ' ' + '24th' # multiple strings concatenated together
print(date)
cold = True # boolean (True or False)
print(cold)
# Remember: write variable names that are meaningful
24 9.75 January January 24th True
# Note that variables persist in Colab's memory across cells
# Modifying variables
day_of_month = day_of_month + 1 # adds 1 to the existing value
print(day_of_month)
day_of_month += 1 # a more compact way of writing the line above
print(day_of_month)
25 26
# Another useful type of variable: datetime object
from datetime import datetime # first, import the package
now = datetime(2023,1,24,9,45,0) # format: (year, month, day, hour, minute, second)
print(now)
print(now.month) # extract month from datetime
2023-01-24 09:45:00 1
# An unfortunately common object: NaN (not-a-number)
import numpy as np
today_data = np.NaN
print(today_data)
print(np.isnan(today_data))
nan True
# Creating lists
months = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
mon_nums = [1,2,3,4,5,6,7,8,9,10,11,12]
print(months)
print(mon_nums)
# Check length of a list using len()
print(len(months))
# Remember: lists can have any type of object in them, including different types of objects
# (e.g., numbers and strings, and even lists inside the list)
['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] 12
# List indexing
print(months[0]) # the 1st element
print(months[11]) # the 12th element
print(months[-2]) # the second-to-last element
print(months[0:3]) # the first 3 elements
print(months[5::2]) # every 2nd element starting at the 6th element (Jun)
# Modifying a list
months[11] = 'December' # change the 11th element to 'December'
print(months)
# Append to a list
months.append('January of the next year')
print(months)
Jan Dec Nov ['Jan', 'Feb', 'Mar'] ['Jun', 'Aug', 'Oct', 'Dec'] ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'December'] ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'December', 'January of the next year']
# Which months in Seattle have comfortable weather?
# Let's Google "Seattle temperature by month" and transcribe the table of high temps
# ASK: try to write a loop to tell us which months in Seattle are at least 66°F
months = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
temp = [47,49,52,57,63,66,72,72,67,59,51,46]
# Let's start by reviewing for-loops
for mon_idx in range(12):
print(mon_idx, months[mon_idx])
0 Jan 1 Feb 2 Mar 3 Apr 4 May 5 Jun 6 Jul 7 Aug 8 Sep 9 Oct 10 Nov 11 Dec
# Here we add an if-statement to check each month's temp against the threshold
temp_threshold = 66
for mon_idx in range(len(months)):
if temp[mon_idx] >= temp_threshold:
print('This month is comfortable: ' + months[mon_idx])
This month is comfortable: Jun This month is comfortable: Jul This month is comfortable: Aug This month is comfortable: Sep
# What about a loop to check which month is exactly 57°F?
for mon_idx in range(len(months)):
if temp[mon_idx] == 57:
print("This is the month we're looking for: " + months[mon_idx])
This is the month we're looking for: Apr
# Add an else statement for months that don't meet the criteria
for mon_idx in range(len(months)):
if temp[mon_idx] == 57:
print("This is the month we're looking for: " + months[mon_idx])
else:
print("This is not the month we're looking for: " + months[mon_idx])
This is not the month we're looking for: Jan This is not the month we're looking for: Feb This is not the month we're looking for: Mar This is the month we're looking for: Apr This is not the month we're looking for: May This is not the month we're looking for: Jun This is not the month we're looking for: Jul This is not the month we're looking for: Aug This is not the month we're looking for: Sep This is not the month we're looking for: Oct This is not the month we're looking for: Nov This is not the month we're looking for: Dec
# Now let's convert the Seattle temperatures from Fahrenheit to Celsius
# Formula: subtract 32 and multiple by 5/9
# This won't work, because you can't do math with lists
# new_temp = (temp - 32) * (5/9)
# Instead, we have to import NumPy and use NumPy arrays
import numpy as np
example_array = np.array([1,2,3,4,5,6]) # array() is a function in the np package
print(4 * example_array)
example_list = [1,2,3,4,5,6] # for comparison
print(4 * example_list)
# Now the math will apply to each NumPy array element
temp = np.array(temp) # convert temp from list to array
new_temp = (temp - 32) * (5/9)
print(new_temp)
[ 4 8 12 16 20 24] [1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6] [ 8.33333333 9.44444444 11.11111111 13.88888889 17.22222222 18.88888889 22.22222222 22.22222222 19.44444444 15. 10.55555556 7.77777778]
# ASK: try to do the same temp threshold exercise, but WITHOUT a loop
# (Hint: use boolean indexing!)
months = np.array(months) # convert from list to array
comfy_temp_mask = temp >= temp_threshold # this creates a Boolean mask
print(comfy_temp_mask)
print(months[comfy_temp_mask]) # index into months using the Boolean mask
[False False False False False True True True True False False False] ['Jun' 'Jul' 'Aug' 'Sep']
# Some common NumPy functions you should know...
print('The average temp is:',np.mean(temp))
print('The median temp is:',np.median(temp))
print('The highest temp is:',np.max(temp))
print('The lowest temp is:',np.min(temp))
print('The sum of all the temps is:',np.sum(temp))
print('The standard deviation of all the temps is:',np.std(temp))
The average temp is: 58.416666666666664 The median temp is: 58.0 The highest temp is: 72 The lowest temp is: 46 The sum of all the temps is: 701 The standard deviation of all the temps is: 9.096320990134174
See this slide deck for more common NumPy functions: https://ethan-campbell.github.io/OCEAN_215/materials/lessons/lesson_5.pdf
Download TAO mooring SST data file from 0°N, 170°W: https://tinyurl.com/OCEAN443-data-TAO
OCEAN 215 slides:
Two ways to upload data files into Colab:
Either way, you can obtain the filepath of a file by going to the Files sidebar of Colab, navigating to the file, clicking the "...", and clicking "Copy path".
from google.colab import drive, auth
drive.mount('/content/drive',force_remount=True)
--------------------------------------------------------------------------- MessageError Traceback (most recent call last) <ipython-input-19-04fea3e52a65> in <module> 1 from google.colab import drive, auth ----> 2 drive.mount('/content/drive',force_remount=True) /usr/local/lib/python3.8/dist-packages/google/colab/drive.py in mount(mountpoint, force_remount, timeout_ms, readonly) 99 def mount(mountpoint, force_remount=False, timeout_ms=120000, readonly=False): 100 """Mount your Google Drive at the specified mountpoint path.""" --> 101 return _mount( 102 mountpoint, 103 force_remount=force_remount, /usr/local/lib/python3.8/dist-packages/google/colab/drive.py in _mount(mountpoint, force_remount, timeout_ms, ephemeral, readonly) 122 'TBE_EPHEM_CREDS_ADDR'] if ephemeral else _os.environ['TBE_CREDS_ADDR'] 123 if ephemeral: --> 124 _message.blocking_request( 125 'request_auth', request={'authType': 'dfs_ephemeral'}, timeout_sec=None) 126 /usr/local/lib/python3.8/dist-packages/google/colab/_message.py in blocking_request(request_type, request, timeout_sec, parent) 169 request_id = send_request( 170 request_type, request, parent=parent, expect_reply=True) --> 171 return read_reply_from_input(request_id, timeout_sec) /usr/local/lib/python3.8/dist-packages/google/colab/_message.py in read_reply_from_input(message_id, timeout_sec) 100 reply.get('colab_msg_id') == message_id): 101 if 'error' in reply: --> 102 raise MessageError(reply['error']) 103 return reply.get('data', None) 104 MessageError: Error: credential propagation was unsuccessful
Two ways to load CSV files in Python:
Not recommended: NumPy's np.genfromtxt()
function: https://numpy.org/doc/stable/reference/generated/numpy.genfromtxt.html
Recommended: Pandas' pd.read_csv()
function: https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html
.
If loading an Excel file, use Pandas:
Pandas' pd.read_excel()
function: https://pandas.pydata.org/docs/reference/api/pandas.read_excel.html
import numpy as np
import pandas as pd
from datetime import datetime
filepath = '/content/sst0n170w_mon.txt'
# Load data into the variable "data"
# See the API link above for function argument info
data = pd.read_csv(filepath,delimiter=' ',index_col=0,parse_dates=['Date'])
display(data)
Time | SST | Quality | |
---|---|---|---|
Date | |||
1988-05-16 | 1200 | 27.37 | 1 |
1988-06-16 | 1200 | 27.01 | 1 |
1988-07-16 | 1200 | 27.03 | 1 |
1988-08-16 | 1200 | 26.71 | 1 |
1988-09-16 | 1200 | 26.41 | 1 |
... | ... | ... | ... |
2020-10-16 | 1200 | 26.52 | 2 |
2022-10-16 | 1200 | 26.59 | 2 |
2022-11-16 | 1200 | 26.87 | 2 |
2022-12-16 | 1200 | 26.90 | 2 |
2023-01-16 | 1200 | 26.60 | 2 |
341 rows × 3 columns
# Select the index column (dates)
print(data.index) # display all items
print(data.index.values[:10]) # display the first 10 elements
# Note: if you ever need to convert between Datetime and datetime64 objects,
# use this chart: https://stackoverflow.com/a/21916253
DatetimeIndex(['1988-05-16', '1988-06-16', '1988-07-16', '1988-08-16', '1988-09-16', '1988-10-16', '1988-11-16', '1988-12-16', '1989-07-16', '1989-08-16', ... '2017-11-16', '2018-08-16', '2018-09-16', '2018-10-16', '2018-11-16', '2020-10-16', '2022-10-16', '2022-11-16', '2022-12-16', '2023-01-16'], dtype='datetime64[ns]', name='Date', length=341, freq=None) ['1988-05-16T00:00:00.000000000' '1988-06-16T00:00:00.000000000' '1988-07-16T00:00:00.000000000' '1988-08-16T00:00:00.000000000' '1988-09-16T00:00:00.000000000' '1988-10-16T00:00:00.000000000' '1988-11-16T00:00:00.000000000' '1988-12-16T00:00:00.000000000' '1989-07-16T00:00:00.000000000' '1989-08-16T00:00:00.000000000']
# Select one column of data using its label
data['SST']
Date 1988-05-16 27.37 1988-06-16 27.01 1988-07-16 27.03 1988-08-16 26.71 1988-09-16 26.41 ... 2020-10-16 26.52 2022-10-16 26.59 2022-11-16 26.87 2022-12-16 26.90 2023-01-16 26.60 Name: SST, Length: 341, dtype: float64
# Extract just the values as a NumPy array
data['SST'].values[:10] # just the first 10 elements
array([27.37, 27.01, 27.03, 26.71, 26.41, 24.9 , 24.83, 25.3 , 27.42, 27.3 ])
# Select a single row
data.loc['2010-01-16']
Time 1200.00 SST 30.05 Quality 1.00 Name: 2010-01-16 00:00:00, dtype: float64
# Select multiple rows
data.loc['2010-01-16':'2010-04-16']
Time | SST | Quality | |
---|---|---|---|
Date | |||
2010-01-16 | 1200 | 30.05 | 1 |
2010-02-15 | 1200 | 29.60 | 1 |
2010-03-16 | 1200 | 29.22 | 1 |
2010-04-16 | 1200 | 28.97 | 1 |
# Select one year of data
data.loc['2010']
Time | SST | Quality | |
---|---|---|---|
Date | |||
2010-01-16 | 1200 | 30.05 | 1 |
2010-02-15 | 1200 | 29.60 | 1 |
2010-03-16 | 1200 | 29.22 | 1 |
2010-04-16 | 1200 | 28.97 | 1 |
2010-05-16 | 1200 | 27.95 | 1 |
2010-06-16 | 1200 | 27.83 | 1 |
2010-07-16 | 1200 | 27.35 | 1 |
2010-08-16 | 1200 | 26.19 | 1 |
2010-09-16 | 1200 | 26.14 | 1 |
2010-10-16 | 1200 | 25.91 | 1 |
2010-11-16 | 1200 | 26.09 | 1 |
2010-12-16 | 1200 | 25.63 | 1 |
# Select a single value
data.loc['2010-01-16','SST'] # use: .loc[row label, column label]
30.05
# You can also select values using NumPy-style integer indexing
data.iloc[99,1] # the 100th row and 2nd column
29.12
# Apply NumPy functions such as mean() directly to the Pandas object
print(data['SST'].mean()) # average of all SST data
print()
print(data['SST'].loc['2010'].mean()) # average of all SST data from 2010
print()
print(data.mean(axis=0)) # average of all rows
27.101994134897364 27.5775 Time 1200.000000 SST 27.101994 Quality 1.653959 dtype: float64
# If needed, you can convert from Pandas into a NumPy 2D array
data_as_array = data.values
print(data_as_array)
[[1.200e+03 2.737e+01 1.000e+00] [1.200e+03 2.701e+01 1.000e+00] [1.200e+03 2.703e+01 1.000e+00] ... [1.200e+03 2.687e+01 2.000e+00] [1.200e+03 2.690e+01 2.000e+00] [1.200e+03 2.660e+01 2.000e+00]]
# When indexing into NumPy 2D arrays, use [row, column] order
print('Row #1, Column #2 =',data_as_array[0,1])
# Check shape of 2D array
print('Array shape in (rows, columns):',data_as_array.shape)
Row #1, Column #2 = 27.37 Array shape in (rows, columns): (341, 3)
OCEAN 215 slides:
plt.plot()
): https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.plot.htmlplt.scatter()
): https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.scatter.htmlplt.hist()
): https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.hist.htmlimport matplotlib.pyplot as plt
# With a Pandas Series, plotting is as simple as specifying a column of data
plt.plot(data['SST'])
[<matplotlib.lines.Line2D at 0x7ff010b36d60>]
# But generally, you should specify the (x, y) data
plt.plot(data.index.values,data['SST'].values)
[<matplotlib.lines.Line2D at 0x7ff010ac20d0>]
# Let's get rid of the bad data (SST = -9.99)
# by creating a new variable with only rows where SST is NOT -9.99
data = data[data['SST'] != -9.99]
# Now let's create a nicely formatted plot and save it
plt.figure(figsize=(10,4))
plt.scatter(data.index.values,data['SST'].values,
c='k',s=5)
plt.plot(data.index.values,data['SST'].values,
c='k',lw=1.0,label='SST')
plt.xlabel('Year')
plt.ylabel('SST (°C)')
plt.title('Sea surface temperatures from TAO mooring at 0°N, 170°W')
plt.grid(alpha=0.5)
plt.legend()
plt.savefig('/content/sst_tao_monthly.pdf')
# Add a second line to the plot
# You can download Niño 3.4 index data here: https://tinyurl.com/OCEAN443-data-nino
nino = pd.read_csv('/content/nino34.csv',index_col=0,parse_dates=True)
plt.figure(figsize=(10,4))
plt.scatter(data.index.values,data['SST'].values,
c='k',s=5)
plt.plot(data.index.values,data['SST'].values,
c='k',lw=1.0,label='SST')
plt.plot(nino.index,nino.values,c='r',lw=1.0,label='El Niño index')
plt.xlim([datetime(1990,1,1),datetime(2023,1,1)])
plt.xlabel('Year')
plt.ylabel('SST (°C)')
plt.title('Sea surface temperatures from TAO mooring at 0°N, 170°W')
plt.grid(alpha=0.5)
plt.legend()
plt.savefig('/content/sst_tao_monthly_with_nino.pdf')
nino
Unnamed: 0 | 0 | |
---|---|---|
0 | 1870-01-15 | 25.58 |
1 | 1870-02-15 | 25.57 |
2 | 1870-03-15 | 26.43 |
3 | 1870-04-15 | 26.96 |
4 | 1870-05-15 | 26.59 |
... | ... | ... |
1807 | 2020-08-15 | 26.46 |
1808 | 2020-09-15 | NaN |
1809 | 2020-10-15 | NaN |
1810 | 2020-11-15 | NaN |
1811 | 2020-12-15 | NaN |
1812 rows × 2 columns
Download OISST monthly SST data file: https://tinyurl.com/OCEAN443-data-OISST
OCEAN 215 slides:
Use xarray to work with netCDF files in Python:
xr.open_dataset()
: https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.htmlimport numpy as np
import xarray as xr
from datetime import datetime
filepath = '/content/sst.mon.ltm.1991-2020.nc'
# Load data into the variable "data"
# See the API link above for function argument info
data = xr.open_dataset(filepath)
display(data)
/usr/local/lib/python3.8/dist-packages/xarray/coding/times.py:699: SerializationWarning: Unable to decode time axis into full numpy.datetime64 objects, continuing using cftime.datetime objects instead, reason: dates out of range dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime) /usr/local/lib/python3.8/dist-packages/xarray/core/indexing.py:524: SerializationWarning: Unable to decode time axis into full numpy.datetime64 objects, continuing using cftime.datetime objects instead, reason: dates out of range return np.asarray(array[self.key], dtype=None)
<xarray.Dataset> Dimensions: (lat: 720, lon: 1440, time: 12, nbnds: 2) Coordinates: * lat (lat) float32 -89.88 -89.62 -89.38 ... 89.38 89.62 89.88 * lon (lon) float32 0.125 0.375 0.625 ... 359.4 359.6 359.9 * time (time) object 0001-01-01 00:00:00 ... 0001-12-01 00:0... Dimensions without coordinates: nbnds Data variables: climatology_bounds (time, nbnds) datetime64[ns] ... sst (time, lat, lon) float32 ... valid_yr_count (time, lat, lon) float32 ... Attributes: Conventions: CF-1.5 title: NOAA/NCEI 1/4 Degree Daily Optimum Interp... institution: NOAA/National Centers for Environmental I... source: NOAA/NCEI https://www.ncei.noaa.gov/data/... References: https://www.psl.noaa.gov/data/gridded/dat... dataset_title: NOAA Daily Optimum Interpolation Sea Surf... version: Version 2.1 comment: Reynolds, et al.(2007) Daily High-Resolut... not_missing_threshold_percent: minimum 3% values input to have non-missi...
# Select a single variable from the netCDF dataset
data['sst']
<xarray.DataArray 'sst' (time: 12, lat: 720, lon: 1440)> [12441600 values with dtype=float32] Coordinates: * lat (lat) float32 -89.88 -89.62 -89.38 -89.12 ... 89.38 89.62 89.88 * lon (lon) float32 0.125 0.375 0.625 0.875 ... 359.1 359.4 359.6 359.9 * time (time) object 0001-01-01 00:00:00 ... 0001-12-01 00:00:00 Attributes: long_name: Long Term Mean Monthly Mean of Sea Surface Temperature units: degC valid_range: [-3. 45.] precision: 2.0 dataset: NOAA High-resolution Blended Analysis var_desc: Sea Surface Temperature level_desc: Surface statistic: Long Term Mean parent_stat: Monthly Mean standard_name: sea_surface_temperature actual_range: [-1.7969891 34.213223 ]
# The metadata attributes are a dict (dictionary) that can be accessed using:
display(data['sst'].attrs)
print()
print(data['sst'].attrs['units']) # select just units
{'long_name': 'Long Term Mean Monthly Mean of Sea Surface Temperature', 'units': 'degC', 'valid_range': array([-3., 45.], dtype=float32), 'precision': 2.0, 'dataset': 'NOAA High-resolution Blended Analysis', 'var_desc': 'Sea Surface Temperature', 'level_desc': 'Surface', 'statistic': 'Long Term Mean', 'parent_stat': 'Monthly Mean', 'standard_name': 'sea_surface_temperature', 'actual_range': array([-1.7969891, 34.213223 ], dtype=float32)}
degC
# To subset the data, use .sel() or .isel() for selection by label or by indices
data['sst'].sel(lat=slice(-10,10),lon=179.625,time='0001-06')
# Note that the line above uses slice(lower,upper) to select a range of values
<xarray.DataArray 'sst' (time: 1, lat: 80)> [80 values with dtype=float32] Coordinates: * lat (lat) float32 -9.875 -9.625 -9.375 -9.125 ... 9.375 9.625 9.875 lon float32 179.6 * time (time) object 0001-06-01 00:00:00 Attributes: long_name: Long Term Mean Monthly Mean of Sea Surface Temperature units: degC valid_range: [-3. 45.] precision: 2.0 dataset: NOAA High-resolution Blended Analysis var_desc: Sea Surface Temperature level_desc: Surface statistic: Long Term Mean parent_stat: Monthly Mean standard_name: sea_surface_temperature actual_range: [-1.7969891 34.213223 ]
data['sst'].isel(time=5)
<xarray.DataArray 'sst' (lat: 720, lon: 1440)> [1036800 values with dtype=float32] Coordinates: * lat (lat) float32 -89.88 -89.62 -89.38 -89.12 ... 89.38 89.62 89.88 * lon (lon) float32 0.125 0.375 0.625 0.875 ... 359.1 359.4 359.6 359.9 time object 0001-06-01 00:00:00 Attributes: long_name: Long Term Mean Monthly Mean of Sea Surface Temperature units: degC valid_range: [-3. 45.] precision: 2.0 dataset: NOAA High-resolution Blended Analysis var_desc: Sea Surface Temperature level_desc: Surface statistic: Long Term Mean parent_stat: Monthly Mean standard_name: sea_surface_temperature actual_range: [-1.7969891 34.213223 ]
# Similar to Pandas, you can apply NumPy functions such as mean() to Xarray objects
# Calculate the average SST in June at 179.625°E between 10°S-10°N
display(data['sst'].sel(lat=slice(-10,10),lon=179.625,time='0001-06').mean())
# Extract the value from Xarray into NumPy
data['sst'].sel(lat=slice(-10,10),lon=179.625,time='0001-06').mean().values
<xarray.DataArray 'sst' ()> array(28.989391, dtype=float32) Coordinates: lon float32 179.6
array(28.989391, dtype=float32)
OCEAN 215 slides:
plt.pcolormesh()
): https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.pcolormesh.htmlplt.contourf()
): https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.contourf.htmlFirst, it's worth knowing that Pandas and Xarray have cool built-in plotting capabilities that you can access using .plot()
. These are great for quick exploration of data, but it's harder to customize plots afterwards:
# Load and plot CSV data in just 2 lines!
data = pd.read_csv('/content/sst0n170w_mon.txt',delimiter=' ',index_col=0,parse_dates=['Date'])
data['SST'].plot()
<matplotlib.axes._subplots.AxesSubplot at 0x7ff002b06520>
# Load and plot netCDF data in just 2 lines!
data = xr.open_dataset('/content/sst.mon.ltm.1991-2020.nc')
data['sst'].sel(time='0001-01').plot()
/usr/local/lib/python3.8/dist-packages/xarray/coding/times.py:699: SerializationWarning: Unable to decode time axis into full numpy.datetime64 objects, continuing using cftime.datetime objects instead, reason: dates out of range dtype = _decode_cf_datetime_dtype(data, units, calendar, self.use_cftime) /usr/local/lib/python3.8/dist-packages/xarray/core/indexing.py:524: SerializationWarning: Unable to decode time axis into full numpy.datetime64 objects, continuing using cftime.datetime objects instead, reason: dates out of range return np.asarray(array[self.key], dtype=None)
<matplotlib.collections.QuadMesh at 0x7ff002abda00>
# Create a pseudocolor plot using Matplotlib
# First, subset data to the region and time of interest
data_subset = data['sst'].sel(lat=slice(-25,25),lon=slice(120,280),time='0001-03')
# "Squeeze" out the non-existent time dimension
data_subset = data_subset.squeeze()
plt.figure(figsize=(10,4))
plt.pcolormesh(data_subset['lon'],data_subset['lat'],data_subset,
cmap='RdBu_r')
plt.colorbar(label='SST (°C)')
plt.xlabel('Longitude (°E)')
plt.ylabel('Latitude (°N)')
plt.title('OISST climatology for March');
# Add scatter points for Honolulu and Fiji
plt.scatter(180+180-157.9,21.3,marker='o',s=100,color='g',label='Honolulu')
plt.scatter(178.1,-17.7,marker='o',s=100,color='gold',label='Fiji')
plt.legend(loc='lower right');
At the moment, Cartopy doesn't always play nice with Google Colab. To create maps using Cartopy, you must run the code below once per notebook to install the package. Your session may "crash" during installation but that is part of the install process. Afterwards, you can comment out the code and you shouldn't need to run it again.
# !pip install -q condacolab
# import condacolab
# condacolab.install()
# !mamba install -q -c conda-forge cartopy
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeature
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER
fig = plt.figure(figsize=(14,4))
ax = plt.axes(projection=ccrs.PlateCarree(central_longitude=180.0))
pcm = ax.pcolormesh(data_subset['lon'],data_subset['lat'],data_subset,
transform=ccrs.PlateCarree())
contours = ax.contour(data_subset['lon'],data_subset['lat'],data_subset,
levels=12,colors='w',transform=ccrs.PlateCarree())
ax.clabel(contours,levels=contours.levels[::2],colors='w',fontsize=8)
c = plt.colorbar(pcm,ax=ax,label='SST (°C)')
ax.add_feature(cfeature.LAND,color='k',alpha=0.8)
gl = ax.gridlines(crs=ccrs.PlateCarree(),draw_labels=True,
linewidth=0.5,color='w',alpha=0.7,linestyle='--')
gl.xlabels_top = False
gl.ylabels_right = False
gl.xformatter = LONGITUDE_FORMATTER
gl.yformatter = LATITUDE_FORMATTER
/usr/local/lib/python3.8/site-packages/cartopy/mpl/gridliner.py:451: UserWarning: The .xlabels_top attribute is deprecated. Please use .top_labels to toggle visibility instead. warnings.warn('The .xlabels_top attribute is deprecated. Please ' /usr/local/lib/python3.8/site-packages/cartopy/mpl/gridliner.py:487: UserWarning: The .ylabels_right attribute is deprecated. Please use .right_labels to toggle visibility instead. warnings.warn('The .ylabels_right attribute is deprecated. Please ' /usr/local/lib/python3.8/site-packages/cartopy/io/__init__.py:241: DownloadWarning: Downloading: https://naturalearth.s3.amazonaws.com/50m_physical/ne_50m_land.zip warnings.warn(f'Downloading: {url}', DownloadWarning)