This notebook accesses the EFIxIDM and EFIxLP products from the Swarm TII cross-track flow data and plot the electron temperature of two different collections against each other for a same time period.
VirES Variable | Description |
---|---|
SPACECRAFT |
Spacecraft options which user can change manually. Available 3 options "A", "B", "C" |
START_TIME/END_TIME |
Time for which to request data. This should be in ISO format (e.g. "2016-01-02T00:00:00Z") |
SPACECRAFT = "B"
START_TIME = '2022-06-30T00:00:00Z'
END_TIME = '2022-07-30T00:00:00Z'
from viresclient import SwarmRequest
SERVER_URL = "https://vires.services/ows"
request = SwarmRequest(SERVER_URL)
collection_efi_idm = f'SW_PREL_EFI{SPACECRAFT}IDM_2_'
collection_efi_lp =f'SW_OPER_EFI{SPACECRAFT}_LP_1B'
start_orbit_number = request.get_orbit_number(SPACECRAFT, START_TIME)
end_orbit_number = request.get_orbit_number(SPACECRAFT, END_TIME)
start_time, end_time = request.get_times_for_orbits(start_orbit_number, end_orbit_number, mission = 'Swarm', spacecraft=SPACECRAFT)
def request_data(url, collection, measurements,start_time, end_time):
"""
Make a request for data from the Swarm API.
"""
request = SwarmRequest(url)
request.set_collection(collection)
request.set_products(
measurements=measurements,
auxiliaries=['OrbitDirection', 'OrbitNumber', 'QDLat', 'QDOrbitDirection'],
)
data = request.get_between(
start_time=start_time,
end_time=end_time,
asynchronous=True,
).as_xarray()
return data
data_idm = request_data(SERVER_URL, collection_efi_idm, ['T_e'],start_time, end_time)
data_lp = request_data(SERVER_URL, collection_efi_lp, ['Te'],start_time, end_time)
| [ Elapsed: 00:00, Remaining: ?]
Processing: 0%| | [ Elapsed: 00:00, Remaining: ? ] [1/2]
Downloading: 0%| | [ Elapsed: 00:00, Remaining: ? ] (235.741MB)
Processing: 0%| | [ Elapsed: 00:00, Remaining: ? ] [2/2]
Downloading: 0%| | [ Elapsed: 00:00, Remaining: ? ] (47.866MB)
| [ Elapsed: 00:00, Remaining: ?]
Processing: 0%| | [ Elapsed: 00:00, Remaining: ? ] [1/2]
Downloading: 0%| | [ Elapsed: 00:00, Remaining: ? ] (237.584MB)
Processing: 0%| | [ Elapsed: 00:00, Remaining: ? ] [2/2]
Downloading: 0%| | [ Elapsed: 00:00, Remaining: ? ] (48.238MB)
The plots show how the electron temperature varies over time and QD latitude for the two different data collections. By comparing the two plots, we can see if there are any notable differences in the electron temperature between the two collections.
import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm
import numpy as np
def sc_plot(ax,data, measurements,vmin, vmax):
mask = (data[measurements].values>=0) & (~np.isnan(data[measurements].values))
timestamps = data["Timestamp"].values[mask]
latitudes = data["QDLat"].values[mask]
efi_te = data[measurements].values[mask]
efi_te = efi_te[~np.isnan(efi_te)]
norm=LogNorm(vmin=vmin, vmax=vmax)
sc = ax.scatter(timestamps, latitudes, c=efi_te, cmap='jet', norm=norm)
ax.set_xlabel('Timestamp')
ax.set_ylabel('Latitude_QD')
cbar = fig.colorbar(sc, ax=ax)
cbar.set_label(measurements)
te_max = max(data_idm['T_e'].values.max(), data_lp['Te'].values.max())
fig, ax = plt.subplots(figsize=(25, 15), nrows =2, ncols=1)
sc_plot(ax[0],data_idm, "T_e",1,te_max )
sc_plot(ax[1], data_lp, "Te",1, te_max)
Some special functions do not like to time data types and require the time to be converted to a number.
import numpy as np
EPOCH = np.datetime64("2000-01-01")
def times_to_seconds(times, epoch=None, precision="ns", scale=1e-9):
""" Convert times (datetime64) to seconds (float).
The epoch (zero value) is set by default to 2000-01-01T00:00:00.
"""
if epoch is None:
epoch = EPOCH
return dtimes_to_seconds(np.asarray(times, "datetime64[ns]") - epoch, precision, scale)
def seconds_to_times(seconds, epoch=None, precision="ns", scale=1e9):
""" Convert seconds (float) to times (datetime64).
The epoch (zero value) is set by default to 2000-01-01T00:00:00.
"""
if epoch is None:
epoch = EPOCH
return seconds_to_dtimes(seconds, precision, scale) + epoch
def dtimes_to_seconds(dtimes, precision="ns", scale=1e-9):
""" Convert time differences (timedelta64) to seconds (float)
with nanosecond precision.
"""
return scale * np.asarray(dtimes, f"timedelta64[{precision}]").astype("int64")
def seconds_to_dtimes(seconds, precision="ns", scale=1e9):
""" Convert seconds to time differences (timedelta64)
with nanosecond precision.
"""
return (scale * np.asarray(seconds)).astype("int64").astype(f"timedelta64[{precision}]")
The match_times()
function below finds corresponding times from two time-series which are not guaraneed to be sampled equally.
from viresclient._wps.time_util import parse_duration
from scipy.interpolate import interp1d
import numpy as np
def match_times(times1, times2, atol):
assert times1.ndim == 1 # assert 1D array
assert times2.ndim == 1 # assert 1D array
epoch = times1[0] if times1.size > 0 else None
seconds1 = times_to_seconds(times1, epoch=epoch)
seconds2 = times_to_seconds(times2, epoch=epoch)
seconds_atol = dtimes_to_seconds(atol)
# find the nearest neigbour
assert ((seconds2[1:] - seconds2[:-1]) >= 0).all() # assert sorted values
idx2 = interp1d(
seconds2, np.arange(seconds2.size),
kind='nearest',
copy=False,
fill_value="extrapolate",
assume_sorted=True,
)(seconds1).astype("int64")
# pick values within the given tolerance
idx1 = (abs(seconds2[idx2] - seconds1) <= seconds_atol).nonzero()[0]
idx2 = idx2[idx1]
return idx1, idx2
idx_lp, idx_idm = match_times(
data_lp["Timestamp"].values,
data_idm["Timestamp"].values,
atol=parse_duration("PT0.5S"),
)
Here, we are creating scatter plots that visualize the correlation between electron temperature measurements from two different data collections, EFI LP and EFI IDM. The red line on each plot represents the line of perfect correlation, where the electron temperature values from both collections are exactly the same. The scatter points represent the individual measurements and their position on the plot indicates their relative electron temperature values between the two data collections. A tight clustering of points around the red line indicates a strong correlation between the two data sets, while a scattered distribution of points indicates a weak correlation. The scatter plot is useful for identifying individual outliers or patterns in the data.
te_lp = data_lp["Te"].values[idx_lp]
te_idm = data_idm["T_e"].values[idx_idm]
def _get_lim(values1, values2):
return (
min(values1.min(), values2.min()),
max(values1.max(), values2.max()),
)
from matplotlib.pyplot import figure
def plot_scatter(ax, lim):
ax.plot(lim, lim, "-", color="red", zorder=0)
ax.scatter(te_lp, te_idm, s=2)
ax.grid()
ax.set_xlabel("Te (EFI LP) / K")
ax.set_ylabel("T_e (EFI IDM) / K")
ax.set_xlim(lim)
ax.set_ylim(lim)
ax.set_aspect("equal")
fig = figure(figsize=(15, 10), dpi=100)
ax = fig.subplots()
plot_scatter(ax, lim=_get_lim(te_lp, te_idm))
fig = figure(figsize=(15, 10), dpi=100)
ax = fig.subplots()
plot_scatter(ax, lim=[0, 6e4])
fig = figure(figsize=(15, 10), dpi=100)
ax = fig.subplots()
plot_scatter(ax, lim=[0, 1e4])
Three scatter plots differ in their zoom levels, which affects the range of values visible on the x and y axes.
Those plots visualize the distribution of data across two variables, which are the electron temperature values obtained from two different collections and identify any areas of high or low density in the data.The advantage of the 2D histogram is that it provides a better visual representation of the distribution of the data and provide more accurate representation of the data density, especially when the number of data points is large.
import numpy as np
from scipy.stats import binned_statistic_2d
def bin_data_2d(x, y, v, x_step, y_step, x_extent=None, y_extent=None, statistic='mean'):
""" Conventience wrapper around the scipy.stats.binned_statistic_2d.
See also https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.binned_statistic_2d.html
Returns:
result ... the calculated binned statictics (Ny x Nx array)
x_edge ... edges of the bins in the x coordinate (Nx + 1 array)
y_edge ... edges of the bins in the y coordinate (Ny + 1 array)
Args:
x ... 1st coordinate values
y ... 2nd coordinate values
v ... binned values
x_step ... x proposed binning step (adjusted to fit the x_extent)
y_step ... y proposed binning step (adjusted to fit the y_extent)
x_extent ... optional pair of min. and max. boulds for the x coordinate
y_extent ... optional pait of min. and max. boulds for the y coordinate
staticstics ... type of the statistic to be calculated. For the possible
options see the scipy.stats.binned_statistic_2d documentation.
"""
def _get_bins(start, end, step):
nstep = abs(int(np.ceil((end - start)/step))) + 1
return np.linspace(start, end, nstep)
if x_extent is None:
x_extent = [x.min(), x.max()]
if y_extent is None:
y_extent = [y.min(), y.max()]
result, x_edges, y_edges, _ = binned_statistic_2d(
x, y, v, statistic=statistic, bins=(
_get_bins(x_extent[0], x_extent[1], x_step),
_get_bins(y_extent[0], y_extent[1], y_step),
),
)
return result.T, x_edges, y_edges
import matplotlib.pyplot as plt
import matplotlib as mpl
def plot_scatter(ax, lim, nbins=100):
# 2D binning
step = (lim[1] - lim[0]) / nbins
v, x, y = bin_data_2d(
x=te_lp,
y=te_idm,
v=np.ones(te_lp.shape),
x_step=step,
y_step=step,
x_extent=lim,
y_extent=lim,
statistic='count'
)
# plotting
ax.plot(lim, lim, "-", color="red", zorder=0)
norm = mpl.colors.LogNorm(vmin=1, vmax=v.max())
im = ax.pcolor(x, y, v, norm=norm)
cax = plt.colorbar(im, ax=ax)
cax.ax.set_ylabel('N', rotation=270)
ax.grid()
ax.set_xlabel("Te (EFI LP) / K")
ax.set_ylabel("T_e (EFI IDM) / K")
ax.set_xlim(lim)
ax.set_ylim(lim)
ax.set_aspect("equal")
fig = plt.figure(figsize=(15, 10), dpi=100)
ax = fig.subplots()
plot_scatter(ax, lim=_get_lim(te_lp, te_idm))
fig = figure(figsize=(15, 10), dpi=100)
ax = fig.subplots()
plot_scatter(ax, lim=[0, 6e4])
fig = figure(figsize=(15, 10), dpi=100)
ax = fig.subplots()
plot_scatter(ax, lim=[0, 1e4])
Difference between three 2D binning and heatmap plot is the zoom level, which changes the visible range of values on the x and y axes.