import requests
import datetime
import xml.etree.ElementTree as ET
import pandas as pd
from pandas.api.types import is_string_dtype
from pandas.api.types import is_numeric_dtype
import geopandas as gpd
import fiona
from fiona.crs import from_epsg
import numpy as np
from shapely.geometry import Point
import matplotlib.pyplot as plt
%matplotlib inline
req = requests.get("http://www.ilmateenistus.ee/ilma_andmed/xml/observations.php")
print(req.encoding)
print(req.headers['content-type'])
tree = ET.fromstring(req.content.decode(req.encoding) )
print(tree.tag)
print(tree.attrib)
ts = tree.attrib['timestamp']
print(datetime.datetime.fromtimestamp(int(ts)))
ISO-8859-1 text/xml observations {'timestamp': '1620898765'} 2021-05-13 12:39:25
data = {'stations' : [],
'wmocode': [],
'precipitations': [],
'airtemperature': [],
'windspeed': [],
'waterlevel': [],
'watertemperature': [],
'geometry': []
}
counter = 0
for station in tree.findall('station'):
counter += 1
# print(station.tag, child.attrib)
# < name > Virtsu < /name > – jaama nimi.
name = station.find('name').text
data['stations'].append(name)
# < wmocode > 26128 < /wmocode > – jaama WMO kood.
wmocode = station.find('wmocode').text
data['wmocode'].append(wmocode)
try:
# < longitude > 23.51355555534363 < /longitude > – jaama asukoha koordinaat.
lon = station.find('longitude').text
# < latitude > 58.572674999100215 < /latitude > – jaama asukoha koordinaat.
lat = station.find('latitude').text
coords = Point(float(lon), float(lat))
data['geometry'].append(coords)
except ValueError as ve:
pass
# < phenomenon > Light snowfall < /phenomenon > – jaamas esinev ilmastikunähtus, selle puudumisel pilvisuse aste (kui jaamas tehakse manuaalseid pilvisuse mõõtmisi). Täielik nimekiri nähtustest on allpool olevas tabelis.
# < visibility > 34.0 < /visibility > – nähtavus (km).
# < precipitations > 0 < /precipitations > – sademed (mm) viimase tunni jooksul. Lume, lörtsi, rahe ja teiste taoliste sademete hulk on samuti esitatud vee millimeetritena. 1 cm lund ~ 1 mm vett.
precip = station.find('precipitations').text
data['precipitations'].append(precip)
# < airpressure > 1005.4 < /airpressure > – õhurõhk (hPa). Normaalrõhk on 1013.25 hPa.
# < relativehumidity > 57 < /relativehumidity > – suhteline õhuniiskus (%).
# < airtemperature > -3.6 < /airtemperature > – õhutemperatuur (°C).
temp = station.find('airtemperature').text
data['airtemperature'].append(temp)
# < winddirection > 101 < /winddirection > – tuule suund (°).
# < windspeed > 3.2 < /windspeed > – keskmine tuule kiirus (m/s).
wind = station.find('windspeed').text
data['windspeed'].append(wind)
# < windspeedmax > 5.1 < /windspeedmax > – maksimaalne tuule kiirus ehk puhangud (m/s).
# < waterlevel > -49 < /waterlevel > – veetase (cm Kroonlinna nulli suhtes)
waterlevel = station.find('waterlevel').text
data['waterlevel'].append(waterlevel)
# < waterlevel_eh2000 > -28 < waterlevel_eh2000/ > – veetase (cm Amsterdami nulli suhtes)
# waterlevel_eh2000 = station.find('waterlevel_eh2000').text
# < watertemperature > -0.2 < /watertemperature > – veetemperatuur (°C)
watertemp = station.find('watertemperature').text
data['watertemperature'].append(watertemp)
print(counter)
df = pd.DataFrame(data)
for field in ['precipitations','airtemperature','windspeed','waterlevel','watertemperature']:
if field in df.columns:
if is_string_dtype(df[field]):
df[field] = df[field].astype(float)
display(df.head(5))
geo_df = gpd.GeoDataFrame(df, crs=from_epsg(4326), geometry='geometry')
geo_df.plot()
air_df = geo_df.dropna(subset=['airtemperature'])
air_df.plot(column='airtemperature', legend=True)
134
stations | wmocode | precipitations | airtemperature | windspeed | waterlevel | watertemperature | geometry | |
---|---|---|---|---|---|---|---|---|
0 | Kuressaare linn | None | NaN | 22.1 | NaN | NaN | NaN | POINT (22.48944444411111 58.26416666666667) |
1 | Tallinn-Harku | 26038 | 0.0 | 24.2 | 3.7 | NaN | NaN | POINT (24.60289166662428 59.39812222235513) |
2 | Pakri | 26029 | 0.0 | 13.1 | 2.7 | NaN | NaN | POINT (24.04008055547654 59.38950277719013) |
3 | Kunda | 26045 | 0.0 | 21.5 | 4.9 | NaN | 8.0 | POINT (26.54139722207962 59.52141111042325) |
4 | Jõhvi | 26046 | 0.0 | 22.5 | 6.4 | NaN | NaN | POINT (27.39827499992098 59.32902499979958) |
C:\dev\conda3\envs\geopy2020\lib\site-packages\pyproj\crs\crs.py:53: FutureWarning: '+init=<authority>:<code>' syntax is deprecated. '<authority>:<code>' is the preferred initialization method. When making the change, be mindful of axis order changes: https://pyproj4.github.io/pyproj/stable/gotchas.html#axis-order-changes-in-proj-6 return _prepare_from_string(" ".join(pjargs))
<AxesSubplot:>
geo_df_3301 = geo_df.dropna(subset=['airtemperature']).to_crs(epsg=3301)
geo_df_3301['x'] = geo_df_3301['geometry'].apply(lambda p: p.x)
geo_df_3301['y'] = geo_df_3301['geometry'].apply(lambda p: p.y)
display(geo_df_3301.head(5))
geo_df_3301.to_file('ilmateenistus_airtemp_stations.shp', encoding='utf-8')
stations | wmocode | precipitations | airtemperature | windspeed | waterlevel | watertemperature | geometry | x | y | |
---|---|---|---|---|---|---|---|---|---|---|
0 | Kuressaare linn | None | NaN | 22.1 | NaN | NaN | NaN | POINT (411346.597 6459155.342) | 411346.597496 | 6.459155e+06 |
1 | Tallinn-Harku | 26038 | 0.0 | 24.2 | 3.7 | NaN | NaN | POINT (534250.544 6584618.823) | 534250.544367 | 6.584619e+06 |
2 | Pakri | 26029 | 0.0 | 13.1 | 2.7 | NaN | NaN | POINT (502277.599 6583505.345) | 502277.598516 | 6.583505e+06 |
3 | Kunda | 26045 | 0.0 | 21.5 | 4.9 | NaN | 8.0 | POINT (643825.046 6600924.969) | 643825.045924 | 6.600925e+06 |
4 | Jõhvi | 26046 | 0.0 | 22.5 | 6.4 | NaN | NaN | POINT (693367.350 6581666.569) | 693367.350112 | 6.581667e+06 |
https://www.geodose.com/2019/09/creating-idw-interpolation-from-scratch-python.html
IDW Algorithm Implementation in Python
geo_df_3301.dtypes
stations object wmocode object precipitations float64 airtemperature float64 windspeed float64 waterlevel float64 watertemperature float64 geometry geometry x float64 y float64 dtype: object
from idw_basic import idw_rblock, idw_npoint
x_idw_list1, y_idw_list1, z_head1 = idw_rblock(x=geo_df_3301['x'].astype(float).values.tolist(),
y=geo_df_3301['y'].astype(float).values.tolist(),
z=geo_df_3301['airtemperature'].values.tolist(),
grid_side_length=200,
search_radius=50000,
p=1.5)
display(len(x_idw_list1))
display(len(y_idw_list1))
display(len(z_head1))
display(np.array(z_head1).shape)
C:\dev\build\py_interpol_demo\idw_basic.py:46: RuntimeWarning: invalid value encountered in double_scalars z_idw=np.dot(z_block,wt)/sum(w_list) # idw calculation using dot product
200
200
200
(200, 200)
plt.matshow(z_head1, origin='lower')
plt.colorbar()
plt.show()
idw_npoint might take very long, due to ierative search radius increase to find at least n nearest neighbours
x_idw_list2, y_idw_list2, z_head2 = idw_npoint(x=geo_df_3301['x'].astype(float).values.tolist(),
y=geo_df_3301['y'].astype(float).values.tolist(),
z=geo_df_3301['airtemperature'].values.tolist(),
grid_side_length=100,
n_points=3,
p=1.5,
rblock_iter_distance=50000)
display(len(x_idw_list2))
display(len(y_idw_list2))
display(len(z_head2))
display(np.array(z_head2).shape)
--------------------------------------------------------------------------- KeyboardInterrupt Traceback (most recent call last) <ipython-input-8-9327b0f9f8a5> in <module> 5 n_points=3, 6 p=1.5, ----> 7 rblock_iter_distance=50000) 8 display(len(x_idw_list2)) 9 display(len(y_idw_list2)) C:\dev\build\py_interpol_demo\idw_basic.py in idw_npoint(x, y, z, grid_side_length, n_points, p, rblock_iter_distance) 176 xz=x_init+wn*j 177 # min. number of search points=5, inv. power value p=1.5 --> 178 z_idw=run_npoint(x, y, z, xz, yz, n_points, p) 179 z_idw_list.append(z_idw) 180 z_head.append(z_idw_list) C:\dev\build\py_interpol_demo\idw_basic.py in run_npoint(x, y, z, xz, yz, n_point, p, rblock_iter_distance) 109 for i in range(len(x)): 110 # condition to test if a point is within the block --> 111 if ((x[i]>=xr_min and x[i]<=xr_max) and (y[i]>=yr_min and y[i]<=yr_max)): 112 x_block.append(x[i]) 113 y_block.append(y[i]) KeyboardInterrupt:
plt.matshow(z_head2, origin='lower')
plt.colorbar()
plt.show()
By Copyright (C) 2016 Paul Brodersen paulbrodersen+idw@gmail.com under GPL-3.0
code: https://github.com/paulbrodersen/inverse_distance_weighting
Inverse distance weighting is an interpolation method that computes the score of query points based on the scores of their k-nearest neighbours, weighted by the inverse of their distances.
As each query point is evaluated using the same number of data points, this method allows for strong gradient changes in regions of high sample density while imposing smoothness in data sparse regions.
uses:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import idw_knn
XY_obs_coords = np.vstack([geo_df_3301['x'].values, geo_df_3301['y'].values]).T
z_arr = geo_df_3301['airtemperature'].values
display(XY_obs_coords.shape)
display(z_arr.shape)
# returns a function that is trained (the tree setup) for the interpolation on the grid
idw_tree = idw_knn.tree(XY_obs_coords, z_arr)
(94, 2)
(94,)
all_dist_m = geo_df_3301['x'].max() - geo_df_3301['x'].min()
dist_km_x = all_dist_m / 1000
display(dist_km_x)
all_dist_m_y = geo_df_3301['y'].max() - geo_df_3301['y'].min()
dist_km_y = all_dist_m_y / 1000
display(dist_km_y)
366.77577143797305
219.85687205510027
# prepare grids
# number of target interpolation grid shape along x and y axis, e.g. 150*100 raster pixels
nx=int(dist_km_x)
ny=int(dist_km_y)
# preparing the "output" grid
x_spacing = np.linspace(geo_df_3301['x'].min(), geo_df_3301['x'].max(), nx)
y_spacing = np.linspace(geo_df_3301['y'].min(), geo_df_3301['y'].max(), ny)
# preparing the target grid
x_y_grid_pairs = np.meshgrid(x_spacing, y_spacing)
x_y_grid_pairs_list = np.reshape(x_y_grid_pairs, (2, -1)).T
display(f"x_y_grid_pairs {len(x_y_grid_pairs)}")
display(f"x_y_grid_pairs_list reshaped {x_y_grid_pairs_list.shape}")
'x_y_grid_pairs 2'
'x_y_grid_pairs_list reshaped (80154, 2)'
# now interpolating onto the target grid
z_arr_interp = idw_tree(x_y_grid_pairs_list)
display(f"z_arr_interp {z_arr_interp.shape}")
'z_arr_interp (80154,)'
# plot
fig, (ax1, ax2) = plt.subplots(1,2, sharex=True, sharey=True, figsize=(10,3))
ax1.scatter(XY_obs_coords[:,0], XY_obs_coords[:,1], c=geo_df_3301['airtemperature'], linewidths=0)
ax1.set_title('Observation samples')
ax2.contourf(x_spacing, y_spacing, z_arr_interp.reshape((ny,nx)))
ax2.set_title('Interpolation')
plt.show()
z_arr_interp.shape
(80154,)
plt.matshow(z_arr_interp.reshape((ny,nx)), origin='lower')
plt.colorbar()
plt.show()
display(f"x_spacing {x_spacing.shape}")
display(f"y_spacing {y_spacing.shape}")
# is a x_y_grid_pair a list of two ndarrays, each is fully spatial 100x150 fields, one holds the x coords the other the y coords
x_mg = np.meshgrid(x_spacing, y_spacing)
display(f"x_mg {type(x_mg)} {len(x_mg)} len0 {type(x_mg[0])} {len(x_mg[0])} {x_mg[0].shape} len1 {type(x_mg[1])} {len(x_mg[1])} {x_mg[0].shape}")
# the yget reshaped into two long flattened arrays the joint full list of target x y pairs representing all grid locations
x_mg_interp_prep = np.reshape(x_mg, (2, -1)).T
display(f"x_mg_interp_prep {type(x_mg_interp_prep)} {len(x_mg_interp_prep)} {x_mg_interp_prep.shape}")
'x_spacing (366,)'
'y_spacing (219,)'
"x_mg <class 'list'> 2 len0 <class 'numpy.ndarray'> 219 (219, 366) len1 <class 'numpy.ndarray'> 219 (219, 366)"
"x_mg_interp_prep <class 'numpy.ndarray'> 80154 (80154, 2)"
from scipy.interpolate import Rbf
def scipy_idw(x, y, z, xi, yi):
interp = Rbf(x, y, z, function='linear')
return interp(xi, yi)
def plot(x,y,z,grid):
plt.figure()
grid_flipped = np.flipud(grid)
plt.imshow(grid, extent=(x.min(), x.max(), y.min(), y.max()), origin='lower')
# plt.hold(True)
plt.scatter(x,y,c=z)
plt.colorbar()
# nx, ny = 50, 50
x=geo_df_3301['x'].astype(float).values
y=geo_df_3301['y'].astype(float).values
z=geo_df_3301['airtemperature'].values
xi = np.linspace(x.min(), x.max(), nx)
yi = np.linspace(y.min(), y.max(), ny)
xi, yi = np.meshgrid(xi, yi)
xi, yi = xi.flatten(), yi.flatten()
grid2 = scipy_idw(x,y,z,xi,yi)
grid2 = grid2.reshape((ny, nx))
C:\dev\conda3\envs\geopy2020\lib\site-packages\scipy\interpolate\rbf.py:257: LinAlgWarning: Ill-conditioned matrix (rcond=1.70585e-32): result may not be accurate. self.nodes = linalg.solve(self.A, self.di)
plot(x,y,z,grid2)
plt.title("Scipy's Rbf with function=linear")
Text(0.5, 1.0, "Scipy's Rbf with function=linear")
# plot
fig, (ax1, ax2, ax3) = plt.subplots(1,3, sharex=True, sharey=True, figsize=(10,3))
ax1.scatter(x,y, c=z, linewidths=0)
ax1.set_title('Observation samples')
ax2.contourf(np.linspace(x.min(), x.max(), nx), np.linspace(y.min(), y.max(), ny), grid2)
ax2.set_title('Interpolation contours')
ax3.imshow(np.flipud(grid2), extent=(x.min(), x.max(), y.min(), y.max()))
ax3.set_title('RBF pixels')
plt.show()
import pykrige.kriging_tools as kt
from pykrige.ok import OrdinaryKriging
OK = OrdinaryKriging(geo_df_3301['x'].astype(float).values,
geo_df_3301['y'].astype(float).values,
geo_df_3301['airtemperature'].values,
variogram_model='linear',
verbose=True,
enable_plotting=True)
Plotting Enabled Adjusting data for anisotropy... Initializing variogram model... Coordinates type: 'euclidean' Using 'linear' Variogram Model Slope: 4.1348109771786575e-05 Nugget: 1.1611810077364633