Taher Chegini
ESIP IT&I 2021
from IPython.display import IFrame
IFrame(src="https://hyriver.readthedocs.io/en/latest", width=950, height=600)
Census TIGER (Topologically Integrated Geographic Encoding and Referencing database)
import warnings
from pathlib import Path
warnings.filterwarnings("ignore", message=".*initial implementation of Parquet.*")
root = Path("..", "data")
root.mkdir(parents=True, exist_ok=True)
BASE_PLOT = {"facecolor": "k", "edgecolor": "b", "alpha": 0.2, "figsize": (18, 9)}
CRS = "esri:102008"
import geopandas as gpd
from shapely.geometry import box
conus_bounds = box(-125, 24, -65, 50)
cfile = Path(root, "conus.feather")
if cfile.exists():
conus = gpd.read_feather(cfile)
else:
tiger_url = (
lambda x: f"https://www2.census.gov/geo/tiger/TIGER2020/{x.upper()}/tl_2020_us_{x}.zip"
)
coastline = gpd.read_file(tiger_url("coastline"))
state = gpd.read_file(tiger_url("state"))
conus = state[state.intersects(conus_bounds)].copy()
conus_coastline = coastline[coastline.within(conus_bounds)]
conus["coastal"] = conus.intersects(conus_coastline.unary_union)
conus.to_feather(cfile)
ax = conus.to_crs(CRS).plot(column="coastal", **BASE_PLOT)
ax.axis("off")
ax.margins(0)
We need to look at the Water Services API.
import pygeohydro as gh
nwis = gh.NWIS()
cfile = Path(root, "coast_stations.feather")
if cfile.exists():
coast_stations = gpd.read_feather(cfile)
else:
queries = [
{
"stateCd": s.lower(),
"siteType": "ST-TS,ES",
"hasDataTypeCd": "dv",
"outputDataTypeCd": "dv",
}
for s in conus.loc[conus.coastal, "STUSPS"]
]
sites = nwis.get_info(queries, False)
coast_stations = gpd.GeoDataFrame(
sites,
geometry=gpd.points_from_xy(sites.dec_long_va, sites.dec_lat_va),
crs="epsg:4269",
)
coast_stations.to_feather(cfile)
st = coast_stations[["site_no", "site_tp_cd", "geometry"]].to_crs(CRS)
ts = st[st.site_tp_cd == "ST-TS"].drop_duplicates()
es = st[st.site_tp_cd == "ES"].drop_duplicates()
station_ids = ts.site_no.tolist() + es.site_no.tolist()
ax = conus.to_crs(CRS).plot(**BASE_PLOT)
ts.plot(ax=ax, lw=3, c="r")
es.plot(ax=ax, lw=3, c="g")
ax.legend([f"ST-TS ({ts.shape[0]})", f"ES ({es.shape[0]})"], loc="best")
ax.axis("off")
ax.margins(0)
import numpy as np
import pandas as pd
cfile = Path(root, "discharge.parquet")
dates = ("2000-01-01", "2015-12-31")
if cfile.exists():
discharge = pd.read_parquet(cfile)
else:
nwis = gh.NWIS()
discharge = nwis.get_streamflow(
station_ids,
dates,
)
discharge[discharge < 0] = np.nan
discharge.to_parquet(cfile)
ax = discharge.plot(legend=False, lw=0.8, figsize=(15, 6))
ax.set_ylabel("Q (cms)")
ax.set_xlabel("")
ax.margins(x=0)
import pynhd as nhd
nldi = nhd.NLDI()
from pygeoogc import ZeroMatched
from rich.progress import Progress
cfiles = list(Path(root).glob("flowline_main_*.feather"))
if all(c.exists() for c in cfiles):
flws_main = {f.stem.rsplit("_", 1)[-1]: gpd.read_feather(f) for f in cfiles}
else:
with Progress() as progress:
task = progress.add_task("Main Flowlines", total=len(station_ids))
for s in station_ids:
cfile = Path(root, f"flowline_main_{s}.feather")
if cfile.exists():
progress.update(task, description=f"ID: {s:<19}", advance=1)
continue
try:
flws_main = nldi.navigate_byid(
fsource="nwissite",
fid=f"USGS-{s}",
navigation="upstreamMain",
source="flowlines",
distance=2000,
)
flws_main.to_feather(cfile)
except (ConnectionError, ZeroMatched):
pass
progress.update(task, description=f"{s:<15}", advance=1)
flws_main = (
pd.concat(flws_main)
.reset_index()
.drop(columns="level_1")
.rename(columns={"level_0": "station"})
)
print(
"\n".join(
[
f"No. of missing stations: {len(station_ids) - len(cfiles)}/{len(station_ids)}",
f"No. of flowlines: {len(flws_main)}",
]
)
)
No. of missing stations: 105/405 No. of flowlines: 21293
ax = conus.to_crs(CRS).plot(**BASE_PLOT)
ts.plot(ax=ax, lw=1, c="r")
es.plot(ax=ax, lw=1, c="g")
flws_main.to_crs(CRS).plot(ax=ax, lw=2, color="b")
ax.axis("off")
ax.margins(0)