Skip to content

Instantly share code, notes, and snippets.

@originalsouth
Created September 5, 2025 11:31
Show Gist options
  • Select an option

  • Save originalsouth/a1e66b8ec4e972d9b381c6c07a474dd1 to your computer and use it in GitHub Desktop.

Select an option

Save originalsouth/a1e66b8ec4e972d9b381c6c07a474dd1 to your computer and use it in GitHub Desktop.
Get weather daily data throughout the Netherlands from 2023 onward
#!/usr/bin/env python3
# pip install meteostat pandas tqdm
from datetime import datetime, timezone
import time
import pandas as pd
from meteostat import Stations, Daily
from tqdm import tqdm
START = datetime(2023, 1, 1) # tz-naive
END = datetime.now(timezone.utc).replace(tzinfo=None) # tz-naive
OUT_CSV = "nl_daily_temps_2023_2025.csv"
def get_nl_stations() -> pd.DataFrame:
"""All NL stations with geo metadata (index = station id)."""
st = Stations().region("NL")
df = st.fetch()
if df.empty:
raise RuntimeError("Could not load Netherlands stations from Meteostat.")
keep = ["name", "country", "region", "latitude", "longitude", "elevation"]
df = df[keep].copy()
df.index = df.index.astype(str)
df["station"] = df.index
return df
def fetch_daily_for_station(station_id: str) -> pd.DataFrame:
"""Daily temps for one station over START..END."""
df = Daily(station_id, start=START, end=END).fetch()
if df is None or df.empty:
return pd.DataFrame(columns=["station", "time", "tmin", "tmax", "tavg"])
for c in ("tmin", "tmax", "tavg"):
if c not in df.columns:
df[c] = pd.NA
df = df.reset_index()[["time", "tmin", "tmax", "tavg"]]
df.insert(0, "station", str(station_id)) # ensure string
return df
def main():
stations = get_nl_stations() # has a 'station' column now
station_ids = stations["station"].tolist()
frames = []
with tqdm(total=len(station_ids), desc="Downloading daily data") as pbar:
for sid in station_ids:
try:
f = fetch_daily_for_station(sid)
if not f.empty:
frames.append(f)
except Exception as e:
tqdm.write(f"Warning: {sid} failed: {e}")
finally:
pbar.update(1)
time.sleep(0.03)
if not frames:
raise RuntimeError("No data retrieved from Meteostat for NL stations.")
daily = pd.concat(frames, ignore_index=True)
out = daily.merge(stations, on="station", how="left")
cols = [
"station", "name", "country", "region", "latitude", "longitude", "elevation",
"time", "tmin", "tmax", "tavg"
]
out = out[cols].sort_values(["station", "time"])
out.to_csv(OUT_CSV, index=False)
print(f"✅ Wrote {len(out):,} rows across {out['station'].nunique()} stations → {OUT_CSV}")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment