Skip to content
Merged
29 changes: 28 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ START = date(2018, 1, 1)
END = date(2018, 12, 31)

# Get nearby weather stations
stations = ms.nearby(POINT, limit=4)
stations = ms.stations.nearby(POINT, limit=4)

# Get daily data & perform interpolation
ts = ms.daily(stations, START, END)
Expand All @@ -73,6 +73,33 @@ Take a look at the expected output:

![2018 temperature data for Vancouver, BC][product-screenshot]

### 🎯 Spatial Interpolation

Meteostat supports multiple interpolation methods for estimating weather data at specific points:

```python
from datetime import datetime
import meteostat as ms

point = ms.Point(50.1155, 8.6842, 113) # Frankfurt, Germany
stations = ms.stations.nearby(point, limit=5)
ts = ms.hourly(stations, datetime(2020, 1, 1, 6), datetime(2020, 1, 1, 18))

# Auto method (default) - intelligently selects best method
df = ms.interpolate(ts, point, method="auto")

# Or choose a specific method:
# - "nearest": Nearest neighbor interpolation
# - "idw": Inverse Distance Weighting
df = ms.interpolate(ts, point, method="idw")
```

Each method has different characteristics:

- **Auto**: Adaptively selects between nearest neighbor and IDW based on station proximity
- **Nearest**: Fast, uses closest station's data (best for very close stations)
- **IDW**: Weighted average considering distance and elevation (good general-purpose method)

## 🤝 Contributing

Instructions on building and testing the Meteostat Python package can be found in the [documentation](https://dev.meteostat.net/python/contributing.html). More information about the Meteostat bulk data interface is available [here](https://dev.meteostat.net/bulk/).
Expand Down
195 changes: 195 additions & 0 deletions benchmarks/interpolation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
"""
Benchmark script for interpolation methods

This script compares the performance of different interpolation methods
using real-world data from multiple locations.
"""

import time
from datetime import datetime
import pandas as pd
import meteostat as ms


def benchmark_location(point, location_name, start, end):
"""Benchmark interpolation methods for a specific location"""
print(f"\n{'=' * 60}")
print(f"Benchmarking: {location_name}")
print(
f"Point: Lat {point.latitude}, Lon {point.longitude}, Elev {point.elevation}m"
)
print(f"Period: {start} to {end}")
print(f"{'=' * 60}")

# Get nearby stations
stations = ms.stations.nearby(point, limit=10)
print(f"\nFound {len(stations)} nearby stations")
if len(stations) > 0:
print(f"Closest station: {stations.iloc[0]['distance']:.0f}m away")

# Fetch time series data
ts = ms.hourly(stations, start, end)
print(f"Time series data points: {len(ts)}")

if len(ts) == 0:
print("⚠️ No data available for this location/period")
return None

methods = {
"nearest": "Nearest Neighbor",
"idw": "Inverse Distance Weighting",
"auto": "Auto (Hybrid)",
}

results = {}

for method_key, method_name in methods.items():
print(f"\n{method_name}:")
try:
# Measure execution time
start_time = time.time()
df = ms.interpolate(ts, point, method=method_key)
end_time = time.time()

execution_time = (end_time - start_time) * 1000 # Convert to milliseconds

if df is not None and not df.empty:
# Calculate some statistics
temp_data = df["temp"] if "temp" in df.columns else None
prcp_data = df["prcp"] if "prcp" in df.columns else None

results[method_key] = {
"execution_time": execution_time,
"data_points": len(df),
"temp_mean": temp_data.mean() if temp_data is not None else None,
"temp_std": temp_data.std() if temp_data is not None else None,
"prcp_sum": prcp_data.sum() if prcp_data is not None else None,
}

print(f" ✓ Execution time: {execution_time:.2f} ms")
print(f" ✓ Data points: {len(df)}")
if temp_data is not None:
print(
f" ✓ Temp: mean={temp_data.mean():.1f}°C, std={temp_data.std():.1f}°C"
)
if prcp_data is not None:
print(f" ✓ Precipitation sum: {prcp_data.sum():.1f}mm")
else:
print(" ✗ No data returned")
results[method_key] = None

except Exception as e:
print(f" ✗ Error: {e}")
results[method_key] = None

return results


def compare_methods(results):
"""Compare results from different methods"""
print(f"\n{'=' * 60}")
print("Method Comparison")
print(f"{'=' * 60}")

# Filter out methods that returned no results
valid_results = {k: v for k, v in results.items() if v is not None}

if not valid_results:
print("No valid results to compare")
return

# Performance comparison
print("\n📊 Performance (Execution Time):")
times = [(k, v["execution_time"]) for k, v in valid_results.items()]
times.sort(key=lambda x: x[1])
for method, time_ms in times:
print(f" {method:15s}: {time_ms:6.2f} ms")

# Temperature comparison (if available)
temp_results = {
k: v for k, v in valid_results.items() if v["temp_mean"] is not None
}
if temp_results:
print("\n🌡️ Temperature Comparison:")
for method, data in temp_results.items():
print(
f" {method:15s}: mean={data['temp_mean']:6.2f}°C, std={data['temp_std']:5.2f}°C"
)

# Calculate variance between methods
temp_means = [v["temp_mean"] for v in temp_results.values()]
temp_variance = pd.Series(temp_means).std()
print(f" Variance between methods: {temp_variance:.2f}°C")


def main():
"""Run benchmarks for multiple locations"""
print("=" * 60)
print("Meteostat Interpolation Methods Benchmark")
print("=" * 60)

# Test locations with different characteristics
test_cases = [
{
"name": "Frankfurt, Germany (Flat terrain)",
"point": ms.Point(50.1155, 8.6842, 113),
"start": datetime(2020, 1, 1),
"end": datetime(2020, 1, 2),
},
{
"name": "Denver, USA (Mountainous)",
"point": ms.Point(39.7392, -104.9903, 1609),
"start": datetime(2020, 6, 1),
"end": datetime(2020, 6, 2),
},
{
"name": "Singapore (Tropical coastal)",
"point": ms.Point(1.3521, 103.8198, 15),
"start": datetime(2020, 7, 1),
"end": datetime(2020, 7, 2),
},
]

all_results = {}

for test_case in test_cases:
results = benchmark_location(
test_case["point"],
test_case["name"],
test_case["start"],
test_case["end"],
)

if results:
all_results[test_case["name"]] = results
compare_methods(results)

# Overall summary
print(f"\n{'=' * 60}")
print("Overall Summary")
print(f"{'=' * 60}")

# Average execution times across all locations
methods = ["nearest", "idw", "auto"]
avg_times = {}

for method in methods:
times = [
results[method]["execution_time"]
for results in all_results.values()
if results.get(method) is not None
]
if times:
avg_times[method] = sum(times) / len(times)

if avg_times:
print("\n📈 Average Execution Time Across All Locations:")
sorted_avg = sorted(avg_times.items(), key=lambda x: x[1])
for method, avg_time in sorted_avg:
print(f" {method:15s}: {avg_time:6.2f} ms")

print("\n✅ Benchmark complete!")


if __name__ == "__main__":
main()
40 changes: 36 additions & 4 deletions examples/point.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,41 @@
"""
Example: Point data interpolation using different methods

This example demonstrates how to interpolate weather data for a specific
geographic point using various interpolation methods.
"""

from datetime import datetime
import meteostat as ms

point = ms.Point(50, 8)
# Define the point of interest: Neu-Anspach, Germany
point = ms.Point(50.3167, 8.5, 320)

# Get nearby weather stations
stations = ms.stations.nearby(point, limit=5)
print(f"Found {len(stations)} nearby stations")

# Fetch hourly data for a specific time range
ts = ms.hourly(stations, datetime(2020, 1, 1, 6), datetime(2020, 1, 1, 18))

# Method 1: Auto (default) - Intelligently selects between nearest and IDW
print("\n=== Auto Method (Default) ===")
df_auto = ms.interpolate(ts, point, method="auto")
print(df_auto.head())

# Method 2: Nearest Neighbor - Uses closest station
print("\n=== Nearest Neighbor Method ===")
df_nearest = ms.interpolate(ts, point, method="nearest")
print(df_nearest.head())

ts = ms.hourly(point, datetime(2020, 1, 1, 6), datetime(2020, 1, 1, 18))
df = ms.interpolate(ts, point, method="nearets")
# Method 3: Inverse Distance Weighting (IDW) - Weighted average
print("\n=== IDW Method ===")
df_idw = ms.interpolate(ts, point, method="idw")
print(df_idw.head())

print(df)
# Compare temperature values from different methods
print("\n=== Temperature Comparison ===")
if "temp" in df_auto.columns:
print(f"Auto: {df_auto['temp'].mean():.2f}°C")
print(f"Nearest: {df_nearest['temp'].mean():.2f}°C")
print(f"IDW: {df_idw['temp'].mean():.2f}°C")
69 changes: 61 additions & 8 deletions meteostat/api/interpolate.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,29 @@
from typing import Callable, Optional
from typing import Callable, Optional, Union
import pandas as pd

from meteostat.api.point import Point
from meteostat.api.timeseries import TimeSeries
from meteostat.interpolation.lapserate import apply_lapse_rate
from meteostat.interpolation.nearest import nearest_neighbor
from meteostat.interpolation.idw import idw
from meteostat.interpolation.auto import auto_interpolate
from meteostat.utils.helpers import get_distance

# Mapping of method names to functions
METHOD_MAP = {
"nearest": nearest_neighbor,
"idw": idw,
"auto": auto_interpolate,
}


def interpolate(
ts: TimeSeries,
point: Point,
method: Callable[
[pd.DataFrame, TimeSeries, Point], Optional[pd.DataFrame]
] = nearest_neighbor,
lapse_rate=6.5,
method: Union[
str, Callable[[pd.DataFrame, TimeSeries, Point], Optional[pd.DataFrame]]
] = "auto",
lapse_rate: float = 6.5,
) -> Optional[pd.DataFrame]:
"""
Interpolate time series data spatially to a specific point.
Expand All @@ -25,15 +34,43 @@ def interpolate(
The time series to interpolate.
point : Point
The point to interpolate the data for.
method : str or callable, optional
Interpolation method to use. Can be a string name or a custom function.
Built-in methods:
- "auto" (default): Automatically select between nearest and IDW based on
spatial context. Uses nearest neighbor if closest station is within 5km
and 50m elevation difference, otherwise uses IDW.
- "nearest": Use the value from the nearest weather station.
- "idw": Inverse Distance Weighting - weighted average based on distance.
Custom functions should have signature:
func(df: pd.DataFrame, ts: TimeSeries, point: Point) -> pd.DataFrame
lapse_rate : float, optional
The lapse rate (temperature gradient) in degrees Celsius per
1000 meters of elevation gain. Default is 6.5.
Set to 0 or None to disable lapse rate adjustment.

Returns
-------
pd.DataFrame or None
A DataFrame containing the interpolated data for the specified point,
or None if no data is available.

Examples
--------
>>> from datetime import datetime
>>> import meteostat as ms
>>> point = ms.Point(50.1155, 8.6842, 113)
>>> stations = ms.stations.nearby(point, limit=5)
>>> ts = ms.hourly(stations, datetime(2020, 1, 1, 6), datetime(2020, 1, 1, 18))
>>> df = ms.interpolate(ts, point, method="auto")

>>> # Using IDW method
>>> df = ms.interpolate(ts, point, method="idw")

>>> # Using custom interpolation function
>>> def custom_method(df, ts, point):
... return df.groupby(pd.Grouper(level="time", freq=ts.freq)).mean()
>>> df = ms.interpolate(ts, point, method=custom_method)
"""
# Fetch DataFrame, filling missing values and adding location data
df = ts.fetch(fill=True, location=True)
Expand All @@ -51,12 +88,28 @@ def interpolate(
point.latitude, point.longitude, df["latitude"], df["longitude"]
)

# Resolve method to a callable
method_func: Callable[[pd.DataFrame, TimeSeries, Point], Optional[pd.DataFrame]]
if isinstance(method, str):
method_lower = method.lower()

resolved = METHOD_MAP.get(method_lower)
if resolved is None:
valid_methods = list(METHOD_MAP.keys())
raise ValueError(
f"Unknown method '{method}'. "
f"Valid methods are: {', '.join(valid_methods)}"
)
method_func = resolved # type: ignore[assignment]
else:
method_func = method # type: ignore[assignment]

# Interpolate
df = method(df, ts, point)
result = method_func(df, ts, point)

# If no data is returned, return None
if df is None:
if result is None or result.empty:
return None

# Drop location-related columns & return
return df.drop(["latitude", "longitude", "elevation", "distance"], axis=1)
return result.drop(["latitude", "longitude", "elevation", "distance"], axis=1)
Loading