Skip to content

Instantly share code, notes, and snippets.

@boxfrommars
Created January 31, 2020 01:21
Show Gist options
  • Select an option

  • Save boxfrommars/efdb3bfc2b9787db60508c8e0885ac25 to your computer and use it in GitHub Desktop.

Select an option

Save boxfrommars/efdb3bfc2b9787db60508c8e0885ac25 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from math import radians, sin, cos, asin, acos, sqrt, pi, atan, atan2, fabs\n",
"from time import time\n",
"from geopy.distance import vincenty, geodesic, EARTH_RADIUS\n",
"import pyproj\n",
"from geographiclib.geodesic import Geodesic, Constants\n",
"from pygeodesy.ellipsoidalVincenty import LatLon\n",
"\n",
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"EARTH_MEAN_RADIUS = EARTH_RADIUS * 1000"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Haversine"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"def haversine_np(df: pd.DataFrame):\n",
" lon1 = np.radians(df[['src_lon']].values)\n",
" lat1 = np.radians(df[['src_lat']].values)\n",
" lon2 = np.radians(df[['dst_lon']].values)\n",
" lat2 = np.radians(df[['dst_lat']].values)\n",
"\n",
" dlon = np.abs(lon1 - lon2)\n",
" dlat = np.abs(lat1 - lat2)\n",
"\n",
" numerator = np.sqrt(\n",
" (np.cos(lat2) * np.sin(dlon))**2 +\n",
" (np.cos(lat1) * np.sin(lat2) - np.sin(lat1) * np.cos(lat2) * np.cos(dlon))**2)\n",
" \n",
"\n",
" denominator = np.sin(lat1) * np.sin(lat2) + np.cos(lat1) * np.cos(lat2) * np.cos(dlon)\n",
"\n",
" c = np.arctan2(numerator, denominator)\n",
" \n",
" return np.reshape(c, -1) * EARTH_MEAN_RADIUS"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"def haversine_2(lat1, lon1, lat2, lon2):\n",
" lat1 = radians(lat1)\n",
" lon1 = radians(lon1)\n",
" lat2 = radians(lat2) \n",
" lon2 = radians(lon2)\n",
"\n",
" dlon = fabs(lon1 - lon2)\n",
" dlat = fabs(lat1 - lat2)\n",
"\n",
" numerator = sqrt(\n",
" (cos(lat2)*sin(dlon))**2 +\n",
" ((cos(lat1)*sin(lat2)) - (sin(lat1)*cos(lat2)*cos(dlon)))**2)\n",
"\n",
" denominator = (\n",
" (sin(lat1)*sin(lat2)) +\n",
" (cos(lat1)*cos(lat2)*cos(dlon)))\n",
"\n",
" c = atan2(numerator, denominator)\n",
" \n",
" return EARTH_MEAN_RADIUS * c\n",
"\n",
"def haversine_v(df):\n",
" columns = ['src_lat', 'src_lon', 'dst_lat', 'dst_lon']\n",
" \n",
" return np.array([haversine_2(x[0], x[1], x[2], x[3]) for x in df[columns].values])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Proj"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"def proj_geodesic(df): \n",
" # https://pyproj4.github.io/pyproj/dev/examples.html#geodesic-calculations\n",
" # https://proj.org/geodesic.html#background\n",
" # https://pyproj4.github.io/pyproj/dev/_modules/pyproj/geod.html#Geod.inv\n",
" _az12, _az21, distance = pyproj.Geod(ellps='WGS84').inv(\n",
" df[['src_lon']].values, \n",
" df[['src_lat']].values, \n",
" df[['dst_lon']].values, \n",
" df[['dst_lat']].values\n",
" )\n",
" \n",
" return np.reshape(distance, -1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Geopy Karney"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"def geopy_geodesic(df: pd.DataFrame) -> list: \n",
" columns = ['src_lat', 'src_lon', 'dst_lat', 'dst_lon']\n",
" v_dist = [geodesic((x[0], x[1]), (x[2], x[3])).m\n",
" for x in df[columns].values]\n",
" return np.array(v_dist)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Geopy Vincenty (current)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"def geopy_vincenty(df: pd.DataFrame) -> list:\n",
" columns = ['src_lat', 'src_lon', 'dst_lat', 'dst_lon']\n",
" v_dist = [vincenty((x[0], x[1]), (x[2], x[3])).m\n",
" for x in df[columns].values]\n",
" return np.array(v_dist)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Tests"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"df_size = 100000\n",
"\n",
"# Moscow boundary box\n",
"west_lon = 37.37\n",
"east_lon = 37.84\n",
"north_lat = 55.91\n",
"south_lat = 55.57\n",
"\n",
"test_df = pd.DataFrame({\n",
" 'id': range(df_size),\n",
" 'src_lon': west_lon + (east_lon - west_lon) * np.random.rand(df_size),\n",
" 'src_lat': south_lat + (north_lat - south_lat) * np.random.rand(df_size),\n",
" 'dst_lon': west_lon + (east_lon - west_lon) * np.random.rand(df_size),\n",
" 'dst_lat': south_lat + (north_lat - south_lat) * np.random.rand(df_size),\n",
"})\n",
"\n",
"actual_dist = geopy_geodesic(test_df)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"def test_wrap(method, method_name):\n",
" t = time()\n",
" result = method(test_df)\n",
" time_ms = ((time() - t) * 1e6) / test_df.shape[0]\n",
" \n",
" diffs = np.abs(actual_dist - result) * 100 / actual_dist\n",
" \n",
" return {\n",
" 'name': method_name,\n",
" 'time_ms': round(time_ms, 3),\n",
" 'err_mean': str(np.round(np.mean(diffs), 3)) + '%',\n",
" 'err_min': str(np.round(np.min(diffs), 3)) + '%',\n",
" 'err_max': str(np.round(np.max(diffs), 3)) + '%',\n",
" 'op/s': round(1e6 / time_ms)\n",
" }"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/fasten806/miniconda3/envs/geo/lib/python3.7/site-packages/ipykernel_launcher.py:4: DeprecationWarning: Vincenty is deprecated and is going to be removed in geopy 2.0. Use `geopy.distance.geodesic` (or the default `geopy.distance.distance`) instead, which is more accurate and always converges.\n",
" after removing the cwd from sys.path.\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>time_ms</th>\n",
" <th>err_mean</th>\n",
" <th>err_min</th>\n",
" <th>err_max</th>\n",
" <th>op/s</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Haversine</td>\n",
" <td>3.346</td>\n",
" <td>0.221%</td>\n",
" <td>0.125%</td>\n",
" <td>0.341%</td>\n",
" <td>298899</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Haversine Numpy</td>\n",
" <td>0.169</td>\n",
" <td>0.221%</td>\n",
" <td>0.125%</td>\n",
" <td>0.341%</td>\n",
" <td>5927256</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Proj Karney</td>\n",
" <td>0.923</td>\n",
" <td>0.0%</td>\n",
" <td>0.0%</td>\n",
" <td>0.0%</td>\n",
" <td>1083139</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Geopy Vincenty (Current)</td>\n",
" <td>35.573</td>\n",
" <td>0.0%</td>\n",
" <td>0.0%</td>\n",
" <td>0.0%</td>\n",
" <td>28111</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Geopy Karney</td>\n",
" <td>252.295</td>\n",
" <td>0.0%</td>\n",
" <td>0.0%</td>\n",
" <td>0.0%</td>\n",
" <td>3964</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name time_ms err_mean err_min err_max op/s\n",
"0 Haversine 3.346 0.221% 0.125% 0.341% 298899\n",
"1 Haversine Numpy 0.169 0.221% 0.125% 0.341% 5927256\n",
"2 Proj Karney 0.923 0.0% 0.0% 0.0% 1083139\n",
"3 Geopy Vincenty (Current) 35.573 0.0% 0.0% 0.0% 28111\n",
"4 Geopy Karney 252.295 0.0% 0.0% 0.0% 3964"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"metrics = []\n",
"\n",
"metrics.append(test_wrap(haversine_v, 'Haversine'))\n",
"metrics.append(test_wrap(haversine_np, 'Haversine Numpy'))\n",
"metrics.append(test_wrap(proj_geodesic, 'Proj Karney'))\n",
"metrics.append(test_wrap(geopy_vincenty, 'Geopy Vincenty (Current)'))\n",
"metrics.append(test_wrap(geopy_geodesic, 'Geopy Karney'))\n",
"\n",
"pd.DataFrame(metrics)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment