Created
January 31, 2020 01:21
-
-
Save boxfrommars/efdb3bfc2b9787db60508c8e0885ac25 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "from math import radians, sin, cos, asin, acos, sqrt, pi, atan, atan2, fabs\n", | |
| "from time import time\n", | |
| "from geopy.distance import vincenty, geodesic, EARTH_RADIUS\n", | |
| "import pyproj\n", | |
| "from geographiclib.geodesic import Geodesic, Constants\n", | |
| "from pygeodesy.ellipsoidalVincenty import LatLon\n", | |
| "\n", | |
| "import pandas as pd\n", | |
| "import numpy as np" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "EARTH_MEAN_RADIUS = EARTH_RADIUS * 1000" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### Haversine" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def haversine_np(df: pd.DataFrame):\n", | |
| " lon1 = np.radians(df[['src_lon']].values)\n", | |
| " lat1 = np.radians(df[['src_lat']].values)\n", | |
| " lon2 = np.radians(df[['dst_lon']].values)\n", | |
| " lat2 = np.radians(df[['dst_lat']].values)\n", | |
| "\n", | |
| " dlon = np.abs(lon1 - lon2)\n", | |
| " dlat = np.abs(lat1 - lat2)\n", | |
| "\n", | |
| " numerator = np.sqrt(\n", | |
| " (np.cos(lat2) * np.sin(dlon))**2 +\n", | |
| " (np.cos(lat1) * np.sin(lat2) - np.sin(lat1) * np.cos(lat2) * np.cos(dlon))**2)\n", | |
| " \n", | |
| "\n", | |
| " denominator = np.sin(lat1) * np.sin(lat2) + np.cos(lat1) * np.cos(lat2) * np.cos(dlon)\n", | |
| "\n", | |
| " c = np.arctan2(numerator, denominator)\n", | |
| " \n", | |
| " return np.reshape(c, -1) * EARTH_MEAN_RADIUS" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def haversine_2(lat1, lon1, lat2, lon2):\n", | |
| " lat1 = radians(lat1)\n", | |
| " lon1 = radians(lon1)\n", | |
| " lat2 = radians(lat2) \n", | |
| " lon2 = radians(lon2)\n", | |
| "\n", | |
| " dlon = fabs(lon1 - lon2)\n", | |
| " dlat = fabs(lat1 - lat2)\n", | |
| "\n", | |
| " numerator = sqrt(\n", | |
| " (cos(lat2)*sin(dlon))**2 +\n", | |
| " ((cos(lat1)*sin(lat2)) - (sin(lat1)*cos(lat2)*cos(dlon)))**2)\n", | |
| "\n", | |
| " denominator = (\n", | |
| " (sin(lat1)*sin(lat2)) +\n", | |
| " (cos(lat1)*cos(lat2)*cos(dlon)))\n", | |
| "\n", | |
| " c = atan2(numerator, denominator)\n", | |
| " \n", | |
| " return EARTH_MEAN_RADIUS * c\n", | |
| "\n", | |
| "def haversine_v(df):\n", | |
| " columns = ['src_lat', 'src_lon', 'dst_lat', 'dst_lon']\n", | |
| " \n", | |
| " return np.array([haversine_2(x[0], x[1], x[2], x[3]) for x in df[columns].values])" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### Proj" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def proj_geodesic(df): \n", | |
| " # https://pyproj4.github.io/pyproj/dev/examples.html#geodesic-calculations\n", | |
| " # https://proj.org/geodesic.html#background\n", | |
| " # https://pyproj4.github.io/pyproj/dev/_modules/pyproj/geod.html#Geod.inv\n", | |
| " _az12, _az21, distance = pyproj.Geod(ellps='WGS84').inv(\n", | |
| " df[['src_lon']].values, \n", | |
| " df[['src_lat']].values, \n", | |
| " df[['dst_lon']].values, \n", | |
| " df[['dst_lat']].values\n", | |
| " )\n", | |
| " \n", | |
| " return np.reshape(distance, -1)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### Geopy Karney" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def geopy_geodesic(df: pd.DataFrame) -> list: \n", | |
| " columns = ['src_lat', 'src_lon', 'dst_lat', 'dst_lon']\n", | |
| " v_dist = [geodesic((x[0], x[1]), (x[2], x[3])).m\n", | |
| " for x in df[columns].values]\n", | |
| " return np.array(v_dist)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### Geopy Vincenty (current)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def geopy_vincenty(df: pd.DataFrame) -> list:\n", | |
| " columns = ['src_lat', 'src_lon', 'dst_lat', 'dst_lon']\n", | |
| " v_dist = [vincenty((x[0], x[1]), (x[2], x[3])).m\n", | |
| " for x in df[columns].values]\n", | |
| " return np.array(v_dist)\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "### Tests" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 8, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "df_size = 100000\n", | |
| "\n", | |
| "# Moscow boundary box\n", | |
| "west_lon = 37.37\n", | |
| "east_lon = 37.84\n", | |
| "north_lat = 55.91\n", | |
| "south_lat = 55.57\n", | |
| "\n", | |
| "test_df = pd.DataFrame({\n", | |
| " 'id': range(df_size),\n", | |
| " 'src_lon': west_lon + (east_lon - west_lon) * np.random.rand(df_size),\n", | |
| " 'src_lat': south_lat + (north_lat - south_lat) * np.random.rand(df_size),\n", | |
| " 'dst_lon': west_lon + (east_lon - west_lon) * np.random.rand(df_size),\n", | |
| " 'dst_lat': south_lat + (north_lat - south_lat) * np.random.rand(df_size),\n", | |
| "})\n", | |
| "\n", | |
| "actual_dist = geopy_geodesic(test_df)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 9, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def test_wrap(method, method_name):\n", | |
| " t = time()\n", | |
| " result = method(test_df)\n", | |
| " time_ms = ((time() - t) * 1e6) / test_df.shape[0]\n", | |
| " \n", | |
| " diffs = np.abs(actual_dist - result) * 100 / actual_dist\n", | |
| " \n", | |
| " return {\n", | |
| " 'name': method_name,\n", | |
| " 'time_ms': round(time_ms, 3),\n", | |
| " 'err_mean': str(np.round(np.mean(diffs), 3)) + '%',\n", | |
| " 'err_min': str(np.round(np.min(diffs), 3)) + '%',\n", | |
| " 'err_max': str(np.round(np.max(diffs), 3)) + '%',\n", | |
| " 'op/s': round(1e6 / time_ms)\n", | |
| " }" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 10, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "/Users/fasten806/miniconda3/envs/geo/lib/python3.7/site-packages/ipykernel_launcher.py:4: DeprecationWarning: Vincenty is deprecated and is going to be removed in geopy 2.0. Use `geopy.distance.geodesic` (or the default `geopy.distance.distance`) instead, which is more accurate and always converges.\n", | |
| " after removing the cwd from sys.path.\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>name</th>\n", | |
| " <th>time_ms</th>\n", | |
| " <th>err_mean</th>\n", | |
| " <th>err_min</th>\n", | |
| " <th>err_max</th>\n", | |
| " <th>op/s</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>Haversine</td>\n", | |
| " <td>3.346</td>\n", | |
| " <td>0.221%</td>\n", | |
| " <td>0.125%</td>\n", | |
| " <td>0.341%</td>\n", | |
| " <td>298899</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>Haversine Numpy</td>\n", | |
| " <td>0.169</td>\n", | |
| " <td>0.221%</td>\n", | |
| " <td>0.125%</td>\n", | |
| " <td>0.341%</td>\n", | |
| " <td>5927256</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>Proj Karney</td>\n", | |
| " <td>0.923</td>\n", | |
| " <td>0.0%</td>\n", | |
| " <td>0.0%</td>\n", | |
| " <td>0.0%</td>\n", | |
| " <td>1083139</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>Geopy Vincenty (Current)</td>\n", | |
| " <td>35.573</td>\n", | |
| " <td>0.0%</td>\n", | |
| " <td>0.0%</td>\n", | |
| " <td>0.0%</td>\n", | |
| " <td>28111</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>Geopy Karney</td>\n", | |
| " <td>252.295</td>\n", | |
| " <td>0.0%</td>\n", | |
| " <td>0.0%</td>\n", | |
| " <td>0.0%</td>\n", | |
| " <td>3964</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " name time_ms err_mean err_min err_max op/s\n", | |
| "0 Haversine 3.346 0.221% 0.125% 0.341% 298899\n", | |
| "1 Haversine Numpy 0.169 0.221% 0.125% 0.341% 5927256\n", | |
| "2 Proj Karney 0.923 0.0% 0.0% 0.0% 1083139\n", | |
| "3 Geopy Vincenty (Current) 35.573 0.0% 0.0% 0.0% 28111\n", | |
| "4 Geopy Karney 252.295 0.0% 0.0% 0.0% 3964" | |
| ] | |
| }, | |
| "execution_count": 10, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "metrics = []\n", | |
| "\n", | |
| "metrics.append(test_wrap(haversine_v, 'Haversine'))\n", | |
| "metrics.append(test_wrap(haversine_np, 'Haversine Numpy'))\n", | |
| "metrics.append(test_wrap(proj_geodesic, 'Proj Karney'))\n", | |
| "metrics.append(test_wrap(geopy_vincenty, 'Geopy Vincenty (Current)'))\n", | |
| "metrics.append(test_wrap(geopy_geodesic, 'Geopy Karney'))\n", | |
| "\n", | |
| "pd.DataFrame(metrics)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.7.6" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 2 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment