Source code for pymove.utils.distances

"""
Distances operations.

haversine,
euclidean_distance_in_meters,
nearest_points,
medp,
medt

"""
from __future__ import annotations

import numpy as np
import pandas as pd
from numpy import ndarray
from pandas.core.frame import DataFrame
from scipy.spatial import distance

from pymove import utils
from pymove.utils.constants import DATETIME, EARTH_RADIUS, LATITUDE, LONGITUDE


[docs]def haversine( lat1: float | ndarray, lon1: float | ndarray, lat2: float | ndarray, lon2: float | ndarray, to_radians: bool = True, earth_radius: float = EARTH_RADIUS ) -> float | ndarray: """ Calculates the great circle distance between two points on the earth. Specified in decimal degrees or in radians. All (lat, lon) coordinates must have numeric dtypes and be of equal length. Result in meters. Use 3956 in earth radius for miles. Parameters ---------- lat1 : float or array latitute of point 1 lon1 : float or array longitude of point 1 lat2 : float or array latitute of point 2 lon2 : float or array longitude of point 2 to_radians : boolean Wether to convert the values to radians, by default True earth_radius : int Radius of sphere, by default EARTH_RADIUS Returns ------- float or ndarray Represents distance between points in meters Example ------- >>> from pymove.utils.distances import haversine >>> lat_fortaleza, lon_fortaleza = [-3.71839 ,-38.5434] >>> lat_quixada, lon_quixada = [-4.979224744401671, -39.056434302570665] >>> haversine(lat_fortaleza, lon_fortaleza, lat_quixada, lon_quixada) 151298.02548428564 References ---------- Vectorized haversine function: https://stackoverflow.com/questions/43577086/pandas-calculate-haversine-distance-within-each-group-of-rows About distance between two points: https://janakiev.com/blog/gps-points-distance-python/ """ if to_radians: lat1, lon1, lat2, lon2 = np.radians([lat1, lon1, lat2, lon2]) # type: ignore a = ( np.sin((lat2 - lat1) / 2.0) ** 2 + np.cos(lat1) * np.cos(lat2) * np.sin((lon2 - lon1) / 2.0) ** 2 ) return (earth_radius * 2 * np.arctan2(a ** 0.5, (1 - a) ** 0.5)) * 1000
[docs]def euclidean_distance_in_meters( lat1: float | ndarray, lon1: float | ndarray, lat2: float | ndarray, lon2: float | ndarray ) -> float | ndarray: """ Calculate the euclidean distance in meters between two points. Parameters ---------- lat1 : float or array latitute of point 1 lon1 : float or array longitude of point 1 lat2 : float or array latitute of point 2 lon2 : float or array longitude of point 2 Returns ------- float or ndarray euclidean distance in meters between the two points. Example ------- >>> from pymove.utils.distances import euclidean_distance_in_meters >>> lat_fortaleza, lon_fortaleza = [-3.71839 ,-38.5434] >>> lat_quixada, lon_quixada = [-4.979224744401671, -39.056434302570665] >>> euclidean_distance_in_meters( >>> lat_fortaleza, lon_fortaleza, lat_quixada, lon_quixada >>> ) 151907.9670136588 """ y1 = utils.conversions.lat_to_y_spherical(lat=lat1) y2 = utils.conversions.lat_to_y_spherical(lat=lat2) x1 = utils.conversions.lon_to_x_spherical(lon=lon1) x2 = utils.conversions.lon_to_x_spherical(lon=lon2) dist_eucl_in_meters = ((y1 - y2)**2 + (x1 - x2)**2)**0.5 return dist_eucl_in_meters
[docs]def nearest_points( traj1: DataFrame, traj2: DataFrame, latitude: str = LATITUDE, longitude: str = LONGITUDE, ) -> DataFrame: """ Returns the point closest to another trajectory based on the Euclidean distance. Parameters ---------- traj1: dataframe The input of one trajectory. traj2: dataframe The input of another trajectory. latitude: str, optional Label of the trajectories dataframe referring to the latitude, by default LATITUDE longitude: str, optional Label of the trajectories dataframe referring to the longitude, by default LONGITUDE Returns ------- DataFrame dataframe with closest points Example ------- >>> from pymove.utils.distances import nearest_points >>> df_a lat lon datetime id 0 39.984198 116.319322 2008-10-23 05:53:06 1 1 39.984224 116.319402 2008-10-23 05:53:11 1 >>> df_b lat lon datetime id 0 39.984211 116.319389 2008-10-23 05:53:16 1 1 39.984217 116.319422 2008-10-23 05:53:21 1 >>> nearest_points(df_a,df_b) lat lon datetime id 0 39.984211 116.319389 2008-10-23 05:53:16 1 1 39.984211 116.319389 2008-10-23 05:53:16 1 """ result = pd.DataFrame(columns=traj1.columns) for _, t1 in traj1.iterrows(): round_result = np.Inf round_traj = [] for _, t2 in traj2.iterrows(): this_distance = distance.euclidean( (t1[latitude], t1[longitude]), (t2[latitude], t2[longitude]), ) if this_distance < round_result: round_result = this_distance round_traj = t2 result = result.append(round_traj) return result
[docs]def medp( traj1: DataFrame, traj2: DataFrame, latitude: str = LATITUDE, longitude: str = LONGITUDE ) -> float: """ Returns the Mean Euclidian Distance Predictive between two trajectories. Considers only the spatial dimension for the similarity measure. Parameters ---------- traj1: dataframe The input of one trajectory. traj2: dataframe The input of another trajectory. latitude: str, optional Label of the trajectories dataframe referring to the latitude, by default LATITUDE longitude: str, optional Label of the trajectories dataframe referring to the longitude, by default LONGITUDE Returns ------- float total distance Example ------- >>> from pymove.utils.distances import medp >>> traj_1 lat lon datetime id 0 39.98471 116.319865 2008-10-23 05:53:23 1 >>> traj_2 lat lon datetime id 0 39.984674 116.31981 2008-10-23 05:53:28 1 >>> medp(traj_1, traj_2) 6.573431370981577e-05 """ soma = 0 traj2 = nearest_points(traj1, traj2, latitude, longitude) for (_, t1), (_, t2) in zip(traj1.iterrows(), traj2.iterrows()): this_distance = distance.euclidean( (t1[latitude], t1[longitude]), (t2[latitude], t2[longitude]) ) soma = soma + this_distance return soma
[docs]def medt( traj1: DataFrame, traj2: DataFrame, latitude: str = LATITUDE, longitude: str = LONGITUDE, datetime: str = DATETIME ) -> float: """ Returns the Mean Euclidian Distance Trajectory between two trajectories. Considers the spatial dimension and the temporal dimension when measuring similarity. Parameters ---------- traj1: dataframe The input of one trajectory. traj2: dataframe The input of another trajectory. latitude: str, optional Label of the trajectories dataframe referring to the latitude, by default LATITUDE longitude: str, optional Label of the trajectories dataframe referring to the longitude, by default LONGITUDE datetime: str, optional Label of the trajectories dataframe referring to the timestamp, by default DATETIME Returns ------- float total distance Example ------- >>> from pymove.utils.distances import medt >>> traj_1 lat lon datetime id 0 39.98471 116.319865 2008-10-23 05:53:23 1 >>> traj_2 lat lon datetime id 0 39.984674 116.31981 2008-10-23 05:53:28 1 >>> medt(traj_1, traj_2) 6.592419887747872e-05 """ soma = 0. proportion = 1000000000 if(len(traj2) < len(traj1)): traj1, traj2 = traj2, traj1 for i in range(0, len(traj1)): this_distance = distance.euclidean( (traj1[latitude].iloc[i], traj1[longitude].iloc[i], float(utils.datetime.timestamp_to_millis( traj1[datetime].iloc[i] )) / proportion), (traj2[latitude].iloc[i], traj2[longitude].iloc[i], float(utils.datetime.timestamp_to_millis( traj2[datetime].iloc[i] )) / proportion), ) soma = soma + this_distance for j in range(len(traj1) + 1, len(traj2)): soma = soma + \ float( utils.datetime.timestamp_to_millis(traj2[datetime].iloc[j]) ) / proportion return soma