Source code for pymove.utils.geoutils

"""
Geo operations.

v_color,
create_geohash_df,
create_bin_geohash_df,
decode_geohash_to_latlon,

"""
from __future__ import annotations

import geohash2 as gh
import numpy as np
from numpy import ndarray
from pandas import DataFrame
from shapely.geometry.base import BaseGeometry

from pymove.utils.constants import (
    BASE_32,
    BIN_GEOHASH,
    COLORS,
    GEOHASH,
    LATITUDE,
    LATITUDE_DECODE,
    LONGITUDE,
    LONGITUDE_DECODE,
)
from pymove.utils.log import progress_bar

BINARY = [
    np.asarray(
        list(f'{x:05b}'), dtype=int
    ) for x in range(0, len(BASE_32))
]
BASE_32_TO_BIN = dict(zip(BASE_32, BINARY))


[docs]def v_color(ob: BaseGeometry) -> str: """ Returns '#ffcc33' if object crosses otherwise it returns '#6699cc'. Parameters ---------- ob : geometry object Any geometric object Return ------ str Geometric object color Example ------- >>> from pymove.utils.geoutils import v_color >>> from shapely.geometry import LineString >>> case_1 = LineString([(3,3),(4,4), (3,4)]) >>> case_2 = LineString([(3,3),(4,4), (4,3)]) >>> case_3 = LineString([(3,3),(4,4), (3,4), (4,3)]) >>> print(v_color(case_1), type(v_color(case_1))) #6699cc <class 'str'> >>> print(v_color(case_2), type(v_color(case_2))) #6699cc <class 'str'> >>> print(v_color(case_3), type(v_color(case_3))) #ffcc33 <class 'str'> """ return COLORS[ob.is_simple + 33]
def _encode(lat: float, lon: float, precision: float = 15) -> str: """ Encodes latitude/longitude to geohash. Either to specified precision or to automatically evaluated precision. Parameters ---------- lat : float Latitude in degrees. lon : float Longitude in degrees. precision : float, optional Number of characters in resulting geohash, by default 15 Return ------ str Geohash of supplied latitude/longitude. Example ------- >>> from pymove.utils.geoutils import _encode >>> lat1, lon1 = -3.777736, -38.547792 >>> lat2, lon2 = -3.793388, -38.517722 >>> print(_encode(lat1,lon1), type(_encode(lat1,lon1))) 7pkddb6356fyzxq <class 'str'> >>> print(_encode(lat2,lon2), type(_encode(lat2,lon2))) 7pkd7t2mbj0z1v7 <class 'str'> """ return gh.encode(lat, lon, precision) def _decode(geohash: str) -> tuple[float, float]: """ Decode geohash to latitude/longitude. Location is approximate centre of geohash cell, to reasonable precision. Parameters ---------- geohash : str Geohash str to be converted to latitude/longitude. Return ------ (lat : float, lon : float) Geohashed location. Example ------- >>> from pymove.utils.geoutils import _decode >>> geoHash_1 = '7pkddb6356fyzxq' >>> geoHash_2 = '7pkd7t2mbj0z1v7' >>> print(_decode(geoHash_1), type(_decode(geoHash_1))) ('-3.777736', '-38.547792') <class 'tuple'> >>> print(_decode(geoHash_2), type(_decode(geoHash_2))) ('-3.793388', '-38.517722') <class 'tuple'> """ return gh.decode(geohash) def _bin_geohash(lat: float, lon: float, precision: float = 15) -> ndarray: """ Transforms a point's geohash into a binary array. Parameters ---------- lat : float Latitude in degrees lon : float Longitude in degrees precision : float, optional Number of characters in resulting geohash, by default 15 Return ------ array Returns a binary geohash array Example ------- >>> from pymove.utils.geoutils import _bin_geohash >>> lat1, lon1 = -3.777736, -38.547792 >>> lat2, lon2 = -3.793388, -38.517722 >>> print(_bin_geohash(lat1,lon1), type(_bin_geohash(lat1,lon1))) [0 0 1 1 1 1 1 0 0 1 1 0 1 0 0 0 1 1 0 1 0 1 1 0 1 0 1 0 1 1 0 0 1 1 0 0 0 0 1 1 0 0 1 0 1 0 0 1 1 0 0 1 1 1 1 1 0 0 0 1 0 1 0 0 0 1 1 1 0 0 0 0 1 1 1 0 1 0] <class 'numpy.ndarray'> >>> print(_bin_geohash(lat2,lon2), type(_bin_geohash(lat2,lon2))) [0 0 1 1 1 1 1 0 0 1 1 0 1 0 0 0 1 1 0 1 0 0 1 1 1 1 1 1 0 1 0 0 0 1 0 1 0 1 1 0 0 1 0 1 1 1 0 0 1 1 0 0 0 0 0 1 0 0 0 1 1 0 0 0 0 1 1 1 1 1 1 0 0 1 1 1] <class 'numpy.ndarray'> """ hashed = _encode(lat, lon, precision) return np.concatenate([BASE_32_TO_BIN[x] for x in hashed]) def _reset_and_create_arrays_none( data: DataFrame, reset_index: bool = True ) -> tuple[ndarray, ndarray, ndarray, ndarray]: """ Reset the df index and create arrays of none values. Parameters ---------- data : dataframe The input trajectories data reset_index : boolean, optional Condition to reset the df index, by default True Return ------ arrays Returns arrays of none values, of the size of the df. Example ------- >>> from pymove.utils.geoutils import _reset_and_create_arrays_none >>> print(type(_reset_and_create_arrays_none(geoLife_df))) >>> _reset_and_create_arrays_none(geoLife_df) <class 'tuple'> (array([nan, nan, nan, nan, nan]), array([nan, nan, nan, nan, nan]), array([None, None, None, None, None], dtype=object), array([None, None, None, None, None], dtype=object)) """ if reset_index: data.reset_index(drop=True, inplace=True) latitudes = np.full( data.shape[0], None, dtype=np.float64 ) longitudes = np.full( data.shape[0], None, dtype=np.float64 ) geohash = np.full( data.shape[0], None, dtype='object_' ) bin_geohash = np.full( data.shape[0], None, dtype=np.ndarray ) return latitudes, longitudes, geohash, bin_geohash
[docs]def create_geohash_df(data: DataFrame, precision: float = 15): """ Create geohash from geographic coordinates and integrate with df. Parameters ---------- data : dataframe The input trajectories data precision : float, optional Number of characters in resulting geohash, by default 15 Return ------ A DataFrame with the additional column 'geohash' Example ------- >>> from pymove.utils.geoutils import create_geohash_df, _reset_and_create_arrays_none >>> geoLife_df lat lon 0 39.984094 116.319236 1 39.984198 116.319322 2 39.984224 116.319402 3 39.984211 116.319389 4 39.984217 116.319422 >>> print(type (create_geohash_df(geoLife_df))) >>> geoLife_df <class 'NoneType'> lat lon geohash 0 39.984094 116.319236 wx4eqyvh4xkg0xs 1 39.984198 116.319322 wx4eqyvhudszsev 2 39.984224 116.319402 wx4eqyvhyx8d9wc 3 39.984211 116.319389 wx4eqyvhyjnv5m7 4 39.984217 116.319422 wx4eqyvhyyr2yy8 """ _, _, geohash, _ = _reset_and_create_arrays_none(data) for idx, row in progress_bar( data[[LATITUDE, LONGITUDE]].iterrows(), total=data.shape[0] ): geohash[idx] = _encode(row[LATITUDE], row[LONGITUDE], precision) data[GEOHASH] = geohash
[docs]def create_bin_geohash_df(data: DataFrame, precision: float = 15): """ Create trajectory geohash binaries and integrate with df. Parameters ---------- data : dataframe The input trajectories data precision : float, optional Number of characters in resulting geohash, by default 15 Return ------ A DataFrame with the additional column 'bin_geohash' Example ------- >>> from pymove.utils.geoutils import create_bin_geohash_df >>> geoLife_df lat lon 0 39.984094 116.319236 1 39.984198 116.319322 2 39.984224 116.319402 3 39.984211 116.319389 4 39.984217 116.319422 >>> print(type(create_bin_geohash_df(geoLife_df))) >>> geoLife_df <class 'NoneType'> lat lon bin_geohash 0 39.984094 116.319236 [1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, ... 1 39.984198 116.319322 [1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, ... 2 39.984224 116.319402 [1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, ... 3 39.984211 116.319389 [1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, ... 4 39.984217 116.319422 [1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, ... """ *_, bin_geohash = _reset_and_create_arrays_none(data) for idx, row in progress_bar( data[[LATITUDE, LONGITUDE]].iterrows(), total=data.shape[0] ): bin_geohash[idx] = _bin_geohash(row[LATITUDE], row[LONGITUDE], precision) data[BIN_GEOHASH] = bin_geohash
[docs]def decode_geohash_to_latlon( data: DataFrame, label_geohash: str = GEOHASH, reset_index: bool = True ): """ Decode feature with hash of trajectories back to geographic coordinates. Parameters ---------- data : dataframe The input trajectories data label_geohash : str, optional The name of the feature with hashed trajectories, by default GEOHASH reset_index : boolean, optional Condition to reset the df index, by default True Return ------ A DataFrame with the additional columns 'lat_decode' and 'lon_decode' Example ------- >>> from pymove.utils.geoutils import decode_geohash_to_latlon >>> geoLife_df lat lon geohash 0 39.984094 116.319236 wx4eqyvh4xkg0xs 1 39.984198 116.319322 wx4eqyvhudszsev 2 39.984224 116.319402 wx4eqyvhyx8d9wc 3 39.984211 116.319389 wx4eqyvhyjnv5m7 4 39.984217 116.319422 wx4eqyvhyyr2yy8 >>> print(type(decode_geohash_to_latlon(geoLife_df))) >>> geoLife_df <class 'NoneType'> lat lon geohash lat_decode lon_decode 0 39.984094 116.319236 wx4eqyvh4xkg0xs 39.984094 116.319236 1 39.984198 116.319322 wx4eqyvhudszsev 39.984198 116.319322 2 39.984224 116.319402 wx4eqyvhyx8d9wc 39.984224 116.319402 3 39.984211 116.319389 wx4eqyvhyjnv5m7 39.984211 116.319389 4 39.984217 116.319422 wx4eqyvhyyr2yy8 39.984217 116.319422 """ if label_geohash not in data: raise ValueError(f'feature {label_geohash} not in df') lat, lon, _, _ = _reset_and_create_arrays_none(data, reset_index=reset_index) for idx, row in progress_bar(data[[label_geohash]].iterrows(), total=data.shape[0]): lat_lon = _decode(row[label_geohash]) lat[idx] = lat_lon[0] lon[idx] = lat_lon[1] data[LATITUDE_DECODE] = lat data[LONGITUDE_DECODE] = lon