"""MoveDataFrame class."""
from __future__ import annotations
from typing import TYPE_CHECKING
from dateutil.parser._parser import ParserError
from pandas.core.frame import DataFrame
from pymove.utils.constants import (
DATETIME,
LATITUDE,
LONGITUDE,
TRAJ_ID,
TYPE_DASK,
TYPE_PANDAS,
)
if TYPE_CHECKING:
from pymove.core.dask import DaskMoveDataFrame
from pymove.core.pandas import PandasMoveDataFrame
[docs]class MoveDataFrame:
"""Auxiliary class to check and transform data into Pymove Dataframes."""
@staticmethod
def __new__( # type: ignore[misc]
self,
data: DataFrame | dict | list,
latitude: str = LATITUDE,
longitude: str = LONGITUDE,
datetime: str = DATETIME,
traj_id: str = TRAJ_ID,
type_: str = TYPE_PANDAS,
n_partitions: int = 1,
) -> 'PandasMoveDataFrame' | 'DaskMoveDataFrame':
"""
Creates the PyMove dataframe, which must contain latitude, longitude and datetime.
The dataframe can be a pandas or dask dataframe.
Parameters
----------
data : DataFrame or PandasMoveDataFrame or dict or list
Input trajectory data.
latitude : str, optional
Represents column name latitude, by default LATITUDE
longitude : str, optional
Represents column name longitude, by default LONGITUDE
datetime : str, optional
Represents column name datetime, by default DATETIME
traj_id : str, optional
Represents column name trajectory id, by default TRAJ_ID
type_ : str, optional
Number of partitions of the dask dataframe, by default TYPE_PANDAS
n_partitions : int, optional
Amount of partitions for dask dataframe, by default 1
Raises
------
KeyError
If missing one of lat, lon, datetime columns
ValueError, ParserError
If the data types can't be converted.
"""
if type_ == TYPE_PANDAS:
from pymove.core.pandas import PandasMoveDataFrame
return PandasMoveDataFrame(
data, latitude, longitude, datetime, traj_id
)
if type_ == TYPE_DASK:
from pymove.core.dask import DaskMoveDataFrame
return DaskMoveDataFrame(
data, latitude, longitude, datetime, traj_id, n_partitions
)
raise TypeError(
f'Unknown MoveDataFrame type {type_}, use {TYPE_PANDAS} or {TYPE_DASK}'
)
[docs] @staticmethod
def has_columns(data: DataFrame) -> bool:
"""
Checks whether the received dataset has 'lat', 'lon', 'datetime' columns.
Parameters
----------
data : DataFrame
Input trajectory data
Returns
-------
bool
Represents whether or not you have the required columns
"""
cols = data.columns
if LATITUDE in cols and LONGITUDE in cols and DATETIME in cols:
return True
return False
[docs] @staticmethod
def validate_move_data_frame(data: DataFrame):
"""
Converts the column type to the default type used by PyMove lib.
Parameters
----------
data : DataFrame
Input trajectory data
Raises
------
KeyError
If missing one of lat, lon, datetime columns
ValueError, ParserError
If the data types can't be converted
"""
try:
if data.dtypes[LATITUDE] != 'float64':
data[LATITUDE] = data[LATITUDE].astype('float64')
if data.dtypes[LONGITUDE] != 'float64':
data[LONGITUDE] = data[LONGITUDE].astype('float64')
if data.dtypes[DATETIME] != 'datetime64[ns]':
data[DATETIME] = data[DATETIME].astype('datetime64[ns]')
except KeyError:
raise KeyError('dataframe missing one of lat, lon, datetime columns.')
except ParserError:
raise ParserError('datetime column cannot be parsed')
except ValueError:
raise ValueError('dtypes cannot be converted.')