07 - Exploring Query

1. Imports

import pandas as pd
import pymove as pm
from pymove import folium, MoveDataFrame
from pymove.query import query

2. Load Data

DataSet - Hurricanes and Typhoons: The NHC publishes the tropical cyclone historical database in a format known as HURDAT, short for HURricane DATabase

hurricanes_pandas_df = pd.read_csv('atlantic.csv')
hurricanes_pandas_df
ID Name Date Time Event Status Latitude Longitude Maximum Wind Minimum Pressure ... Low Wind SW Low Wind NW Moderate Wind NE Moderate Wind SE Moderate Wind SW Moderate Wind NW High Wind NE High Wind SE High Wind SW High Wind NW
0 AL011851 UNNAMED 18510625 0 HU 28.0N 94.8W 80 -999 ... -999 -999 -999 -999 -999 -999 -999 -999 -999 -999
1 AL011851 UNNAMED 18510625 600 HU 28.0N 95.4W 80 -999 ... -999 -999 -999 -999 -999 -999 -999 -999 -999 -999
2 AL011851 UNNAMED 18510625 1200 HU 28.0N 96.0W 80 -999 ... -999 -999 -999 -999 -999 -999 -999 -999 -999 -999
3 AL011851 UNNAMED 18510625 1800 HU 28.1N 96.5W 80 -999 ... -999 -999 -999 -999 -999 -999 -999 -999 -999 -999
4 AL011851 UNNAMED 18510625 2100 L HU 28.2N 96.8W 80 -999 ... -999 -999 -999 -999 -999 -999 -999 -999 -999 -999
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
49100 AL122015 KATE 20151112 1200 EX 41.3N 50.4W 55 981 ... 180 120 120 120 60 0 0 0 0 0
49101 AL122015 KATE 20151112 1800 EX 41.9N 49.9W 55 983 ... 180 120 120 120 60 0 0 0 0 0
49102 AL122015 KATE 20151113 0 EX 41.5N 49.2W 50 985 ... 200 220 120 120 60 0 0 0 0 0
49103 AL122015 KATE 20151113 600 EX 40.8N 47.5W 45 985 ... 180 220 0 0 0 0 0 0 0 0
49104 AL122015 KATE 20151113 1200 EX 40.7N 45.4W 45 987 ... 150 220 0 0 0 0 0 0 0 0

49105 rows × 22 columns

#Select hurricanes from 2012 to 2015
hurricanes_pandas_df = hurricanes_pandas_df.loc[hurricanes_pandas_df['Date'] >= 20120000]
hurricanes_pandas_df = hurricanes_pandas_df.loc[hurricanes_pandas_df['Date'] < 20160000]
hurricanes_pandas_df.shape
(1639, 22)
hurricanes_pandas_df[['ID', 'Name', 'Latitude', 'Longitude', 'Date', 'Time']].head()
ID Name Latitude Longitude Date Time
47466 AL012012 ALBERTO 33.1N 77.0W 20120519 0
47467 AL012012 ALBERTO 32.8N 77.1W 20120519 600
47468 AL012012 ALBERTO 32.5N 77.3W 20120519 1200
47469 AL012012 ALBERTO 32.3N 77.6W 20120519 1800
47470 AL012012 ALBERTO 32.1N 78.1W 20120520 0
hurricanes_pandas_df = pm.conversions.lat_and_lon_decimal_degrees_to_decimal(
    hurricanes_pandas_df, latitude='Latitude', longitude='Longitude'
)

def convert_to_datetime(row):
        this_date = '{}-{}-{}'.format(str(row['Date'])[0:4], str(row['Date'])[4:6], str(row['Date'])[6:])
        this_time = '{:02d}:{:02d}:00'.format(int(row['Time']/100), int(str(row['Time'])[-2:]))
        return '{} {}'.format(this_date, this_time)
hurricanes_pandas_df['Datetime'] = hurricanes_pandas_df.apply(convert_to_datetime, axis=1)

hurricanes_pandas_df[['ID', 'Name', 'Latitude', 'Longitude', 'Datetime']].head()
ID Name Latitude Longitude Datetime
47466 AL012012 ALBERTO 33.1 -77.0 2012-05-19 00:00:00
47467 AL012012 ALBERTO 32.8 -77.1 2012-05-19 06:00:00
47468 AL012012 ALBERTO 32.5 -77.3 2012-05-19 12:00:00
47469 AL012012 ALBERTO 32.3 -77.6 2012-05-19 18:00:00
47470 AL012012 ALBERTO 32.1 -78.1 2012-05-20 00:00:00
#Converting the pandas dataframe to pymove's MoveDataFrame
hurricanes_2012 = MoveDataFrame(
    data=hurricanes_pandas_df, latitude='Latitude', longitude='Longitude',datetime='Datetime', traj_id='Name'
)
print(type(hurricanes_2012))
hurricanes_2012.head()
<class 'pymove.core.pandas.PandasMoveDataFrame'>
ID id Date Time Event Status lat lon Maximum Wind Minimum Pressure ... Low Wind NW Moderate Wind NE Moderate Wind SE Moderate Wind SW Moderate Wind NW High Wind NE High Wind SE High Wind SW High Wind NW datetime
47466 AL012012 ALBERTO 20120519 0 LO 33.1 -77.0 25 1010 ... 0 0 0 0 0 0 0 0 0 2012-05-19 00:00:00
47467 AL012012 ALBERTO 20120519 600 TD 32.8 -77.1 30 1008 ... 0 0 0 0 0 0 0 0 0 2012-05-19 06:00:00
47468 AL012012 ALBERTO 20120519 1200 TS 32.5 -77.3 40 1005 ... 30 0 0 0 0 0 0 0 0 2012-05-19 12:00:00
47469 AL012012 ALBERTO 20120519 1800 TS 32.3 -77.6 45 997 ... 40 0 0 0 0 0 0 0 0 2012-05-19 18:00:00
47470 AL012012 ALBERTO 20120520 0 TS 32.1 -78.1 50 995 ... 40 0 0 0 20 0 0 0 0 2012-05-20 00:00:00

5 rows × 23 columns

Visualization

folium.plot_trajectories(hurricanes_2012, zoom_start=2)
Make this Notebook Trusted to load map: File -> Trust Notebook
#Total hurricane amount between 2012 and 2015
this_ex = hurricanes_2012
this_ex['id'].unique().shape[0]
55
#Selecting a hurricane for demonstration
gonzalo = hurricanes_2012.loc[hurricanes_2012['id'].str.strip() == 'GONZALO']
gonzalo.shape
(39, 23)
folium.plot_trajectories(
    gonzalo, lat_origin=gonzalo['lat'].median(), lon_origin=gonzalo['lon'].median(), zoom_start=2
)
Make this Notebook Trusted to load map: File -> Trust Notebook

2. Range Query

Using distance MEDP (Mean Euclidean Distance Predictive)

prox_Gonzalo = query.range_query(gonzalo, hurricanes_2012, min_dist=200, distance='MEDP')
VBox(children=(HTML(value=''), IntProgress(value=0, max=55)))
folium.plot_trajectories(prox_Gonzalo, zoom_start=3)
Make this Notebook Trusted to load map: File -> Trust Notebook

Using Distance MEDT (Mean Euclidean Distance Trajectory)

prox_Gonzalo = query.range_query(gonzalo, hurricanes_2012, min_dist=1000, distance='MEDT')
VBox(children=(HTML(value=''), IntProgress(value=0, max=55)))
folium.plot_trajectories(prox_Gonzalo, zoom_start=3)
Make this Notebook Trusted to load map: File -> Trust Notebook

3. KNN (K-Nearest-Neighbor)

Using distance MEDP (Mean Euclidean Distance Predictive)

prox_Gonzalo = query.knn_query(gonzalo, hurricanes_2012, id_='id', k=5, distance='MEDP')
VBox(children=(HTML(value=''), IntProgress(value=0, max=55)))
folium.plot_trajectories(prox_Gonzalo, zoom_start=3)
Make this Notebook Trusted to load map: File -> Trust Notebook

Using Distance MEDT (Mean Euclidean Distance Trajectory)

prox_Gonzalo = query.knn_query(gonzalo, hurricanes_2012, id_='id', k=5, distance='MEDT')
VBox(children=(HTML(value=''), IntProgress(value=0, max=55)))
folium.plot_trajectories(prox_Gonzalo, zoom_start=3)
Make this Notebook Trusted to load map: File -> Trust Notebook