07 - Exploring Query¶
1. Imports¶
import pandas as pd
import pymove as pm
from pymove import folium, MoveDataFrame
from pymove.query import query
2. Load Data¶
DataSet - Hurricanes and Typhoons: The NHC publishes the tropical cyclone historical database in a format known as HURDAT, short for HURricane DATabase
hurricanes_pandas_df = pd.read_csv('atlantic.csv')
hurricanes_pandas_df
ID | Name | Date | Time | Event | Status | Latitude | Longitude | Maximum Wind | Minimum Pressure | ... | Low Wind SW | Low Wind NW | Moderate Wind NE | Moderate Wind SE | Moderate Wind SW | Moderate Wind NW | High Wind NE | High Wind SE | High Wind SW | High Wind NW | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | AL011851 | UNNAMED | 18510625 | 0 | HU | 28.0N | 94.8W | 80 | -999 | ... | -999 | -999 | -999 | -999 | -999 | -999 | -999 | -999 | -999 | -999 | |
1 | AL011851 | UNNAMED | 18510625 | 600 | HU | 28.0N | 95.4W | 80 | -999 | ... | -999 | -999 | -999 | -999 | -999 | -999 | -999 | -999 | -999 | -999 | |
2 | AL011851 | UNNAMED | 18510625 | 1200 | HU | 28.0N | 96.0W | 80 | -999 | ... | -999 | -999 | -999 | -999 | -999 | -999 | -999 | -999 | -999 | -999 | |
3 | AL011851 | UNNAMED | 18510625 | 1800 | HU | 28.1N | 96.5W | 80 | -999 | ... | -999 | -999 | -999 | -999 | -999 | -999 | -999 | -999 | -999 | -999 | |
4 | AL011851 | UNNAMED | 18510625 | 2100 | L | HU | 28.2N | 96.8W | 80 | -999 | ... | -999 | -999 | -999 | -999 | -999 | -999 | -999 | -999 | -999 | -999 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
49100 | AL122015 | KATE | 20151112 | 1200 | EX | 41.3N | 50.4W | 55 | 981 | ... | 180 | 120 | 120 | 120 | 60 | 0 | 0 | 0 | 0 | 0 | |
49101 | AL122015 | KATE | 20151112 | 1800 | EX | 41.9N | 49.9W | 55 | 983 | ... | 180 | 120 | 120 | 120 | 60 | 0 | 0 | 0 | 0 | 0 | |
49102 | AL122015 | KATE | 20151113 | 0 | EX | 41.5N | 49.2W | 50 | 985 | ... | 200 | 220 | 120 | 120 | 60 | 0 | 0 | 0 | 0 | 0 | |
49103 | AL122015 | KATE | 20151113 | 600 | EX | 40.8N | 47.5W | 45 | 985 | ... | 180 | 220 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | |
49104 | AL122015 | KATE | 20151113 | 1200 | EX | 40.7N | 45.4W | 45 | 987 | ... | 150 | 220 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
49105 rows × 22 columns
#Select hurricanes from 2012 to 2015
hurricanes_pandas_df = hurricanes_pandas_df.loc[hurricanes_pandas_df['Date'] >= 20120000]
hurricanes_pandas_df = hurricanes_pandas_df.loc[hurricanes_pandas_df['Date'] < 20160000]
hurricanes_pandas_df.shape
(1639, 22)
hurricanes_pandas_df[['ID', 'Name', 'Latitude', 'Longitude', 'Date', 'Time']].head()
ID | Name | Latitude | Longitude | Date | Time | |
---|---|---|---|---|---|---|
47466 | AL012012 | ALBERTO | 33.1N | 77.0W | 20120519 | 0 |
47467 | AL012012 | ALBERTO | 32.8N | 77.1W | 20120519 | 600 |
47468 | AL012012 | ALBERTO | 32.5N | 77.3W | 20120519 | 1200 |
47469 | AL012012 | ALBERTO | 32.3N | 77.6W | 20120519 | 1800 |
47470 | AL012012 | ALBERTO | 32.1N | 78.1W | 20120520 | 0 |
hurricanes_pandas_df = pm.conversions.lat_and_lon_decimal_degrees_to_decimal(
hurricanes_pandas_df, latitude='Latitude', longitude='Longitude'
)
def convert_to_datetime(row):
this_date = '{}-{}-{}'.format(str(row['Date'])[0:4], str(row['Date'])[4:6], str(row['Date'])[6:])
this_time = '{:02d}:{:02d}:00'.format(int(row['Time']/100), int(str(row['Time'])[-2:]))
return '{} {}'.format(this_date, this_time)
hurricanes_pandas_df['Datetime'] = hurricanes_pandas_df.apply(convert_to_datetime, axis=1)
hurricanes_pandas_df[['ID', 'Name', 'Latitude', 'Longitude', 'Datetime']].head()
ID | Name | Latitude | Longitude | Datetime | |
---|---|---|---|---|---|
47466 | AL012012 | ALBERTO | 33.1 | -77.0 | 2012-05-19 00:00:00 |
47467 | AL012012 | ALBERTO | 32.8 | -77.1 | 2012-05-19 06:00:00 |
47468 | AL012012 | ALBERTO | 32.5 | -77.3 | 2012-05-19 12:00:00 |
47469 | AL012012 | ALBERTO | 32.3 | -77.6 | 2012-05-19 18:00:00 |
47470 | AL012012 | ALBERTO | 32.1 | -78.1 | 2012-05-20 00:00:00 |
#Converting the pandas dataframe to pymove's MoveDataFrame
hurricanes_2012 = MoveDataFrame(
data=hurricanes_pandas_df, latitude='Latitude', longitude='Longitude',datetime='Datetime', traj_id='Name'
)
print(type(hurricanes_2012))
hurricanes_2012.head()
<class 'pymove.core.pandas.PandasMoveDataFrame'>
ID | id | Date | Time | Event | Status | lat | lon | Maximum Wind | Minimum Pressure | ... | Low Wind NW | Moderate Wind NE | Moderate Wind SE | Moderate Wind SW | Moderate Wind NW | High Wind NE | High Wind SE | High Wind SW | High Wind NW | datetime | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
47466 | AL012012 | ALBERTO | 20120519 | 0 | LO | 33.1 | -77.0 | 25 | 1010 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2012-05-19 00:00:00 | |
47467 | AL012012 | ALBERTO | 20120519 | 600 | TD | 32.8 | -77.1 | 30 | 1008 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2012-05-19 06:00:00 | |
47468 | AL012012 | ALBERTO | 20120519 | 1200 | TS | 32.5 | -77.3 | 40 | 1005 | ... | 30 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2012-05-19 12:00:00 | |
47469 | AL012012 | ALBERTO | 20120519 | 1800 | TS | 32.3 | -77.6 | 45 | 997 | ... | 40 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 2012-05-19 18:00:00 | |
47470 | AL012012 | ALBERTO | 20120520 | 0 | TS | 32.1 | -78.1 | 50 | 995 | ... | 40 | 0 | 0 | 0 | 20 | 0 | 0 | 0 | 0 | 2012-05-20 00:00:00 |
5 rows × 23 columns
Visualization¶
folium.plot_trajectories(hurricanes_2012, zoom_start=2)
Make this Notebook Trusted to load map: File -> Trust Notebook
#Total hurricane amount between 2012 and 2015
this_ex = hurricanes_2012
this_ex['id'].unique().shape[0]
55
#Selecting a hurricane for demonstration
gonzalo = hurricanes_2012.loc[hurricanes_2012['id'].str.strip() == 'GONZALO']
gonzalo.shape
(39, 23)
folium.plot_trajectories(
gonzalo, lat_origin=gonzalo['lat'].median(), lon_origin=gonzalo['lon'].median(), zoom_start=2
)
Make this Notebook Trusted to load map: File -> Trust Notebook
2. Range Query¶
Using distance MEDP (Mean Euclidean Distance Predictive)¶
prox_Gonzalo = query.range_query(gonzalo, hurricanes_2012, min_dist=200, distance='MEDP')
VBox(children=(HTML(value=''), IntProgress(value=0, max=55)))
folium.plot_trajectories(prox_Gonzalo, zoom_start=3)
Make this Notebook Trusted to load map: File -> Trust Notebook
Using Distance MEDT (Mean Euclidean Distance Trajectory)¶
prox_Gonzalo = query.range_query(gonzalo, hurricanes_2012, min_dist=1000, distance='MEDT')
VBox(children=(HTML(value=''), IntProgress(value=0, max=55)))
folium.plot_trajectories(prox_Gonzalo, zoom_start=3)
Make this Notebook Trusted to load map: File -> Trust Notebook
3. KNN (K-Nearest-Neighbor)¶
Using distance MEDP (Mean Euclidean Distance Predictive)¶
prox_Gonzalo = query.knn_query(gonzalo, hurricanes_2012, id_='id', k=5, distance='MEDP')
VBox(children=(HTML(value=''), IntProgress(value=0, max=55)))
folium.plot_trajectories(prox_Gonzalo, zoom_start=3)
Make this Notebook Trusted to load map: File -> Trust Notebook
Using Distance MEDT (Mean Euclidean Distance Trajectory)¶
prox_Gonzalo = query.knn_query(gonzalo, hurricanes_2012, id_='id', k=5, distance='MEDT')
VBox(children=(HTML(value=''), IntProgress(value=0, max=55)))
folium.plot_trajectories(prox_Gonzalo, zoom_start=3)
Make this Notebook Trusted to load map: File -> Trust Notebook