import pandas as pd
import numpy as np
df = pd.read_csv("ZebraBotswana.txt")
print(df.shape)
The dataset contains 53776 records, with 4 columns.
print(df.columns)
print(df.head())
animals = np.unique(df["Animal"])
print(animals)
We calculate the day (integer) corresponding to each Unix timestamp by dividing the Unix timestamp by 24 times 60 times 60.
df["day"] = df["UnixTime"].map(lambda x: int(x/(24.*60.*60.)))
print(df.head())
lat_to_dist = 111.3 # distance in km of a degree in longitude
lng_to_dist = 104.6 # distance in km of a degree in latitude
for animal in animals:
df_animal = df[df["Animal"]==animal].copy()
lng = np.array(df_animal["Lng"])
lat = np.array(df_animal["Lat"])
dlng = (lng[1:]-lng[:-1])*lng_to_dist
dlat = (lat[1:]-lat[:-1])*lat_to_dist
dist = (dlng**2+dlat**2)**0.5
df_animal["dist"] = np.hstack([0,dist]) # Adding the missing element due to distancing
daily_distance = df_animal.groupby(["day"]).sum()["dist"]
print(animal)
print("Avg. daily distance: %5.2f+/-%5.2f km"%(daily_distance.mean(),daily_distance.std()))
print("Min/Max daily distance: %5.2f km / %5.2f km"%(daily_distance.min(),daily_distance.max()))