NYPD Transportation Data Research Poster
Code used for visualizations
Average Number per Hour and Daily Total
#Imports Libraries
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
#Loads data into DataFrame
file_path = '/content/drive/MyDrive/Motor_Vehicle_Collisions_-_Crashes_20250124.csv'
data = pd.read_csv(file_path)
#Convert CRASH DATE and CRASH TIME into datetime format
data['CRASH DATE'] = pd.to_datetime(data['CRASH DATE'])
data['CRASH TIME'] = pd.to_datetime(data['CRASH TIME'], format='%H:%M')
# Pulls the day of the week from CRASH DATE
data['Day of Week'] = data['CRASH DATE'].dt.day_name()
# Time of Day Analysis
data['Hour of Day'] = data['CRASH TIME'].dt.hour
#Orders the day of the week
day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
data['Day of Week'] = pd.Categorical(data['Day of Week'], categories=day_order, ordered=True)
#Finds the average number of pedestrians involved crashes
pedestrian_crashes_by_day = data.groupby('Day of Week')[['NUMBER OF PEDESTRIANS INJURED', 'NUMBER OF PEDESTRIANS KILLED']].sum().sum(axis=1)
# Filter the data to include only pedestrian-related crashes
pedestrian_crashes = data[(data['NUMBER OF PEDESTRIANS INJURED'] > 0) | (data['NUMBER OF PEDESTRIANS KILLED'] > 0)]
# Group by 'Hour of Day' and calculate the average number of crashes per hour
average_pedestrian_crashes_per_hour = pedestrian_crashes.groupby('Hour of Day').size() / pedestrian_crashes['Hour of Day'].nunique()
#Set plot style
sns.set(style="whitegrid")
#Plot the Day of the Week data
plt.figure(figsize=(15,6))
sns.barplot(x= pedestrian_crashes_by_day.index, y= pedestrian_crashes_by_day.values, palette= 'mako')
plt.title('Daily Total of Pedestrian Involved Crashes in NYC')
plt.xlabel('Day of Week')
plt.ylabel('Number of Pedestrians Involved Crashes')
plt.legend()
plt.show()
#Plot the Time of Day data
plt.figure(figsize=(15,6))
sns.barplot(x = average_pedestrian_crashes_per_hour.index, y = average_pedestrian_crashes_per_hour.values)
plt.title('Average Number of Pedestrian Involved Crashes per Hour of Day')
plt.xlabel('Hour of Day')
plt.ylabel('Average Number of Crashes')
plt.xticks(range(0,24))
plt.show()
Top 10 Contributing Factors Visual
top_factors = data['CONTRIBUTING FACTOR VEHICLE 1'].value_counts().head(10)
plt.figure(figsize=(12, 7))
sns.barplot(x=top_factors.index, y=top_factors.values, palette="magma")
plt.title('Top 10 Contributing Factors to crashes', fontsize=16)
plt.xlabel('Contributing Factor', fontsize=14)
plt.ylabel('Number of Crashes', fontsize=14)
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()
Types of Crashes and Their Frequencies visual
import matplotlib.pyplot as plt
import seaborn as sns
# Aggregating data - Complete for Cyclist and Motorist
types_of_crashes = {
'Pedestrian Injuries': data['NUMBER OF PEDESTRIANS INJURED'].sum(),
'Cyclist Injuries': data['NUMBER OF CYCLIST INJURED'].sum(),
'Motorist Injuries': data['NUMBER OF MOTORIST INJURED'].sum(),
'Pedestrian Deaths': data['NUMBER OF PEDESTRIANS KILLED'].sum(),
'Cyclist Deaths': data['NUMBER OF CYCLIST KILLED'].sum(),
'Motorist Deaths': data['NUMBER OF MOTORIST KILLED'].sum()
}
# Converting to DataFrame for easier plotting - we want the items in the dictionary, use the items function
crash_types_df = pd.DataFrame(list(types_of_crashes.items()), columns=['Crash Type', 'Count'])
# Plot
plt.figure(figsize=(12, 7))
sns.barplot(x='Count', y='Crash Type', data=crash_types_df, palette="mako")
plt.title('Types of Crashes and Their Frequencies')
plt.xlabel('Count')
plt.ylabel('Type of Crash')
plt.tight_layout()
plt.show()