NYPD Transportation Data Research Poster

Code used for visualizations

Average Number per Hour and Daily Total

#Imports Libraries

import matplotlib.pyplot as plt

import seaborn as sns

import pandas as pd

#Loads data into DataFrame

file_path = '/content/drive/MyDrive/Motor_Vehicle_Collisions_-_Crashes_20250124.csv'

data = pd.read_csv(file_path)

#Convert CRASH DATE and CRASH TIME into datetime format

data['CRASH DATE'] = pd.to_datetime(data['CRASH DATE'])

data['CRASH TIME'] = pd.to_datetime(data['CRASH TIME'], format='%H:%M')

# Pulls the day of the week from CRASH DATE

data['Day of Week'] = data['CRASH DATE'].dt.day_name()

# Time of Day Analysis

data['Hour of Day'] = data['CRASH TIME'].dt.hour

#Orders the day of the week

day_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']

data['Day of Week'] = pd.Categorical(data['Day of Week'], categories=day_order, ordered=True)

#Finds the average number of pedestrians involved crashes

pedestrian_crashes_by_day = data.groupby('Day of Week')[['NUMBER OF PEDESTRIANS INJURED', 'NUMBER OF PEDESTRIANS KILLED']].sum().sum(axis=1)

# Filter the data to include only pedestrian-related crashes

pedestrian_crashes = data[(data['NUMBER OF PEDESTRIANS INJURED'] > 0) | (data['NUMBER OF PEDESTRIANS KILLED'] > 0)]

# Group by 'Hour of Day' and calculate the average number of crashes per hour

average_pedestrian_crashes_per_hour = pedestrian_crashes.groupby('Hour of Day').size() / pedestrian_crashes['Hour of Day'].nunique()

#Set plot style

sns.set(style="whitegrid")

#Plot the Day of the Week data

plt.figure(figsize=(15,6))

sns.barplot(x= pedestrian_crashes_by_day.index, y= pedestrian_crashes_by_day.values, palette= 'mako')

plt.title('Daily Total of Pedestrian Involved Crashes in NYC')

plt.xlabel('Day of Week')

plt.ylabel('Number of Pedestrians Involved Crashes')

plt.legend()

plt.show()

#Plot the Time of Day data

plt.figure(figsize=(15,6))

sns.barplot(x = average_pedestrian_crashes_per_hour.index, y = average_pedestrian_crashes_per_hour.values)

plt.title('Average Number of Pedestrian Involved Crashes per Hour of Day')

plt.xlabel('Hour of Day')

plt.ylabel('Average Number of Crashes')

plt.xticks(range(0,24))

plt.show()

Top 10 Contributing Factors Visual

top_factors = data['CONTRIBUTING FACTOR VEHICLE 1'].value_counts().head(10)

plt.figure(figsize=(12, 7))

sns.barplot(x=top_factors.index, y=top_factors.values, palette="magma")

plt.title('Top 10 Contributing Factors to crashes', fontsize=16)

plt.xlabel('Contributing Factor', fontsize=14)

plt.ylabel('Number of Crashes', fontsize=14)

plt.xticks(rotation=45, ha='right')

plt.tight_layout()

plt.show()

Types of Crashes and Their Frequencies visual

import matplotlib.pyplot as plt

import seaborn as sns

# Aggregating data - Complete for Cyclist and Motorist

types_of_crashes = {

    'Pedestrian Injuries': data['NUMBER OF PEDESTRIANS INJURED'].sum(),

    'Cyclist Injuries': data['NUMBER OF CYCLIST INJURED'].sum(),

    'Motorist Injuries': data['NUMBER OF MOTORIST INJURED'].sum(),

    'Pedestrian Deaths': data['NUMBER OF PEDESTRIANS KILLED'].sum(),

    'Cyclist Deaths': data['NUMBER OF CYCLIST KILLED'].sum(),

    'Motorist Deaths': data['NUMBER OF MOTORIST KILLED'].sum()

}

# Converting to DataFrame for easier plotting - we want the items in the dictionary, use the items function

crash_types_df = pd.DataFrame(list(types_of_crashes.items()), columns=['Crash Type', 'Count'])

# Plot

plt.figure(figsize=(12, 7))

sns.barplot(x='Count', y='Crash Type', data=crash_types_df, palette="mako")

plt.title('Types of Crashes and Their Frequencies')

plt.xlabel('Count')

plt.ylabel('Type of Crash')

plt.tight_layout()

plt.show()

Previous
Previous

Hotel Dashboard