import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
traffic = pd.read_csv('data/Metro_Interstate_Traffic_Volume.csv')
traffic.head()


traffic.tail()


traffic.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48204 entries, 0 to 48203
Data columns (total 9 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   holiday              48204 non-null  object 
 1   temp                 48204 non-null  float64
 2   rain_1h              48204 non-null  float64
 3   snow_1h              48204 non-null  float64
 4   clouds_all           48204 non-null  int64  
 5   weather_main         48204 non-null  object 
 6   weather_description  48204 non-null  object 
 7   date_time            48204 non-null  object 
 8   traffic_volume       48204 non-null  int64  
dtypes: float64(3), int64(2), object(4)
memory usage: 3.3+ MB


plt.style.use('fivethirtyeight')
#___________________________________________________________
#Initial Plot
fig,ax = plt.subplots(figsize = (12,8))
plot_data = traffic['traffic_volume'].apply(lambda x: np.round(x,-1)).value_counts()
ax.bar(plot_data.index,plot_data.values, width = 10, alpha = .5)
#____________________________________________________________
#Format grid, ticks, etc
ax.set_xticks([1000,3000,5000,7000])
ax.set_yticks([100,200,300,400])
ax.axhline(y = 0, color = 'black', linewidth = 2, alpha = 1)
ax.tick_params(colors = 'grey', which = 'both')
#___________________________________________________________
#create title,subtitle,signature bar
ax.text(-900,470, 'Traffic Volume Distribution is Right-Skewed', weight = 'bold', size = 26, alpha = .75)
ax.text(-900,445, 'Histogram of traffic volume', size = 19, alpha = .85)
ax.text(x = -900, y = -60,
    s = '   J. Wilson Peoples                                                                                                                  Source: UCI Machine Learning Repository ',
    fontsize = 12, color = '#f0f0f0', backgroundcolor = 'grey')
#_____________________________________________________________
#on figure annotations

Text(-900, -60, '   J. Wilson Peoples                                                                                                                  Source: UCI Machine Learning Repository ')


traffic['date_time'] = pd.to_datetime(traffic['date_time'])
day_mask = (7 < traffic['date_time'].dt.hour) & (traffic['date_time'].dt.hour <= 19)
night_mask = ~day_mask
day = traffic.copy()[day_mask]
night = traffic.copy()[night_mask]


#___________________________________________________________
#Initial Plot
fig,ax = plt.subplots(figsize = (12,8))
day_data = day['traffic_volume'].apply(lambda x: np.round(x,-1)).value_counts()
night_data = night['traffic_volume'].apply(lambda x: np.round(x,-1)).value_counts()
ax.bar(day_data.index,day_data.values, width = 10, alpha = .5, color = 'orangered')
ax.bar(night_data.index, night_data.values, width = 10, alpha = .5, color = 'indigo')
#____________________________________________________________
#Format grid, ticks, etc
ax.legend()
ax.set_xticks([1000,3000,5000,7000])
ax.set_xticklabels(['1000','3000','5000','7000 vehicles/hour'])
ax.set_yticks([100,200,300,400])
ax.axhline(y = 0, color = 'grey', linewidth = 5, alpha = .5)
ax.tick_params(colors = 'grey', which = 'both')
#___________________________________________________________
#create title,subtitle,signature bar
ax.text(-900,470, 'Night Traffic Contributes to Right-Skewed Distribution', weight = 'bold', size = 26, alpha = .75)
ax.text(-900,445, 'Histogram of nighttime traffic (purple) and daytime traffic (orange)', size = 19, alpha = .85)
ax.text(x = -900, y = -60,
    s = '   J. Wilson Peoples                                                                                                                        Source: UCI Machine Learning Repository ',
    fontsize = 12, color = '#f0f0f0', backgroundcolor = 'grey')
#_____________________________________________________________
#on figure annotations
ax.text(x = 1100, y = 300, s ='         traffic: typically', color = 'grey', size = 19)
ax.text(x = 1100, y = 300, s ='night', size = 19, color = 'indigo')
ax.text(x = 1100, y = 280, s ='lower volume', color = 'grey', size = 19)
ax.vlines(x = 1000, ymin = 270,ymax =325, color = 'black', linewidth = 1.5)

ax.text(x = 4450, y = 200, s ='       traffic: typically', color = 'grey', size = 19)
ax.text(x = 4450, y = 200, s ='day', size = 19, color = 'orangered')
ax.text(x = 4450, y = 180, s ='higher volume', color = 'grey', size = 19)
ax.vlines(x = 4350, ymin = 170,ymax =225, color = 'black', linewidth = 1.5)

No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.

<matplotlib.collections.LineCollection at 0x7f2595762490>


day['month'] = day['date_time'].dt.month
by_month = day.groupby('month').mean()
#__________________________________________
#main plot 
fig, ax = plt.subplots(figsize = (12,8))
#create x axis 
x_axis = by_month.index.to_list()
x_axis.insert(0,x_axis.pop()-12) #add duplicate of last item to first position
#create y axis
y_axis = by_month['traffic_volume'].to_list()
y_axis.insert(0,y_axis.pop())
#ax.scatter(x_axis, y_axis, color = 'black', alpha = 1)
#winter months
ax.plot(x_axis[0:3], y_axis[0:3], color = 'deepskyblue', alpha = .75)
#spring months
ax.plot(x_axis[3:6], y_axis[3:6], color = 'mediumseagreen', alpha = .75)
#summer months
ax.plot(x_axis[6:9], y_axis[6:9], color = 'orangered', alpha = .75)
#fall months
ax.plot(x_axis[9:12], y_axis[9:12], color = 'darkorange', alpha = .75)
#____________________________________________________________
#Format grid, ticks, etc
#ax.grid(visible=False, axis = 'y')
ax.set_xticks([0,3,6,9])
ax.set_ylim([4300,4850])
ax.set_xticklabels(['Dec', 'Mar','June','Sept'])
ax.axhline(y = 4300, color = 'black', linewidth = 5, alpha = .5)
ax.legend()
ax.tick_params(colors = 'grey', which = 'both')
#___________________________________________________________
#create title,subtitle,signature bar
ax.text(-1.3,4905, 'Winter and Spring: Sharp Difference in Traffic Volume', weight = 'bold', size = 26, alpha = .75)
ax.text(-1.3,4880, 'Mean hourly traffic volume vs. month', size = 19, alpha = .85)
ax.text(x = -1.3, y = 4200,
    s = '   J. Wilson Peoples                                                                                                                  Source: UCI Machine Learning Repository ',
    fontsize = 12, color = '#f0f0f0', backgroundcolor = 'grey')
#___________________________________________________________
#on figure annotations
ax.text(x = 1.5, y = 4400, s ='Winter', size = 19, color = 'deepskyblue') 
ax.text(x = 3.5, y = 4700, s ='Spring', size = 19, color = 'mediumseagreen') 
ax.text(x = 6.5, y = 4450, s ='Summer', size = 19, color = 'orangered') 
ax.text(x = 9.5, y = 4700, s ='Fall', size = 19, color = 'darkorange')

/tmp/ipykernel_729/1835113481.py:2: FutureWarning: The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.
  by_month = day.groupby('month').mean()
No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.

Text(9.5, 4700, 'Fall')


day['season'] = day['month'].apply(lambda x: 'winter' if x in [12,1,2] else 'spring' if x in [3,4,5] else 'summer' if x in [6,7,8] else 'fall')
seasons = day['season'].value_counts().index
#_________________________________________________________
#main plot
fig, ax = plt.subplots(figsize = (12,8))
colors = ['orangered', 'deepskyblue']
for i,season in enumerate(['summer','winter']):
    mask = (day['season'] == season)
    plot_data = day[mask]['traffic_volume'].apply(lambda x: np.round(x,-2)).value_counts()
    ax.bar(plot_data.index,plot_data.values, width = 50, alpha = .4, color = colors[i])
#____________________________________________________________
#Format grid, ticks, etc
ax.grid(visible = False)
ax.set_yticks([])
ax.set_xticks([300,6900])
ax.set_xticklabels(['Low traffic volume','High traffic volume'])
ax.axhline(y = 0, color = 'black', linewidth = 5, alpha = .5)
ax.legend()
ax.tick_params(colors = 'grey', which = 'both')
#___________________________________________________________
#create title,subtitle,signature bar
ax.text(-400,365, 'Mean of Traffic Volume Distribution Shifts by Season', weight = 'bold', size = 26, alpha = .75)
ax.text(-400,348, 'Distribution of daytime traffic volume for Winter traffic (blue) and Summer traffic (red)', size = 19, alpha = .85)
ax.text(x = -400, y = -50,
    s = '   J. Wilson Peoples                                                                                                                               Source: UCI Machine Learning Repository ',
    fontsize = 12, color = '#f0f0f0', backgroundcolor = 'grey')
#___________________________________________________________
#_____________________________________________________________
#on figure annotations
ax.text(x = 1050, y = 150, s ='           traffic:', color = 'grey', size = 19)
ax.text(x = 1050, y = 150, s ='Winter', size = 19, color = 'deepskyblue')
ax.text(x = 1050, y = 138, s ='mean shifted left', color = 'grey', size = 19)
ax.vlines(x = 1000, ymin = 130,ymax =165, color = 'black', linewidth = 1.5)

ax.text(x = 5550, y = 250, s ='              traffic:', color = 'grey', size = 19)
ax.text(x = 5550, y = 250, s ='Summer', size = 19, color = 'orangered')
ax.text(x = 5550, y = 238, s ='mean shifted right', color = 'grey', size = 19)
ax.vlines(x = 5500, ymin = 230,ymax =265, color = 'black', linewidth = 1.5)

No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.

<matplotlib.collections.LineCollection at 0x7f25978096a0>


day['week_day'] = day['date_time'].dt.dayofweek
by_weekday = day.groupby('week_day').mean()
#____________________________________________________________
#main plot
fig,ax = plt.subplots(figsize = (12,8))
x_axis = by_weekday.index.to_list()
y_axis = by_weekday['traffic_volume'].to_list()
ax.plot(x_axis, y_axis, alpha = .1, color = 'grey')
ax.plot(x_axis[0:5], y_axis[0:5], alpha = .5, color = 'mediumseagreen')
ax.plot(x_axis[5:], y_axis[5:], alpha = .5, color = 'orangered')
#____________________________________________________________
#Format grid, ticks, etc
ax.legend()
ax.tick_params(colors = 'grey', which = 'both')
ax.set_xticks([0,2,4,6])
ax.set_xticklabels(['Mon','Wed','Fri','Sun'])
ax.set_yticks([3600,4100,4600,5100])
#___________________________________________________________
#create title,subtitle,signature bar
ax.text(-.75,5400, 'Traffic is Light on the Weekends', weight = 'bold', size = 26, alpha = .75)
ax.text(-.75,5300, 'Mean of hourly traffic volume vs. day of week', size = 19, alpha = .85)
ax.text(x = -.75, y = 3300,
    s = '   J. Wilson Peoples                                                                                                                       Source: UCI Machine Learning Repository ',
    fontsize = 12, color = '#f0f0f0', backgroundcolor = 'grey')
#___________________________________________________________
#_____________________________________________________________
#on figure annotations
ax.text(x = 1.5, y = 4800, s ='Workdays', size = 19, color = 'mediumseagreen')
ax.text(x = 4.5, y = 3800, s ='Weekend', size = 19, color = 'orangered')

/tmp/ipykernel_729/2860564296.py:2: FutureWarning: The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.
  by_weekday = day.groupby('week_day').mean()
No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.

Text(4.5, 3800, 'Weekend')


day['hour'] = day['date_time'].dt.hour
workdays = day.copy()[day['week_day'] <= 4]
weekend = day.copy()[day['week_day'] > 4]
by_hour_workday = workdays.groupby('hour').mean()
by_hour_weekend = weekend.groupby('hour').mean()

/tmp/ipykernel_729/3815138057.py:4: FutureWarning: The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.
  by_hour_workday = workdays.groupby('hour').mean()
/tmp/ipykernel_729/3815138057.py:5: FutureWarning: The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.
  by_hour_weekend = weekend.groupby('hour').mean()


#______________________________________
#main plot
fig,ax = plt.subplots(figsize = (12,8))
x_axis = by_hour_workday.index.to_list()
y_axis_1 = by_hour_workday['traffic_volume'].to_list()
y_axis_2 = by_hour_weekend['traffic_volume'].to_list()
ax.plot(x_axis, y_axis_1, alpha = .5, color = 'mediumseagreen')
ax.plot(x_axis, y_axis_2, alpha = .5, color = 'orangered')
#____________________________________________________________
#Format grid, ticks, etc
ax.set_xticks([10,13,16,19])
ax.set_xticklabels(['10:00am','1:00pm', '4:00pm','7:00pm'])
ax.set_yticks([2500,3750,5000,6250])
ax.legend()
ax.tick_params(colors = 'grey', which = 'both')
#___________________________________________________________
#create title,subtitle,signature bar
ax.text(6.75,7300, 'Rush Hour Traffic: Source of Differing Traffic Volumes', weight = 'bold', size = 26, alpha = .75)
ax.text(6.75,7050, 'Between Workdays and Weekdays', weight = 'bold', size = 26, alpha = .75)
ax.text(6.75,6800, 'Mean of hourly traffic volume vs. hour of day for Mon - Fri (green) and Sat - Sun (red)', size = 19, alpha = .85)
ax.text(x = 6.75, y = 1300,
    s = '   J. Wilson Peoples                                                                                                                       Source: UCI Machine Learning Repository ',
    fontsize = 12, color = '#f0f0f0', backgroundcolor = 'grey')
#___________________________________________________________
#_____________________________________________________________
ax.vlines(x = 8,ymin =2370, ymax = 5480, color = 'grey', linewidth = 2)
ax.vlines(x = 16,ymin =4350, ymax = 6200, color = 'grey', linewidth = 2)
ax.text(x = 13.5, y = 5100, s ='Workdays', size = 19, color = 'mediumseagreen', rotation = 35)
ax.text(x = 10, y = 3780, s ='Weekend', size = 19, color = 'orangered', rotation = 33)
ax.text(x = 8.1, y = 4200, s ='Rush', size = 19, color = 'grey', alpha = .8)
ax.text(x = 8.1, y = 4020, s ='Hour', size = 19, color = 'grey', alpha = .8)
ax.text(x = 16.1, y = 4820, s ='Rush', size = 19, color = 'grey', alpha = .8)
ax.text(x = 16.1, y = 4680, s ='Hour', size = 19, color = 'grey', alpha = .8)

No artists with labels found to put in legend.  Note that artists whose label start with an underscore are ignored when legend() is called with no argument.

Text(16.1, 4680, 'Hour')


weather_and_traffic = day[['traffic_volume', 'temp','rain_1h','snow_1h','clouds_all','weather_main','weather_description']]
weather_and_traffic.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 23759 entries, 0 to 48199
Data columns (total 7 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   traffic_volume       23759 non-null  int64  
 1   temp                 23759 non-null  float64
 2   rain_1h              23759 non-null  float64
 3   snow_1h              23759 non-null  float64
 4   clouds_all           23759 non-null  int64  
 5   weather_main         23759 non-null  object 
 6   weather_description  23759 non-null  object 
dtypes: float64(3), int64(2), object(2)
memory usage: 1.5+ MB


weather_numeric = weather_and_traffic[['traffic_volume', 'temp','rain_1h','snow_1h','clouds_all']]
weather_numeric.corr()['traffic_volume']

traffic_volume    1.000000
temp              0.133283
rain_1h           0.005001
snow_1h           0.005186
clouds_all       -0.037828
Name: traffic_volume, dtype: float64


#______________________________________
#main plot
fig,ax = plt.subplots(figsize = (12,8))
x_axis = weather_numeric['traffic_volume']
y_axis = weather_numeric['temp']
ax.scatter(x_axis, y_axis, alpha = .5, color = 'orangered')
#____________________________________________________________
#Format grid, ticks, etc
ax.set_ylim(bottom = 200)
ax.set_xticklabels(['','     Low Volume','','','','','','','      High Volume'])
ax.set_yticks([220,240,260,280,300,320])
ax.set_yticklabels(['220 ', '240 ', '260 ', '280 ', '300 ', '320K'])
ax.tick_params(colors = 'grey', which = 'both')
ax.axhline(y = 200, color = 'grey', linewidth = 7)
#___________________________________________________________
#create title,subtitle,signature bar
ax.text(-800,340, 'Temperature and Traffic Volume are Not Correlated', weight = 'bold', size = 26, alpha = .75)
ax.text(-800,333, 'Temperature (Kelvin) vs. hourly traffic volume', size = 19, alpha = .85)
ax.text(x = -800, y = 180,
    s = '   J. Wilson Peoples                                                                                                                       Source: UCI Machine Learning Repository ',
    fontsize = 12, color = '#f0f0f0', backgroundcolor = 'grey')
#___________________________________________________________
#_____________________________________________________________

/tmp/ipykernel_729/1535728484.py:10: UserWarning: FixedFormatter should only be used together with FixedLocator
  ax.set_xticklabels(['','     Low Volume','','','','','','','      High Volume'])

Text(-800, 180, '   J. Wilson Peoples                                                                                                                       Source: UCI Machine Learning Repository ')


weather = day[['traffic_volume','weather_main','weather_description']]
weather.head()


by_weather_main = weather.groupby('weather_main').mean(numeric_only = True)
by_weather_description = weather.groupby('weather_description').mean(numeric_only = True)


from wordcloud import WordCloud
# create word frequency dictionary
word_freq = by_weather_main['traffic_volume'].to_dict()
text = " ".join([(k + " ")*int(v) for k,v in word_freq.items()])

# Generate a word cloud image
wordcloud = WordCloud(collocations=False).generate(text)
fig,ax = plt.subplots(figsize = (12,8))
ax.imshow(wordcloud, interpolation = 'bilinear')
ax.axis('off')

(-0.5, 399.5, 199.5, -0.5)


plot_data = by_weather_description[by_weather_description.index.to_series().str.contains('snow')] #filter out means of descriptions containing snow
x_axis = plot_data.index.to_list()
y_axis = plot_data['traffic_volume'].to_list()
#____________________________________________________________
#main plot
fig,ax = plt.subplots(figsize = (12,8))
ax.barh(x_axis, y_axis, alpha = .5, color = 'deepskyblue')
#____________________________________________________________
#Format grid, ticks, etc
ax.grid(visible = False)
ax.tick_params(colors = 'grey', which = 'both')
#___________________________________________________________
#create title,subtitle,signature bar
ax.text(-1100,5.8, 'Light Rain and Snow More Indicative of High Traffic', weight = 'bold', size = 26, alpha = .75)
ax.text(-1100,5.5, 'Volume Than Heavy Snow', weight = 'bold', size = 26, alpha = .75)
ax.text(-1100,5.2, 'weather descriptions containing "snow" vs. mean traffic volume', size = 19, alpha = .85)
ax.text(x = -1100, y = -1.5,
    s = '   J. Wilson Peoples                                                                                                                                            Source: UCI Machine Learning Repository ',
    fontsize = 12, color = '#f0f0f0', backgroundcolor = 'grey')
#___________________________________________________________

Text(-1100, -1.5, '   J. Wilson Peoples                                                                                                                                            Source: UCI Machine Learning Repository ')

Column Name	Description	Data Type
holiday	US National holidays plus regional holiday, Minnesota State Fair	Categorical
temp	Average temp in kelvin	Numeric
rain_1h	Amount in mm of rain that occurred in the hour	Numeric
snow_1h	Amount in mm of snow that occurred in the hour	Numeric
clouds_all	Percentage of cloud cover	Numeric
weather_main	Short textual description of the current weather	Categorical
weather_description	Longer textual description of the current weather	Categorical
date_time	Hour of the data collected in local CST time	DateTime
traffic_volume	Hourly I-94 ATR 301 reported westbound traffic volume	Numeric

	holiday	temp	clouds_all	weather_main	weather_description	date_time	traffic_volume
0	None	288.28	40	Clouds	scattered clouds	2012-10-02 09:00:00	5545
1	None	289.36	75	Clouds	broken clouds	2012-10-02 10:00:00	4516
2	None	289.58	90	Clouds	overcast clouds	2012-10-02 11:00:00	4767
3	None	290.13	90	Clouds	overcast clouds	2012-10-02 12:00:00	5026
4	None	291.14	75	Clouds	broken clouds	2012-10-02 13:00:00	4918

	holiday	temp	clouds_all	weather_main	weather_description	date_time	traffic_volume
48199	None	283.45	75	Clouds	broken clouds	2018-09-30 19:00:00	3543
48200	None	282.76	90	Clouds	overcast clouds	2018-09-30 20:00:00	2781
48201	None	282.73	90	Thunderstorm	proximity thunderstorm	2018-09-30 21:00:00	2159
48202	None	282.09	90	Clouds	overcast clouds	2018-09-30 22:00:00	1450
48203	None	282.12	90	Clouds	overcast clouds	2018-09-30 23:00:00	954

	traffic_volume	weather_main	weather_description
0	5545	Clouds	scattered clouds
1	4516	Clouds	broken clouds
2	4767	Clouds	overcast clouds
3	5026	Clouds	overcast clouds
4	4918	Clouds	broken clouds

Finding Indicators of Heavy Traffic on I-94¶

Summary¶

About the Data¶

Initial Data Exploration¶

Initial Observations¶

Traffic Indicators by Day vs. Night¶

Traffic Indicators by Season¶

Traffic Indicators by day of week¶

The Impact of Weather on Traffic¶

Conlusion¶