2.8 Visualisation (Python
)
2.8.1 Sankey diagram
import plotly.graph_objects as go
= pd.DataFrame({
df 'PULocationID':np.load('D:/DS/0_ASS1/dataset/PULocationID_credit.npz')['data'],
'DOLocationID':np.load('D:/DS/0_ASS1/dataset/DOLocationID_credit.npz')['data'],
'total_amount':np.load('D:/DS/0_ASS1/dataset/total_amount_credit.npz')['data'],
'pickup_hour':np.load('D:/DS/0_ASS1/dataset/pickup_hour_credit.npz')['data'],
})'PULocationID'] = df['PULocationID'].astype(int)
df['DOLocationID'] = df['DOLocationID'].astype(int)
df['pickup_hour'] = df['pickup_hour'].astype(int)
df[= df.sample(10000, random_state=26)
df
# Create area mapping
= manhattan.to_dict('split')['data']
mapping = {k:v for [k,v] in mapping}
mapping 'PUZone']=df['PULocationID'].map(mapping).fillna('Other')
df['DOZone']=df['DOLocationID'].map(mapping).fillna('Other')
df[
def hex_to_rgb(hex_color, opacity):
= hex_color.lstrip("#")
hex_color if len(hex_color) == 3:
= hex_color * 2
hex_color return int(hex_color[0:2], 16), int(hex_color[2:4], 16), int(hex_color[4:6], 16), opacity
= {'Downtown':0,
area_mapping 'Midtown':1,
'Uptown':2,
'JFK':3,
'LGA':4,
'Other':5}
= {0:'1380A1',
color_mapping 1:'588300',
2:'1ce4c1',
3:'FAAB18',
4:'990000',
5:'bf55fb'}
= df.groupby(['PUZone','DOZone']).count().reset_index().iloc[:,:3]
data 'PUZone'] = data['PUZone'].map(area_mapping)
data['DOZone'] = data['DOZone'].map(area_mapping)
data['Color'] = data['PUZone'].map(color_mapping).apply(lambda x: 'rgba'+str(hex_to_rgb(x,0.5)))
data[
data.head()
# Plot
= go.Figure(data=[go.Sankey(
fig = dict(
node = 15,
pad = 20,
thickness = dict(width = 0),
line = list(area_mapping.keys()),
label = ['#'+str(x) for x in color_mapping.values()]
color
),= dict(
link = data['PUZone'], # indices correspond to labels, eg A1, A2, A2, B1, ...
source = data['DOZone'],
target = data['PULocationID'],
value = data['Color']
color
))])
fig.show()
2.8.2 Profitability
= plt.subplots(1, 1, figsize=(20,20))
fig, ax
= make_axes_locatable(ax).append_axes("bottom", size="1%", pad=0.1)
cax = Zones.plot(alpha=0.9, ax=ax,
ax =None, column='log_profitability', cmap=Matter_20.mpl_colormap,
edgecolor= cax, legend=True, legend_kwds={'orientation': "horizontal"})
cax =ctx.providers.CartoDB.Positron)
ctx.add_basemap(ax, source
'Average log profitability ($) per taxi zone in 2019', fontsize=24)
ax.set_title( ax.set_axis_off()
2.8.3 Airport stripe
from palettable.cmocean.sequential import Matter_19
from sklearn.preprocessing import minmax_scale
'num_of_trips'] = 1
lga_pu[= lga_pu.groupby(['DOLocationID','pickup_hour']).sum().reset_index()
agg
########## CREATING AGGREGATED DF
= {'DOLocationID':[],
to_append 'pickup_hour':[],
'num_of_trips':[]}
for i in agg['DOLocationID'].unique():
= agg[agg['DOLocationID']==i]['pickup_hour'].unique()
avail if len(avail)!=24:
for j in range(24):
if j not in avail:
'DOLocationID'].append(i)
to_append['pickup_hour'].append(j)
to_append['num_of_trips'].append(0)
to_append[= pd.concat([agg, pd.DataFrame(to_append)],ignore_index=True)
agg = agg.sort_values(by=['DOLocationID','pickup_hour']).reset_index(drop=True)
agg
######## CREATING HEATMAP
= np.ndarray((len(agg['DOLocationID'].unique()),
heatmap len(agg['pickup_hour'].unique())))
for n in range(agg.shape[0]):
= agg.iloc[n,:]
row = n//24
i 'pickup_hour'].astype(int)]=row['num_of_trips']
heatmap[i][row[=minmax_scale(heatmap)
heatmap
####### PLOT
= plt.figure(figsize=(60,20))
fig = fig.add_subplot(111)
ax =Matter_19.mpl_colormap)
plt.imshow(heatmap.T,cmap'equal')
ax.set_aspect(
# We want to show all ticks...
len(agg['DOLocationID'].unique())))
ax.set_xticks(np.arange(len(agg['pickup_hour'].unique())))
ax.set_yticks(np.arange(# ... and label them with the respective list entries
'DOLocationID'].astype(int).unique())
ax.set_xticklabels(agg['pickup_hour'].astype(int).unique())
ax.set_yticklabels(agg[False)
ax.grid(
# Rotate the tick labels and set their alignment.
=90, ha="right",
plt.setp(ax.get_xticklabels(), rotation="anchor")
rotation_mode
plt.show()