2.8 Visualisation (Python)
2.8.1 Sankey diagram
import plotly.graph_objects as go
df = pd.DataFrame({
'PULocationID':np.load('D:/DS/0_ASS1/dataset/PULocationID_credit.npz')['data'],
'DOLocationID':np.load('D:/DS/0_ASS1/dataset/DOLocationID_credit.npz')['data'],
'total_amount':np.load('D:/DS/0_ASS1/dataset/total_amount_credit.npz')['data'],
'pickup_hour':np.load('D:/DS/0_ASS1/dataset/pickup_hour_credit.npz')['data'],
})
df['PULocationID'] = df['PULocationID'].astype(int)
df['DOLocationID'] = df['DOLocationID'].astype(int)
df['pickup_hour'] = df['pickup_hour'].astype(int)
df = df.sample(10000, random_state=26)
# Create area mapping
mapping = manhattan.to_dict('split')['data']
mapping = {k:v for [k,v] in mapping}
df['PUZone']=df['PULocationID'].map(mapping).fillna('Other')
df['DOZone']=df['DOLocationID'].map(mapping).fillna('Other')
def hex_to_rgb(hex_color, opacity):
hex_color = hex_color.lstrip("#")
if len(hex_color) == 3:
hex_color = hex_color * 2
return int(hex_color[0:2], 16), int(hex_color[2:4], 16), int(hex_color[4:6], 16), opacity
area_mapping = {'Downtown':0,
'Midtown':1,
'Uptown':2,
'JFK':3,
'LGA':4,
'Other':5}
color_mapping = {0:'1380A1',
1:'588300',
2:'1ce4c1',
3:'FAAB18',
4:'990000',
5:'bf55fb'}
data = df.groupby(['PUZone','DOZone']).count().reset_index().iloc[:,:3]
data['PUZone'] = data['PUZone'].map(area_mapping)
data['DOZone'] = data['DOZone'].map(area_mapping)
data['Color'] = data['PUZone'].map(color_mapping).apply(lambda x: 'rgba'+str(hex_to_rgb(x,0.5)))
data.head()
# Plot
fig = go.Figure(data=[go.Sankey(
node = dict(
pad = 15,
thickness = 20,
line = dict(width = 0),
label = list(area_mapping.keys()),
color = ['#'+str(x) for x in color_mapping.values()]
),
link = dict(
source = data['PUZone'], # indices correspond to labels, eg A1, A2, A2, B1, ...
target = data['DOZone'],
value = data['PULocationID'],
color = data['Color']
))])
fig.show()2.8.2 Profitability
fig, ax = plt.subplots(1, 1, figsize=(20,20))
cax = make_axes_locatable(ax).append_axes("bottom", size="1%", pad=0.1)
ax = Zones.plot(alpha=0.9, ax=ax,
edgecolor=None, column='log_profitability', cmap=Matter_20.mpl_colormap,
cax = cax, legend=True, legend_kwds={'orientation': "horizontal"})
ctx.add_basemap(ax, source=ctx.providers.CartoDB.Positron)
ax.set_title('Average log profitability ($) per taxi zone in 2019', fontsize=24)
ax.set_axis_off()2.8.3 Airport stripe
from palettable.cmocean.sequential import Matter_19
from sklearn.preprocessing import minmax_scale
lga_pu['num_of_trips'] = 1
agg = lga_pu.groupby(['DOLocationID','pickup_hour']).sum().reset_index()
########## CREATING AGGREGATED DF
to_append = {'DOLocationID':[],
'pickup_hour':[],
'num_of_trips':[]}
for i in agg['DOLocationID'].unique():
avail = agg[agg['DOLocationID']==i]['pickup_hour'].unique()
if len(avail)!=24:
for j in range(24):
if j not in avail:
to_append['DOLocationID'].append(i)
to_append['pickup_hour'].append(j)
to_append['num_of_trips'].append(0)
agg = pd.concat([agg, pd.DataFrame(to_append)],ignore_index=True)
agg = agg.sort_values(by=['DOLocationID','pickup_hour']).reset_index(drop=True)
######## CREATING HEATMAP
heatmap = np.ndarray((len(agg['DOLocationID'].unique()),
len(agg['pickup_hour'].unique())))
for n in range(agg.shape[0]):
row = agg.iloc[n,:]
i = n//24
heatmap[i][row['pickup_hour'].astype(int)]=row['num_of_trips']
heatmap=minmax_scale(heatmap)
####### PLOT
fig = plt.figure(figsize=(60,20))
ax = fig.add_subplot(111)
plt.imshow(heatmap.T,cmap=Matter_19.mpl_colormap)
ax.set_aspect('equal')
# We want to show all ticks...
ax.set_xticks(np.arange(len(agg['DOLocationID'].unique())))
ax.set_yticks(np.arange(len(agg['pickup_hour'].unique())))
# ... and label them with the respective list entries
ax.set_xticklabels(agg['DOLocationID'].astype(int).unique())
ax.set_yticklabels(agg['pickup_hour'].astype(int).unique())
ax.grid(False)
# Rotate the tick labels and set their alignment.
plt.setp(ax.get_xticklabels(), rotation=90, ha="right",
rotation_mode="anchor")
plt.show()