2.8 Visualisation (Python)

2.8.1 Sankey diagram

import plotly.graph_objects as go
df = pd.DataFrame({
    'PULocationID':np.load('D:/DS/0_ASS1/dataset/PULocationID_credit.npz')['data'],
    'DOLocationID':np.load('D:/DS/0_ASS1/dataset/DOLocationID_credit.npz')['data'],
    'total_amount':np.load('D:/DS/0_ASS1/dataset/total_amount_credit.npz')['data'],
    'pickup_hour':np.load('D:/DS/0_ASS1/dataset/pickup_hour_credit.npz')['data'],
})
df['PULocationID'] = df['PULocationID'].astype(int)
df['DOLocationID'] = df['DOLocationID'].astype(int)
df['pickup_hour'] = df['pickup_hour'].astype(int)
df = df.sample(10000, random_state=26)

# Create area mapping
mapping = manhattan.to_dict('split')['data']
mapping = {k:v for [k,v] in mapping}
df['PUZone']=df['PULocationID'].map(mapping).fillna('Other')
df['DOZone']=df['DOLocationID'].map(mapping).fillna('Other')

def hex_to_rgb(hex_color, opacity):
    hex_color = hex_color.lstrip("#")
    if len(hex_color) == 3:
        hex_color = hex_color * 2
    return int(hex_color[0:2], 16), int(hex_color[2:4], 16), int(hex_color[4:6], 16), opacity

area_mapping = {'Downtown':0,
              'Midtown':1,
              'Uptown':2,
              'JFK':3,
              'LGA':4,
              'Other':5}
color_mapping = {0:'1380A1',
                1:'588300',
                2:'1ce4c1',
                3:'FAAB18',
                4:'990000',
                5:'bf55fb'}
data = df.groupby(['PUZone','DOZone']).count().reset_index().iloc[:,:3]
data['PUZone'] = data['PUZone'].map(area_mapping)
data['DOZone'] = data['DOZone'].map(area_mapping)
data['Color'] = data['PUZone'].map(color_mapping).apply(lambda x: 'rgba'+str(hex_to_rgb(x,0.5))) 
data.head()

# Plot
fig = go.Figure(data=[go.Sankey(
    node = dict(
      pad = 15,
      thickness = 20,
      line = dict(width = 0),
      label = list(area_mapping.keys()),
      color = ['#'+str(x) for x in color_mapping.values()]
    ),
    link = dict(
      source = data['PUZone'], # indices correspond to labels, eg A1, A2, A2, B1, ...
      target = data['DOZone'],
      value = data['PULocationID'],
        color = data['Color']
  ))])

fig.show()

2.8.2 Profitability

fig, ax = plt.subplots(1, 1, figsize=(20,20))

cax = make_axes_locatable(ax).append_axes("bottom", size="1%", pad=0.1)
ax = Zones.plot(alpha=0.9, ax=ax,
                edgecolor=None, column='log_profitability', cmap=Matter_20.mpl_colormap,
                cax = cax, legend=True, legend_kwds={'orientation': "horizontal"})
ctx.add_basemap(ax, source=ctx.providers.CartoDB.Positron)

ax.set_title('Average log profitability ($) per taxi zone in 2019', fontsize=24)
ax.set_axis_off()

2.8.3 Airport stripe

from palettable.cmocean.sequential import Matter_19
from sklearn.preprocessing import minmax_scale

lga_pu['num_of_trips'] = 1
agg = lga_pu.groupby(['DOLocationID','pickup_hour']).sum().reset_index()

########## CREATING AGGREGATED DF
to_append = {'DOLocationID':[],
            'pickup_hour':[],
            'num_of_trips':[]}
for i in agg['DOLocationID'].unique():
    avail = agg[agg['DOLocationID']==i]['pickup_hour'].unique()
    if len(avail)!=24:
        for j in range(24):
            if j not in avail:
                to_append['DOLocationID'].append(i)
                to_append['pickup_hour'].append(j)
                to_append['num_of_trips'].append(0)
agg = pd.concat([agg, pd.DataFrame(to_append)],ignore_index=True)
agg = agg.sort_values(by=['DOLocationID','pickup_hour']).reset_index(drop=True)

######## CREATING HEATMAP
heatmap = np.ndarray((len(agg['DOLocationID'].unique()),
                     len(agg['pickup_hour'].unique())))
for n in range(agg.shape[0]):
    row = agg.iloc[n,:]
    i = n//24
    heatmap[i][row['pickup_hour'].astype(int)]=row['num_of_trips']
heatmap=minmax_scale(heatmap)

####### PLOT

fig = plt.figure(figsize=(60,20))
ax = fig.add_subplot(111)
plt.imshow(heatmap.T,cmap=Matter_19.mpl_colormap)
ax.set_aspect('equal')

# We want to show all ticks...
ax.set_xticks(np.arange(len(agg['DOLocationID'].unique())))
ax.set_yticks(np.arange(len(agg['pickup_hour'].unique())))
# ... and label them with the respective list entries
ax.set_xticklabels(agg['DOLocationID'].astype(int).unique())
ax.set_yticklabels(agg['pickup_hour'].astype(int).unique())
ax.grid(False)

# Rotate the tick labels and set their alignment.
plt.setp(ax.get_xticklabels(), rotation=90, ha="right",
         rotation_mode="anchor")

plt.show()