# Visualize correlation from heatmap

In [None]:
import numpy as np
import os
import pandas as pd
import matplotlib.pylab as plt

from osdyn.utils.plot.heatmap import heatmap

In [None]:
def corrplot(
 data,
 size_scale=500,
 marker="s",
 size_range=[0, 1],
 color_range=[-1, 1],
):
 """
 Dedicated to correlation plots but can be used for other purposes.

 Parameters
 ----------
 data : pandas.Dataframe
 data containing the x axes, y axes, values.
 size_scale : int, optional
 multiplicator of data value. The default is 500.
 marker : str, optional
 "s" "c" "t"... The default is "s".
 size_range : list of 2 int or float, optional
 Values to anchor the sizes. The default is [0, 1].
 color_range : list of 2 int or float, optional
 Values to anchor the color. The default is [-1, 1].

 Returns
 -------
 Call heatmap

 """
 corr = pd.melt(data.reset_index(), id_vars="index").replace(np.nan, 0)
 corr.columns = ["x", "y", "value"]
 heatmap(
 corr["x"],
 corr["y"],
 color=corr["value"],
 color_range=color_range,
 palette=plt.get_cmap("seismic_r", lut=256),
 size=corr["value"].abs(),
 size_range=size_range,
 marker=marker,
 x_ticklabel=data.columns,
 y_ticklabel=data.columns[::-1],
 size_scale=size_scale,
 )

In [None]:
# Load the Automobile dataset.
# This gets a cleaned version of UCI dataset
# found at http://archive.ics.uci.edu/ml/datasets/automobile
#data = pd.read_csv("https://raw.githubusercontent.com/drazenz/heatmap/master/autos.clean.csv")
data = pd.read_csv(os.path.join(os.environ["INPUT_DATA_PATH"],"autos.clean.csv"))

In [None]:
data.corr()

In [None]:
plt.figure(figsize=(8, 8))
corrplot(data.corr(), size_scale=300)