{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Visualize correlation from heatmap" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import os\n", "import pandas as pd\n", "import matplotlib.pylab as plt\n", "\n", "from osdyn.utils.plot.heatmap import heatmap" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def corrplot(\n", " data,\n", " size_scale=500,\n", " marker=\"s\",\n", " size_range=[0, 1],\n", " color_range=[-1, 1],\n", "):\n", " \"\"\"\n", " Dedicated to correlation plots but can be used for other purposes.\n", "\n", " Parameters\n", " ----------\n", " data : pandas.Dataframe\n", " data containing the x axes, y axes, values.\n", " size_scale : int, optional\n", " multiplicator of data value. The default is 500.\n", " marker : str, optional\n", " \"s\" \"c\" \"t\"... The default is \"s\".\n", " size_range : list of 2 int or float, optional\n", " Values to anchor the sizes. The default is [0, 1].\n", " color_range : list of 2 int or float, optional\n", " Values to anchor the color. The default is [-1, 1].\n", "\n", " Returns\n", " -------\n", " Call heatmap\n", "\n", " \"\"\"\n", " corr = pd.melt(data.reset_index(), id_vars=\"index\").replace(np.nan, 0)\n", " corr.columns = [\"x\", \"y\", \"value\"]\n", " heatmap(\n", " corr[\"x\"],\n", " corr[\"y\"],\n", " color=corr[\"value\"],\n", " color_range=color_range,\n", " palette=plt.get_cmap(\"seismic_r\", lut=256),\n", " size=corr[\"value\"].abs(),\n", " size_range=size_range,\n", " marker=marker,\n", " x_ticklabel=data.columns,\n", " y_ticklabel=data.columns[::-1],\n", " size_scale=size_scale,\n", " )" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Load the Automobile dataset.\n", "# This gets a cleaned version of UCI dataset\n", "# found at http://archive.ics.uci.edu/ml/datasets/automobile\n", "#data = pd.read_csv(\"https://raw.githubusercontent.com/drazenz/heatmap/master/autos.clean.csv\")\n", "data = pd.read_csv(os.path.join(os.environ[\"INPUT_DATA_PATH\"],\"autos.clean.csv\"))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data.corr()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "tags": [ "nbsphinx-thumbnail" ] }, "outputs": [], "source": [ "plt.figure(figsize=(8, 8))\n", "corrplot(data.corr(), size_scale=300)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.7" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 4 }