{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Dataset from single or multiple NetCDF files"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Note : A shorter way to read the file is available for each class of models"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "from osdyn.config import get_config_value\n",
    "from osdyn.utils.data.mxarray import get_dataset, tzyx2index, auto_merge\n",
    "from osdyn.utils.data.io import list_files"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## mars outputs"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Concat over time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Get the list of files according to the period\n",
    "path = get_config_value(\"osdyn.grcm.mars\", \"path_v9_6\")\n",
    "pattern_file = get_config_value(\"osdyn.grcm.mars\", \"pattern_file_v9_6\")\n",
    "pattern_date = (\"2013-01-30\",\"2013-02-01 10:00:00\")  # plot super long\n",
    "#pattern_date = (\"2013-01-30\", \"2013-01-30 10:00:00\")\n",
    "infiles = list_files(os.path.join(path, pattern_file), pattern_date)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Get XE and TEMP and concatene along time\n",
    "out = get_dataset(infiles, varnames=['XE','TEMP'], gather_unique_dim='time')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "fig = out.XE[:,100,100].plot()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Super long, voir comment on peut améliorer cela"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Concat over time and select a subdomain "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Get the selection\n",
    "tzyx = tzyx2index(infiles[0], 'TEMP', lons=(6.,6.), lats=(43.,43.))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "tzyx"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Read the files\n",
    "out = get_dataset(infiles, varnames=['XE','TEMP'], subdomain=tzyx, decode_times=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "out"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## NEMO outputs"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Concat variables (from different files) over time, and add the grid file"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Get the list of files\n",
    "path = get_config_value(\"osdyn.grcm.nemo\", \"path_medrys1v1\")\n",
    "pattern_file = get_config_value(\"osdyn.grcm.nemo\",\n",
    "                                \"pattern_medrys1v1\")\n",
    "pattern_date = eval(\n",
    "    get_config_value(\"osdyn.grcm.nemo\", \"period_medrys1v1\")\n",
    ")\n",
    "infiles = list_files(os.path.join(path,pattern_file), pattern_date)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Read and gather the files into a unique one\n",
    "out = get_dataset(infiles[:], decode_times=False, gather_grid=['grid2D','gridS','gridT','gridU','gridV'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "out"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## AROME (previmer format)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "path = get_config_value(\"osdyn.grcm.arome\", \"path_previ\")\n",
    "pattern_file = get_config_value(\"osdyn.grcm.arome\",\n",
    "                                \"pattern_previ\")\n",
    "pattern_date = eval(\n",
    "    get_config_value(\"osdyn.grcm.arome\", \"period_previ\")\n",
    ")\n",
    "infiles = list_files(os.path.join(path, pattern_file), pattern_date)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Select a subdomain\n",
    "tzyx = tzyx2index(infiles[0], 'eau', lons=(2.5,3.5), lats=(42.,43.))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Read and gather the files into a unique one\n",
    "out = get_dataset(infiles, varnames=['eau'], subdomain=tzyx, decode_times=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "out"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## MesoNH"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "More complicated as the time is not an axis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Get the list of files\n",
    "path = get_config_value(\"osdyn.grcm.mesonh\", \"path_v5_3\")\n",
    "pattern_file = get_config_value(\"osdyn.grcm.mesonh\",\n",
    "                                \"pattern_obc_v5_3\")\n",
    "pattern_date = eval(get_config_value(\"osdyn.grcm.mesonh\",\n",
    "                                     \"period_v5_3\")) # plus court\n",
    "pattern_date = (\"2011-09-02 15:00:00\",\"2011-09-04 00:00:00\")\n",
    "infiles = list_files(os.path.join(path, pattern_file), pattern_date)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create the process to apply to each profile\n",
    "from osdyn.grcm.mesonh import get_datetime\n",
    "def userprocess(dsu):\n",
    "    \"\"\"\n",
    "    `xarray.open_mfdataset calls this function through `preprocess=userprocess`\n",
    "    to apply the directives on each dataset prior to the concatenation.\n",
    "\n",
    "    Parameters\n",
    "    ----------\n",
    "    dsu : xarray.Dataset\n",
    "        One of the files.\n",
    "\n",
    "    Returns\n",
    "    -------\n",
    "    xarray.Dataset\n",
    "        The modified dataset in which the time axis has been added and a few \n",
    "        variables have been collected.\n",
    "\n",
    "    \"\"\"\n",
    "\n",
    "    timerecord = get_datetime(dsu.DTCUR__TDATE, dsu.DTCUR__TIME)\n",
    "    mnhgvars = ['time']\n",
    "    mnhvars = ['UT']\n",
    "    return dsu.assign(time=timerecord)[mnhvars + mnhgvars]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Gather MesoNH variables along time\n",
    "mnh = get_dataset(infiles, userprocess=userprocess, decode_times=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "mnh"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# grid of MesoNH file\n",
    "import xarray as xr\n",
    "gridvars = ['LON0','LAT0', 'BETA', 'JPHEXT', 'XHAT', 'YHAT', 'ZHAT', \n",
    "            'LAT', 'LON', 'ZS','ZSMT']\n",
    "mnh_grid = xr.open_dataset(infiles[0])[gridvars]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Add the grid into MesoNH file\n",
    "mnh = mnh.merge(mnh_grid)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "mnh"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Basic files\n",
    "From database organisation point of view"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Automatically merge a split xarray Dataset. This is designed to behave like `xarray.open_mfdataset`, except it supports concatenation along multiple dimensions."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Arpege (Previmer format)\n",
    "path = get_config_value(\"osdyn.grcm.arpegehr\", \"path_previ\")\n",
    "pattern_file = get_config_value(\"osdyn.grcm.arpegehr\",\n",
    "                                \"pattern_previ\")\n",
    "pattern_date = eval(\n",
    "    get_config_value(\"osdyn.grcm.arpegehr\", \"period_previ\")\n",
    ")\n",
    "infiles = list_files(os.path.join(path, pattern_file), pattern_date)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "auto_merge(infiles)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    ".. note:: simplest way\n",
    "Very usefull when files splitted by periods and when all the variables are dependent on time and available in each file"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    ".. warning:: DataArrays are extended over concatenation dimension.\n",
    "See dx variables for instance below"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Get the list of files according to the period\n",
    "path = get_config_value(\"osdyn.grcm.mars\", \"path_v9_6\")\n",
    "pattern_file = get_config_value(\"osdyn.grcm.mars\", \"pattern_file_v9_6\")\n",
    "pattern_date = (\"2013-01-30\",\"2013-01-30 03:00:00\")  # plot super long\n",
    "#pattern_date = (\"2013-01-30\", \"2013-01-30 10:00:00\")\n",
    "infiles = list_files(os.path.join(path, pattern_file), pattern_date)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "infiles"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "auto = auto_merge(infiles, decode_times=False)\n",
    "auto.attrs = {}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "auto"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    ".. note:: auto_merge = database when gather_unique_dim=None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ds = get_dataset(infiles, gather_unique_dim=None)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "auto.time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ds.time"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "xr.testing.assert_identical(ds, auto)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.8"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}