{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Dataset from single or multiple NetCDF files" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Note : A shorter way to read the file is available for each class of models" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import os\n", "from osdyn.config import get_config_value\n", "from osdyn.utils.data.mxarray import get_dataset, tzyx2index, auto_merge\n", "from osdyn.utils.data.io import list_files" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## mars outputs" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Concat over time" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Get the list of files according to the period\n", "path = get_config_value(\"osdyn.grcm.mars\", \"path_v9_6\")\n", "pattern_file = get_config_value(\"osdyn.grcm.mars\", \"pattern_file_v9_6\")\n", "pattern_date = (\"2013-01-30\",\"2013-02-01 10:00:00\") # plot super long\n", "#pattern_date = (\"2013-01-30\", \"2013-01-30 10:00:00\")\n", "infiles = list_files(os.path.join(path, pattern_file), pattern_date)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Get XE and TEMP and concatene along time\n", "out = get_dataset(infiles, varnames=['XE','TEMP'], gather_unique_dim='time')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "fig = out.XE[:,100,100].plot()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Super long, voir comment on peut améliorer cela" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Concat over time and select a subdomain " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Get the selection\n", "tzyx = tzyx2index(infiles[0], 'TEMP', lons=(6.,6.), lats=(43.,43.))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "tzyx" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Read the files\n", "out = get_dataset(infiles, varnames=['XE','TEMP'], subdomain=tzyx, decode_times=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "out" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## NEMO outputs" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Concat variables (from different files) over time, and add the grid file" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Get the list of files\n", "path = get_config_value(\"osdyn.grcm.nemo\", \"path_medrys1v1\")\n", "pattern_file = get_config_value(\"osdyn.grcm.nemo\",\n", " \"pattern_medrys1v1\")\n", "pattern_date = eval(\n", " get_config_value(\"osdyn.grcm.nemo\", \"period_medrys1v1\")\n", ")\n", "infiles = list_files(os.path.join(path,pattern_file), pattern_date)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Read and gather the files into a unique one\n", "out = get_dataset(infiles[:], decode_times=False, gather_grid=['grid2D','gridS','gridT','gridU','gridV'])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "out" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## AROME (previmer format)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "path = get_config_value(\"osdyn.grcm.arome\", \"path_previ\")\n", "pattern_file = get_config_value(\"osdyn.grcm.arome\",\n", " \"pattern_previ\")\n", "pattern_date = eval(\n", " get_config_value(\"osdyn.grcm.arome\", \"period_previ\")\n", ")\n", "infiles = list_files(os.path.join(path, pattern_file), pattern_date)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Select a subdomain\n", "tzyx = tzyx2index(infiles[0], 'eau', lons=(2.5,3.5), lats=(42.,43.))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Read and gather the files into a unique one\n", "out = get_dataset(infiles, varnames=['eau'], subdomain=tzyx, decode_times=True)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "out" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## MesoNH" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "More complicated as the time is not an axis" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Get the list of files\n", "path = get_config_value(\"osdyn.grcm.mesonh\", \"path_v5_3\")\n", "pattern_file = get_config_value(\"osdyn.grcm.mesonh\",\n", " \"pattern_obc_v5_3\")\n", "pattern_date = eval(get_config_value(\"osdyn.grcm.mesonh\",\n", " \"period_v5_3\")) # plus court\n", "pattern_date = (\"2011-09-02 15:00:00\",\"2011-09-04 00:00:00\")\n", "infiles = list_files(os.path.join(path, pattern_file), pattern_date)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Create the process to apply to each profile\n", "from osdyn.grcm.mesonh import get_datetime\n", "def userprocess(dsu):\n", " \"\"\"\n", " `xarray.open_mfdataset calls this function through `preprocess=userprocess`\n", " to apply the directives on each dataset prior to the concatenation.\n", "\n", " Parameters\n", " ----------\n", " dsu : xarray.Dataset\n", " One of the files.\n", "\n", " Returns\n", " -------\n", " xarray.Dataset\n", " The modified dataset in which the time axis has been added and a few \n", " variables have been collected.\n", "\n", " \"\"\"\n", "\n", " timerecord = get_datetime(dsu.DTCUR__TDATE, dsu.DTCUR__TIME)\n", " mnhgvars = ['time']\n", " mnhvars = ['UT']\n", " return dsu.assign(time=timerecord)[mnhvars + mnhgvars]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Gather MesoNH variables along time\n", "mnh = get_dataset(infiles, userprocess=userprocess, decode_times=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "mnh" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# grid of MesoNH file\n", "import xarray as xr\n", "gridvars = ['LON0','LAT0', 'BETA', 'JPHEXT', 'XHAT', 'YHAT', 'ZHAT', \n", " 'LAT', 'LON', 'ZS','ZSMT']\n", "mnh_grid = xr.open_dataset(infiles[0])[gridvars]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Add the grid into MesoNH file\n", "mnh = mnh.merge(mnh_grid)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "mnh" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Basic files\n", "From database organisation point of view" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Automatically merge a split xarray Dataset. This is designed to behave like `xarray.open_mfdataset`, except it supports concatenation along multiple dimensions." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Arpege (Previmer format)\n", "path = get_config_value(\"osdyn.grcm.arpegehr\", \"path_previ\")\n", "pattern_file = get_config_value(\"osdyn.grcm.arpegehr\",\n", " \"pattern_previ\")\n", "pattern_date = eval(\n", " get_config_value(\"osdyn.grcm.arpegehr\", \"period_previ\")\n", ")\n", "infiles = list_files(os.path.join(path, pattern_file), pattern_date)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "auto_merge(infiles)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ ".. note:: simplest way\n", "Very usefull when files splitted by periods and when all the variables are dependent on time and available in each file" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ ".. warning:: DataArrays are extended over concatenation dimension.\n", "See dx variables for instance below" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Get the list of files according to the period\n", "path = get_config_value(\"osdyn.grcm.mars\", \"path_v9_6\")\n", "pattern_file = get_config_value(\"osdyn.grcm.mars\", \"pattern_file_v9_6\")\n", "pattern_date = (\"2013-01-30\",\"2013-01-30 03:00:00\") # plot super long\n", "#pattern_date = (\"2013-01-30\", \"2013-01-30 10:00:00\")\n", "infiles = list_files(os.path.join(path, pattern_file), pattern_date)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "infiles" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "auto = auto_merge(infiles, decode_times=False)\n", "auto.attrs = {}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "auto" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ ".. note:: auto_merge = database when gather_unique_dim=None" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ds = get_dataset(infiles, gather_unique_dim=None)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "auto.time" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ds.time" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "xr.testing.assert_identical(ds, auto)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.8" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 2 }