# Dataset from single or multiple NetCDF files

Note : A shorter way to read the file is available for each class of models

In [None]:
import os
from osdyn.config import get_config_value
from osdyn.utils.data.mxarray import get_dataset, tzyx2index, auto_merge
from osdyn.utils.data.io import list_files

## mars outputs

### Concat over time

In [None]:
# Get the list of files according to the period
path = get_config_value("osdyn.grcm.mars", "path_v9_6")
pattern_file = get_config_value("osdyn.grcm.mars", "pattern_file_v9_6")
pattern_date = ("2013-01-30","2013-02-01 10:00:00") # plot super long
#pattern_date = ("2013-01-30", "2013-01-30 10:00:00")
infiles = list_files(os.path.join(path, pattern_file), pattern_date)

In [None]:
# Get XE and TEMP and concatene along time
out = get_dataset(infiles, varnames=['XE','TEMP'], gather_unique_dim='time')

In [None]:
fig = out.XE[:,100,100].plot()

Super long, voir comment on peut améliorer cela

### Concat over time and select a subdomain 

In [None]:
# Get the selection
tzyx = tzyx2index(infiles[0], 'TEMP', lons=(6.,6.), lats=(43.,43.))

In [None]:
tzyx

In [None]:
# Read the files
out = get_dataset(infiles, varnames=['XE','TEMP'], subdomain=tzyx, decode_times=True)

In [None]:
out

## NEMO outputs

Concat variables (from different files) over time, and add the grid file

In [None]:
# Get the list of files
path = get_config_value("osdyn.grcm.nemo", "path_medrys1v1")
pattern_file = get_config_value("osdyn.grcm.nemo",
 "pattern_medrys1v1")
pattern_date = eval(
 get_config_value("osdyn.grcm.nemo", "period_medrys1v1")
)
infiles = list_files(os.path.join(path,pattern_file), pattern_date)

In [None]:
# Read and gather the files into a unique one
out = get_dataset(infiles[:], decode_times=False, gather_grid=['grid2D','gridS','gridT','gridU','gridV'])

In [None]:
out

## AROME (previmer format)

In [None]:
path = get_config_value("osdyn.grcm.arome", "path_previ")
pattern_file = get_config_value("osdyn.grcm.arome",
 "pattern_previ")
pattern_date = eval(
 get_config_value("osdyn.grcm.arome", "period_previ")
)
infiles = list_files(os.path.join(path, pattern_file), pattern_date)

In [None]:
# Select a subdomain
tzyx = tzyx2index(infiles[0], 'eau', lons=(2.5,3.5), lats=(42.,43.))

In [None]:
# Read and gather the files into a unique one
out = get_dataset(infiles, varnames=['eau'], subdomain=tzyx, decode_times=True)

In [None]:
out

## MesoNH

More complicated as the time is not an axis

In [None]:
# Get the list of files
path = get_config_value("osdyn.grcm.mesonh", "path_v5_3")
pattern_file = get_config_value("osdyn.grcm.mesonh",
 "pattern_obc_v5_3")
pattern_date = eval(get_config_value("osdyn.grcm.mesonh",
 "period_v5_3")) # plus court
pattern_date = ("2011-09-02 15:00:00","2011-09-04 00:00:00")
infiles = list_files(os.path.join(path, pattern_file), pattern_date)

In [None]:
# Create the process to apply to each profile
from osdyn.grcm.mesonh import get_datetime
def userprocess(dsu):
 """
 `xarray.open_mfdataset calls this function through `preprocess=userprocess`
 to apply the directives on each dataset prior to the concatenation.

 Parameters
 ----------
 dsu : xarray.Dataset
 One of the files.

 Returns
 -------
 xarray.Dataset
 The modified dataset in which the time axis has been added and a few 
 variables have been collected.

 """

 timerecord = get_datetime(dsu.DTCUR__TDATE, dsu.DTCUR__TIME)
 mnhgvars = ['time']
 mnhvars = ['UT']
 return dsu.assign(time=timerecord)[mnhvars + mnhgvars]

In [None]:
# Gather MesoNH variables along time
mnh = get_dataset(infiles, userprocess=userprocess, decode_times=False)

In [None]:
mnh

In [None]:
# grid of MesoNH file
import xarray as xr
gridvars = ['LON0','LAT0', 'BETA', 'JPHEXT', 'XHAT', 'YHAT', 'ZHAT', 
 'LAT', 'LON', 'ZS','ZSMT']
mnh_grid = xr.open_dataset(infiles[0])[gridvars]

In [None]:
# Add the grid into MesoNH file
mnh = mnh.merge(mnh_grid)

In [None]:
mnh

## Basic files
From database organisation point of view

Automatically merge a split xarray Dataset. This is designed to behave like `xarray.open_mfdataset`, except it supports concatenation along multiple dimensions.

In [None]:
# Arpege (Previmer format)
path = get_config_value("osdyn.grcm.arpegehr", "path_previ")
pattern_file = get_config_value("osdyn.grcm.arpegehr",
 "pattern_previ")
pattern_date = eval(
 get_config_value("osdyn.grcm.arpegehr", "period_previ")
)
infiles = list_files(os.path.join(path, pattern_file), pattern_date)

In [None]:
auto_merge(infiles)

.. note:: simplest way
Very usefull when files splitted by periods and when all the variables are dependent on time and available in each file

.. warning:: DataArrays are extended over concatenation dimension.
See dx variables for instance below

In [None]:
# Get the list of files according to the period
path = get_config_value("osdyn.grcm.mars", "path_v9_6")
pattern_file = get_config_value("osdyn.grcm.mars", "pattern_file_v9_6")
pattern_date = ("2013-01-30","2013-01-30 03:00:00") # plot super long
#pattern_date = ("2013-01-30", "2013-01-30 10:00:00")
infiles = list_files(os.path.join(path, pattern_file), pattern_date)

In [None]:
infiles

In [None]:
auto = auto_merge(infiles, decode_times=False)
auto.attrs = {}

In [None]:
auto

.. note:: auto_merge = database when gather_unique_dim=None

In [None]:
ds = get_dataset(infiles, gather_unique_dim=None)

In [None]:
auto.time

In [None]:
ds.time

In [None]:
xr.testing.assert_identical(ds, auto)