Source code for esmvalcore.io

"""A modular system for reading input data from various sources.

An input data source can be defined in the configuration by using
:obj:`esmvalcore.config.CFG`, for example:

.. code-block:: python

    >>> from esmvalcore.config import CFG
    >>> CFG["projects"]["CMIP6"]["data"]["local"] = {
            "type": "esmvalcore.local.LocalDataSource",
            "rootpath": "~/climate_data",
            "dirname_template": "{project}/{activity}/{institute}/{dataset}/{exp}/{ensemble}/{mip}/{short_name}/{grid}/{version}",
            "filename_template": "{short_name}_{mip}_{dataset}_{exp}_{ensemble}_{grid}*.nc",
        }

or as a :ref:`YAML configuration file <config_overview>`:

.. code-block:: yaml

    projects:
      CMIP6:
        data:
          local:
            type: "esmvalcore.local.LocalDataSource"
            rootpath: "~/climate_data"
            dirname_template: "{project}/{activity}/{institute}/{dataset}/{exp}/{ensemble}/{mip}/{short_name}/{grid}/{version}"
            filename_template: "{short_name}_{mip}_{dataset}_{exp}_{ensemble}_{grid}*.nc"

where ``CMIP6`` is a project, and ``local`` is a unique name describing the
data source. The data source type,
:class:`esmvalcore.local.LocalDataSource`, in the example above, needs to
implement the :class:`esmvalcore.io.protocol.DataSource` protocol. Any
remaining key-value pairs in the configuration, ``rootpath``,
``dirname_template``, and ``filename_template`` in this example, are passed
as keyword arguments to the data source when it is created.

If there are multiple data sources configured for a project, deduplication of
search results happens based on the
:attr:`esmvalcore.io.protocol.DataElement.name` attribute and the ``"version"``
facet in :attr:`esmvalcore.io.protocol.DataElement.facets` of the data elements
provided by the data sources. If no ``version`` facet is specified in the
search, the latest version will be used. If there is a tie, the data element
provided by the data source with the lowest value of
:attr:`esmvalcore.io.protocol.DataSource.priority` is chosen.
"""

import importlib
import logging

from esmvalcore.config import Session
from esmvalcore.io.protocol import DataSource

logger = logging.getLogger(__name__)


[docs] def load_data_sources( session: Session, project: str | None = None, ) -> list[DataSource]: """Get the list of available data sources. If no ``priority`` is configured for a data source, the default priority of 1 is used. Arguments --------- session: The configuration. project: If specified, only data sources for this project are returned. Returns ------- :obj:`list` of :obj:`DataSource`: A list of available data sources. Raises ------ ValueError: If the project or its settings are not found in the configuration. """ data_sources: list[DataSource] = [] if project is not None and project not in session["projects"]: msg = f"Unknown project '{project}', please configure it under 'projects'." raise ValueError(msg) settings = ( session["projects"] if project is None else {project: session["projects"][project]} ) for project_, project_settings in settings.items(): for name, orig_kwargs in project_settings.get("data", {}).items(): kwargs = orig_kwargs.copy() module_name, cls_name = kwargs.pop("type").rsplit(".", 1) module = importlib.import_module(module_name) cls = getattr(module, cls_name) priority = kwargs.pop("priority", 1) data_source = cls( name=name, project=project_, priority=priority, **kwargs, ) if not isinstance(data_source, DataSource): msg = ( "Expected a data source of type `esmvalcore.io.protocol.DataSource`, " f"but your configuration for project '{project_}' contains " f"'{data_source}' of type '{type(data_source)}'." ) raise TypeError(msg) data_sources.append(data_source) if not data_sources: if project is None: msg = "No data sources found. Check your configuration under 'projects'" else: msg = ( f"No data sources found for project '{project}'. " f"Check your configuration under 'projects: {project}: data'" ) raise ValueError(msg) return data_sources