Data source types#

Source code in wellies/data.py
class StaticData:
    def __init__(self, data_dir: str, name: str, script: str, options: dict):
        self.name = name
        self.options = options
        self.dir = data_dir
        pre_script = process_file_or_string(options.get("pre_script", None))
        post_script = process_file_or_string(options.get("post_script", None))

        script_list = []
        if pre_script:
            script_list.extend(
                [
                    "# Pre-script",
                    pre_script,
                    "",
                ]
            )

        script_list.extend(
            [
                "# Main script for retrieving data",
                "mkdir -p {}".format(data_dir),
                script,
            ]
        )
        if post_script:
            script_list.extend(
                [
                    "# Post-script",
                    post_script,
                    "",
                ]
            )

        self.script = pf.Script(script_list)

Bases: StaticData

Source code in wellies/data.py
class CopyData(StaticData):
    def __init__(self, data_dir, name, options):
        tgt = options["source"]
        files = options.get("files")
        if files is not None:
            if not isinstance(files, list):
                files = [files]
            files = [os.path.join(tgt, f) for f in files]
            tgt = files
        else:
            tgt = [tgt]

        script = pf.TemplateScript(
            scripts.copy_script,
            DIR=data_dir,
            NAME=name,
            TARGET=tgt,
        )
        super().__init__(data_dir, name, script, options)

Bases: StaticData

Source code in wellies/data.py
class RsyncData(StaticData):
    def __init__(self, data_dir, name, options):
        tgt = options["source"]
        files = options.get("files")
        if files is not None:
            if not isinstance(files, list):
                files = [files]
            files = [os.path.join(tgt, f) for f in files]
            tgt = files
        else:
            tgt = [tgt]

        script = pf.TemplateScript(
            scripts.rsync_script,
            DIR=data_dir,
            NAME=name,
            TARGET=tgt,
            RSYNC_OPTIONS=options.get("rsync_options", "-avzpL"),
        )
        super().__init__(data_dir, name, script, options)

Bases: StaticData

Source code in wellies/data.py
class LinkData(StaticData):
    def __init__(self, data_dir, name, options):
        script = pf.TemplateScript(
            scripts.link_script,
            DIR=data_dir,
            NAME=name,
            TARGET=options["source"],
        )
        super().__init__(data_dir, name, script, options)

Bases: StaticData

Source code in wellies/data.py
class ECFSData(StaticData):
    def __init__(self, data_dir, name, options):
        tgt = options["source"]

        files = options.get("files")
        if files is not None:
            if not isinstance(files, list):
                files = [files]
            files = [os.path.join(tgt, f) for f in files]
            tgt = files
        else:
            tgt = [tgt]

        script = pf.TemplateScript(
            scripts.ecfs_script,
            DIR=data_dir,
            NAME=name,
            TARGET=tgt,
        )
        super().__init__(data_dir, name, script, options)

Bases: StaticData

Source code in wellies/data.py
class GitData(StaticData):
    def __init__(self, data_dir, name, options):
        files = options.get("files")
        build_dir = options.get("build_dir")
        if files is None:
            target = data_dir if build_dir is None else build_dir
            script = pf.TemplateScript(
                scripts.git_script,
                DIR=target,
                NAME=name,
                URL=options["source"],
                BRANCH=options.get("branch"),
            )
        else:
            if build_dir is None:
                build_dir = os.path.join(data_dir, "git")
            if not isinstance(files, list):
                files = [files]
            files = [os.path.join(build_dir, name, f) for f in files]
            script = [
                pf.TemplateScript(
                    scripts.git_script,
                    DIR=build_dir,
                    NAME=name,
                    URL=options["source"],
                    BRANCH=options.get("branch"),
                ),
                pf.TemplateScript(
                    scripts.rsync_script,
                    DIR=data_dir,
                    NAME=name,
                    TARGET=files,
                    RSYNC_OPTIONS=options.get("rsync_options", "-avzpL"),
                ),
                "echo 'cleaning build directory'",
                f"rm -rf {build_dir}/{name}",
            ]

        super().__init__(data_dir, name, script, options)

Bases: StaticData

Source code in wellies/data.py
class MarsData(StaticData):
    def __init__(self, data_dir, name, options):
        script = [
            f"dest_dir={os.path.join(data_dir, name)}",
            "mkdir -p $dest_dir",
            "cd $dest_dir",
            mars.Retrieve(options["request"]),
        ]
        super().__init__(data_dir, name, script, options)

Bases: StaticData

Source code in wellies/data.py
class CustomData(StaticData):
    def __init__(self, data_dir, name, options):
        script = "# Running custom data command"
        super().__init__(data_dir, name, script, options)

Static Data Store#

Source code in wellies/data.py
class StaticDataStore:
    def __init__(self, data_dir: str, static_data_dict: dict):
        """
        The StaticDataStore contains a set of static data items and their
        associated scripts to be used to deploy the items when running the
        suite.

        Parameters
        ----------
        data_dir (str):
            The directory where the data is stored on the running host.
        static_data_dict (dict):
            A dictionary containing the names of the static data items as
            keys and their deployment options as values.
        """
        self.static_data = {}
        for name, options in static_data_dict.items():
            data = parse_data_item(data_dir, name, options)
            self.static_data[name] = data

    def __getitem__(self, item):
        return self.static_data[item]

    def __repr__(self) -> str:
        items = str({name: it.options for name, it in self.items()})
        return f"StaticDataStore: {items}"

    def items(self):
        return self.static_data.items()

    @classmethod
    def from_yamls(cls, config_files: list, data_dir=None):
        options = parse_yaml_files(config_files)
        if data_dir is None:
            rootname = "$DATA_DIR"
        else:
            if data_dir in options:
                rootname = options[data_dir]
            else:
                rootname = data_dir
        cls(rootname, options["static_data"])

__init__(data_dir, static_data_dict) #

The StaticDataStore contains a set of static data items and their associated scripts to be used to deploy the items when running the suite.

Parameters#

data_dir (str): The directory where the data is stored on the running host. static_data_dict (dict): A dictionary containing the names of the static data items as keys and their deployment options as values.

Source code in wellies/data.py
def __init__(self, data_dir: str, static_data_dict: dict):
    """
    The StaticDataStore contains a set of static data items and their
    associated scripts to be used to deploy the items when running the
    suite.

    Parameters
    ----------
    data_dir (str):
        The directory where the data is stored on the running host.
    static_data_dict (dict):
        A dictionary containing the names of the static data items as
        keys and their deployment options as values.
    """
    self.static_data = {}
    for name, options in static_data_dict.items():
        data = parse_data_item(data_dir, name, options)
        self.static_data[name] = data