Source code for pylleo.lleoio

[docs]def read_meta(path_dir, tag_model, tag_id):
    """Read meta data from Little Leonardo data header rows

    Args
    ----
    path_dir: str
        Parent directory containing lleo data files
    tag_model: str
        Little Leonardo tag model name
    tag_id: str, int
        Little Leonardo tag ID number

    Returns
    -------
    meta: dict
        dictionary with meta data from header lines of lleo data files
    """
    from collections import OrderedDict
    import os
    import yamlord

    from . import utils

    def _parse_meta_line(line):
        """Return key, value pair parsed from data header line"""

        # Parse the key and its value from the line
        key, val = line.replace(":", "").replace('"', "").split(",")

        return key.strip(), val.strip()

    def _read_meta_all(f, meta, n_header):
        """Read all meta data from header rows of data file"""

        # Skip 'File name' line
        f.seek(0)
        _ = f.readline()

        # Create child dictionary for channel / file
        line = f.readline()
        key_ch, val_ch = _parse_meta_line(line)
        val_ch = utils.posix_string(val_ch)
        meta["parameters"][val_ch] = OrderedDict()

        # Write header values to channel dict
        for _ in range(n_header - 2):
            line = f.readline()
            key, val = _parse_meta_line(line)
            meta["parameters"][val_ch][key] = val.strip()

        return meta

    def _create_meta(path_dir, tag_model, tag_id):
        """Create meta data dictionary"""
        import datetime
        from . import utils

        param_strs = utils.get_tag_params(tag_model)

        # Create dictionary of meta data
        meta = OrderedDict()

        # Create fields for the parameters in data directory name
        exp_name = os.path.split(path_dir)[1]
        params_tag = utils.parse_experiment_params(exp_name)
        for key, value in params_tag.items():
            meta[key] = value

        fmt = "%Y-%m-%d %H:%M:%S"
        meta["date_modified"] = datetime.datetime.now().strftime(fmt)

        meta["parameters"] = OrderedDict()

        for param_str in param_strs:
            print("Create meta entry for {}".format(param_str))

            path_file = utils.find_file(path_dir, param_str, ".TXT")
            # Get number of header rows
            enc = utils.predict_encoding(path_file, n_lines=20)
            with open(path_file, "r", encoding=enc) as f:
                n_header = utils.get_n_header(f)
                f.seek(0)
                meta = _read_meta_all(f, meta, n_header=n_header)

        return meta

    # Load meta data from YAML file if it already exists
    meta_yaml_path = os.path.join(path_dir, "meta.yml")

    # Load file if exists else create
    if os.path.isfile(meta_yaml_path):
        meta = yamlord.read_yaml(meta_yaml_path)

    # Else create meta dictionary and save to YAML
    else:
        meta = _create_meta(path_dir, tag_model, tag_id)
        yamlord.write_yaml(meta, meta_yaml_path)

    return meta


[docs]def read_data(meta, path_dir, sample_f=1, decimate=False, overwrite=False):
    """Read accelerometry data from leonardo txt files

    Args
    ----
    meta: dict
        Dictionary of meta data from header lines of lleo data files
    path_dir: str
        Parent directory containing lleo data files
    sample_f: int
        Return every `sample_f` data points

    Returns
    -------
    acc: pandas.DataFrame
        Dataframe containing accelerometry data on x, y, z axes [m/s^2]
    depth: pandas.DataFrame
        Dataframe containing depth data [m]
    prop: pandas.DataFrame
        Dataframe containing speed data from propeller
    temp: pandas.DataFrame
        Dataframe containing temperature data
    """
    import os
    import pandas

    from pylleo import utils

    def _generate_datetimes(date, time, interval_s, n_timestamps):
        """Generate list of datetimes from date/time with given interval"""
        import pandas
        from datetime import timedelta

        # TODO problematic if both m/d d/m options
        fmts = [
            "%Y/%m/%d %H%M%S",
            "%d/%m/%Y %H%M%S",
            "%m/%d/%Y %I%M%S %p",
            "%d/%m/%Y %I%M%S %p",
        ]

        for fmt in fmts:
            try:
                start = pandas.to_datetime("{} {}".format(date, time), format=fmt)
            except Exception:
                print("Date format {:18} incorrect, " "trying next...".format(fmt))
            else:
                print("Date format {:18} correct.".format(fmt))
                break

        # Create datetime array
        datetimes = list()
        for i in range(n_timestamps):
            secs = interval_s * i
            datetimes.append(start + timedelta(seconds=secs))

        return datetimes

    def _read_data_file(meta, path_dir, param_str):
        """Read single Little Leonardo txt data file"""
        import numpy
        import pandas

        from . import utils

        # Get path of data file and associated pickle file
        path_file = utils.find_file(path_dir, param_str, ".TXT")
        col_name = utils.posix_string(param_str)

        # Get number of header rows in file
        enc = utils.predict_encoding(path_file, n_lines=20)
        with open(path_file, "r", encoding=enc) as f:
            n_header = utils.get_n_header(f)

        print("\nReading: {}".format(col_name))

        data = numpy.genfromtxt(path_file, skip_header=n_header)

        interval_s = float(meta["parameters"][col_name]["Interval(Sec)"])
        date = meta["parameters"][col_name]["Start date"]
        time = meta["parameters"][col_name]["Start time"]

        # TODO review
        # Generate summed data if propeller sampling rate not 1
        if (col_name == "propeller") and (interval_s < 1):
            print("Too high sampling interval, taking sums")
            # Sampling rate
            fs = int(1 / interval_s)

            print("data before", data.max())
            # Drop elements to make divisible by fs for summing
            data = data[: -int(len(data) % fs)]

            # Reshape to 2D with columns `fs` in length to be summed
            data = data.reshape(fs, int(len(data) / fs))
            data = numpy.sum(data, axis=0)
            interval_s = 1

            print("data after", data.max())

        datetimes = _generate_datetimes(date, time, interval_s, len(data))
        data = numpy.vstack((datetimes, data)).T
        df = pandas.DataFrame(data, columns=["datetimes", col_name])

        return df

    # Get list of string parameter names for tag model
    param_names = utils.get_tag_params(meta["tag_model"])

    # Load pickle file exists and code unchanged
    pickle_file = os.path.join(path_dir, "pydata_" + meta["experiment"] + ".p")

    # Load or create pandas DataFrame with parameters associated with tag model
    if (os.path.exists(pickle_file)) and (overwrite is not True):
        data_df = pandas.read_pickle(pickle_file)
    else:
        first_col = True
        for name in param_names:
            next_df = _read_data_file(meta, path_dir, name)
            if first_col is False:
                data_df = pandas.merge(data_df, next_df, on="datetimes", how="left")
            else:
                data_df = next_df
                first_col = False
        print("")

        # Covert columns to `datetime64` or `float64` types
        data_df = data_df.apply(lambda x: pandas.to_numeric(x, errors="ignore"))

        # Save file to pickle
        data_df.to_pickle(pickle_file)

    # Return DataFrame with ever `sample_f` values
    return data_df.iloc[::sample_f, :]