Source code for pylleo.lleoio

[docs]def read_meta(path_dir, tag_model, tag_id): """Read meta data from Little Leonardo data header rows Args ---- path_dir: str Parent directory containing lleo data files tag_model: str Little Leonardo tag model name tag_id: str, int Little Leonardo tag ID number Returns ------- meta: dict dictionary with meta data from header lines of lleo data files """ from collections import OrderedDict import os import yamlord from . import utils def _parse_meta_line(line): """Return key, value pair parsed from data header line""" # Parse the key and its value from the line key, val = line.replace(":", "").replace('"', "").split(",") return key.strip(), val.strip() def _read_meta_all(f, meta, n_header): """Read all meta data from header rows of data file""" # Skip 'File name' line f.seek(0) _ = f.readline() # Create child dictionary for channel / file line = f.readline() key_ch, val_ch = _parse_meta_line(line) val_ch = utils.posix_string(val_ch) meta["parameters"][val_ch] = OrderedDict() # Write header values to channel dict for _ in range(n_header - 2): line = f.readline() key, val = _parse_meta_line(line) meta["parameters"][val_ch][key] = val.strip() return meta def _create_meta(path_dir, tag_model, tag_id): """Create meta data dictionary""" import datetime from . import utils param_strs = utils.get_tag_params(tag_model) # Create dictionary of meta data meta = OrderedDict() # Create fields for the parameters in data directory name exp_name = os.path.split(path_dir)[1] params_tag = utils.parse_experiment_params(exp_name) for key, value in params_tag.items(): meta[key] = value fmt = "%Y-%m-%d %H:%M:%S" meta["date_modified"] = datetime.datetime.now().strftime(fmt) meta["parameters"] = OrderedDict() for param_str in param_strs: print("Create meta entry for {}".format(param_str)) path_file = utils.find_file(path_dir, param_str, ".TXT") # Get number of header rows enc = utils.predict_encoding(path_file, n_lines=20) with open(path_file, "r", encoding=enc) as f: n_header = utils.get_n_header(f) f.seek(0) meta = _read_meta_all(f, meta, n_header=n_header) return meta # Load meta data from YAML file if it already exists meta_yaml_path = os.path.join(path_dir, "meta.yml") # Load file if exists else create if os.path.isfile(meta_yaml_path): meta = yamlord.read_yaml(meta_yaml_path) # Else create meta dictionary and save to YAML else: meta = _create_meta(path_dir, tag_model, tag_id) yamlord.write_yaml(meta, meta_yaml_path) return meta
[docs]def read_data(meta, path_dir, sample_f=1, decimate=False, overwrite=False): """Read accelerometry data from leonardo txt files Args ---- meta: dict Dictionary of meta data from header lines of lleo data files path_dir: str Parent directory containing lleo data files sample_f: int Return every `sample_f` data points Returns ------- acc: pandas.DataFrame Dataframe containing accelerometry data on x, y, z axes [m/s^2] depth: pandas.DataFrame Dataframe containing depth data [m] prop: pandas.DataFrame Dataframe containing speed data from propeller temp: pandas.DataFrame Dataframe containing temperature data """ import os import pandas from pylleo import utils def _generate_datetimes(date, time, interval_s, n_timestamps): """Generate list of datetimes from date/time with given interval""" import pandas from datetime import timedelta # TODO problematic if both m/d d/m options fmts = [ "%Y/%m/%d %H%M%S", "%d/%m/%Y %H%M%S", "%m/%d/%Y %I%M%S %p", "%d/%m/%Y %I%M%S %p", ] for fmt in fmts: try: start = pandas.to_datetime("{} {}".format(date, time), format=fmt) except Exception: print("Date format {:18} incorrect, " "trying next...".format(fmt)) else: print("Date format {:18} correct.".format(fmt)) break # Create datetime array datetimes = list() for i in range(n_timestamps): secs = interval_s * i datetimes.append(start + timedelta(seconds=secs)) return datetimes def _read_data_file(meta, path_dir, param_str): """Read single Little Leonardo txt data file""" import numpy import pandas from . import utils # Get path of data file and associated pickle file path_file = utils.find_file(path_dir, param_str, ".TXT") col_name = utils.posix_string(param_str) # Get number of header rows in file enc = utils.predict_encoding(path_file, n_lines=20) with open(path_file, "r", encoding=enc) as f: n_header = utils.get_n_header(f) print("\nReading: {}".format(col_name)) data = numpy.genfromtxt(path_file, skip_header=n_header) interval_s = float(meta["parameters"][col_name]["Interval(Sec)"]) date = meta["parameters"][col_name]["Start date"] time = meta["parameters"][col_name]["Start time"] # TODO review # Generate summed data if propeller sampling rate not 1 if (col_name == "propeller") and (interval_s < 1): print("Too high sampling interval, taking sums") # Sampling rate fs = int(1 / interval_s) print("data before", data.max()) # Drop elements to make divisible by fs for summing data = data[: -int(len(data) % fs)] # Reshape to 2D with columns `fs` in length to be summed data = data.reshape(fs, int(len(data) / fs)) data = numpy.sum(data, axis=0) interval_s = 1 print("data after", data.max()) datetimes = _generate_datetimes(date, time, interval_s, len(data)) data = numpy.vstack((datetimes, data)).T df = pandas.DataFrame(data, columns=["datetimes", col_name]) return df # Get list of string parameter names for tag model param_names = utils.get_tag_params(meta["tag_model"]) # Load pickle file exists and code unchanged pickle_file = os.path.join(path_dir, "pydata_" + meta["experiment"] + ".p") # Load or create pandas DataFrame with parameters associated with tag model if (os.path.exists(pickle_file)) and (overwrite is not True): data_df = pandas.read_pickle(pickle_file) else: first_col = True for name in param_names: next_df = _read_data_file(meta, path_dir, name) if first_col is False: data_df = pandas.merge(data_df, next_df, on="datetimes", how="left") else: data_df = next_df first_col = False print("") # Covert columns to `datetime64` or `float64` types data_df = data_df.apply(lambda x: pandas.to_numeric(x, errors="ignore")) # Save file to pickle data_df.to_pickle(pickle_file) # Return DataFrame with ever `sample_f` values return data_df.iloc[::sample_f, :]