Module redvox.common.data_window_configuration_old
This module provide type-safe data window configuration
Expand source code
"""
This module provide type-safe data window configuration
"""
from dataclasses import dataclass
from dataclasses_json import dataclass_json
from typing import Optional, List, MutableMapping
import pprint
import toml
import redvox.common.date_time_utils as dtu
# defaults for configuration
DEFAULT_DROP_TIME_S: float = 0.2 # seconds between packets to be considered a gap
DEFAULT_START_PADDING_S: float = 120.0 # time to add before start time when searching
DEFAULT_END_PADDING_S: float = 120.0 # time to add after end time when searching
@dataclass_json()
@dataclass
class DataWindowConfig:
"""
Properties:
input_directory: string, directory that contains the files to read data from. REQUIRED
structured_layout: bool, if True, the input_directory contains specially named and organized
directories of data. Default True
station_ids: optional list of strings, list of station ids to filter on.
If empty or None, get any ids found in the input directory. Default None
extensions: optional list of strings, representing file extensions to filter on.
If None, gets as much data as it can in the input directory. Default None
api_versions: optional list of ApiVersions, representing api versions to filter on.
If None, get as much data as it can in the input directory. Default None
start_year: optional int representing the year of the data window start time. Default None
start_month: optional int representing the month of the data window start time. Default None
start_day: optional int representing the day of the data window start time. Default None
start_hour: optional int representing the hour of the data window start time. Default None
start_minute: optional int representing the minute of the data window start time. Default None
start_second: optional int representing the second of the data window start time. Default None
end_year: optional int representing the year of the data window end time. Default None
end_month: optional int representing the month of the data window end time. Default None
end_day: optional int representing the day of the data window end time. Default None
end_hour: optional int representing the hour of the data window end time. Default None
end_minute: optional int representing the minute of the data window end time. Default None
end_second: optional int representing the second of the data window end time. Default None
start_padding_seconds: float representing the amount of seconds to include before the start datetime
when filtering data. Default DEFAULT_START_PADDING_S
end_padding_seconds: float representing the amount of seconds to include after the end datetime
when filtering data. Default DEFAULT_END_PADDING_S
drop_time_seconds: float representing the minimum amount of seconds between data packets that would indicate
a gap. Default DEFAULT_DROP_TIME_S
apply_correction: bool, if True, update the timestamps in the data based on best station offset. Default True
edge_points_mode: str, one of NAN, COPY, or INTERPOLATE. Determines behavior when creating points on the edge
of the data window. default COPY
use_model_correction: bool, if True, use the offset model's correction functions, otherwise use the best
offset. Default True
debug: bool, if True, output additional information when processing data window. Default False
"""
input_directory: str
structured_layout: bool = True
station_ids: Optional[List[str]] = None
extensions: Optional[List[str]] = None
api_versions: Optional[List[str]] = None
start_year: Optional[int] = None
start_month: Optional[int] = None
start_day: Optional[int] = None
start_hour: Optional[int] = None
start_minute: Optional[int] = None
start_second: Optional[int] = None
end_year: Optional[int] = None
end_month: Optional[int] = None
end_day: Optional[int] = None
end_hour: Optional[int] = None
end_minute: Optional[int] = None
end_second: Optional[int] = None
start_padding_seconds: float = DEFAULT_START_PADDING_S
end_padding_seconds: float = DEFAULT_END_PADDING_S
drop_time_seconds: float = DEFAULT_DROP_TIME_S
apply_correction: bool = True
edge_points_mode: str = "COPY"
use_model_correction: bool = True
debug: bool = False
@staticmethod
def from_path(config_path: str) -> "DataWindowConfig":
try:
with open(config_path, "r") as config_in:
config_dict: MutableMapping = toml.load(config_in)
# noinspection Mypy
return DataWindowConfig.from_dict(config_dict)
except Exception as e:
print(f"Error loading configuration at: {config_path}")
raise e
def pretty(self) -> str:
# noinspection Mypy
return pprint.pformat(self.to_dict())
def start_dt(self) -> dtu.datetime:
return dtu.datetime(self.start_year, self.start_month, self.start_day,
self.start_hour, self.start_minute, self.start_second)
def set_start_dt(self, start_dt: dtu.datetime):
self.start_year = start_dt.year
self.start_month = start_dt.month
self.start_day = start_dt.day
self.start_hour = start_dt.hour
self.start_minute = start_dt.minute
self.start_second = start_dt.second
def end_dt(self) -> dtu.datetime:
return dtu.datetime(self.end_year, self.end_month, self.end_day,
self.end_hour, self.end_minute, self.end_second)
def set_end_dt(self, end_dt: dtu.datetime):
self.end_year = end_dt.year
self.end_month = end_dt.month
self.end_day = end_dt.day
self.end_hour = end_dt.hour
self.end_minute = end_dt.minute
self.end_second = end_dt.second
Classes
class DataWindowConfig (input_directory: str, structured_layout: bool = True, station_ids: Optional[List[str]] = None, extensions: Optional[List[str]] = None, api_versions: Optional[List[str]] = None, start_year: Optional[int] = None, start_month: Optional[int] = None, start_day: Optional[int] = None, start_hour: Optional[int] = None, start_minute: Optional[int] = None, start_second: Optional[int] = None, end_year: Optional[int] = None, end_month: Optional[int] = None, end_day: Optional[int] = None, end_hour: Optional[int] = None, end_minute: Optional[int] = None, end_second: Optional[int] = None, start_padding_seconds: float = 120.0, end_padding_seconds: float = 120.0, drop_time_seconds: float = 0.2, apply_correction: bool = True, edge_points_mode: str = 'COPY', use_model_correction: bool = True, debug: bool = False)
-
Properties
input_directory: string, directory that contains the files to read data from. REQUIRED structured_layout: bool, if True, the input_directory contains specially named and organized directories of data. Default True station_ids: optional list of strings, list of station ids to filter on. If empty or None, get any ids found in the input directory. Default None extensions: optional list of strings, representing file extensions to filter on. If None, gets as much data as it can in the input directory. Default None api_versions: optional list of ApiVersions, representing api versions to filter on. If None, get as much data as it can in the input directory. Default None start_year: optional int representing the year of the data window start time. Default None start_month: optional int representing the month of the data window start time. Default None start_day: optional int representing the day of the data window start time. Default None start_hour: optional int representing the hour of the data window start time. Default None start_minute: optional int representing the minute of the data window start time. Default None start_second: optional int representing the second of the data window start time. Default None end_year: optional int representing the year of the data window end time. Default None end_month: optional int representing the month of the data window end time. Default None end_day: optional int representing the day of the data window end time. Default None end_hour: optional int representing the hour of the data window end time. Default None end_minute: optional int representing the minute of the data window end time. Default None end_second: optional int representing the second of the data window end time. Default None start_padding_seconds: float representing the amount of seconds to include before the start datetime when filtering data. Default DEFAULT_START_PADDING_S end_padding_seconds: float representing the amount of seconds to include after the end datetime when filtering data. Default DEFAULT_END_PADDING_S drop_time_seconds: float representing the minimum amount of seconds between data packets that would indicate a gap. Default DEFAULT_DROP_TIME_S apply_correction: bool, if True, update the timestamps in the data based on best station offset. Default True edge_points_mode: str, one of NAN, COPY, or INTERPOLATE. Determines behavior when creating points on the edge of the data window. default COPY use_model_correction: bool, if True, use the offset model's correction functions, otherwise use the best offset. Default True debug: bool, if True, output additional information when processing data window. Default False
Expand source code
@dataclass_json() @dataclass class DataWindowConfig: """ Properties: input_directory: string, directory that contains the files to read data from. REQUIRED structured_layout: bool, if True, the input_directory contains specially named and organized directories of data. Default True station_ids: optional list of strings, list of station ids to filter on. If empty or None, get any ids found in the input directory. Default None extensions: optional list of strings, representing file extensions to filter on. If None, gets as much data as it can in the input directory. Default None api_versions: optional list of ApiVersions, representing api versions to filter on. If None, get as much data as it can in the input directory. Default None start_year: optional int representing the year of the data window start time. Default None start_month: optional int representing the month of the data window start time. Default None start_day: optional int representing the day of the data window start time. Default None start_hour: optional int representing the hour of the data window start time. Default None start_minute: optional int representing the minute of the data window start time. Default None start_second: optional int representing the second of the data window start time. Default None end_year: optional int representing the year of the data window end time. Default None end_month: optional int representing the month of the data window end time. Default None end_day: optional int representing the day of the data window end time. Default None end_hour: optional int representing the hour of the data window end time. Default None end_minute: optional int representing the minute of the data window end time. Default None end_second: optional int representing the second of the data window end time. Default None start_padding_seconds: float representing the amount of seconds to include before the start datetime when filtering data. Default DEFAULT_START_PADDING_S end_padding_seconds: float representing the amount of seconds to include after the end datetime when filtering data. Default DEFAULT_END_PADDING_S drop_time_seconds: float representing the minimum amount of seconds between data packets that would indicate a gap. Default DEFAULT_DROP_TIME_S apply_correction: bool, if True, update the timestamps in the data based on best station offset. Default True edge_points_mode: str, one of NAN, COPY, or INTERPOLATE. Determines behavior when creating points on the edge of the data window. default COPY use_model_correction: bool, if True, use the offset model's correction functions, otherwise use the best offset. Default True debug: bool, if True, output additional information when processing data window. Default False """ input_directory: str structured_layout: bool = True station_ids: Optional[List[str]] = None extensions: Optional[List[str]] = None api_versions: Optional[List[str]] = None start_year: Optional[int] = None start_month: Optional[int] = None start_day: Optional[int] = None start_hour: Optional[int] = None start_minute: Optional[int] = None start_second: Optional[int] = None end_year: Optional[int] = None end_month: Optional[int] = None end_day: Optional[int] = None end_hour: Optional[int] = None end_minute: Optional[int] = None end_second: Optional[int] = None start_padding_seconds: float = DEFAULT_START_PADDING_S end_padding_seconds: float = DEFAULT_END_PADDING_S drop_time_seconds: float = DEFAULT_DROP_TIME_S apply_correction: bool = True edge_points_mode: str = "COPY" use_model_correction: bool = True debug: bool = False @staticmethod def from_path(config_path: str) -> "DataWindowConfig": try: with open(config_path, "r") as config_in: config_dict: MutableMapping = toml.load(config_in) # noinspection Mypy return DataWindowConfig.from_dict(config_dict) except Exception as e: print(f"Error loading configuration at: {config_path}") raise e def pretty(self) -> str: # noinspection Mypy return pprint.pformat(self.to_dict()) def start_dt(self) -> dtu.datetime: return dtu.datetime(self.start_year, self.start_month, self.start_day, self.start_hour, self.start_minute, self.start_second) def set_start_dt(self, start_dt: dtu.datetime): self.start_year = start_dt.year self.start_month = start_dt.month self.start_day = start_dt.day self.start_hour = start_dt.hour self.start_minute = start_dt.minute self.start_second = start_dt.second def end_dt(self) -> dtu.datetime: return dtu.datetime(self.end_year, self.end_month, self.end_day, self.end_hour, self.end_minute, self.end_second) def set_end_dt(self, end_dt: dtu.datetime): self.end_year = end_dt.year self.end_month = end_dt.month self.end_day = end_dt.day self.end_hour = end_dt.hour self.end_minute = end_dt.minute self.end_second = end_dt.second
Class variables
var api_versions : Optional[List[str]]
var apply_correction : bool
var debug : bool
var drop_time_seconds : float
var edge_points_mode : str
var end_day : Optional[int]
var end_hour : Optional[int]
var end_minute : Optional[int]
var end_month : Optional[int]
var end_padding_seconds : float
var end_second : Optional[int]
var end_year : Optional[int]
var extensions : Optional[List[str]]
var input_directory : str
var start_day : Optional[int]
var start_hour : Optional[int]
var start_minute : Optional[int]
var start_month : Optional[int]
var start_padding_seconds : float
var start_second : Optional[int]
var start_year : Optional[int]
var station_ids : Optional[List[str]]
var structured_layout : bool
var use_model_correction : bool
Static methods
def from_dict(kvs: Union[dict, list, str, int, float, bool, ForwardRef(None)], *, infer_missing=False) ‑> ~A
-
Expand source code
@classmethod def from_dict(cls: Type[A], kvs: Json, *, infer_missing=False) -> A: return _decode_dataclass(cls, kvs, infer_missing)
def from_json(s: Union[str, bytes, bytearray], *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) ‑> ~A
-
Expand source code
@classmethod def from_json(cls: Type[A], s: JsonData, *, parse_float=None, parse_int=None, parse_constant=None, infer_missing=False, **kw) -> A: kvs = json.loads(s, parse_float=parse_float, parse_int=parse_int, parse_constant=parse_constant, **kw) return cls.from_dict(kvs, infer_missing=infer_missing)
def from_path(config_path: str) ‑> DataWindowConfig
-
Expand source code
@staticmethod def from_path(config_path: str) -> "DataWindowConfig": try: with open(config_path, "r") as config_in: config_dict: MutableMapping = toml.load(config_in) # noinspection Mypy return DataWindowConfig.from_dict(config_dict) except Exception as e: print(f"Error loading configuration at: {config_path}") raise e
def schema(*, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) ‑> dataclasses_json.mm.SchemaF[~A]
-
Expand source code
@classmethod def schema(cls: Type[A], *, infer_missing: bool = False, only=None, exclude=(), many: bool = False, context=None, load_only=(), dump_only=(), partial: bool = False, unknown=None) -> SchemaType: Schema = build_schema(cls, DataClassJsonMixin, infer_missing, partial) if unknown is None: undefined_parameter_action = _undefined_parameter_action_safe(cls) if undefined_parameter_action is not None: # We can just make use of the same-named mm keywords unknown = undefined_parameter_action.name.lower() return Schema(only=only, exclude=exclude, many=many, context=context, load_only=load_only, dump_only=dump_only, partial=partial, unknown=unknown)
Methods
def end_dt(self) ‑> datetime.datetime
-
Expand source code
def end_dt(self) -> dtu.datetime: return dtu.datetime(self.end_year, self.end_month, self.end_day, self.end_hour, self.end_minute, self.end_second)
def pretty(self) ‑> str
-
Expand source code
def pretty(self) -> str: # noinspection Mypy return pprint.pformat(self.to_dict())
def set_end_dt(self, end_dt: datetime.datetime)
-
Expand source code
def set_end_dt(self, end_dt: dtu.datetime): self.end_year = end_dt.year self.end_month = end_dt.month self.end_day = end_dt.day self.end_hour = end_dt.hour self.end_minute = end_dt.minute self.end_second = end_dt.second
def set_start_dt(self, start_dt: datetime.datetime)
-
Expand source code
def set_start_dt(self, start_dt: dtu.datetime): self.start_year = start_dt.year self.start_month = start_dt.month self.start_day = start_dt.day self.start_hour = start_dt.hour self.start_minute = start_dt.minute self.start_second = start_dt.second
def start_dt(self) ‑> datetime.datetime
-
Expand source code
def start_dt(self) -> dtu.datetime: return dtu.datetime(self.start_year, self.start_month, self.start_day, self.start_hour, self.start_minute, self.start_second)
def to_dict(self, encode_json=False) ‑> Dict[str, Union[dict, list, str, int, float, bool, ForwardRef(None)]]
-
Expand source code
def to_dict(self, encode_json=False) -> Dict[str, Json]: return _asdict(self, encode_json=encode_json)
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Union[int, str, ForwardRef(None)] = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) ‑> str
-
Expand source code
def to_json(self, *, skipkeys: bool = False, ensure_ascii: bool = True, check_circular: bool = True, allow_nan: bool = True, indent: Optional[Union[int, str]] = None, separators: Tuple[str, str] = None, default: Callable = None, sort_keys: bool = False, **kw) -> str: return json.dumps(self.to_dict(encode_json=False), cls=_ExtendedEncoder, skipkeys=skipkeys, ensure_ascii=ensure_ascii, check_circular=check_circular, allow_nan=allow_nan, indent=indent, separators=separators, default=default, sort_keys=sort_keys, **kw)