Source code for mydatapreprocessing.consolidation.consolidation_config.consolidation_config_internal
"""Module with config for consolidation pipeline."""
from __future__ import annotations
import numpy as np
import pandas as pd
from mypythontools.config import Config, MyProperty
from ...types import PandasIndex
from . import subconfigurations
[docs]class ConsolidationConfig(Config):
"""Config class for `consolidate_data` pipeline.
There is `default_consolidation_config` object already created. You can import it, edit and use. Static
type check and intellisense should work.
"""
def __init__(self) -> None:
"""Create subconfigs."""
self.datetime: subconfigurations.Datetime = subconfigurations.Datetime()
"""Set datetime index and convert it to datetime type."""
self.resample: subconfigurations.Resample = subconfigurations.Resample()
"""Change sampling frequency on defined frequency if there is a datetime column. You can use sum or
average."""
self.remove_missing_values: subconfigurations.RemoveMissingValues = (
subconfigurations.RemoveMissingValues()
)
"""Define whether and how to remove NotANumber values."""
self.strings_to_numeric: subconfigurations.StringsToNumeric = subconfigurations.StringsToNumeric()
"""Remove or replace string values with numbers."""
@MyProperty
def inplace(self) -> bool:
"""Define whether work on inserted data itself, or on a copy.
Type:
bool
Default:
False
Copy is created just once, then internally all the consolidating functions are used inplace. Syntax is
a bit different than in for example Pandas. Use assigning to variable e.g. `df = consolidate_data(df)`
even with inplace. If True your inserted data will be changed.
"""
return False
@MyProperty
def check_shape_and_transform(self) -> bool:
"""Check whether correct shape is used and eventually transpose.
Type:
bool
Default:
True
Usually there is much more rows than columns in table. If not, it can mean that dimensions are swapped
from data load. This will check this, transform if necessary and log it.
"""
return True
@MyProperty
def first_column(self) -> None | PandasIndex:
"""Move defined column on index 0.
Type:
None | PandasIndex
Default:
None
"""
return None
@MyProperty
def data_length(self) -> int:
"""Limit the data length after resampling.
Type:
int
Default:
0
If 0, then all the data is used.
"""
return 0
@MyProperty
def dtype(self) -> str | np.dtype | pd.Series | list[str | np.dtype]:
"""Set output dtype.
Type:
str | np.dtype | pd.Series | list[str | np.dtype]
Default:
"float32"
For possible inputs check pandas function `astype`.
"""
return "float32"
default_consolidation_config = ConsolidationConfig()