Source code for annular.tariffs

import datetime
from collections.abc import Mapping
from functools import reduce
from numbers import Real
from operator import and_, attrgetter
from pathlib import Path
from typing import Callable, Iterable, Type, TypeVar

import pandas as pd
from dateutil.easter import easter

[docs] T = TypeVar("T", bound="TariffManager")
[docs] def parse_weekday_weekend(date: datetime.date) -> str: """Parse the weekday weekend from a date. Args: date: Date to parse. Returns: Whether the given date is a weekday or a weekend day. """ # monday is 1, sunday is 7 if date.isoweekday() in (6, 7) or is_dutch_holiday(date): return "weekend" else: return "weekday"
[docs] TEMPORAL_INDEXERS: dict[str, Callable] = { "year": attrgetter("year"), "month": attrgetter("month"), "weekday/weekend": parse_weekday_weekend, "hour": attrgetter("hour"), }
[docs] class TariffManager: def __init__(self, tariff_data: dict[str, pd.Series | Real] | None = None): """A manager object for energy network tariffs. Includes support for intelligently parsing timestamps to various time-based indexing options. Tariffs are retrievable by (case-insensitive) name through various `fetch_*` methods. Time related index names can be provided case-insensitive, since any case-sensitivity will be removed from index names using the ``casefold`` string method. Args: tariff_data: Dictionary where keys are tariff names and values are series of tariff values and indices. """ tariff_data = tariff_data if tariff_data else {} # replace `None` with empty dictionary
[docs] self.data = {key.casefold(): value for key, value in tariff_data.items()}
for key, value in tariff_data.items(): # If a tariff still has an index, case-fold the index names if isinstance(value, pd.Series): value.index.names = [name.casefold() for name in value.index.names] self.data[key.casefold()] = value # Save tariffs under case-folded name @classmethod
[docs] def from_folder(cls: Type[T], path: Path, preselect: Mapping[str, str | Real]) -> T: """Create a TariffManager from csv files in a folder. Args: path: Path to the folder with tariff data in csv format. preselect: Dictionary specifying the category within the tariff. Any tariff may be indexed both categorically and temporally. This `preselect` argument should at least specify a value for each categorical index, i.e., column in the tariff file. E.g.: `{"grid level": "distribution", "consumer type": "small"}`. If any of the categories given to `preselect` are not present in the tariff data, they are silently ignored. """ raw_tariffs = {file.stem: pd.read_csv(file) for file in path.iterdir()} tariffs = {name: filter_dataframe(tariff, preselect) for name, tariff in raw_tariffs.items()} return cls(tariffs)
[docs] def __contains__(self, item: str) -> bool: """Check if a tariff is present in the TariffManager.""" return item.casefold() in self.data
[docs] def fetch_value(self, name: str) -> Real: """Fetch a single-valued tariff. Args: name: Name of the tariff. Returns: Value for a specific tariff. """ tariff = self.data[name.casefold()] if not isinstance(tariff, Real): raise ValueError("Specified tariff is not a single value.") return tariff
[docs] def fetch_timeseries(self, name: str, timestamps: pd.Index) -> pd.Series: """Fetch tariff value for each given timestamp. Args: name: Name of the tariff. timestamps: Datetime to use for selecting temporal index levels. Returns: Series of values for the specified tariff, indexed by the given timestamps. """ tariff = self.data[name.casefold()] assert isinstance(tariff, pd.Series) # Force typing of `tariff` # Prepare parsed versions of the timestamps as columns that can be JOIN-ed to match the tariff values timestamp_series = timestamps.to_series() timestamps_to_join_to = pd.DataFrame( {key: timestamp_series.apply(parser) for key, parser in TEMPORAL_INDEXERS.items()}, index=timestamps ) return timestamps_to_join_to.join(tariff, on=tariff.index.names)["value"]
[docs] def fetch_indexed(self, name: str, timestamps: Iterable[pd.Timestamp]) -> pd.Series: """Fetch collection of tariff values, relevant to the given timestamps. Args: name: Name of the tariff. timestamps: Datetime to use for selecting temporal index levels. Returns: Series of tariff values, maintaining its original index, pre-selected with only the relevant values . """ tariff = self.data[name.casefold()] assert isinstance(tariff, pd.Series) # Force typing of `tariff` # Get the unique set of temporal index values from the timestamps unique_values = { level: {TEMPORAL_INDEXERS[level](timestamp) for timestamp in timestamps} for level in tariff.index.names } # Make a selection mask for each index level masks = [tariff.index.isin(unique_values[level], level) for level in tariff.index.names] # Combine all masks per level using logical AND mask = reduce(and_, masks) return tariff[mask]
[docs] def filter_dataframe(data: pd.DataFrame, select: Mapping[str, str | Real]) -> pd.Series | Real: """Filter a dataframe by multiple columns. Example: Preselect ``{"Foo": "high", "Bar": "old"}`` with data = ==== === === ===== Foo Bar Baz value ==== === === ===== high new yes 1 high new no 2 high old yes 3 high old no 4 low new yes 5 low new no 6 low old yes 7 low old no 8 ==== === === ===== Result: === ===== Baz value === ===== yes 3 no 4 === ===== Note: Column names can be given case-insensitively, but the values to match on are still case-sensitive. So the previous example would work the same if ``select={"foo": "high", "BAR": "old"}`` was used, but not ``select={"Foo": "High", "Bar": "OLD"}``. Args: data: pandas DataFrame to filter. Must have at least one column named 'value'. select: Dictionary where keys are strings or integers, used to select only the desired rows from the given data. If a key is present as a column name, then only those rows are kept where that column matches the matching value from this dictionary. Returns: A pd.Series of the ``value`` column, where rows are filtered based on matching values in `select`. Any columns that were filtered on are removed, and any remaining columns are used as a pd.MultiIndex. If the series only consists of a single row, then it only returns the value. """ data.rename(columns=str.casefold, inplace=True) index_cols = [x for x in data.columns if x != "value"] selector_values, selector_levels = [], [] for colname, value in select.items(): colname = colname.casefold() if colname not in index_cols: continue selector_values.append(value) selector_levels.append(colname) indexed_data = data.set_index(index_cols) if selector_levels: indexed_data = indexed_data.xs(tuple(selector_values), level=selector_levels) values = indexed_data["value"] if len(values) == 1: return values.iloc[0] return values
[docs] def is_dutch_holiday(date: datetime.date) -> bool: """Check if date is a Dutch holiday, if not already a weekend by definition. As of 2013, Dutch holidays are: - New Year's Day - Good Friday - Easter (Sunday and Monday) - King's Day - Ascension Day - Pentecost (sunday and Monday) - Christmas (25th and 26th) Args: date: Date to check. Returns: True if the given date is a Dutch holiday, False otherwise. """ # New Year's Day, King's Day and Christmas # NB: While King's day is moved to the 26th if the 27th is a Sunday, this # doesn't matter for us since the 26th is then a Saturday, still weekend. fixed_holidays = {(1, 1), (4, 27), (12, 25), (12, 26)} # Good Friday, Second Easter day, Ascension day and Pentecost respectively easter_adjustments = [datetime.timedelta(days=days) for days in (-2, 1, 39, 50)] easter_date = easter(date.year) easter_dates = set() for adjustment in easter_adjustments: adjusted_date = easter_date + adjustment easter_dates.add((adjusted_date.month, adjusted_date.day)) return (date.month, date.day) in fixed_holidays | easter_dates