Source code for reperiods.find_RP.poncelet

from typing import Any

import numpy as np
import pandas as pd
import pulp as pl
from sklearn.preprocessing import MinMaxScaler

from ..representative_periods import RepresentativePeriods
from ..utils import duration_function



[docs]
def poncelet_method(
    data: pd.DataFrame,
    curve_set: pd.Index,
    N_RP: int,
    RP_length: int,
    N_bins: int = 15,
    solver: Any = None,
) -> list[RepresentativePeriods]:
    """Find representative periods (RPs) and their weights using the Poncelet et al. (2017) method.

    Args:
        temporal_data (TemporalData): A TemporalData object containing the input data.
        data (DataFrame): A DataFrame containing the data where RP will be found
        curve_set (Index): The set of curve
        N_RP (int): The number of representative periods to find.
        RP_length (int): The length of each representative period.
        N_bins (int, optional): The number of bins for duration curve discretization. Defaults to 15.
        solver (pulp solver, optional): The solver to use for optimization. Defaults to None. see : https://coin-or.github.io/pulp/guides/how_to_configure_solvers.html and https://coin-or.github.io/pulp/technical/solvers.html#module-pulp.apis

    Returns:
        list: A list of RepresentativePeriods objects, each representing an RP with its weight.
    """
    # Get RP candidates (not normalized)
    Number_of_candidate_RP = data.shape[0] // RP_length
    P_candidates = {
        P_id: data.iloc[P_id * RP_length : (P_id + 1) * RP_length]
        for P_id in range(Number_of_candidate_RP)
    }

    # Set bins
    bins = np.arange(N_bins) / (N_bins - 1)

    # Set scalers
    scalers = {}
    for curve in curve_set:
        scaler = MinMaxScaler()
        scaler.fit(data[curve].to_numpy().reshape(-1, 1))
        scalers[curve] = scaler

    ## Set MILP model
    # Constants
    L = {}
    for curve in curve_set:
        DC = duration_function(
            scalers[curve].transform(data[curve].to_numpy().reshape(-1, 1))
        )
        for bin in bins:
            L[curve, bin] = DC(bin)
    A = {}
    for P_id in P_candidates:
        for curve in curve_set:
            DC = duration_function(
                scalers[curve].transform(
                    P_candidates[P_id][curve].to_numpy().reshape(-1, 1)
                )
            )
            for bin in bins:
                A[curve, bin, P_id] = DC(bin)

    # Variables
    U = {P_id: pl.LpVariable(f"U_{P_id}", cat="Binary") for P_id in P_candidates}
    W = {
        P_id: pl.LpVariable(f"W_{P_id}", cat="Continuous", lowBound=0)
        for P_id in P_candidates
    }
    errors = {
        (curve, bin): pl.LpVariable(f"error_{curve}-{bin}", cat="Continuous")
        for curve in curve_set
        for bin in bins
    }

    # Constraints
    problem = pl.LpProblem("Poncelet_Method", pl.LpMinimize)
    problem += (
        pl.lpSum(U[P_id] for P_id in P_candidates) == N_RP,
        "Number_of_RP",
    )
    problem += (
        pl.lpSum(W[P_id] for P_id in P_candidates) == 1,
        "RP_weights",
    )

    for P_id in P_candidates:
        problem += (
            W[P_id] <= U[P_id],
            f"Weight_{P_id}_is_not_null_if_the_Period_{P_id}_is_selected",
        )

    for curve in curve_set:
        for bin in bins:
            problem += errors[curve, bin] >= L[curve, bin] - pl.lpSum(
                W[P_id] * A[curve, bin, P_id] for P_id in P_candidates
            )
            problem += (
                errors[curve, bin]
                >= pl.lpSum(W[P_id] * A[curve, bin, P_id] for P_id in P_candidates)
                - L[curve, bin]
            )

    # Objective
    problem += (
        pl.lpSum(errors[curve, bin] for curve in curve_set for bin in bins),
        "Minimize global error",
    )

    problem.solve(solver)

    representative_periods = []
    for P_id in P_candidates:
        if U[P_id].varValue == 1:
            representative_periods.append(
                RepresentativePeriods(data=P_candidates[P_id], weight=W[P_id].varValue)
            )
    return representative_periods