Skip to content

utils

pytorch_lattice.utils.data

Utility functions and classes for handling data.

Dataset

Bases: Dataset

A class for loading a dataset for a calibrated model.

Source code in pytorch_lattice/utils/data.py
class Dataset(torch.utils.data.Dataset):
    """A class for loading a dataset for a calibrated model."""

    def __init__(
        self,
        X: pd.DataFrame,
        y: np.ndarray,
        features: list[Union[NumericalFeature, CategoricalFeature]],
    ):
        """Initializes an instance of `Dataset`."""
        self.X = X.copy()
        self.y = y.copy()

        selected_features = [feature.feature_name for feature in features]
        unavailable_features = set(selected_features) - set(self.X.columns)
        if len(unavailable_features) > 0:
            raise ValueError(f"Features {unavailable_features} not found in dataset.")

        drop_features = list(set(self.X.columns) - set(selected_features))
        self.X.drop(drop_features, axis=1, inplace=True)
        prepare_features(self.X, features)

        self.data = torch.from_numpy(self.X.values).double()
        self.labels = torch.from_numpy(self.y).double()[:, None]

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        if isinstance(idx, torch.Tensor):
            idx = idx.tolist()

        return [self.data[idx], self.labels[idx]]

__init__(X, y, features)

Initializes an instance of Dataset.

Source code in pytorch_lattice/utils/data.py
def __init__(
    self,
    X: pd.DataFrame,
    y: np.ndarray,
    features: list[Union[NumericalFeature, CategoricalFeature]],
):
    """Initializes an instance of `Dataset`."""
    self.X = X.copy()
    self.y = y.copy()

    selected_features = [feature.feature_name for feature in features]
    unavailable_features = set(selected_features) - set(self.X.columns)
    if len(unavailable_features) > 0:
        raise ValueError(f"Features {unavailable_features} not found in dataset.")

    drop_features = list(set(self.X.columns) - set(selected_features))
    self.X.drop(drop_features, axis=1, inplace=True)
    prepare_features(self.X, features)

    self.data = torch.from_numpy(self.X.values).double()
    self.labels = torch.from_numpy(self.y).double()[:, None]

prepare_features(X, features)

Maps categorical features to their integer indices in place.

Source code in pytorch_lattice/utils/data.py
def prepare_features(
    X: pd.DataFrame, features: list[Union[NumericalFeature, CategoricalFeature]]
):
    """Maps categorical features to their integer indices in place."""
    for feature in features:
        feature_data = X[feature.feature_name]

        if isinstance(feature, CategoricalFeature):
            feature_data = feature_data.map(feature.category_indices)

        if feature.missing_input_value is not None:
            feature_data = feature_data.fillna(feature.missing_input_value)

        X[feature.feature_name] = feature_data

pytorch_lattice.utils.models

Utility functions for use in model classes.

calibrate_and_stack(x, calibrators)

Helper function to run calibrators along columns of given data.

Parameters:

Name Type Description Default
x Tensor

The input tensor of feature values of shape (batch_size, num_features).

required
calibrators ModuleDict

A dictionary of calibrator functions.

required

Returns:

Type Description
Tensor

A torch.Tensor resulting from applying the calibrators and stacking the results.

Source code in pytorch_lattice/utils/models.py
def calibrate_and_stack(
    x: torch.Tensor,
    calibrators: torch.nn.ModuleDict,
) -> torch.Tensor:
    """Helper function to run calibrators along columns of given data.

    Args:
        x: The input tensor of feature values of shape `(batch_size, num_features)`.
        calibrators: A dictionary of calibrator functions.

    Returns:
        A torch.Tensor resulting from applying the calibrators and stacking the results.
    """
    return torch.column_stack(
        tuple(
            calibrator(x[:, i, None])
            for i, calibrator in enumerate(calibrators.values())
        )
    )

initialize_feature_calibrators(features, output_min=None, output_max=None)

Helper function to initialize calibrators for calibrated model.

Parameters:

Name Type Description Default
features list[Union[NumericalFeature, CategoricalFeature]]

A list of numerical and/or categorical feature configs.

required
output_min Optional[float]

The minimum output value for the model. If None, the minimum output value will be unbounded.

None
output_max Union[Optional[float], list[Optional[float]]]

A list of maximum output value for each feature of the model. If None, the maximum output value will be unbounded. If a singular value, it will be taken as the maximum of all features.

None

Returns:

Type Description
ModuleDict

A torch.nn.ModuleDict of calibrators accessible by each feature's name.

Raises:

Type Description
ValueError

If any feature configs are not NUMERICAL or CATEGORICAL.

Source code in pytorch_lattice/utils/models.py
def initialize_feature_calibrators(
    features: list[Union[NumericalFeature, CategoricalFeature]],
    output_min: Optional[float] = None,
    output_max: Union[Optional[float], list[Optional[float]]] = None,
) -> torch.nn.ModuleDict:
    """Helper function to initialize calibrators for calibrated model.

    Args:
        features: A list of numerical and/or categorical feature configs.
        output_min: The minimum output value for the model. If `None`, the minimum
            output value will be unbounded.
        output_max: A list of maximum output value for each feature of the model. If
            `None`, the maximum output value will be unbounded. If a singular value, it
            will be taken as the maximum of all features.

    Returns:
        A `torch.nn.ModuleDict` of calibrators accessible by each feature's name.

    Raises:
        ValueError: If any feature configs are not `NUMERICAL` or `CATEGORICAL`.
    """
    calibrators = torch.nn.ModuleDict()
    if not isinstance(output_max, list):
        output_max = [output_max] * len(features)
    for feature, feature_max in zip(features, output_max):
        if isinstance(feature, NumericalFeature):
            calibrators[feature.feature_name] = NumericalCalibrator(
                input_keypoints=feature.input_keypoints,
                missing_input_value=feature.missing_input_value,
                output_min=output_min,
                output_max=feature_max,
                monotonicity=feature.monotonicity,
                kernel_init=NumericalCalibratorInit.EQUAL_SLOPES,
                projection_iterations=feature.projection_iterations,
            )
        elif isinstance(feature, CategoricalFeature):
            calibrators[feature.feature_name] = CategoricalCalibrator(
                num_categories=len(feature.categories),
                missing_input_value=feature.missing_input_value,
                output_min=output_min,
                output_max=feature_max,
                monotonicity_pairs=feature.monotonicity_index_pairs,
                kernel_init=CategoricalCalibratorInit.UNIFORM,
            )
        else:
            raise ValueError(f"Unknown type {type(feature)} for feature {feature}")
    return calibrators

initialize_monotonicities(features)

Helper function to initialize monotonicities for calibrated model.

Parameters:

Name Type Description Default
features list[Union[NumericalFeature, CategoricalFeature]]

A list of numerical and/or categorical feature configs.

required

Returns:

Type Description
list[Optional[Monotonicity]]

A list of None or Monotonicity.INCREASING based on whether

list[Optional[Monotonicity]]

each feature has a monotonicity or not.

Source code in pytorch_lattice/utils/models.py
def initialize_monotonicities(
    features: list[Union[NumericalFeature, CategoricalFeature]]
) -> list[Optional[Monotonicity]]:
    """Helper function to initialize monotonicities for calibrated model.

    Args:
        features: A list of numerical and/or categorical feature configs.

    Returns:
        A list of `None` or `Monotonicity.INCREASING` based on whether
        each feature has a monotonicity or not.
    """
    monotonicities = [
        None
        if (isinstance(feature, CategoricalFeature) and not feature.monotonicity_pairs)
        or (isinstance(feature, NumericalFeature) and feature.monotonicity is None)
        else Monotonicity.INCREASING
        for feature in features
    ]
    return monotonicities

initialize_output_calibrator(monotonic, output_calibration_num_keypoints, output_min=None, output_max=None)

Helper function to initialize output calibrator for calibrated model.

Parameters:

Name Type Description Default
monotonic bool

Whether output calibrator should have monotonicity constraint.

required
output_calibration_num_keypoints Optional[int]

The number of keypoints in output calibrator. If 0 or None, no output calibrator will be returned.

required
output_min Optional[float]

The minimum output value for the model. If None, the minimum output value will be unbounded.

None
output_max Optional[float]

The maximum output value for the model. If None, the maximum output value will be unbounded.

None

Returns:

Type Description
Optional[NumericalCalibrator]

A torch.nn.ModuleDict of calibrators accessible by each feature's name.

Raises:

Type Description
ValueError

If any feature configs are not NUMERICAL or CATEGORICAL.

Source code in pytorch_lattice/utils/models.py
def initialize_output_calibrator(
    monotonic: bool,
    output_calibration_num_keypoints: Optional[int],
    output_min: Optional[float] = None,
    output_max: Optional[float] = None,
) -> Optional[NumericalCalibrator]:
    """Helper function to initialize output calibrator for calibrated model.

    Args:
        monotonic: Whether output calibrator should have monotonicity constraint.
        output_calibration_num_keypoints: The number of keypoints in output
            calibrator. If `0` or `None`, no output calibrator will be returned.
        output_min: The minimum output value for the model. If `None`, the minimum
            output value will be unbounded.
        output_max: The maximum output value for the model. If `None`, the maximum
            output value will be unbounded.

    Returns:
        A `torch.nn.ModuleDict` of calibrators accessible by each feature's name.

    Raises:
        ValueError: If any feature configs are not `NUMERICAL` or `CATEGORICAL`.
    """
    if output_calibration_num_keypoints:
        output_calibrator = NumericalCalibrator(
            input_keypoints=np.linspace(0.0, 1.0, num=output_calibration_num_keypoints),
            missing_input_value=None,
            output_min=output_min,
            output_max=output_max,
            monotonicity=Monotonicity.INCREASING if monotonic else None,
            kernel_init=NumericalCalibratorInit.EQUAL_HEIGHTS,
        )
        return output_calibrator
    return None