classifier

`pytorch_lattice.classifier.Classifier`

A classifier for tabular data using calibrated models.

Note: currently only handles binary classification targets.

Example:

X, y = pyl.datasets.heart()
clf = pyl.Classifier(X.columns)
clf.configure("age").num_keypoints(10).monotonicity("increasing")
clf.fit(X, y)

Attributes:

Name	Type	Description
`features`		A dict mapping feature names to their corresponding `FeatureConfig` instances.
`model_config`		The model configuration to use for fitting the classifier.
`self.model`		The fitted model. This will be `None` until `fit` is called.

Source code in pytorch_lattice/classifier.py

class Classifier:
    """A classifier for tabular data using calibrated models.

    Note: currently only handles binary classification targets.

    Example:
    ```python
    X, y = pyl.datasets.heart()
    clf = pyl.Classifier(X.columns)
    clf.configure("age").num_keypoints(10).monotonicity("increasing")
    clf.fit(X, y)
    ```

    Attributes:
        features: A dict mapping feature names to their corresponding `FeatureConfig`
            instances.
        model_config: The model configuration to use for fitting the classifier.
        self.model: The fitted model. This will be `None` until `fit` is called.
    """

    def __init__(
        self,
        feature_names: list[str],
        model_config: Optional[Union[LinearConfig, LatticeConfig]] = None,
    ):
        """Initializes an instance of `Classifier`."""
        self.features = {
            feature_name: FeatureConfig(name=feature_name)
            for feature_name in feature_names
        }
        self.model_config = model_config if model_config is not None else LinearConfig()
        self.model: Optional[Union[CalibratedLinear, CalibratedLattice]] = None

    def configure(self, feature_name: str):
        """Returns a `FeatureConfig` object for the given feature name."""
        return self.features[feature_name]

    def fit(
        self,
        X: pd.DataFrame,
        y: np.ndarray,
        epochs: int = 50,
        batch_size: int = 64,
        learning_rate: float = 1e-3,
        shuffle: bool = False,
    ) -> Classifier:
        """Returns this classifier after fitting a model to the given data.

        Note that calling this function will overwrite any existing model and train a
        new model from scratch.

        Args:
            X: A `pd.DataFrame` containing the features for the training data.
            y: A `np.ndarray` containing the labels for the training data.
            epochs: The number of epochs for which to fit the classifier.
            batch_size: The batch size to use for fitting.
            learning_rate: The learning rate to use for fitting the model.
            shuffle: Whether to shuffle the data before fitting.
        """
        model = self._create_model(X)
        optimizer = torch.optim.Adam(model.parameters(recurse=True), lr=learning_rate)
        loss_fn = torch.nn.BCEWithLogitsLoss()

        dataset = Dataset(X, y, model.features)
        dataloader = torch.utils.data.DataLoader(
            dataset, batch_size=batch_size, shuffle=shuffle
        )
        for _ in trange(epochs, desc="Training Progress"):
            for inputs, labels in dataloader:
                optimizer.zero_grad()
                outputs = model(inputs)
                loss = loss_fn(outputs, labels)
                loss.backward()
                optimizer.step()
                model.apply_constraints()

        self.model = model
        return self

    def predict(self, X: pd.DataFrame, logits: bool = False) -> np.ndarray:
        """Returns predictions for the given data.

        Args:
            X: a `pd.DataFrame` containing to data for which to generate predictions.
            logits: If `True`, returns the logits of the predictions. Otherwise, returns
                probabilities.
        """
        if self.model is None:
            raise RuntimeError("Cannot predict before fitting the model.")

        self.model.eval()
        X_copy = X[[feature.feature_name for feature in self.model.features]].copy()
        prepare_features(X_copy, self.model.features)
        X_tensor = torch.tensor(X_copy.values).double()
        with torch.no_grad():
            preds = self.model(X_tensor).numpy()

        if logits:
            return preds
        else:
            return 1.0 / (1.0 + np.exp(-preds))

    def save(self, filepath: str):
        """Saves the classifier to the specified path.

        Args:
            filepath: The directory where the classifier will be saved. If the directory
                does not exist, this function will attempt to create it. If the
                directory already exists, this function will overwrite any existing
                content with conflicting filenames.
        """
        if not os.path.exists(filepath):
            os.makedirs(filepath)
        with open(os.path.join(filepath, "clf_attrs.pkl"), "wb") as f:
            attrs = {key: self.__dict__[key] for key in ["features", "model_config"]}
            pickle.dump(attrs, f)
        if self.model is not None:
            model_path = os.path.join(filepath, "model.pt")
            torch.save(self.model, model_path)

    @classmethod
    def load(cls, filepath: str) -> Classifier:
        """Loads a `Classifier` from the specified path.

        Args:
            filepath: The filepath from which to load the classifier. The filepath
                should point to the filepath used in the `save` method when saving the
                classifier.

        Returns:
            A `Classifier` instance.
        """
        with open(os.path.join(filepath, "clf_attrs.pkl"), "rb") as f:
            attrs = pickle.load(f)

        clf = cls([])
        clf.__dict__.update(attrs)

        model_path = os.path.join(filepath, "model.pt")
        if os.path.exists(model_path):
            clf.model = torch.load(model_path)

        return clf

    ################################################################################
    ############################## PRIVATE METHODS #################################
    ################################################################################

    def _create_model(
        self, X: pd.DataFrame
    ) -> Union[CalibratedLinear, CalibratedLattice]:
        """Returns a model based on `self.features` and `self.model_config`."""
        features: list[Union[CategoricalFeature, NumericalFeature]] = []

        for feature_name, feature in self.features.items():
            if X[feature_name].dtype.kind in ["S", "O", "b"]:  # string, object, bool
                if feature._categories is None:
                    categories = X[feature_name].unique().tolist()
                    feature.categories(categories)
                else:
                    categories = feature._categories
                if feature._monotonicity is not None and isinstance(
                    feature._monotonicity, list
                ):
                    monotonicity_pairs = feature._monotonicity
                else:
                    monotonicity_pairs = None
                features.append(
                    CategoricalFeature(
                        feature_name=feature_name,
                        categories=categories,
                        missing_input_value=MISSING_INPUT_VALUE,
                        monotonicity_pairs=monotonicity_pairs,
                        lattice_size=feature._lattice_size,
                    )
                )
            else:  # numerical feature
                if feature._monotonicity is not None and isinstance(
                    feature._monotonicity, str
                ):
                    monotonicity = feature._monotonicity
                else:
                    monotonicity = None
                features.append(
                    NumericalFeature(
                        feature_name=feature_name,
                        data=np.array(X[feature_name].values),
                        num_keypoints=feature._num_keypoints,
                        input_keypoints_init=feature._input_keypoints_init,
                        missing_input_value=MISSING_INPUT_VALUE,
                        monotonicity=monotonicity,
                        projection_iterations=feature._projection_iterations,
                        lattice_size=feature._lattice_size,
                    )
                )

        if isinstance(self.model_config, LinearConfig):
            return CalibratedLinear(
                features,
                self.model_config.output_min,
                self.model_config.output_max,
                self.model_config.use_bias,
                self.model_config.output_calibration_num_keypoints,
            )
        else:
            return CalibratedLattice(
                features,
                True,
                self.model_config.output_min,
                self.model_config.output_max,
                self.model_config.kernel_init,
                self.model_config.interpolation,
                self.model_config.output_calibration_num_keypoints,
            )

`init(feature_names, model_config=None)`

Initializes an instance of Classifier.

Source code in pytorch_lattice/classifier.py

def __init__(
    self,
    feature_names: list[str],
    model_config: Optional[Union[LinearConfig, LatticeConfig]] = None,
):
    """Initializes an instance of `Classifier`."""
    self.features = {
        feature_name: FeatureConfig(name=feature_name)
        for feature_name in feature_names
    }
    self.model_config = model_config if model_config is not None else LinearConfig()
    self.model: Optional[Union[CalibratedLinear, CalibratedLattice]] = None

`configure(feature_name)`

Returns a FeatureConfig object for the given feature name.

Source code in pytorch_lattice/classifier.py

def configure(self, feature_name: str):
    """Returns a `FeatureConfig` object for the given feature name."""
    return self.features[feature_name]

`fit(X, y, epochs=50, batch_size=64, learning_rate=0.001, shuffle=False)`

Returns this classifier after fitting a model to the given data.

Note that calling this function will overwrite any existing model and train a new model from scratch.

Parameters:

Name	Type	Description	Default
`X`	`DataFrame`	A `pd.DataFrame` containing the features for the training data.	required
`y`	`ndarray`	A `np.ndarray` containing the labels for the training data.	required
`epochs`	`int`	The number of epochs for which to fit the classifier.	`50`
`batch_size`	`int`	The batch size to use for fitting.	`64`
`learning_rate`	`float`	The learning rate to use for fitting the model.	`0.001`
`shuffle`	`bool`	Whether to shuffle the data before fitting.	`False`

Source code in pytorch_lattice/classifier.py

def fit(
    self,
    X: pd.DataFrame,
    y: np.ndarray,
    epochs: int = 50,
    batch_size: int = 64,
    learning_rate: float = 1e-3,
    shuffle: bool = False,
) -> Classifier:
    """Returns this classifier after fitting a model to the given data.

    Note that calling this function will overwrite any existing model and train a
    new model from scratch.

    Args:
        X: A `pd.DataFrame` containing the features for the training data.
        y: A `np.ndarray` containing the labels for the training data.
        epochs: The number of epochs for which to fit the classifier.
        batch_size: The batch size to use for fitting.
        learning_rate: The learning rate to use for fitting the model.
        shuffle: Whether to shuffle the data before fitting.
    """
    model = self._create_model(X)
    optimizer = torch.optim.Adam(model.parameters(recurse=True), lr=learning_rate)
    loss_fn = torch.nn.BCEWithLogitsLoss()

    dataset = Dataset(X, y, model.features)
    dataloader = torch.utils.data.DataLoader(
        dataset, batch_size=batch_size, shuffle=shuffle
    )
    for _ in trange(epochs, desc="Training Progress"):
        for inputs, labels in dataloader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = loss_fn(outputs, labels)
            loss.backward()
            optimizer.step()
            model.apply_constraints()

    self.model = model
    return self

`load(filepath)` `classmethod`

Loads a Classifier from the specified path.

Parameters:

Name	Type	Description	Default
`filepath`	`str`	The filepath from which to load the classifier. The filepath should point to the filepath used in the `save` method when saving the classifier.	required

Returns:

Type	Description
`Classifier`	A `Classifier` instance.

Source code in pytorch_lattice/classifier.py

@classmethod
def load(cls, filepath: str) -> Classifier:
    """Loads a `Classifier` from the specified path.

    Args:
        filepath: The filepath from which to load the classifier. The filepath
            should point to the filepath used in the `save` method when saving the
            classifier.

    Returns:
        A `Classifier` instance.
    """
    with open(os.path.join(filepath, "clf_attrs.pkl"), "rb") as f:
        attrs = pickle.load(f)

    clf = cls([])
    clf.__dict__.update(attrs)

    model_path = os.path.join(filepath, "model.pt")
    if os.path.exists(model_path):
        clf.model = torch.load(model_path)

    return clf

`predict(X, logits=False)`

Returns predictions for the given data.

Parameters:

Name	Type	Description	Default
`X`	`DataFrame`	a `pd.DataFrame` containing to data for which to generate predictions.	required
`logits`	`bool`	If `True`, returns the logits of the predictions. Otherwise, returns probabilities.	`False`

Source code in pytorch_lattice/classifier.py

def predict(self, X: pd.DataFrame, logits: bool = False) -> np.ndarray:
    """Returns predictions for the given data.

    Args:
        X: a `pd.DataFrame` containing to data for which to generate predictions.
        logits: If `True`, returns the logits of the predictions. Otherwise, returns
            probabilities.
    """
    if self.model is None:
        raise RuntimeError("Cannot predict before fitting the model.")

    self.model.eval()
    X_copy = X[[feature.feature_name for feature in self.model.features]].copy()
    prepare_features(X_copy, self.model.features)
    X_tensor = torch.tensor(X_copy.values).double()
    with torch.no_grad():
        preds = self.model(X_tensor).numpy()

    if logits:
        return preds
    else:
        return 1.0 / (1.0 + np.exp(-preds))

`save(filepath)`

Saves the classifier to the specified path.

Parameters:

Name	Type	Description	Default
`filepath`	`str`	The directory where the classifier will be saved. If the directory does not exist, this function will attempt to create it. If the directory already exists, this function will overwrite any existing content with conflicting filenames.	required

Source code in pytorch_lattice/classifier.py

def save(self, filepath: str):
    """Saves the classifier to the specified path.

    Args:
        filepath: The directory where the classifier will be saved. If the directory
            does not exist, this function will attempt to create it. If the
            directory already exists, this function will overwrite any existing
            content with conflicting filenames.
    """
    if not os.path.exists(filepath):
        os.makedirs(filepath)
    with open(os.path.join(filepath, "clf_attrs.pkl"), "wb") as f:
        attrs = {key: self.__dict__[key] for key in ["features", "model_config"]}
        pickle.dump(attrs, f)
    if self.model is not None:
        model_path = os.path.join(filepath, "model.pt")
        torch.save(self.model, model_path)

classifier

pytorch_lattice.classifier.Classifier

__init__(feature_names, model_config=None)

configure(feature_name)

fit(X, y, epochs=50, batch_size=64, learning_rate=0.001, shuffle=False)

load(filepath) classmethod

predict(X, logits=False)

save(filepath)

`pytorch_lattice.classifier.Classifier`

`init(feature_names, model_config=None)`

`configure(feature_name)`

`fit(X, y, epochs=50, batch_size=64, learning_rate=0.001, shuffle=False)`

`load(filepath)` `classmethod`

`predict(X, logits=False)`

`save(filepath)`