from pandas import Series, DataFrame
from presc.utils import PrescError
[docs]class ClassificationModel:
"""Represents a classification problem.
Instances wrap a ML model together with its associated training dataset.
Args:
classifier (sklearn Classifier): the classifier to wrap
dataset (Dataset): optionally include the associated training dataset
retrain_now (bool): should the classifier first be (re-)trained on the given dataset?
"""
def __init__(self, classifier, train_dataset=None, retrain_now=False):
self._classifier = classifier
self._train_dataset = train_dataset
if retrain_now:
# Train the classifier on the given dataset.
self.train()
[docs] def train(self, train_dataset=None):
"""Train the underlying classification model.
Parameters
----------
train_dataset : presc.dataset.Dataset
A Dataset to train on. Defaults to the pre-specified training dataset, if any.
"""
if train_dataset is None:
train_dataset = self._train_dataset
self._classifier.fit(train_dataset.features, train_dataset.labels)
[docs] def predict_labels(self, test_dataset):
"""
Predict labels for the given test dataset.
Parameters
----------
test_dataset : presc.dataset.Dataset
Returns
-------
Series
A like-indexed Series.
"""
pred = self._classifier.predict(test_dataset.features)
return Series(pred, index=test_dataset.features.index)
[docs] def predict_probs(self, test_dataset):
"""
Compute predicted probabilities for the given test dataset.
This must be supported by the underlying classifier, otherwise an
error will be raised.
Parameters
----------
test_dataset : presc.dataset.Dataset
Returns
-------
DataFrame
A like-indexed DataFrame of probabilities for each class.
"""
try:
pred = self._classifier.predict_proba(test_dataset.features)
return DataFrame(pred, index=test_dataset.features.index)
except AttributeError as e:
raise PrescError(
"classifier does not support predicted probabilities"
) from e
@property
def classifier(self):
"""Returns the underlying classifier."""
return self._classifier