support ROC and AUC for multi-class classification (#170)

* support ROC and AUC for multi-class classification * add a test case to cover ROC and AUC for multi-class classification
2021-08-23 07:16:10 +09:00 · 2021-08-23 07:16:10 +09:00 · 6270353458
parent 10082b9262
commit 6270353458
7 changed files with 52 additions and 14 deletions
--- a/flaml/automl.py
+++ b/flaml/automl.py
@ -1018,8 +1018,8 @@ class AutoML:
                dataframe and label are ignored;
                If not, dataframe and label must be provided.
            metric: A string of the metric name or a function,
-                e.g., 'accuracy', 'roc_auc', 'f1', 'micro_f1', 'macro_f1',
-                'log_loss', 'mae', 'mse', 'r2'
+                e.g., 'accuracy', 'roc_auc', 'roc_auc_ovr', 'roc_auc_ovo',
+                'f1', 'micro_f1', 'macro_f1', 'log_loss', 'mae', 'mse', 'r2'
                if passing a customized metric function, the function needs to
                have the follwing signature:

@ -1133,7 +1133,8 @@ class AutoML:
            else:
                metric = 'r2'
        self._state.metric = metric
-        if metric in ['r2', 'accuracy', 'roc_auc', 'f1', 'ap', 'micro_f1', 'macro_f1']:
+        if metric in ['r2', 'accuracy', 'roc_auc', 'roc_auc_ovr', 'roc_auc_ovo',
+                      'f1', 'ap', 'micro_f1', 'macro_f1']:
            error_metric = f"1-{metric}"
        elif isinstance(metric, str):
            error_metric = metric
--- a/flaml/ml.py
+++ b/flaml/ml.py
@ -56,8 +56,8 @@ def sklearn_metric_loss_score(

    Args:
        metric_name: A string of the metric name, one of
-            'r2', 'rmse', 'mae', 'mse', 'accuracy', 'roc_auc', 'log_loss',
-            'f1', 'ap', 'micro_f1', 'macro_f1'
+            'r2', 'rmse', 'mae', 'mse', 'accuracy', 'roc_auc', 'roc_auc_ovr',
+            'roc_auc_ovo', 'log_loss', 'f1', 'ap', 'micro_f1', 'macro_f1'
        y_predict: A 1d or 2d numpy array of the predictions which can be
            used to calculate the metric. E.g., 2d for log_loss and 1d
            for others.
@ -83,9 +83,15 @@ def sklearn_metric_loss_score(
    elif metric_name == 'accuracy':
        score = 1.0 - accuracy_score(
            y_true, y_predict, sample_weight=sample_weight)
-    elif 'roc_auc' in metric_name:
+    elif metric_name == 'roc_auc':
        score = 1.0 - roc_auc_score(
            y_true, y_predict, sample_weight=sample_weight)
+    elif metric_name == 'roc_auc_ovr':
+        score = 1.0 - roc_auc_score(
+            y_true, y_predict, sample_weight=sample_weight, multi_class='ovr')
+    elif metric_name == 'roc_auc_ovo':
+        score = 1.0 - roc_auc_score(
+            y_true, y_predict, sample_weight=sample_weight, multi_class='ovo')
    elif 'log_loss' in metric_name:
        score = log_loss(
            y_true, y_predict, labels=labels, sample_weight=sample_weight)
@ -104,7 +110,8 @@ def sklearn_metric_loss_score(
        raise ValueError(
            metric_name + ' is not a built-in metric, '
            'currently built-in metrics are: '
-            'r2, rmse, mae, mse, accuracy, roc_auc, log_loss, f1, micro_f1, macro_f1, ap. '
+            'r2, rmse, mae, mse, accuracy, roc_auc, roc_auc_ovr, roc_auc_ovo,'
+            'log_loss, f1, micro_f1, macro_f1, ap. '
            'please pass a customized metric function to AutoML.fit(metric=func)')
    return score

@ -114,7 +121,7 @@ def get_y_pred(estimator, X, eval_metric, obj):
        y_pred_classes = estimator.predict_proba(X)
        y_pred = y_pred_classes[
            :, 1] if y_pred_classes.ndim > 1 else y_pred_classes
-    elif eval_metric in ['log_loss', 'roc_auc']:
+    elif eval_metric in ['log_loss', 'roc_auc', 'roc_auc_ovr', 'roc_auc_ovo']:
        y_pred = estimator.predict_proba(X)
    else:
        y_pred = estimator.predict(X)
--- a/notebook/automl_in_sklearn_pipeline.ipynb
+++ b/notebook/automl_in_sklearn_pipeline.ipynb
@ -200,7 +200,7 @@
   "source": [
    "settings = {\n",
    "    \"time_budget\": 60,  # total running time in seconds\n",
-    "    \"metric\": 'accuracy',  # primary metrics can be chosen from: ['accuracy','roc_auc','f1','log_loss','mae','mse','r2']\n",
+    "    \"metric\": 'accuracy',  # primary metrics can be chosen from: ['accuracy','roc_auc', 'roc_auc_ovr', 'roc_auc_ovo', 'f1','log_loss','mae','mse','r2']\n",
    "    \"task\": 'classification',  # task type   \n",
    "    \"estimator_list\":['xgboost','catboost','lgbm'],\n",
    "    \"log_file_name\": 'airlines_experiment.log',  # flaml log file\n",
@ -962,4 +962,4 @@
 },
 "nbformat": 4,
 "nbformat_minor": 4
-}
+}
--- a/notebook/flaml_automl.ipynb
+++ b/notebook/flaml_automl.ipynb
@ -121,7 +121,7 @@
   "source": [
    "settings = {\n",
    "    \"time_budget\": 300,  # total running time in seconds\n",
-    "    \"metric\": 'accuracy',  # primary metrics can be chosen from: ['accuracy','roc_auc','f1','log_loss','mae','mse','r2']\n",
+    "    \"metric\": 'accuracy',  # primary metrics can be chosen from: ['accuracy','roc_auc','roc_auc_ovr','roc_auc_ovo','f1','log_loss','mae','mse','r2']\n",
    "    \"task\": 'classification',  # task type    \n",
    "    \"log_file_name\": 'airlines_experiment.log',  # flaml log file\n",
    "}"
--- a/notebook/flaml_azureml.ipynb
+++ b/notebook/flaml_azureml.ipynb
@ -141,7 +141,7 @@
   "source": [
    "settings = {\n",
    "    \"time_budget\": 60,  # total running time in seconds\n",
-    "    \"metric\": 'accuracy',  # primary metrics can be chosen from: ['accuracy','roc_auc','f1','log_loss','mae','mse','r2']\n",
+    "    \"metric\": 'accuracy',  # primary metrics can be chosen from: ['accuracy','roc_auc','roc_auc_ovr','roc_auc_ovo','f1','log_loss','mae','mse','r2']\n",
    "    \"estimator_list\": ['lgbm', 'rf', 'xgboost'],  # list of ML learners\n",
    "    \"task\": 'classification',  # task type    \n",
    "    \"sample\": False,  # whether to subsample training data\n",
--- a/test/test_automl.py
+++ b/test/test_automl.py
@ -344,6 +344,36 @@ class TestAutoML(unittest.TestCase):
        print(multi_class_curves(y_train, y_pred_proba, roc_curve))
        print(multi_class_curves(y_train, y_pred_proba, precision_recall_curve))

+    def test_roc_auc_ovr(self):
+        automl_experiment = AutoML()
+        automl_settings = {
+            "time_budget": 2,
+            "metric": "roc_auc_ovr",
+            "task": "classification",
+            "log_file_name": "test/roc_auc_ovr.log",
+            "log_training_metric": True,
+            "n_jobs": 1,
+            "model_history": True
+        }
+        X_train, y_train = load_iris(return_X_y=True)
+        automl_experiment.fit(
+            X_train=X_train, y_train=y_train, **automl_settings)
+
+    def test_roc_auc_ovo(self):
+        automl_experiment = AutoML()
+        automl_settings = {
+            "time_budget": 2,
+            "metric": "roc_auc_ovo",
+            "task": "classification",
+            "log_file_name": "test/roc_auc_ovo.log",
+            "log_training_metric": True,
+            "n_jobs": 1,
+            "model_history": True
+        }
+        X_train, y_train = load_iris(return_X_y=True)
+        automl_experiment.fit(
+            X_train=X_train, y_train=y_train, **automl_settings)
+
    def test_regression(self):
        automl_experiment = AutoML()
        automl_settings = {
--- a/test/test_notebook_example.py
+++ b/test/test_notebook_example.py
@ -14,7 +14,7 @@ def test_automl(budget=5, dataset_format='dataframe'):
    automl = AutoML()
    settings = {
        "time_budget": budget,  # total running time in seconds
-        "metric": 'accuracy',  # primary metrics can be chosen from: ['accuracy','roc_auc','f1','log_loss','mae','mse','r2']
+        "metric": 'accuracy',  # primary metrics can be chosen from: ['accuracy','roc_auc','roc_auc_ovr','roc_auc_ovo','f1','log_loss','mae','mse','r2']
        "task": 'classification',  # task type
        "log_file_name": 'airlines_experiment.log',  # flaml log file
    }
@ -71,7 +71,7 @@ def test_mlflow():
    automl = AutoML()
    settings = {
        "time_budget": 5,  # total running time in seconds
-        "metric": 'accuracy',  # primary metrics can be chosen from: ['accuracy','roc_auc','f1','log_loss','mae','mse','r2']
+        "metric": 'accuracy',  # primary metrics can be chosen from: ['accuracy','roc_auc','roc_auc_ovr','roc_auc_ovo','f1','log_loss','mae','mse','r2']
        "estimator_list": ['lgbm', 'rf', 'xgboost'],  # list of ML learners
        "task": 'classification',  # task type
        "sample": False,  # whether to subsample training data