From 068fb9f5c2cceae79c1ebbb73ecb2839bcb83686 Mon Sep 17 00:00:00 2001 From: Gian Pio Domiziani <50718324+gianpDomiziani@users.noreply.github.com> Date: Sat, 24 Apr 2021 02:14:29 +0200 Subject: [PATCH] X.copy() in the process method (#78) * X.copy() in the transformer method. * update version 0.3.4 --- flaml/data.py | 13 ++++++++----- flaml/version.py | 2 +- test/test_automl.py | 2 ++ 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/flaml/data.py b/flaml/data.py index f99d995655..c73c33c99b 100644 --- a/flaml/data.py +++ b/flaml/data.py @@ -214,6 +214,7 @@ class DataTransformer: X[column] = X[column].fillna('__NAN__') cat_columns.append(column) else: + # print(X[column].dtype.name) if X[column].nunique(dropna=True) < 2: X.drop(columns=column, inplace=True) drop = True @@ -236,8 +237,8 @@ class DataTransformer: SimpleImputer(missing_values=np.nan, strategy='median'), X_num.columns)]) X[num_columns] = self.transformer.fit_transform(X_num) - self._cat_columns, self._num_columns, self._datetime_columns = \ - cat_columns, num_columns, datetime_columns + self._cat_columns, self._num_columns, self._datetime_columns = cat_columns, \ + num_columns, datetime_columns self._drop = drop if task == 'regression': @@ -249,13 +250,14 @@ class DataTransformer: return X, y def transform(self, X): + X = X.copy() if isinstance(X, pd.DataFrame): - cat_columns, num_columns, datetime_columns = \ - self._cat_columns, self._num_columns, self._datetime_columns + cat_columns, num_columns, datetime_columns = self._cat_columns, \ + self._num_columns, self._datetime_columns + X = X[cat_columns + num_columns].copy() if datetime_columns: for dt_column in datetime_columns: X[dt_column] = X[dt_column].map(datetime.toordinal) - X = X[cat_columns + num_columns].copy() for column in cat_columns: # print(column, X[column].dtype.name) if X[column].dtype.name == 'object': @@ -273,3 +275,4 @@ class DataTransformer: X_num.columns = range(X_num.shape[1]) X[num_columns] = self.transformer.transform(X_num) return X + diff --git a/flaml/version.py b/flaml/version.py index e19434e2e3..334b899568 100644 --- a/flaml/version.py +++ b/flaml/version.py @@ -1 +1 @@ -__version__ = "0.3.3" +__version__ = "0.3.4" diff --git a/test/test_automl.py b/test/test_automl.py index 4fbf8296ce..94a4452c6e 100644 --- a/test/test_automl.py +++ b/test/test_automl.py @@ -239,6 +239,8 @@ class TestAutoML(unittest.TestCase): y = np.array([0, 1]) automl_experiment.fit(X_train=fake_df, X_val=fake_df, y_train=y, y_val=y, **automl_settings) + y_pred = automl_experiment.predict(fake_df) + def test_regression(self): automl_experiment = AutoML()