X.copy() in the process method (#78)

* X.copy() in the transformer method.

* update version 0.3.4
This commit is contained in:
Gian Pio Domiziani 2021-04-24 02:14:29 +02:00 committed by GitHub
parent b6f57894ef
commit 068fb9f5c2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 11 additions and 6 deletions

View File

@ -214,6 +214,7 @@ class DataTransformer:
X[column] = X[column].fillna('__NAN__')
cat_columns.append(column)
else:
# print(X[column].dtype.name)
if X[column].nunique(dropna=True) < 2:
X.drop(columns=column, inplace=True)
drop = True
@ -236,8 +237,8 @@ class DataTransformer:
SimpleImputer(missing_values=np.nan, strategy='median'),
X_num.columns)])
X[num_columns] = self.transformer.fit_transform(X_num)
self._cat_columns, self._num_columns, self._datetime_columns = \
cat_columns, num_columns, datetime_columns
self._cat_columns, self._num_columns, self._datetime_columns = cat_columns, \
num_columns, datetime_columns
self._drop = drop
if task == 'regression':
@ -249,13 +250,14 @@ class DataTransformer:
return X, y
def transform(self, X):
X = X.copy()
if isinstance(X, pd.DataFrame):
cat_columns, num_columns, datetime_columns = \
self._cat_columns, self._num_columns, self._datetime_columns
cat_columns, num_columns, datetime_columns = self._cat_columns, \
self._num_columns, self._datetime_columns
X = X[cat_columns + num_columns].copy()
if datetime_columns:
for dt_column in datetime_columns:
X[dt_column] = X[dt_column].map(datetime.toordinal)
X = X[cat_columns + num_columns].copy()
for column in cat_columns:
# print(column, X[column].dtype.name)
if X[column].dtype.name == 'object':
@ -273,3 +275,4 @@ class DataTransformer:
X_num.columns = range(X_num.shape[1])
X[num_columns] = self.transformer.transform(X_num)
return X

View File

@ -1 +1 @@
__version__ = "0.3.3"
__version__ = "0.3.4"

View File

@ -239,6 +239,8 @@ class TestAutoML(unittest.TestCase):
y = np.array([0, 1])
automl_experiment.fit(X_train=fake_df, X_val=fake_df, y_train=y, y_val=y, **automl_settings)
y_pred = automl_experiment.predict(fake_df)
def test_regression(self):
automl_experiment = AutoML()