mirror of https://github.com/microsoft/autogen.git
datetime columns preprocess for validation data fixed. (#73)
* datetime columns preprocess for validation data fixed. * code line formatted.
This commit is contained in:
parent
f4f3f4f17b
commit
ad42889a3b
|
@ -192,12 +192,13 @@ class DataTransformer:
|
|||
if isinstance(X, pd.DataFrame):
|
||||
X = X.copy()
|
||||
n = X.shape[0]
|
||||
cat_columns, num_columns = [], []
|
||||
cat_columns, num_columns, datetime_columns = [], [], []
|
||||
drop = False
|
||||
for column in X.columns:
|
||||
# sklearn\utils\validation.py needs int/float values
|
||||
if X[column].dtype.name == 'datetime64[ns]':
|
||||
X[column] = X[column].map(datetime.toordinal)
|
||||
datetime_columns.append(column)
|
||||
if X[column].dtype.name in ('object', 'category'):
|
||||
if X[column].nunique() == 1 or X[column].nunique(
|
||||
dropna=True) == n - X[column].isnull().sum():
|
||||
|
@ -236,7 +237,8 @@ class DataTransformer:
|
|||
SimpleImputer(missing_values=np.nan, strategy='median'),
|
||||
X_num.columns)])
|
||||
X[num_columns] = self.transformer.fit_transform(X_num)
|
||||
self._cat_columns, self._num_columns = cat_columns, num_columns
|
||||
self._cat_columns, self._num_columns, self._datetime_columns = cat_columns, \
|
||||
num_columns, datetime_columns
|
||||
self._drop = drop
|
||||
|
||||
if task == 'regression':
|
||||
|
@ -249,7 +251,11 @@ class DataTransformer:
|
|||
|
||||
def transform(self, X):
|
||||
if isinstance(X, pd.DataFrame):
|
||||
cat_columns, num_columns = self._cat_columns, self._num_columns
|
||||
cat_columns, num_columns, datetime_columns = self._cat_columns, \
|
||||
self._num_columns, self._datetime_columns
|
||||
if datetime_columns:
|
||||
for dt_column in datetime_columns:
|
||||
X[dt_column] = X[dt_column].map(datetime.toordinal)
|
||||
X = X[cat_columns + num_columns].copy()
|
||||
for column in cat_columns:
|
||||
# print(column, X[column].dtype.name)
|
||||
|
|
|
@ -4,6 +4,9 @@ import numpy as np
|
|||
import scipy.sparse
|
||||
from sklearn.datasets import load_boston, load_iris, load_wine
|
||||
|
||||
import pandas as pd
|
||||
from datetime import datetime
|
||||
|
||||
from flaml import AutoML
|
||||
from flaml.data import get_output_from_log
|
||||
|
||||
|
@ -219,6 +222,23 @@ class TestAutoML(unittest.TestCase):
|
|||
print(automl_experiment.model)
|
||||
print(automl_experiment.predict_proba(X_train)[:5])
|
||||
|
||||
def test_datetime_columns(self):
|
||||
|
||||
automl_experiment = AutoML()
|
||||
automl_settings = {
|
||||
"time_budget": 2,
|
||||
"metric": 'mse',
|
||||
"task": 'regression',
|
||||
"log_file_name": "test/datetime_columns.log",
|
||||
"log_training_metric": True,
|
||||
"n_jobs": 1,
|
||||
"model_history": True
|
||||
}
|
||||
|
||||
fake_df = pd.DataFrame({'A': [datetime(1900, 2, 3), datetime(1900, 3, 4)]})
|
||||
y = np.array([0, 1])
|
||||
automl_experiment.fit(X_train=fake_df, X_val=fake_df, y_train=y, y_val=y, **automl_settings)
|
||||
|
||||
def test_regression(self):
|
||||
|
||||
automl_experiment = AutoML()
|
||||
|
|
Loading…
Reference in New Issue