Modifying Hyperparameter Settings

Offering: openGaussDev

More detail: Modifying Hyperparameter Settings

Match-id-c7650485108bc6fe7b1f63cd7ff9bf23bbc3325f
This commit is contained in:
openGaussDev 2022-03-09 11:33:15 +08:00 committed by yanghao
parent 84e65fdf73
commit 927c5e918f
10 changed files with 18 additions and 16 deletions

View File

@ -80,6 +80,7 @@ For example:
* verbose: 0 (no output), 1 (less output), or 2 (full output) * verbose: 0 (no output), 1 (less output), or 2 (full output)
# Hyperparameter list for 'xgboost_regression_logistic', 'xgboost_binary_logistic', 'xgboost_regression_gamma' and 'xgboost_regression_squarederror': # Hyperparameter list for 'xgboost_regression_logistic', 'xgboost_binary_logistic', 'xgboost_regression_gamma' and 'xgboost_regression_squarederror':
* n_iter: Maximum iterations until convergence
* batch_size: Number of tuples in each processing batch * batch_size: Number of tuples in each processing batch
* booster: Which booster to use, e.g., gbtree, gblinear or dart (default: gbtree) * booster: Which booster to use, e.g., gbtree, gblinear or dart (default: gbtree)
* tree_method: The tree construction algorithm used in XGBoost. Choices: auto, exact, approx, hist, gpu_hist (gpu_hist only supported with GPU) * tree_method: The tree construction algorithm used in XGBoost. Choices: auto, exact, approx, hist, gpu_hist (gpu_hist only supported with GPU)

View File

@ -257,7 +257,7 @@ static Datum pca_predict(const Matrix *features, const Matrix *weights,
static HyperparameterDefinition pca_hyperparameter_definitions[] = { static HyperparameterDefinition pca_hyperparameter_definitions[] = {
HYPERPARAMETER_INT4("number_components", 1, 1, true, INT32_MAX, true, HyperparametersGD, number_dimensions, HYPERPARAMETER_INT4("number_components", 1, 1, true, INT32_MAX, true, HyperparametersGD, number_dimensions,
HP_NO_AUTOML()), HP_NO_AUTOML()),
HYPERPARAMETER_INT4("batch_size", 1000, 1, true, INT32_MAX, true, HyperparametersGD, batch_size, HP_NO_AUTOML()), HYPERPARAMETER_INT4("batch_size", 1000, 1, true, MAX_BATCH_SIZE, true, HyperparametersGD, batch_size, HP_NO_AUTOML()),
HYPERPARAMETER_INT4("max_iterations", 100, 1, true, ITER_MAX, true, HyperparametersGD, max_iterations, HYPERPARAMETER_INT4("max_iterations", 100, 1, true, ITER_MAX, true, HyperparametersGD, max_iterations,
HP_NO_AUTOML()), HP_NO_AUTOML()),
HYPERPARAMETER_INT4("max_seconds", 0, 0, true, INT32_MAX, true, HyperparametersGD, max_seconds, HP_NO_AUTOML()), HYPERPARAMETER_INT4("max_seconds", 0, 0, true, INT32_MAX, true, HyperparametersGD, max_seconds, HP_NO_AUTOML()),

View File

@ -1044,7 +1044,7 @@ HyperparameterDefinition kmeans_hyperparameter_definitions[] = {
HYPERPARAMETER_INT4("max_iterations", 10, 1, true, ITER_MAX, true, HyperparametersKMeans, num_iterations, HYPERPARAMETER_INT4("max_iterations", 10, 1, true, ITER_MAX, true, HyperparametersKMeans, num_iterations,
HP_NO_AUTOML()), HP_NO_AUTOML()),
HYPERPARAMETER_INT4("num_features", 0, 1, true, INT32_MAX, true, HyperparametersKMeans, n_features, HP_NO_AUTOML()), HYPERPARAMETER_INT4("num_features", 0, 1, true, INT32_MAX, true, HyperparametersKMeans, n_features, HP_NO_AUTOML()),
HYPERPARAMETER_INT4("batch_size", 1000, 1, true, 1000000, true, HyperparametersKMeans, batch_size, HP_NO_AUTOML()), HYPERPARAMETER_INT4("batch_size", 1000, 1, true, MAX_BATCH_SIZE, true, HyperparametersKMeans, batch_size, HP_NO_AUTOML()),
HYPERPARAMETER_INT4("seed", 0, 0, true, INT32_MAX, true, HyperparametersKMeans, external_seed, HYPERPARAMETER_INT4("seed", 0, 0, true, INT32_MAX, true, HyperparametersKMeans, external_seed,
HP_AUTOML_INT(1, INT32_MAX, 1, ProbabilityDistribution::UNIFORM_RANGE)), HP_AUTOML_INT(1, INT32_MAX, 1, ProbabilityDistribution::UNIFORM_RANGE)),
HYPERPARAMETER_FLOAT8("tolerance", 0.00001, 0.0, false, 1.0, true, HyperparametersKMeans, tolerance, HYPERPARAMETER_FLOAT8("tolerance", 0.00001, 0.0, false, 1.0, true, HyperparametersKMeans, tolerance,

View File

@ -226,13 +226,13 @@ const char *xgboost_tree_method_str[] = {"auto", "exact", "approx", "hist", "gpu
const char *xgboost_eval_metric_str[] = {"rmse", "rmsle", "map", "mae", "auc", "aucpr" }; const char *xgboost_eval_metric_str[] = {"rmse", "rmsle", "map", "mae", "auc", "aucpr" };
static HyperparameterDefinition xgboost_hyperparameter_definitions[] = { static HyperparameterDefinition xgboost_hyperparameter_definitions[] = {
HYPERPARAMETER_INT4("n_iter", 10, 1, true, ITER_MAX, true, HyperparamsXGBoost, n_iterations, HP_NO_AUTOML()), HYPERPARAMETER_INT4("n_iter", 10, 1, true, ITER_MAX, true, HyperparamsXGBoost, n_iterations, HP_NO_AUTOML()),
HYPERPARAMETER_INT4("batch_size", 10000, 1, true, INT32_MAX, true, HyperparamsXGBoost, batch_size, HP_NO_AUTOML()), HYPERPARAMETER_INT4("batch_size", 10000, 1, true, MAX_BATCH_SIZE, true, HyperparamsXGBoost, batch_size, HP_NO_AUTOML()),
HYPERPARAMETER_INT4("max_depth", 5, 0, true, INT32_MAX, true, HyperparamsXGBoost, max_depth, HP_NO_AUTOML()), HYPERPARAMETER_INT4("max_depth", 5, 0, true, INT32_MAX, true, HyperparamsXGBoost, max_depth, HP_NO_AUTOML()),
HYPERPARAMETER_INT4("min_child_weight", 1, 0, true, INT32_MAX, true, HyperparamsXGBoost, min_child_weight, HYPERPARAMETER_INT4("min_child_weight", 1, 0, true, INT32_MAX, true, HyperparamsXGBoost, min_child_weight,
HP_NO_AUTOML()), HP_NO_AUTOML()),
HYPERPARAMETER_FLOAT8("gamma", 0.0, 0.0, true, 1, true, HyperparamsXGBoost, gamma, HP_NO_AUTOML()), HYPERPARAMETER_FLOAT8("gamma", 0.0, 0.0, true, DBL_MAX, true, HyperparamsXGBoost, gamma, HP_NO_AUTOML()),
HYPERPARAMETER_FLOAT8("eta", 0.3, 0.0, true, 1, true, HyperparamsXGBoost, eta, HP_NO_AUTOML()), HYPERPARAMETER_FLOAT8("eta", 0.3, 0.0, true, 1, true, HyperparamsXGBoost, eta, HP_NO_AUTOML()),
HYPERPARAMETER_INT4("nthread", 1, 0, true, INT32_MAX, true, HyperparamsXGBoost, nthread, HP_NO_AUTOML()), HYPERPARAMETER_INT4("nthread", 1, 0, true, 100, true, HyperparamsXGBoost, nthread, HP_NO_AUTOML()),
HYPERPARAMETER_INT4("verbosity", 1, 0, true, 3, true, HyperparamsXGBoost, verbosity, HP_NO_AUTOML()), HYPERPARAMETER_INT4("verbosity", 1, 0, true, 3, true, HyperparamsXGBoost, verbosity, HP_NO_AUTOML()),
HYPERPARAMETER_INT4("seed", 0, 0, true, INT32_MAX, true, HyperparamsXGBoost, seed, HYPERPARAMETER_INT4("seed", 0, 0, true, INT32_MAX, true, HyperparamsXGBoost, seed,
HP_AUTOML_INT(1, INT32_MAX, 1, ProbabilityDistribution::UNIFORM_RANGE)), HP_AUTOML_INT(1, INT32_MAX, 1, ProbabilityDistribution::UNIFORM_RANGE)),

View File

@ -26,6 +26,7 @@
#include "utils/timestamp.h" #include "utils/timestamp.h"
#define ITER_MAX 10000 #define ITER_MAX 10000
#define MAX_BATCH_SIZE 0x0fffff
uint64_t time_diff(struct timespec *time_p1, struct timespec *time_p2); uint64_t time_diff(struct timespec *time_p1, struct timespec *time_p2);
double interval_to_sec(double time_interval); double interval_to_sec(double time_interval);

View File

@ -261,7 +261,7 @@ typedef struct HyperparametersGD {
} HyperparametersGD; } HyperparametersGD;
#define GD_HYPERPARAMETERS_SUPERVISED \ #define GD_HYPERPARAMETERS_SUPERVISED \
HYPERPARAMETER_INT4("batch_size", 1000, 1, true, INT32_MAX, true, \ HYPERPARAMETER_INT4("batch_size", 1000, 1, true, MAX_BATCH_SIZE, true, \
HyperparametersGD, batch_size, \ HyperparametersGD, batch_size, \
HP_AUTOML_INT(1, 10000, 4, ProbabilityDistribution::LOG_RANGE)), \ HP_AUTOML_INT(1, 10000, 4, ProbabilityDistribution::LOG_RANGE)), \
HYPERPARAMETER_FLOAT8("decay", 0.95, 0.0, false, DBL_MAX, true, \ HYPERPARAMETER_FLOAT8("decay", 0.95, 0.0, false, DBL_MAX, true, \

View File

@ -1201,7 +1201,7 @@ CREATE MODEL my_kmeans_pp_empty USING kmeans FROM (SELECT position FROM multivar
-- Batch size -- Batch size
CREATE MODEL my_kmeans_pp_empty USING kmeans FROM (SELECT position FROM multivariate_7_1000_10) WITH max_iterations = 50, num_centroids = 10, tolerance = 0.00001, batch_size = 0, num_features = 7, distance_function = 'L2_Squared', seeding_function = 'Random++', verbose = 1, seed = 1255025990; CREATE MODEL my_kmeans_pp_empty USING kmeans FROM (SELECT position FROM multivariate_7_1000_10) WITH max_iterations = 50, num_centroids = 10, tolerance = 0.00001, batch_size = 0, num_features = 7, distance_function = 'L2_Squared', seeding_function = 'Random++', verbose = 1, seed = 1255025990;
CREATE MODEL my_kmeans_pp_empty USING kmeans FROM (SELECT position FROM multivariate_7_1000_10) WITH max_iterations = 50, num_centroids = 10, tolerance = 0.00001, batch_size = 1000001, num_features = 7, distance_function = 'L2_Squared', seeding_function = 'Random++', verbose = 1, seed = 1255025990; CREATE MODEL my_kmeans_pp_empty USING kmeans FROM (SELECT position FROM multivariate_7_1000_10) WITH max_iterations = 50, num_centroids = 10, tolerance = 0.00001, batch_size = 1048576, num_features = 7, distance_function = 'L2_Squared', seeding_function = 'Random++', verbose = 1, seed = 1255025990;
-- Num of features (not matching the data) -- Num of features (not matching the data)
CREATE MODEL my_kmeans_pp_empty USING kmeans FROM (SELECT position FROM multivariate_7_1000_10) WITH max_iterations = 50, num_centroids = 10, tolerance = 0.00001, batch_size = 1000, num_features = 9, distance_function = 'L2_Squared', seeding_function = 'Random++', verbose = 1, seed = 1255025990; CREATE MODEL my_kmeans_pp_empty USING kmeans FROM (SELECT position FROM multivariate_7_1000_10) WITH max_iterations = 50, num_centroids = 10, tolerance = 0.00001, batch_size = 1000, num_features = 9, distance_function = 'L2_Squared', seeding_function = 'Random++', verbose = 1, seed = 1255025990;

View File

@ -14,13 +14,13 @@ CREATE MODEL m using logistic_regression FEATURES size,lot FROM db4ai_houses;
ERROR: Supervised ML algorithms require TARGET clause ERROR: Supervised ML algorithms require TARGET clause
-- Errors with semantic validation of hyperparameters -- Errors with semantic validation of hyperparameters
CREATE MODEL m USING logistic_regression FEATURES size, lot TARGET price <100000 FROM db4ai_houses with batch_size = 0, seed=1; CREATE MODEL m USING logistic_regression FEATURES size, lot TARGET price <100000 FROM db4ai_houses with batch_size = 0, seed=1;
ERROR: Hyperparameter batch_size must be in the range [1,2147483647] ERROR: Hyperparameter batch_size must be in the range [1,1048575]
CREATE MODEL m USING logistic_regression FEATURES size, lot TARGET price <100000 FROM db4ai_houses with decay = 0.0, seed=1; CREATE MODEL m USING logistic_regression FEATURES size, lot TARGET price <100000 FROM db4ai_houses with decay = 0.0, seed=1;
ERROR: Hyperparameter decay must be in the range (0,1.7976931e+308] ERROR: Hyperparameter decay must be in the range (0,1.7976931e+308]
CREATE MODEL m USING logistic_regression FEATURES size, lot TARGET price <100000 FROM db4ai_houses with learning_rate = 0.0, seed=1; CREATE MODEL m USING logistic_regression FEATURES size, lot TARGET price <100000 FROM db4ai_houses with learning_rate = 0.0, seed=1;
ERROR: Hyperparameter learning_rate must be in the range (0,1.7976931e+308] ERROR: Hyperparameter learning_rate must be in the range (0,1.7976931e+308]
CREATE MODEL m USING logistic_regression FEATURES size, lot TARGET price <100000 FROM db4ai_houses with max_iterations = 0, seed=1; CREATE MODEL m USING logistic_regression FEATURES size, lot TARGET price <100000 FROM db4ai_houses with max_iterations = 0, seed=1;
ERROR: Hyperparameter max_iterations must be in the range [1,2147483647] ERROR: Hyperparameter max_iterations must be in the range [1,10000]
CREATE MODEL m USING logistic_regression FEATURES size, lot TARGET price <100000 FROM db4ai_houses with max_seconds = -1, seed=1; CREATE MODEL m USING logistic_regression FEATURES size, lot TARGET price <100000 FROM db4ai_houses with max_seconds = -1, seed=1;
ERROR: Hyperparameter max_seconds must be in the range [0,2147483647] ERROR: Hyperparameter max_seconds must be in the range [0,2147483647]
CREATE MODEL m USING logistic_regression FEATURES size, lot TARGET price <100000 FROM db4ai_houses with optimizer = nogd, seed=1; CREATE MODEL m USING logistic_regression FEATURES size, lot TARGET price <100000 FROM db4ai_houses with optimizer = nogd, seed=1;

View File

@ -742,7 +742,7 @@ CONTEXT: referenced column: centroid_id
-- Wrong parameters -- Wrong parameters
-- Number of iterations -- Number of iterations
CREATE MODEL my_kmeans_pp_empty USING kmeans FROM (SELECT position FROM multivariate_7_1000_10) WITH max_iterations = 0, num_centroids = 10, tolerance = 0.00001, batch_size = 1000, num_features = 7, distance_function = 'L2_Squared', seeding_function = 'Random++', verbose = 1, seed = 1255025990; CREATE MODEL my_kmeans_pp_empty USING kmeans FROM (SELECT position FROM multivariate_7_1000_10) WITH max_iterations = 0, num_centroids = 10, tolerance = 0.00001, batch_size = 1000, num_features = 7, distance_function = 'L2_Squared', seeding_function = 'Random++', verbose = 1, seed = 1255025990;
ERROR: Hyperparameter max_iterations must be in the range [1,2147483647] ERROR: Hyperparameter max_iterations must be in the range [1,10000]
-- Number of centroids -- Number of centroids
CREATE MODEL my_kmeans_pp_empty USING kmeans FROM (SELECT position FROM multivariate_7_1000_10) WITH max_iterations = 50, num_centroids = 0, tolerance = 0.00001, batch_size = 1000, num_features = 7, distance_function = 'L2_Squared', seeding_function = 'Random++', verbose = 1, seed = 1255025990; CREATE MODEL my_kmeans_pp_empty USING kmeans FROM (SELECT position FROM multivariate_7_1000_10) WITH max_iterations = 50, num_centroids = 0, tolerance = 0.00001, batch_size = 1000, num_features = 7, distance_function = 'L2_Squared', seeding_function = 'Random++', verbose = 1, seed = 1255025990;
ERROR: Hyperparameter num_centroids must be in the range [1,1000000] ERROR: Hyperparameter num_centroids must be in the range [1,1000000]
@ -755,9 +755,9 @@ CREATE MODEL my_kmeans_pp_empty USING kmeans FROM (SELECT position FROM multivar
ERROR: Hyperparameter tolerance must be in the range (0,1] ERROR: Hyperparameter tolerance must be in the range (0,1]
-- Batch size -- Batch size
CREATE MODEL my_kmeans_pp_empty USING kmeans FROM (SELECT position FROM multivariate_7_1000_10) WITH max_iterations = 50, num_centroids = 10, tolerance = 0.00001, batch_size = 0, num_features = 7, distance_function = 'L2_Squared', seeding_function = 'Random++', verbose = 1, seed = 1255025990; CREATE MODEL my_kmeans_pp_empty USING kmeans FROM (SELECT position FROM multivariate_7_1000_10) WITH max_iterations = 50, num_centroids = 10, tolerance = 0.00001, batch_size = 0, num_features = 7, distance_function = 'L2_Squared', seeding_function = 'Random++', verbose = 1, seed = 1255025990;
ERROR: Hyperparameter batch_size must be in the range [1,1000000] ERROR: Hyperparameter batch_size must be in the range [1,1048575]
CREATE MODEL my_kmeans_pp_empty USING kmeans FROM (SELECT position FROM multivariate_7_1000_10) WITH max_iterations = 50, num_centroids = 10, tolerance = 0.00001, batch_size = 1000001, num_features = 7, distance_function = 'L2_Squared', seeding_function = 'Random++', verbose = 1, seed = 1255025990; CREATE MODEL my_kmeans_pp_empty USING kmeans FROM (SELECT position FROM multivariate_7_1000_10) WITH max_iterations = 50, num_centroids = 10, tolerance = 0.00001, batch_size = 1048576, num_features = 7, distance_function = 'L2_Squared', seeding_function = 'Random++', verbose = 1, seed = 1255025990;
ERROR: Hyperparameter batch_size must be in the range [1,1000000] ERROR: Hyperparameter batch_size must be in the range [1,1048575]
-- Num of features (not matching the data) -- Num of features (not matching the data)
CREATE MODEL my_kmeans_pp_empty USING kmeans FROM (SELECT position FROM multivariate_7_1000_10) WITH max_iterations = 50, num_centroids = 10, tolerance = 0.00001, batch_size = 1000, num_features = 9, distance_function = 'L2_Squared', seeding_function = 'Random++', verbose = 1, seed = 1255025990; CREATE MODEL my_kmeans_pp_empty USING kmeans FROM (SELECT position FROM multivariate_7_1000_10) WITH max_iterations = 50, num_centroids = 10, tolerance = 0.00001, batch_size = 1000, num_features = 9, distance_function = 'L2_Squared', seeding_function = 'Random++', verbose = 1, seed = 1255025990;
NOTICE: *** Initial statistics gathered: NOTICE: *** Initial statistics gathered:

View File

@ -12,9 +12,9 @@ CREATE MODEL m using xgboost_binary_logistic FROM db4ai_rain;
ERROR: Supervised ML algorithms require FEATURES clause ERROR: Supervised ML algorithms require FEATURES clause
-- Errors with semantic validation of hyperparameters -- Errors with semantic validation of hyperparameters
CREATE MODEL m USING xgboost_binary_logistic FEATURES rainfall, temp9am TARGET raintoday FROM db4ai_rain WITH n_iter=-1; CREATE MODEL m USING xgboost_binary_logistic FEATURES rainfall, temp9am TARGET raintoday FROM db4ai_rain WITH n_iter=-1;
ERROR: Hyperparameter n_iter must be in the range [1,2147483647] ERROR: Hyperparameter n_iter must be in the range [1,10000]
CREATE MODEL m USING xgboost_binary_logistic FEATURES rainfall, temp9am TARGET raintoday FROM db4ai_rain WITH batch_size=0; CREATE MODEL m USING xgboost_binary_logistic FEATURES rainfall, temp9am TARGET raintoday FROM db4ai_rain WITH batch_size=0;
ERROR: Hyperparameter batch_size must be in the range [1,2147483647] ERROR: Hyperparameter batch_size must be in the range [1,1048575]
CREATE MODEL m USING xgboost_binary_logistic FEATURES rainfall, temp9am TARGET raintoday FROM db4ai_rain WITH max_depth=-1; CREATE MODEL m USING xgboost_binary_logistic FEATURES rainfall, temp9am TARGET raintoday FROM db4ai_rain WITH max_depth=-1;
ERROR: Hyperparameter max_depth must be in the range [0,2147483647] ERROR: Hyperparameter max_depth must be in the range [0,2147483647]
CREATE MODEL m USING xgboost_binary_logistic FEATURES rainfall, temp9am TARGET raintoday FROM db4ai_rain WITH min_child_weight=-1; CREATE MODEL m USING xgboost_binary_logistic FEATURES rainfall, temp9am TARGET raintoday FROM db4ai_rain WITH min_child_weight=-1;
@ -24,7 +24,7 @@ ERROR: Hyperparameter eta must be in the range [0,1]
CREATE MODEL m USING xgboost_binary_logistic FEATURES rainfall, temp9am TARGET raintoday FROM db4ai_rain WITH seed=-1; CREATE MODEL m USING xgboost_binary_logistic FEATURES rainfall, temp9am TARGET raintoday FROM db4ai_rain WITH seed=-1;
ERROR: Hyperparameter seed must be in the range [0,2147483647] ERROR: Hyperparameter seed must be in the range [0,2147483647]
CREATE MODEL m USING xgboost_binary_logistic FEATURES rainfall, temp9am TARGET raintoday FROM db4ai_rain WITH nthread=-1; CREATE MODEL m USING xgboost_binary_logistic FEATURES rainfall, temp9am TARGET raintoday FROM db4ai_rain WITH nthread=-1;
ERROR: Hyperparameter nthread must be in the range [0,2147483647] ERROR: Hyperparameter nthread must be in the range [0,100]
CREATE MODEL m USING xgboost_binary_logistic FEATURES rainfall, temp9am TARGET raintoday FROM db4ai_rain WITH booster=10; CREATE MODEL m USING xgboost_binary_logistic FEATURES rainfall, temp9am TARGET raintoday FROM db4ai_rain WITH booster=10;
ERROR: Hyperparameter booster must be a string ERROR: Hyperparameter booster must be a string
CREATE MODEL m USING xgboost_binary_logistic FEATURES rainfall, temp9am TARGET raintoday FROM db4ai_rain WITH tree_method=10; CREATE MODEL m USING xgboost_binary_logistic FEATURES rainfall, temp9am TARGET raintoday FROM db4ai_rain WITH tree_method=10;