forked from openGauss-Ecosystem/openGauss-server
Modifying Hyperparameter Settings
Offering: openGaussDev More detail: Modifying Hyperparameter Settings Match-id-c7650485108bc6fe7b1f63cd7ff9bf23bbc3325f
This commit is contained in:
parent
84e65fdf73
commit
927c5e918f
|
@ -80,6 +80,7 @@ For example:
|
|||
* verbose: 0 (no output), 1 (less output), or 2 (full output)
|
||||
|
||||
# Hyperparameter list for 'xgboost_regression_logistic', 'xgboost_binary_logistic', 'xgboost_regression_gamma' and 'xgboost_regression_squarederror':
|
||||
* n_iter: Maximum iterations until convergence
|
||||
* batch_size: Number of tuples in each processing batch
|
||||
* booster: Which booster to use, e.g., gbtree, gblinear or dart (default: gbtree)
|
||||
* tree_method: The tree construction algorithm used in XGBoost. Choices: auto, exact, approx, hist, gpu_hist (gpu_hist only supported with GPU)
|
||||
|
|
|
@ -257,7 +257,7 @@ static Datum pca_predict(const Matrix *features, const Matrix *weights,
|
|||
static HyperparameterDefinition pca_hyperparameter_definitions[] = {
|
||||
HYPERPARAMETER_INT4("number_components", 1, 1, true, INT32_MAX, true, HyperparametersGD, number_dimensions,
|
||||
HP_NO_AUTOML()),
|
||||
HYPERPARAMETER_INT4("batch_size", 1000, 1, true, INT32_MAX, true, HyperparametersGD, batch_size, HP_NO_AUTOML()),
|
||||
HYPERPARAMETER_INT4("batch_size", 1000, 1, true, MAX_BATCH_SIZE, true, HyperparametersGD, batch_size, HP_NO_AUTOML()),
|
||||
HYPERPARAMETER_INT4("max_iterations", 100, 1, true, ITER_MAX, true, HyperparametersGD, max_iterations,
|
||||
HP_NO_AUTOML()),
|
||||
HYPERPARAMETER_INT4("max_seconds", 0, 0, true, INT32_MAX, true, HyperparametersGD, max_seconds, HP_NO_AUTOML()),
|
||||
|
|
|
@ -1044,7 +1044,7 @@ HyperparameterDefinition kmeans_hyperparameter_definitions[] = {
|
|||
HYPERPARAMETER_INT4("max_iterations", 10, 1, true, ITER_MAX, true, HyperparametersKMeans, num_iterations,
|
||||
HP_NO_AUTOML()),
|
||||
HYPERPARAMETER_INT4("num_features", 0, 1, true, INT32_MAX, true, HyperparametersKMeans, n_features, HP_NO_AUTOML()),
|
||||
HYPERPARAMETER_INT4("batch_size", 1000, 1, true, 1000000, true, HyperparametersKMeans, batch_size, HP_NO_AUTOML()),
|
||||
HYPERPARAMETER_INT4("batch_size", 1000, 1, true, MAX_BATCH_SIZE, true, HyperparametersKMeans, batch_size, HP_NO_AUTOML()),
|
||||
HYPERPARAMETER_INT4("seed", 0, 0, true, INT32_MAX, true, HyperparametersKMeans, external_seed,
|
||||
HP_AUTOML_INT(1, INT32_MAX, 1, ProbabilityDistribution::UNIFORM_RANGE)),
|
||||
HYPERPARAMETER_FLOAT8("tolerance", 0.00001, 0.0, false, 1.0, true, HyperparametersKMeans, tolerance,
|
||||
|
|
|
@ -226,13 +226,13 @@ const char *xgboost_tree_method_str[] = {"auto", "exact", "approx", "hist", "gpu
|
|||
const char *xgboost_eval_metric_str[] = {"rmse", "rmsle", "map", "mae", "auc", "aucpr" };
|
||||
static HyperparameterDefinition xgboost_hyperparameter_definitions[] = {
|
||||
HYPERPARAMETER_INT4("n_iter", 10, 1, true, ITER_MAX, true, HyperparamsXGBoost, n_iterations, HP_NO_AUTOML()),
|
||||
HYPERPARAMETER_INT4("batch_size", 10000, 1, true, INT32_MAX, true, HyperparamsXGBoost, batch_size, HP_NO_AUTOML()),
|
||||
HYPERPARAMETER_INT4("batch_size", 10000, 1, true, MAX_BATCH_SIZE, true, HyperparamsXGBoost, batch_size, HP_NO_AUTOML()),
|
||||
HYPERPARAMETER_INT4("max_depth", 5, 0, true, INT32_MAX, true, HyperparamsXGBoost, max_depth, HP_NO_AUTOML()),
|
||||
HYPERPARAMETER_INT4("min_child_weight", 1, 0, true, INT32_MAX, true, HyperparamsXGBoost, min_child_weight,
|
||||
HP_NO_AUTOML()),
|
||||
HYPERPARAMETER_FLOAT8("gamma", 0.0, 0.0, true, 1, true, HyperparamsXGBoost, gamma, HP_NO_AUTOML()),
|
||||
HYPERPARAMETER_FLOAT8("gamma", 0.0, 0.0, true, DBL_MAX, true, HyperparamsXGBoost, gamma, HP_NO_AUTOML()),
|
||||
HYPERPARAMETER_FLOAT8("eta", 0.3, 0.0, true, 1, true, HyperparamsXGBoost, eta, HP_NO_AUTOML()),
|
||||
HYPERPARAMETER_INT4("nthread", 1, 0, true, INT32_MAX, true, HyperparamsXGBoost, nthread, HP_NO_AUTOML()),
|
||||
HYPERPARAMETER_INT4("nthread", 1, 0, true, 100, true, HyperparamsXGBoost, nthread, HP_NO_AUTOML()),
|
||||
HYPERPARAMETER_INT4("verbosity", 1, 0, true, 3, true, HyperparamsXGBoost, verbosity, HP_NO_AUTOML()),
|
||||
HYPERPARAMETER_INT4("seed", 0, 0, true, INT32_MAX, true, HyperparamsXGBoost, seed,
|
||||
HP_AUTOML_INT(1, INT32_MAX, 1, ProbabilityDistribution::UNIFORM_RANGE)),
|
||||
|
|
|
@ -26,6 +26,7 @@
|
|||
#include "utils/timestamp.h"
|
||||
|
||||
#define ITER_MAX 10000
|
||||
#define MAX_BATCH_SIZE 0x0fffff
|
||||
|
||||
uint64_t time_diff(struct timespec *time_p1, struct timespec *time_p2);
|
||||
double interval_to_sec(double time_interval);
|
||||
|
|
|
@ -261,7 +261,7 @@ typedef struct HyperparametersGD {
|
|||
} HyperparametersGD;
|
||||
|
||||
#define GD_HYPERPARAMETERS_SUPERVISED \
|
||||
HYPERPARAMETER_INT4("batch_size", 1000, 1, true, INT32_MAX, true, \
|
||||
HYPERPARAMETER_INT4("batch_size", 1000, 1, true, MAX_BATCH_SIZE, true, \
|
||||
HyperparametersGD, batch_size, \
|
||||
HP_AUTOML_INT(1, 10000, 4, ProbabilityDistribution::LOG_RANGE)), \
|
||||
HYPERPARAMETER_FLOAT8("decay", 0.95, 0.0, false, DBL_MAX, true, \
|
||||
|
|
|
@ -1201,7 +1201,7 @@ CREATE MODEL my_kmeans_pp_empty USING kmeans FROM (SELECT position FROM multivar
|
|||
|
||||
-- Batch size
|
||||
CREATE MODEL my_kmeans_pp_empty USING kmeans FROM (SELECT position FROM multivariate_7_1000_10) WITH max_iterations = 50, num_centroids = 10, tolerance = 0.00001, batch_size = 0, num_features = 7, distance_function = 'L2_Squared', seeding_function = 'Random++', verbose = 1, seed = 1255025990;
|
||||
CREATE MODEL my_kmeans_pp_empty USING kmeans FROM (SELECT position FROM multivariate_7_1000_10) WITH max_iterations = 50, num_centroids = 10, tolerance = 0.00001, batch_size = 1000001, num_features = 7, distance_function = 'L2_Squared', seeding_function = 'Random++', verbose = 1, seed = 1255025990;
|
||||
CREATE MODEL my_kmeans_pp_empty USING kmeans FROM (SELECT position FROM multivariate_7_1000_10) WITH max_iterations = 50, num_centroids = 10, tolerance = 0.00001, batch_size = 1048576, num_features = 7, distance_function = 'L2_Squared', seeding_function = 'Random++', verbose = 1, seed = 1255025990;
|
||||
|
||||
-- Num of features (not matching the data)
|
||||
CREATE MODEL my_kmeans_pp_empty USING kmeans FROM (SELECT position FROM multivariate_7_1000_10) WITH max_iterations = 50, num_centroids = 10, tolerance = 0.00001, batch_size = 1000, num_features = 9, distance_function = 'L2_Squared', seeding_function = 'Random++', verbose = 1, seed = 1255025990;
|
||||
|
|
|
@ -14,13 +14,13 @@ CREATE MODEL m using logistic_regression FEATURES size,lot FROM db4ai_houses;
|
|||
ERROR: Supervised ML algorithms require TARGET clause
|
||||
-- Errors with semantic validation of hyperparameters
|
||||
CREATE MODEL m USING logistic_regression FEATURES size, lot TARGET price <100000 FROM db4ai_houses with batch_size = 0, seed=1;
|
||||
ERROR: Hyperparameter batch_size must be in the range [1,2147483647]
|
||||
ERROR: Hyperparameter batch_size must be in the range [1,1048575]
|
||||
CREATE MODEL m USING logistic_regression FEATURES size, lot TARGET price <100000 FROM db4ai_houses with decay = 0.0, seed=1;
|
||||
ERROR: Hyperparameter decay must be in the range (0,1.7976931e+308]
|
||||
CREATE MODEL m USING logistic_regression FEATURES size, lot TARGET price <100000 FROM db4ai_houses with learning_rate = 0.0, seed=1;
|
||||
ERROR: Hyperparameter learning_rate must be in the range (0,1.7976931e+308]
|
||||
CREATE MODEL m USING logistic_regression FEATURES size, lot TARGET price <100000 FROM db4ai_houses with max_iterations = 0, seed=1;
|
||||
ERROR: Hyperparameter max_iterations must be in the range [1,2147483647]
|
||||
ERROR: Hyperparameter max_iterations must be in the range [1,10000]
|
||||
CREATE MODEL m USING logistic_regression FEATURES size, lot TARGET price <100000 FROM db4ai_houses with max_seconds = -1, seed=1;
|
||||
ERROR: Hyperparameter max_seconds must be in the range [0,2147483647]
|
||||
CREATE MODEL m USING logistic_regression FEATURES size, lot TARGET price <100000 FROM db4ai_houses with optimizer = nogd, seed=1;
|
||||
|
|
|
@ -742,7 +742,7 @@ CONTEXT: referenced column: centroid_id
|
|||
-- Wrong parameters
|
||||
-- Number of iterations
|
||||
CREATE MODEL my_kmeans_pp_empty USING kmeans FROM (SELECT position FROM multivariate_7_1000_10) WITH max_iterations = 0, num_centroids = 10, tolerance = 0.00001, batch_size = 1000, num_features = 7, distance_function = 'L2_Squared', seeding_function = 'Random++', verbose = 1, seed = 1255025990;
|
||||
ERROR: Hyperparameter max_iterations must be in the range [1,2147483647]
|
||||
ERROR: Hyperparameter max_iterations must be in the range [1,10000]
|
||||
-- Number of centroids
|
||||
CREATE MODEL my_kmeans_pp_empty USING kmeans FROM (SELECT position FROM multivariate_7_1000_10) WITH max_iterations = 50, num_centroids = 0, tolerance = 0.00001, batch_size = 1000, num_features = 7, distance_function = 'L2_Squared', seeding_function = 'Random++', verbose = 1, seed = 1255025990;
|
||||
ERROR: Hyperparameter num_centroids must be in the range [1,1000000]
|
||||
|
@ -755,9 +755,9 @@ CREATE MODEL my_kmeans_pp_empty USING kmeans FROM (SELECT position FROM multivar
|
|||
ERROR: Hyperparameter tolerance must be in the range (0,1]
|
||||
-- Batch size
|
||||
CREATE MODEL my_kmeans_pp_empty USING kmeans FROM (SELECT position FROM multivariate_7_1000_10) WITH max_iterations = 50, num_centroids = 10, tolerance = 0.00001, batch_size = 0, num_features = 7, distance_function = 'L2_Squared', seeding_function = 'Random++', verbose = 1, seed = 1255025990;
|
||||
ERROR: Hyperparameter batch_size must be in the range [1,1000000]
|
||||
CREATE MODEL my_kmeans_pp_empty USING kmeans FROM (SELECT position FROM multivariate_7_1000_10) WITH max_iterations = 50, num_centroids = 10, tolerance = 0.00001, batch_size = 1000001, num_features = 7, distance_function = 'L2_Squared', seeding_function = 'Random++', verbose = 1, seed = 1255025990;
|
||||
ERROR: Hyperparameter batch_size must be in the range [1,1000000]
|
||||
ERROR: Hyperparameter batch_size must be in the range [1,1048575]
|
||||
CREATE MODEL my_kmeans_pp_empty USING kmeans FROM (SELECT position FROM multivariate_7_1000_10) WITH max_iterations = 50, num_centroids = 10, tolerance = 0.00001, batch_size = 1048576, num_features = 7, distance_function = 'L2_Squared', seeding_function = 'Random++', verbose = 1, seed = 1255025990;
|
||||
ERROR: Hyperparameter batch_size must be in the range [1,1048575]
|
||||
-- Num of features (not matching the data)
|
||||
CREATE MODEL my_kmeans_pp_empty USING kmeans FROM (SELECT position FROM multivariate_7_1000_10) WITH max_iterations = 50, num_centroids = 10, tolerance = 0.00001, batch_size = 1000, num_features = 9, distance_function = 'L2_Squared', seeding_function = 'Random++', verbose = 1, seed = 1255025990;
|
||||
NOTICE: *** Initial statistics gathered:
|
||||
|
|
|
@ -12,9 +12,9 @@ CREATE MODEL m using xgboost_binary_logistic FROM db4ai_rain;
|
|||
ERROR: Supervised ML algorithms require FEATURES clause
|
||||
-- Errors with semantic validation of hyperparameters
|
||||
CREATE MODEL m USING xgboost_binary_logistic FEATURES rainfall, temp9am TARGET raintoday FROM db4ai_rain WITH n_iter=-1;
|
||||
ERROR: Hyperparameter n_iter must be in the range [1,2147483647]
|
||||
ERROR: Hyperparameter n_iter must be in the range [1,10000]
|
||||
CREATE MODEL m USING xgboost_binary_logistic FEATURES rainfall, temp9am TARGET raintoday FROM db4ai_rain WITH batch_size=0;
|
||||
ERROR: Hyperparameter batch_size must be in the range [1,2147483647]
|
||||
ERROR: Hyperparameter batch_size must be in the range [1,1048575]
|
||||
CREATE MODEL m USING xgboost_binary_logistic FEATURES rainfall, temp9am TARGET raintoday FROM db4ai_rain WITH max_depth=-1;
|
||||
ERROR: Hyperparameter max_depth must be in the range [0,2147483647]
|
||||
CREATE MODEL m USING xgboost_binary_logistic FEATURES rainfall, temp9am TARGET raintoday FROM db4ai_rain WITH min_child_weight=-1;
|
||||
|
@ -24,7 +24,7 @@ ERROR: Hyperparameter eta must be in the range [0,1]
|
|||
CREATE MODEL m USING xgboost_binary_logistic FEATURES rainfall, temp9am TARGET raintoday FROM db4ai_rain WITH seed=-1;
|
||||
ERROR: Hyperparameter seed must be in the range [0,2147483647]
|
||||
CREATE MODEL m USING xgboost_binary_logistic FEATURES rainfall, temp9am TARGET raintoday FROM db4ai_rain WITH nthread=-1;
|
||||
ERROR: Hyperparameter nthread must be in the range [0,2147483647]
|
||||
ERROR: Hyperparameter nthread must be in the range [0,100]
|
||||
CREATE MODEL m USING xgboost_binary_logistic FEATURES rainfall, temp9am TARGET raintoday FROM db4ai_rain WITH booster=10;
|
||||
ERROR: Hyperparameter booster must be a string
|
||||
CREATE MODEL m USING xgboost_binary_logistic FEATURES rainfall, temp9am TARGET raintoday FROM db4ai_rain WITH tree_method=10;
|
||||
|
|
Loading…
Reference in New Issue