"""Class for cross-validation over distributions of hyperparameters
-- Anthony Yu and Michael Chau
"""
import logging
import random
import numpy as np
import warnings
from sklearn.base import clone
from ray import tune
from ray.tune.search.sample import Domain
from ray.tune.search import (ConcurrencyLimiter, BasicVariantGenerator,
Searcher)
from ray.tune.search.bohb import TuneBOHB
from ray.tune.schedulers import HyperBandForBOHB
from ray.tune.stopper import CombinedStopper
from ray.tune.search.skopt import SkOptSearch
from ray.tune.search.hyperopt import HyperOptSearch
from ray.tune.search.optuna import OptunaSearch
from tune_sklearn.utils import check_is_pipeline, MaximumIterationStopper
from tune_sklearn.tune_basesearch import TuneBaseSearchCV
from tune_sklearn._trainable import _Trainable, _PipelineTrainable
from tune_sklearn.list_searcher import RandomListSearcher
from tune_sklearn.utils import check_error_warm_start, resolve_logger_callbacks
logger = logging.getLogger(__name__)
available_optimizations = {
BasicVariantGenerator: "random",
RandomListSearcher: "random",
SkOptSearch: "bayesian", # scikit-optimize/SkOpt
TuneBOHB: "bohb",
HyperOptSearch: "hyperopt",
OptunaSearch: "optuna",
}
def _check_distribution(dist, search_optimization):
# Tune Domain is always good
if isinstance(dist, Domain):
return
search_optimization = available_optimizations.get(
type(search_optimization), search_optimization)
if search_optimization == "random":
if not (isinstance(dist, list) or hasattr(dist, "rvs")):
raise ValueError("distribution must be a list or scipy "
"distribution when using randomized search")
elif not isinstance(dist, tuple) and not isinstance(dist, list):
if search_optimization == "bayesian":
import skopt
if not isinstance(dist, skopt.space.Dimension):
raise ValueError("distribution must be a tuple, list, or "
"`skopt.space.Dimension` instance when using "
"bayesian search")
elif search_optimization == "hyperopt":
import hyperopt.pyll
if not isinstance(dist, hyperopt.pyll.base.Apply):
raise ValueError(
"distribution must be a tuple, list, or "
"`hyperopt.pyll.base.Apply` instance when using "
"hyperopt search")
elif search_optimization == "optuna":
import optuna.distributions
if not isinstance(dist, optuna.distributions.BaseDistribution):
raise ValueError("distribution must be a tuple, list, or "
"`optuna.distributions.BaseDistribution`"
"instance when using optuna search")
elif search_optimization == "bohb":
import ConfigSpace.hyperparameters
if not isinstance(dist,
ConfigSpace.hyperparameters.Hyperparameter):
raise ValueError(
"distribution must be a tuple, list, or "
"`ConfigSpace.hyperparameters.Hyperparameter` "
"instance when using bohb search")
[docs]class TuneSearchCV(TuneBaseSearchCV):
"""Generic, non-grid search on hyper parameters.
Randomized search is invoked with ``search_optimization`` set to
``"random"`` and behaves like scikit-learn's ``RandomizedSearchCV``.
Bayesian search can be invoked with several values of
``search_optimization``.
- ``"bayesian"``, using https://scikit-optimize.github.io/stable/
- ``"bohb"``, using HpBandSter - https://github.com/automl/HpBandSter
Tree-Parzen Estimators search is invoked with ``search_optimization``
set to ``"hyperopt"``, using HyperOpt - http://hyperopt.github.io/hyperopt
All types of search aside from Randomized search require parent
libraries to be installed.
TuneSearchCV implements a "fit" and a "score" method.
It also implements "predict", "predict_proba", "decision_function",
"transform" and "inverse_transform" if they are implemented in the
estimator used.
The parameters of the estimator used to apply these methods are optimized
by cross-validated search over parameter settings.
In contrast to GridSearchCV, not all parameter values are tried out, but
rather a fixed number of parameter settings is sampled from the specified
distributions. The number of parameter settings that are tried is
given by n_trials.
Args:
estimator (`estimator`): This is assumed to implement the
scikit-learn estimator interface. Either estimator needs to
provide a ``score`` function, or ``scoring`` must be passed.
param_distributions (`dict` or `list` or `ConfigurationSpace`): Serves
as the ``param_distributions`` parameter in scikit-learn's
``RandomizedSearchCV`` or as the ``search_space`` parameter in
``BayesSearchCV``.
For randomized search: dictionary with parameters names (string)
as keys and distributions or lists of parameter settings to try
for randomized search.
Distributions must provide a rvs method for sampling (such as
those from scipy.stats.distributions). Ray Tune search spaces
are also supported.
If a list is given, it is sampled uniformly. If a list of dicts is
given, first a dict is sampled uniformly, and then a parameter is
sampled using that dict as above.
For other types of search: dictionary with parameter names (string)
as keys. Values can be
- a (lower_bound, upper_bound) tuple (for Real or Integer params)
- a (lower_bound, upper_bound, "prior") tuple (for Real params)
- as a list of categories (for Categorical dimensions)
- Ray Tune search space (eg. ``tune.uniform``)
``"bayesian"`` (scikit-optimize) also accepts
- skopt.space.Dimension instance (Real, Integer or Categorical).
``"hyperopt"`` (HyperOpt) also accepts
- an instance of a hyperopt.pyll.base.Apply object.
``"bohb"`` (HpBandSter) also accepts
- ConfigSpace.hyperparameters.Hyperparameter instance.
``"optuna"`` (Optuna) also accepts
- an instance of a optuna.distributions.BaseDistribution object.
For ``"bohb"`` (HpBandSter) it is also possible to pass a
`ConfigSpace.ConfigurationSpace` object instead of dict or a list.
https://scikit-optimize.github.io/stable/modules/
classes.html#module-skopt.space.space
early_stopping (bool, str or :class:`TrialScheduler`, optional): Option
to stop fitting to a hyperparameter configuration if it performs
poorly. Possible inputs are:
- If True, defaults to ASHAScheduler.
- A string corresponding to the name of a Tune Trial Scheduler
(i.e., "ASHAScheduler"). To specify parameters of the scheduler,
pass in a scheduler object instead of a string.
- Scheduler for executing fit with early stopping. Only a subset
of schedulers are currently supported. The scheduler will only be
used if the estimator supports partial fitting
- If None or False, early stopping will not be used.
Unless a ``HyperBandForBOHB`` object is passed,
this parameter is ignored for ``"bohb"``, as it requires
``HyperBandForBOHB``.
n_trials (int): Number of parameter settings that are sampled.
n_trials trades off runtime vs quality of the solution.
Defaults to 10.
scoring (str, callable, list/tuple, dict, or None): A single
string or a callable to evaluate the predictions on the test set.
See https://scikit-learn.org/stable/modules/model_evaluation.html
#scoring-parameter for all options.
For evaluating multiple metrics, either give a list/tuple of
(unique) strings or a dict with names as keys and callables as
values.
If None, the estimator's score method is used. Defaults to None.
n_jobs (int): Number of jobs to run in parallel. None or -1 means
using all processors. Defaults to None. If set to 1, jobs
will be run using Ray's 'local mode'. This can
lead to significant speedups if the model takes < 10 seconds
to fit due to removing inter-process communication overheads.
refit (bool, str, or `callable`): Refit an estimator using the
best found parameters on the whole dataset.
For multiple metric evaluation, this needs to be a string denoting
the scorer that would be used to find the best parameters for
refitting the estimator at the end.
The refitted estimator is made available at the ``best_estimator_``
attribute and permits using ``predict`` directly on this
``GridSearchCV`` instance.
Also for multiple metric evaluation, the attributes
``best_index_``, ``best_score_`` and ``best_params_`` will only be
available if ``refit`` is set and all of them will be determined
w.r.t this specific scorer. If refit not needed, set to False.
See ``scoring`` parameter to know more about multiple metric
evaluation. Defaults to True.
cv (int, `cross-validation generator` or `iterable`): Determines
the cross-validation splitting strategy.
Possible inputs for cv are:
- None, to use the default 5-fold cross validation,
- integer, to specify the number of folds in a `(Stratified)KFold`,
- An iterable yielding (train, test) splits as arrays of indices.
For integer/None inputs, if the estimator is a classifier and ``y``
is either binary or multiclass, :class:`StratifiedKFold` is used.
In all other cases, :class:`KFold` is used. Defaults to None.
verbose (int): Controls the verbosity: 0 = silent, 1 = only status
updates, 2 = status and trial results. Defaults to 0.
random_state (int or `RandomState`): Pseudo random number generator
state used for random uniform
sampling from lists of possible values instead of scipy.stats
distributions.
If int, random_state is the seed used by the random number
generator;
If RandomState instance, a seed is sampled from random_state;
If None, the random number generator is the RandomState instance
used by np.random and no seed is provided. Defaults to None.
Ignored when using BOHB.
error_score ('raise' or int or float): Value to assign to the score if
an error occurs in estimator
fitting. If set to 'raise', the error is raised. If a numeric value
is given, FitFailedWarning is raised. This parameter does not
affect the refit step, which will always raise the error.
Defaults to np.nan.
return_train_score (bool): If ``False``, the ``cv_results_``
attribute will not include training scores. Defaults to False.
Computing training scores is used to get insights on how different
parameter settings impact the overfitting/underfitting trade-off.
However computing the scores on the training set can be
computationally expensive and is not strictly required to select
the parameters that yield the best generalization performance.
local_dir (str): A string that defines where checkpoints and logs will
be stored. Defaults to "~/ray_results"
name (str) – Name of experiment (for Ray Tune).
max_iters (int): Indicates the maximum number of epochs to run for each
hyperparameter configuration sampled (specified by ``n_trials``).
This parameter is used for early stopping. Defaults to 1.
Depending on the classifier type provided, a resource parameter
(`resource_param = max_iter or n_estimators`) will be detected.
The value of `resource_param` will be treated as a
"max resource value", and all classifiers will be
initialized with `max resource value // max_iters`, where
`max_iters` is this defined parameter. On each epoch,
resource_param (max_iter or n_estimators) is
incremented by `max resource value // max_iters`.
search_optimization ("random" or "bayesian" or "bohb" or "hyperopt"
or "optuna" or `ray.tune.search.Searcher` instance):
Randomized search is invoked with ``search_optimization`` set to
``"random"`` and behaves like scikit-learn's
``RandomizedSearchCV``.
Bayesian search can be invoked with several values of
``search_optimization``.
- ``"bayesian"`` via https://scikit-optimize.github.io/stable/
- ``"bohb"`` via http://github.com/automl/HpBandSter
Tree-Parzen Estimators search is invoked with
``search_optimization`` set to ``"hyperopt"`` via HyperOpt:
http://hyperopt.github.io/hyperopt
All types of search aside from Randomized search require parent
libraries to be installed.
Alternatively, instead of a string, a Ray Tune Searcher instance
can be used, which will be passed to ``tune.run()``.
use_gpu (bool): Indicates whether to use gpu for fitting.
Defaults to False. If True, training will start processes
with the proper CUDA VISIBLE DEVICE settings set. If a Ray
cluster has been initialized, all available GPUs will
be used.
loggers (list): A list of the names of the Tune loggers as strings
to be used to log results. Possible values are "tensorboard",
"csv", "mlflow", and "json"
pipeline_auto_early_stop (bool): Only relevant if estimator is Pipeline
object and early_stopping is enabled/True. If True, early stopping
will be performed on the last stage of the pipeline (which must
support early stopping). If False, early stopping will be
determined by 'Pipeline.warm_start' or 'Pipeline.partial_fit'
capabilities, which are by default not supported by standard
SKlearn. Defaults to True.
stopper (ray.tune.stopper.Stopper): Stopper objects passed to
``tune.run()``.
time_budget_s (int|float|datetime.timedelta): Global time budget in
seconds after which all trials are stopped. Can also be a
``datetime.timedelta`` object. The stopping condition is checked
after receiving a result, i.e. after each training iteration.
mode (str): One of {min, max}. Determines whether objective is
minimizing or maximizing the metric attribute. Defaults to "max".
search_kwargs (dict):
Additional arguments to pass to the SearchAlgorithms (tune.suggest)
objects.
"""
def __init__(self,
estimator,
param_distributions,
early_stopping=None,
n_trials=10,
scoring=None,
n_jobs=None,
refit=True,
cv=None,
verbose=0,
random_state=None,
error_score=np.nan,
return_train_score=False,
local_dir="~/ray_results",
name=None,
max_iters=1,
search_optimization="random",
use_gpu=False,
loggers=None,
pipeline_auto_early_stop=True,
stopper=None,
time_budget_s=None,
sk_n_jobs=None,
mode=None,
search_kwargs=None,
**kwargs):
if kwargs:
raise ValueError(
"Passing kwargs is depreciated, as it causes issues with "
"sklearn cloning. Please use the 'search_kwargs' argument "
"instead.")
if search_kwargs is not None and not isinstance(search_kwargs, dict):
raise TypeError(
f"'search_kwargs' must be a dict, got {type(search_kwargs)}.")
if sk_n_jobs not in (None, 1):
raise ValueError(
"Tune-sklearn no longer supports nested parallelism "
"with new versions of joblib/sklearn. Don't set 'sk_n_jobs'.")
self.search_optimization = search_optimization
if (self._search_optimization_lower not in set(
available_optimizations.values())) and not isinstance(
self._search_optimization_lower, Searcher):
raise ValueError(
"Search optimization must be one of "
f"{', '.join(list(available_optimizations.values()))} "
"or a ray.tune.search.Searcher instance.")
if isinstance(self._search_optimization_lower, Searcher):
if not hasattr(self._search_optimization_lower,
"_mode") or not hasattr(
self._search_optimization_lower, "_metric"):
raise ValueError(
"Searcher instance used as search optimization must have"
" '_mode' and '_metric' attributes.")
self._try_import_required_libraries(self._search_optimization_lower)
if isinstance(param_distributions, list):
if self._search_optimization_lower != "random":
raise ValueError("list of dictionaries for parameters "
"is not supported for non-random search")
if isinstance(param_distributions, dict):
check_param_distributions = [param_distributions]
else:
check_param_distributions = param_distributions
can_use_param_distributions = False
if self._search_optimization_lower == "bohb" or isinstance(
self._search_optimization_lower, TuneBOHB):
import ConfigSpace as CS
can_use_param_distributions = isinstance(check_param_distributions,
CS.ConfigurationSpace)
if isinstance(early_stopping, bool):
if early_stopping is False:
raise ValueError(
"early_stopping must not be False when using BOHB")
early_stopping = "HyperBandForBOHB"
elif not isinstance(early_stopping, HyperBandForBOHB):
if early_stopping != "HyperBandForBOHB":
warnings.warn("Ignoring early_stopping value, "
"as BOHB requires HyperBandForBOHB "
"as the EarlyStopping scheduler")
early_stopping = "HyperBandForBOHB"
elif early_stopping == "HyperBandForBOHB" or isinstance(
early_stopping, HyperBandForBOHB):
raise ValueError("search_optimization must be set to 'BOHB' "
"if early_stopping is set to HyperBandForBOHB")
if not can_use_param_distributions:
for p in check_param_distributions:
for dist in p.values():
_check_distribution(dist, self._search_optimization_lower)
super(TuneSearchCV, self).__init__(
estimator=estimator,
early_stopping=early_stopping,
scoring=scoring,
n_jobs=n_jobs or -1,
cv=cv,
verbose=verbose,
refit=refit,
error_score=error_score,
return_train_score=return_train_score,
local_dir=local_dir,
name=name,
max_iters=max_iters,
use_gpu=use_gpu,
loggers=loggers,
pipeline_auto_early_stop=pipeline_auto_early_stop,
stopper=stopper,
time_budget_s=time_budget_s,
mode=mode)
check_error_warm_start(self.early_stop_type, param_distributions,
estimator)
self.param_distributions = param_distributions
self.n_trials = n_trials
self.random_state = random_state
if isinstance(random_state, np.random.RandomState):
# For compatibility with all search algorithms we just
# sample a seed from the random state
self.seed = random_state.randint(2**32 - 1)
else:
self.seed = random_state
if self._search_optimization_lower == "random" or isinstance(
self._search_optimization_lower, type):
if search_kwargs:
raise ValueError(
f"{self._search_optimization_lower} does not support "
f"extra args: {search_kwargs}")
self.search_kwargs = search_kwargs
@property
def _searcher_name(self):
return available_optimizations.get(
type(self._search_optimization_lower),
self._search_optimization_lower)
@property
def _search_optimization_lower(self):
return self.search_optimization.lower() if isinstance(
self.search_optimization, str) else self.search_optimization
def _fill_config_hyperparam(self, config):
"""Fill in the ``config`` dictionary with the hyperparameters.
Each distribution in ``self.param_distributions`` must implement
the ``rvs`` method to generate a random variable. The [0] is
present to extract the single value out of a list, which is returned
by ``rvs``.
Args:
config (`dict`): dictionary to be filled in as the
configuration for `tune.run`.
"""
if self._searcher_name != "random":
return
if isinstance(self.param_distributions, list):
return
samples = 1
all_lists = True
for key, distribution in self.param_distributions.items():
if isinstance(distribution, Domain):
config[key] = distribution
all_lists = False
elif isinstance(distribution, list):
config[key] = tune.choice(distribution)
samples *= len(distribution)
else:
all_lists = False
def get_sample(dist):
return lambda spec: dist.rvs(1)[0]
config[key] = tune.sample_from(get_sample(distribution))
if all_lists:
self.n_trials = min(self.n_trials, samples)
def _is_param_distributions_all_tune_domains(self):
return isinstance(self.param_distributions, dict) and all(
isinstance(v, Domain) for k, v in self.param_distributions.items())
def _get_bohb_config_space(self):
if self._is_param_distributions_all_tune_domains():
return self.param_distributions
import ConfigSpace as CS
config_space = CS.ConfigurationSpace()
if isinstance(self.param_distributions, CS.ConfigurationSpace):
return self.param_distributions
for param_name, space in self.param_distributions.items():
prior = "uniform"
param_name = str(param_name)
if isinstance(space,
tuple) and len(space) >= 2 and len(space) <= 3:
try:
low = float(space[0])
high = float(space[1])
except Exception:
raise ValueError(
"low and high need to be of type float, "
f"are of type {type(low)} and {type(high)}") from None
if len(space) == 3:
prior = space[2]
if prior not in ["uniform", "log-uniform"]:
raise ValueError(
"prior needs to be either "
f"'uniform' or 'log-uniform', was {prior}")
config_space.add_hyperparameter(
CS.UniformFloatHyperparameter(
name=param_name,
lower=low,
upper=high,
log=prior == "log-uniform"))
elif isinstance(space, list):
config_space.add_hyperparameter(
CS.CategoricalHyperparameter(
name=param_name, choices=space))
else:
config_space.add_hyperparameter(space)
return config_space
def _get_optuna_params(self):
config_space = {}
for param_name, space in self.param_distributions.items():
prior = "uniform"
param_name = str(param_name)
if isinstance(space,
tuple) and len(space) >= 2 and len(space) <= 3:
try:
low = float(space[0])
high = float(space[1])
except Exception:
raise ValueError(
"low and high need to be of type float, "
f"are of type {type(low)} and {type(high)}") from None
if len(space) == 3:
prior = space[2]
if prior not in ["uniform", "log-uniform"]:
raise ValueError(
"prior needs to be either "
f"'uniform' or 'log-uniform', was {prior}")
if prior == "log-uniform":
config_space[param_name] = tune.loguniform(low, high)
else:
config_space[param_name] = tune.uniform(low, high)
elif isinstance(space, list):
config_space[param_name] = tune.choice(space)
else:
raise RuntimeError(f"Unknown Optuna search space: {space}")
return OptunaSearch.convert_search_space(config_space)
def _get_hyperopt_params(self):
from hyperopt import hp
config_space = {}
for param_name, space in self.param_distributions.items():
prior = "uniform"
param_name = str(param_name)
if isinstance(space,
tuple) and len(space) >= 2 and len(space) <= 3:
try:
low = float(space[0])
high = float(space[1])
except Exception:
raise ValueError(
"low and high need to be of type float, "
f"are of type {type(low)} and {type(high)}") from None
if len(space) == 3:
prior = space[2]
if prior not in ["uniform", "log-uniform"]:
raise ValueError("prior needs to be either 'uniform' "
f"or 'log-uniform', was {prior}")
if prior == "log-uniform":
config_space[param_name] = hp.loguniform(
param_name, np.log(low), np.log(high))
else:
config_space[param_name] = hp.uniform(
param_name, low, high)
elif isinstance(space, list):
config_space[param_name] = hp.choice(param_name, space)
else:
config_space[param_name] = space
return config_space
def _try_import_required_libraries(self, search_optimization):
if search_optimization == "bayesian":
try:
import skopt # noqa: F401
except ImportError:
raise ImportError(
"It appears that scikit-optimize is not installed. "
"Do: pip install scikit-optimize") from None
elif search_optimization == "bohb":
try:
import ConfigSpace as CS # noqa: F401
except ImportError:
raise ImportError(
"It appears that either HpBandSter or ConfigSpace "
"is not installed. "
"Do: pip install hpbandster ConfigSpace") from None
elif search_optimization == "hyperopt":
try:
from hyperopt import hp # noqa: F401
except ImportError:
raise ImportError("It appears that hyperopt is not installed. "
"Do: pip install hyperopt") from None
elif search_optimization == "optuna":
try:
import optuna # noqa: F401
except ImportError:
raise ImportError("It appears that optuna is not installed. "
"Do: pip install optuna") from None
def _tune_run(self,
X,
y,
config,
resources_per_trial,
tune_params=None,
fit_params=None):
"""Wrapper to call ``tune.run``. Multiple estimators are generated when
early stopping is possible, whereas a single estimator is
generated when early stopping is not possible.
Args:
X (:obj:`array-like` (shape = [n_samples, n_features])):
Training vector, where n_samples is the number of samples and
n_features is the number of features.
y (:obj:`array-like`): Shape of array expected to be [n_samples]
or [n_samples, n_output]). Target relative to X for
classification or regression; None for unsupervised learning.
config (dict): Configurations such as hyperparameters to run
``tune.run`` on.
resources_per_trial (dict): Resources to use per trial within Ray.
Accepted keys are `cpu`, `gpu` and custom resources, and values
are integers specifying the number of each resource to use.
tune_params (dict): User defined parameters passed to
``tune.run``. Parameters inside `tune_params` override
preset parameters.
fit_params (dict): Parameters passed to the ``fit`` method
of the estimator.
Returns:
analysis (`ExperimentAnalysis`): Object returned by
`tune.run`.
"""
if self.seed is not None:
random.seed(self.seed)
np.random.seed(self.seed)
trainable = _Trainable
if self.pipeline_auto_early_stop and check_is_pipeline(
self.estimator) and self.early_stopping_:
trainable = _PipelineTrainable
max_iter = self.max_iters
if self.early_stopping_ is not None:
estimator_list = [
clone(self.estimator) for _ in range(self.n_splits)
]
if hasattr(self.early_stopping_, "_max_t_attr"):
# we want to delegate stopping to schedulers which
# support it, but we want it to stop eventually, just in case
# the solution is to make the stop condition very big
max_iter = self.max_iters * 10
else:
estimator_list = [clone(self.estimator)]
stopper = MaximumIterationStopper(max_iter=max_iter)
if self.stopper:
stopper = CombinedStopper(stopper, self.stopper)
run_args = dict(
scheduler=self.early_stopping_,
reuse_actors=True,
verbose=self.verbose,
stop=stopper,
num_samples=self.n_trials,
config=config,
fail_fast="raise",
resources_per_trial=resources_per_trial,
local_dir=self.local_dir,
name=self.name,
callbacks=resolve_logger_callbacks(self.loggers,
self.defined_loggers),
time_budget_s=self.time_budget_s,
metric=self._metric_name,
mode=self.mode)
if self._search_optimization_lower == "random":
if isinstance(self.param_distributions, list):
search_algo = RandomListSearcher(self.param_distributions)
else:
search_algo = BasicVariantGenerator()
run_args["search_alg"] = search_algo
else:
search_space = None
override_search_space = True
if isinstance(
self._search_optimization_lower, Searcher) and hasattr(
self._search_optimization_lower, "_space"
) and self._search_optimization_lower._space is not None:
if (self._search_optimization_lower._metric !=
self._metric_name):
raise ValueError(
"If a Searcher instance has been initialized with a "
"space, its metric "
f"('{self._search_optimization_lower._metric}') "
"must match the metric set in TuneSearchCV"
f" ('{self._metric_name}')")
if self._search_optimization_lower._mode != self.mode:
raise ValueError(
"If a Searcher instance has been initialized with a "
"space, its mode "
f"('{self._search_optimization_lower._mode}') "
"must match the mode set in TuneSearchCV"
f" ('{self.mode}')")
elif self._is_param_distributions_all_tune_domains():
run_args["config"].update(self.param_distributions)
override_search_space = False
search_kwargs = self.search_kwargs or {}
search_kwargs = search_kwargs.copy()
if override_search_space:
search_kwargs["metric"] = run_args.pop("metric")
search_kwargs["mode"] = run_args.pop("mode")
if run_args["scheduler"]:
if hasattr(run_args["scheduler"], "_metric") and hasattr(
run_args["scheduler"], "_mode"):
run_args["scheduler"]._metric = search_kwargs["metric"]
run_args["scheduler"]._mode = search_kwargs["mode"]
else:
warnings.warn(
"Could not set `_metric` and `_mode` attributes "
f"on Scheduler {run_args['scheduler']}. "
"This may cause an exception later! "
"Ensure your Scheduler initializes with those "
"attributes.", UserWarning)
if self._search_optimization_lower == "bayesian":
if override_search_space:
search_space = self.param_distributions
search_algo = SkOptSearch(space=search_space, **search_kwargs)
run_args["search_alg"] = search_algo
elif self._search_optimization_lower == "bohb":
if override_search_space:
search_space = self._get_bohb_config_space()
search_algo = TuneBOHB(
space=search_space, seed=self.seed, **search_kwargs)
run_args["search_alg"] = search_algo
elif self._search_optimization_lower == "optuna":
from optuna.samplers import TPESampler
if "sampler" not in search_kwargs:
search_kwargs["sampler"] = TPESampler(seed=self.seed)
elif self.seed:
warnings.warn("'seed' is not implemented for Optuna.")
if override_search_space:
search_space = self._get_optuna_params()
search_algo = OptunaSearch(space=search_space, **search_kwargs)
run_args["search_alg"] = search_algo
elif self._search_optimization_lower == "hyperopt":
if override_search_space:
search_space = self._get_hyperopt_params()
search_algo = HyperOptSearch(
space=search_space,
random_state_seed=self.seed,
**search_kwargs)
run_args["search_alg"] = search_algo
elif isinstance(self._search_optimization_lower, Searcher):
search_algo = self._search_optimization_lower
run_args["search_alg"] = search_algo
else:
# This should not happen as we validate the input before
# this method. Still, just to be sure, raise an error here.
raise ValueError("Invalid search optimizer: "
f"{self._search_optimization_lower}")
if isinstance(self.n_jobs, int) and self.n_jobs > 0 \
and not self._searcher_name == "random":
search_algo = ConcurrencyLimiter(
search_algo, max_concurrent=self.n_jobs)
run_args["search_alg"] = search_algo
run_args = self._override_run_args_with_tune_params(
run_args, tune_params)
trainable = tune.with_parameters(
trainable,
X=X,
y=y,
estimator_list=estimator_list,
fit_params=fit_params)
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore", message="fail_fast='raise' "
"detected.")
analysis = tune.run(trainable, **run_args)
return analysis