From 303f60b628d708b505af4895574345a7564b6c95 Mon Sep 17 00:00:00 2001 From: Xbc-gressor <2765024335@qq.com> Date: Tue, 27 Feb 2024 09:51:09 +0800 Subject: [PATCH] openbox experiments --- test/reproduction/__init__.py | 0 .../mo/benchmark_mo_openbox_math.py | 148 ++++++ test/reproduction/mo/mo_benchmark_function.py | 273 ++++++++++ .../moc/benchmark_moc_openbox_math.py | 157 ++++++ .../moc/moc_benchmark_function.py | 246 +++++++++ .../so/benchmark_so_openbox_math.py | 119 +++++ test/reproduction/so/so_benchmark_function.py | 473 ++++++++++++++++++ .../soc/benchmark_soc_openbox_math.py | 126 +++++ .../soc/soc_benchmark_function.py | 228 +++++++++ test/reproduction/test_utils.py | 133 +++++ 10 files changed, 1903 insertions(+) create mode 100644 test/reproduction/__init__.py create mode 100644 test/reproduction/mo/benchmark_mo_openbox_math.py create mode 100644 test/reproduction/mo/mo_benchmark_function.py create mode 100644 test/reproduction/moc/benchmark_moc_openbox_math.py create mode 100644 test/reproduction/moc/moc_benchmark_function.py create mode 100644 test/reproduction/so/benchmark_so_openbox_math.py create mode 100644 test/reproduction/so/so_benchmark_function.py create mode 100644 test/reproduction/soc/benchmark_soc_openbox_math.py create mode 100644 test/reproduction/soc/soc_benchmark_function.py create mode 100644 test/reproduction/test_utils.py diff --git a/test/reproduction/__init__.py b/test/reproduction/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/test/reproduction/mo/benchmark_mo_openbox_math.py b/test/reproduction/mo/benchmark_mo_openbox_math.py new file mode 100644 index 00000000..f67346a9 --- /dev/null +++ b/test/reproduction/mo/benchmark_mo_openbox_math.py @@ -0,0 +1,148 @@ +""" +example cmdline: + +python test/reproduction/mo/benchmark_mo_openbox_math.py --problem zdt2-3 --n 200 --init_strategy sobol --rep 1 --start_id 0 + +""" +import os +NUM_THREADS = "2" +os.environ["OMP_NUM_THREADS"] = NUM_THREADS # export OMP_NUM_THREADS=1 +os.environ["OPENBLAS_NUM_THREADS"] = NUM_THREADS # export OPENBLAS_NUM_THREADS=1 +os.environ["MKL_NUM_THREADS"] = NUM_THREADS # export MKL_NUM_THREADS=1 +os.environ["VECLIB_MAXIMUM_THREADS"] = NUM_THREADS # export VECLIB_MAXIMUM_THREADS=1 +os.environ["NUMEXPR_NUM_THREADS"] = NUM_THREADS # export NUMEXPR_NUM_THREADS=1 + +import sys +import time +import numpy as np +import argparse +import pickle as pkl + +sys.path.insert(0, os.getcwd()) +from mo_benchmark_function import get_problem, plot_pf +from openbox import Optimizer +from openbox.utils.multi_objective import Hypervolume +from test.reproduction.test_utils import timeit, seeds + +parser = argparse.ArgumentParser() +parser.add_argument('--problem', type=str) +parser.add_argument('--n', type=int, default=100) +parser.add_argument('--init', type=int, default=0) +parser.add_argument('--init_strategy', type=str, default='sobol', choices=['sobol', 'latin_hypercube']) +parser.add_argument('--surrogate', type=str, default='auto', choices=['auto', 'gp', 'prf']) +parser.add_argument('--acq_type', type=str, default='auto', choices=['auto', 'ehvi', 'usemo', 'mesmo']) +parser.add_argument('--optimizer', type=str, default='auto', choices=['auto', 'scipy', 'local']) +parser.add_argument('--rep', type=int, default=1) +parser.add_argument('--start_id', type=int, default=0) +parser.add_argument('--plot_mode', type=int, default=0) + +args = parser.parse_args() +problem_str = args.problem +max_runs = args.n +initial_runs = args.init +init_strategy = args.init_strategy +surrogate_type = args.surrogate +acq_type = args.acq_type +if args.optimizer == 'scipy': + acq_optimizer_type = 'random_scipy' +elif args.optimizer == 'local': + acq_optimizer_type = 'local_random' +elif args.optimizer == 'auto': + acq_optimizer_type = 'auto' +else: + raise ValueError('Unknown optimizer %s' % args.optimizer) +if acq_type in ['usemo', 'mesmo']: + acq_optimizer_type = None +rep = args.rep +start_id = args.start_id +plot_mode = args.plot_mode +if acq_type == 'ehvi': + mth = 'openbox' +else: + mth = 'openbox-%s' % acq_type + +problem = get_problem(problem_str) +if initial_runs == 0: + initial_runs = 2 * (problem.dim + 1) +cs = problem.get_configspace(optimizer='smac') +# max_runtime_per_trial = 600 +task_id = '%s_%s_%s' % (mth, acq_type, problem_str) + + +def evaluate(mth, run_i, seed): + print(mth, run_i, seed, '===== start =====', flush=True) + + def objective_function(config): + res = problem.evaluate_config(config) + # res['config'] = config + return res + + bo = Optimizer( + objective_function, + cs, + num_objectives=problem.num_objectives, + num_constraints=0, + surrogate_type=surrogate_type, # default: auto: gp + acq_type=acq_type, # default: auto: ehvi + acq_optimizer_type=acq_optimizer_type, # default: auto: random_scipy + initial_runs=initial_runs, # default: 2 * (problem.dim + 1) + init_strategy=init_strategy, # default: sobol + max_runs=max_runs, + ref_point=problem.ref_point, task_id=task_id, random_state=seed, + ) + + # bo.run() + hv_diffs = [] + time_list = [] + global_start_time = time.time() + for i in range(max_runs): + observation = bo.iterate(bo.time_left) + config, trial_state, objectives = observation.config, observation.trial_state, observation.objectives + global_time = time.time() - global_start_time + bo.time_left -= global_time + print(seed, i, objectives, config, trial_state, 'time=', global_time) + hv = Hypervolume(problem.ref_point).compute(bo.get_history().get_pareto_front()) + hv_diff = problem.max_hv - hv + print(seed, i, 'hypervolume =', hv) + print(seed, i, 'hv diff =', hv_diff) + hv_diffs.append(hv_diff) + time_list.append(global_time) + config_list = bo.get_history().configurations + perf_list = bo.get_history().get_objectives(transform='none') + pf = np.asarray(bo.get_history().get_pareto_front()) + + # plot for debugging + if plot_mode == 1: + Y_init = None + plot_pf(problem, problem_str, mth, pf, Y_init) + + history = bo.get_history() + + return hv_diffs, pf, config_list, perf_list, time_list, history + +if __name__ == '__main__': + with timeit('%s all' % (mth,)): + for run_i in range(start_id, start_id + rep): + seed = seeds[run_i] + with timeit('%s %d %d' % (mth, run_i, seed)): + # Evaluate + hv_diffs, pf, config_list, perf_list, time_list, history = evaluate(mth, run_i, seed) + + # Save result + print('=' * 20) + print(seed, mth, config_list, perf_list, time_list, hv_diffs) + print(seed, mth, 'best hv_diff:', hv_diffs[-1]) + print(seed, mth, 'max_hv:', problem.max_hv) + if pf is not None: + print(seed, mth, 'pareto num:', pf.shape[0]) + + timestamp = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time())) + dir_path = 'logs/mo_benchmark_%s_%d/%s/' % (problem_str, max_runs, mth) + file = 'benchmark_%s_%04d_%s.pkl' % (mth, seed, timestamp) + os.makedirs(dir_path, exist_ok=True) + with open(os.path.join(dir_path, file), 'wb') as f: + save_item = (hv_diffs, pf, config_list, perf_list, time_list) + pkl.dump(save_item, f) + print(dir_path, file, 'saved!', flush=True) + + history.save_json(os.path.join(dir_path, 'benchmark_%s_%04d_%s.json' % (mth, seed, timestamp))) diff --git a/test/reproduction/mo/mo_benchmark_function.py b/test/reproduction/mo/mo_benchmark_function.py new file mode 100644 index 00000000..71c99515 --- /dev/null +++ b/test/reproduction/mo/mo_benchmark_function.py @@ -0,0 +1,273 @@ +import numpy as np + +from ConfigSpace import ConfigurationSpace, UniformFloatHyperparameter, UniformIntegerHyperparameter, \ + Constant, CategoricalHyperparameter, InCondition, EqualsCondition, UnParametrizedHyperparameter, \ + ForbiddenEqualsClause, ForbiddenInClause, ForbiddenAndConjunction + +from openbox.benchmark.objective_functions.synthetic import DTLZ1, DTLZ2, BraninCurrin, VehicleSafety, ZDT1, ZDT2, ZDT3 + + +def get_problem(problem_str, **kwargs): + problem = None + if problem_str.startswith('dtlz1'): + params = problem_str.split('-') + assert params[0] == 'dtlz1' + if len(params) == 1: + return dtlz1(dim=5, num_objectives=4) + elif len(params) == 3: + return dtlz1(dim=int(params[1]), num_objectives=int(params[2])) + elif problem_str.startswith('dtlz2'): + params = problem_str.split('-') + assert params[0] == 'dtlz2' + if len(params) == 1: + return dtlz2(dim=12, num_objectives=2) + elif len(params) == 3: + return dtlz2(dim=int(params[1]), num_objectives=int(params[2])) + elif problem_str == 'branincurrin': + problem = branincurrin + elif problem_str == 'vehiclesafety': + problem = vehiclesafety + elif problem_str.startswith('zdt'): + params = problem_str.split('-') + assert params[0] in ('zdt1', 'zdt2', 'zdt3') + if len(params) == 1: + return zdt(problem_str=params[0], dim=3) + else: + return zdt(problem_str=params[0], dim=int(params[1])) + if problem is None: + raise ValueError('Unknown problem_str %s.' % problem_str) + return problem(**kwargs) + + +def plot_pf(problem, problem_str, mth, pf, Y_init=None): + import matplotlib.pyplot as plt + assert problem.num_objectives in (2, 3) + if problem.num_objectives == 2: + plt.scatter(pf[:, 0], pf[:, 1], label=mth) + if Y_init is not None: + plt.scatter(Y_init[:, 0], Y_init[:, 1], label='init', marker='x') + plt.xlabel('Objective 1') + plt.ylabel('Objective 2') + elif problem.num_objectives == 3: + ax = plt.axes(projection='3d') + ax.scatter3D(pf[:, 0], pf[:, 1], pf[:, 2], label=mth) + if Y_init is not None: + ax.scatter3D(Y_init[:, 0], Y_init[:, 1], Y_init[:, 3], label='init', marker='x') + ax.set_xlabel('Objective 1') + ax.set_ylabel('Objective 2') + ax.set_zlabel('Objective 3') + else: + raise ValueError('Cannot plot_pf with problem.num_objectives == %d.' % (problem.num_objectives,)) + plt.title('Pareto Front of %s' % (problem_str,)) + plt.legend() + plt.show() + + +class BaseMultiObjectiveProblem: + def __init__(self, dim, num_objectives, problem=None, **kwargs): + self.dim = dim + self.num_objectives = num_objectives + if problem is not None: + self.problem = problem + self.ref_point = problem.ref_point + try: + self.max_hv = problem.max_hv + except NotImplementedError: + self.max_hv = 0.0 + + def evaluate_config(self, config, optimizer='smac'): + raise NotImplementedError + + def evaluate(self, X: np.ndarray): + raise NotImplementedError + + @staticmethod + def get_config_dict(config, optimizer='smac'): + if optimizer == 'smac': + config_dict = config.get_dictionary().copy() + elif optimizer == 'tpe': + config_dict = config + else: + raise ValueError('Unknown optimizer %s' % optimizer) + return config_dict + + @staticmethod + def checkX(X: np.ndarray): + X = np.atleast_2d(X) + assert len(X.shape) == 2 and X.shape[0] == 1 + X = X.flatten() + return X + + def get_configspace(self, optimizer='smac'): + raise NotImplementedError + + +class dtlz1(BaseMultiObjectiveProblem): + + def __init__(self, dim, num_objectives, **kwargs): + problem = DTLZ1(dim=dim, num_objectives=num_objectives) + super().__init__(dim=dim, num_objectives=num_objectives, problem=problem, **kwargs) + self.lb = 0 + self.ub = 1 + self.bounds = [(self.lb, self.ub)] * self.dim + + def evaluate_config(self, config, optimizer='smac'): + config_dict = self.get_config_dict(config, optimizer) + x_list = [config_dict['x%d' % i] for i in range(1, self.dim+1)] + X = np.array(x_list) + return self.evaluate(X) + + def evaluate(self, X: np.ndarray): + X = self.checkX(X) + return self.problem._evaluate(X) # dict + + def get_configspace(self, optimizer='smac'): + if optimizer == 'smac': + return self.problem.config_space + elif optimizer == 'gpflowopt': + import gpflowopt + domain = np.sum([ + gpflowopt.domain.ContinuousParameter('x%d' % i, self.lb, self.ub) for i in range(1, self.dim+1) + ]) + return domain + else: + raise ValueError('Unknown optimizer %s when getting configspace' % optimizer) + + +class dtlz2(BaseMultiObjectiveProblem): + + def __init__(self, dim, num_objectives, **kwargs): + problem = DTLZ2(dim=dim, num_objectives=num_objectives) + super().__init__(dim=dim, num_objectives=num_objectives, problem=problem, **kwargs) + self.lb = 0 + self.ub = 1 + self.bounds = [(self.lb, self.ub)] * self.dim + + def evaluate_config(self, config, optimizer='smac'): + config_dict = self.get_config_dict(config, optimizer) + x_list = [config_dict['x%d' % i] for i in range(1, self.dim+1)] + X = np.array(x_list) + return self.evaluate(X) + + def evaluate(self, X: np.ndarray): + X = self.checkX(X) + return self.problem._evaluate(X) # dict + + def get_configspace(self, optimizer='smac'): + if optimizer == 'smac': + return self.problem.config_space + elif optimizer == 'gpflowopt': + import gpflowopt + domain = np.sum([ + gpflowopt.domain.ContinuousParameter('x%d' % i, self.lb, self.ub) for i in range(1, self.dim+1) + ]) + return domain + else: + raise ValueError('Unknown optimizer %s when getting configspace' % optimizer) + + +class branincurrin(BaseMultiObjectiveProblem): + + def __init__(self, **kwargs): + problem = BraninCurrin() + super().__init__(dim=2, num_objectives=2, problem=problem, **kwargs) + self.lb = 1e-10 # fix numeric problem + self.ub = 1 + self.bounds = [(self.lb, self.ub)] * self.dim + + def evaluate_config(self, config, optimizer='smac'): + config_dict = self.get_config_dict(config, optimizer) + x_list = [config_dict['x%d' % i] for i in range(1, self.dim+1)] + X = np.array(x_list) + return self.evaluate(X) + + def evaluate(self, X: np.ndarray): + X = self.checkX(X) + return self.problem._evaluate(X) # dict + + def get_configspace(self, optimizer='smac'): + if optimizer == 'smac': + cs = ConfigurationSpace() + cs.add_hyperparameters( + [UniformFloatHyperparameter("x%s" % i, self.lb, self.ub) for i in range(1, self.dim+1)]) + return cs + elif optimizer == 'gpflowopt': + import gpflowopt + domain = np.sum([ + gpflowopt.domain.ContinuousParameter('x%d' % i, self.lb, self.ub) for i in range(1, self.dim+1) + ]) + return domain + else: + raise ValueError('Unknown optimizer %s when getting configspace' % optimizer) + + +class vehiclesafety(BaseMultiObjectiveProblem): + + def __init__(self, **kwargs): + problem = VehicleSafety() + super().__init__(dim=5, num_objectives=3, problem=problem, **kwargs) + self.lb = 1 + self.ub = 3 + self.bounds = [(self.lb, self.ub)] * self.dim + + def evaluate_config(self, config, optimizer='smac'): + config_dict = self.get_config_dict(config, optimizer) + x_list = [config_dict['x%d' % i] for i in range(1, self.dim+1)] + X = np.array(x_list) + return self.evaluate(X) + + def evaluate(self, X: np.ndarray): + X = self.checkX(X) + return self.problem._evaluate(X) # dict + + def get_configspace(self, optimizer='smac'): + if optimizer == 'smac': + return self.problem.config_space + elif optimizer == 'gpflowopt': + import gpflowopt + domain = np.sum([ + gpflowopt.domain.ContinuousParameter('x%d' % i, self.lb, self.ub) for i in range(1, self.dim+1) + ]) + return domain + else: + raise ValueError('Unknown optimizer %s when getting configspace' % optimizer) + + +class zdt(BaseMultiObjectiveProblem): + + def __init__(self, problem_str, dim, **kwargs): + if problem_str == 'zdt1': + problem = ZDT1 + elif problem_str == 'zdt2': + problem = ZDT2 + elif problem_str == 'zdt3': + problem = ZDT3 + else: + raise ValueError + problem = problem(dim=dim) + super().__init__(dim=dim, num_objectives=2, problem=problem, **kwargs) + self.lb = 0 + self.ub = 1 + self.bounds = [(self.lb, self.ub)] * self.dim + + def evaluate_config(self, config, optimizer='smac'): + config_dict = self.get_config_dict(config, optimizer) + x_list = [config_dict['x%d' % i] for i in range(1, self.dim+1)] + X = np.array(x_list) + return self.evaluate(X) + + def evaluate(self, X: np.ndarray): + X = self.checkX(X) + return self.problem._evaluate(X) # dict + + def get_configspace(self, optimizer='smac'): + if optimizer == 'smac': + return self.problem.config_space + elif optimizer == 'gpflowopt': + import gpflowopt + domain = np.sum([ + gpflowopt.domain.ContinuousParameter('x%d' % i, self.lb, self.ub) for i in range(1, self.dim+1) + ]) + return domain + else: + raise ValueError('Unknown optimizer %s when getting configspace' % optimizer) diff --git a/test/reproduction/moc/benchmark_moc_openbox_math.py b/test/reproduction/moc/benchmark_moc_openbox_math.py new file mode 100644 index 00000000..c2b96397 --- /dev/null +++ b/test/reproduction/moc/benchmark_moc_openbox_math.py @@ -0,0 +1,157 @@ +""" +example cmdline: + +python test/reproduction/moc/benchmark_moc_openbox_math.py --problem constr --n 200 --init_strategy sobol --rep 1 --start_id 0 + +""" +import os +NUM_THREADS = "2" +os.environ["OMP_NUM_THREADS"] = NUM_THREADS # export OMP_NUM_THREADS=1 +os.environ["OPENBLAS_NUM_THREADS"] = NUM_THREADS # export OPENBLAS_NUM_THREADS=1 +os.environ["MKL_NUM_THREADS"] = NUM_THREADS # export MKL_NUM_THREADS=1 +os.environ["VECLIB_MAXIMUM_THREADS"] = NUM_THREADS # export VECLIB_MAXIMUM_THREADS=1 +os.environ["NUMEXPR_NUM_THREADS"] = NUM_THREADS # export NUMEXPR_NUM_THREADS=1 + +import sys +import time +import numpy as np +import argparse +import pickle as pkl + +sys.path.insert(0, os.getcwd()) +from moc_benchmark_function import get_problem, plot_pf +from openbox import Optimizer +from openbox.utils.multi_objective import Hypervolume +from test.reproduction.test_utils import timeit, seeds + +parser = argparse.ArgumentParser() +parser.add_argument('--problem', type=str) +parser.add_argument('--n', type=int, default=200) +parser.add_argument('--init', type=int, default=0) +parser.add_argument('--init_strategy', type=str, default='sobol', choices=['sobol', 'latin_hypercube']) +parser.add_argument('--surrogate', type=str, default='auto', choices=['auto', 'gp', 'prf']) +parser.add_argument('--acq_type', type=str, default='auto', choices=['auto', 'ehvic', 'mesmoc', 'mesmoc2']) +parser.add_argument('--optimizer', type=str, default='auto', choices=['auto', 'scipy', 'local']) +parser.add_argument('--rep', type=int, default=1) +parser.add_argument('--start_id', type=int, default=0) +parser.add_argument('--plot_mode', type=int, default=0) + +args = parser.parse_args() +problem_str = args.problem +max_runs = args.n +initial_runs = args.init +init_strategy = args.init_strategy +surrogate_type = args.surrogate +acq_type = args.acq_type +if args.optimizer == 'scipy': + acq_optimizer_type = 'random_scipy' +elif args.optimizer == 'local': + acq_optimizer_type = 'local_random' +elif args.optimizer == 'auto': + acq_optimizer_type = 'auto' +else: + raise ValueError('Unknown optimizer %s' % args.optimizer) +if acq_type in ['mesmoc', 'mesmoc2']: + surrogate_type = None + acq_optimizer_type = None +rep = args.rep +start_id = args.start_id +plot_mode = args.plot_mode +if acq_type == 'ehvic': + mth = 'openbox' +else: + mth = 'openbox-%s' % acq_type + +problem = get_problem(problem_str) +if initial_runs == 0: + initial_runs = 2 * (problem.dim + 1) +cs = problem.get_configspace(optimizer='smac') +task_id = '%s_%s_%s' % (mth, acq_type, problem_str) + + +def evaluate(mth, run_i, seed): + print(mth, run_i, seed, '===== start =====', flush=True) + + def objective_function(config): + res = problem.evaluate_config(config) + # res['config'] = config + res['objectives'] = np.asarray(res['objectives']).tolist() + res['constraints'] = np.asarray(res['constraints']).tolist() + return res + + bo = Optimizer( + objective_function, + cs, + num_objectives=problem.num_objectives, + num_constraints=problem.num_constraints, + surrogate_type=surrogate_type, # default: auto: gp + acq_type=acq_type, # default: auto: ehvic + acq_optimizer_type=acq_optimizer_type, # default: auto: random_scipy + initial_runs=initial_runs, # default: 2 * (problem.dim + 1) + init_strategy=init_strategy, # default: sobol + max_runs=max_runs, + ref_point=problem.ref_point, task_id=task_id, random_state=seed, + ) + + # bo.run() + hv_diffs = [] + config_list = [] + perf_list = [] + time_list = [] + global_start_time = time.time() + for i in range(max_runs): + observation = bo.iterate(bo.time_left) + config, trial_state, constraints, origin_objectives = observation.config, observation.trial_state, observation.constraints, observation.objectives + global_time = time.time() - global_start_time + bo.time_left -= global_time + if any(c > 0 for c in constraints): + objectives = [9999999.0] * problem.num_objectives + else: + objectives = origin_objectives + print(seed, i, origin_objectives, objectives, constraints, config, trial_state, 'time=', global_time) + config_list.append(config) + perf_list.append(objectives) + time_list.append(global_time) + hv = Hypervolume(problem.ref_point).compute(perf_list) + hv_diff = problem.max_hv - hv + hv_diffs.append(hv_diff) + print(seed, i, 'hypervolume =', hv) + print(seed, i, 'hv diff =', hv_diff) + pf = np.asarray(bo.get_history().get_pareto_front()) + + # plot for debugging + if plot_mode == 1: + Y_init = None + plot_pf(problem, problem_str, mth, pf, Y_init) + + history = bo.get_history() + + return hv_diffs, pf, config_list, perf_list, time_list, history + + +if __name__ == '__main__': + with timeit('%s all' % (mth,)): + for run_i in range(start_id, start_id + rep): + seed = seeds[run_i] + with timeit('%s %d %d' % (mth, run_i, seed)): + # Evaluate + hv_diffs, pf, config_list, perf_list, time_list, history = evaluate(mth, run_i, seed) + + # Save result + print('=' * 20) + print(seed, mth, config_list, perf_list, time_list, hv_diffs) + print(seed, mth, 'best hv_diff:', hv_diffs[-1]) + print(seed, mth, 'max_hv:', problem.max_hv) + if pf is not None: + print(seed, mth, 'pareto num:', pf.shape[0]) + + timestamp = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time())) + dir_path = 'logs/moc_benchmark_%s_%d/%s/' % (problem_str, max_runs, mth) + file = 'benchmark_%s_%04d_%s.pkl' % (mth, seed, timestamp) + os.makedirs(dir_path, exist_ok=True) + with open(os.path.join(dir_path, file), 'wb') as f: + save_item = (hv_diffs, pf, config_list, perf_list, time_list) + pkl.dump(save_item, f) + print(dir_path, file, 'saved!', flush=True) + + history.save_json(os.path.join(dir_path, 'benchmark_%s_%04d_%s.json' % (mth, seed, timestamp))) diff --git a/test/reproduction/moc/moc_benchmark_function.py b/test/reproduction/moc/moc_benchmark_function.py new file mode 100644 index 00000000..bc1aae0e --- /dev/null +++ b/test/reproduction/moc/moc_benchmark_function.py @@ -0,0 +1,246 @@ +import numpy as np + +# from openbox.utils.config_space import ConfigurationSpace, UniformFloatHyperparameter, UniformIntegerHyperparameter, Constant +from ConfigSpace import ConfigurationSpace, UniformFloatHyperparameter, UniformIntegerHyperparameter, \ + Constant, CategoricalHyperparameter, InCondition, EqualsCondition, UnParametrizedHyperparameter, \ + ForbiddenEqualsClause, ForbiddenInClause, ForbiddenAndConjunction + +from openbox.benchmark.objective_functions.synthetic import DTLZ2, BraninCurrin, BNH, SRN, CONSTR + + +def get_problem(problem_str, **kwargs): + problem = None + if problem_str.startswith('c2dtlz2'): + params = problem_str.split('-') + assert params[0] == 'c2dtlz2' + if len(params) == 1: + return c2dtlz2(dim=3, num_objectives=2) + elif len(params) == 3: + return c2dtlz2(dim=int(params[1]), num_objectives=int(params[2])) + elif problem_str == 'cbranincurrin': + problem = cbranincurrin + elif problem_str == 'bnh': + problem = bnh + elif problem_str == 'srn': + problem = srn + elif problem_str == 'constr': + problem = constr + if problem is None: + raise ValueError('Unknown problem_str %s.' % problem_str) + return problem(**kwargs) + + +def plot_pf(problem, problem_str, mth, pf, Y_init=None): + import matplotlib.pyplot as plt + assert problem.num_objectives in (2, 3) + if problem.num_objectives == 2: + plt.scatter(pf[:, 0], pf[:, 1], label=mth) + if Y_init is not None: + plt.scatter(Y_init[:, 0], Y_init[:, 1], label='init', marker='x') + plt.xlabel('Objective 1') + plt.ylabel('Objective 2') + elif problem.num_objectives == 3: + ax = plt.axes(projection='3d') + ax.scatter3D(pf[:, 0], pf[:, 1], pf[:, 2], label=mth) + if Y_init is not None: + ax.scatter3D(Y_init[:, 0], Y_init[:, 1], Y_init[:, 3], label='init', marker='x') + ax.set_xlabel('Objective 1') + ax.set_ylabel('Objective 2') + ax.set_zlabel('Objective 3') + else: + raise ValueError('Cannot plot_pf with problem.num_objectives == %d.' % (problem.num_objectives,)) + plt.title('Pareto Front of %s' % (problem_str,)) + plt.legend() + plt.show() + + +class BaseConstrainedMultiObjectiveProblem: + def __init__(self, dim, num_objectives, num_constraints, problem=None, **kwargs): + self.dim = dim + self.num_objectives = num_objectives + self.num_constraints = num_constraints + if problem is not None: + self.problem = problem + self.ref_point = problem.ref_point + try: + self.max_hv = problem.max_hv + except NotImplementedError: + self.max_hv = 0.0 + + def evaluate_config(self, config, optimizer='smac'): + raise NotImplementedError + + def evaluate(self, X: np.ndarray): + raise NotImplementedError + + @staticmethod + def get_config_dict(config, optimizer='smac'): + if optimizer == 'smac': + config_dict = config.get_dictionary().copy() + elif optimizer in ['tpe', 'hypermapper', 'optuna']: + config_dict = config + else: + raise ValueError('Unknown optimizer %s' % optimizer) + return config_dict + + @staticmethod + def checkX(X: np.ndarray): + X = np.atleast_2d(X) + assert len(X.shape) == 2 and X.shape[0] == 1 + X = X.flatten() + return X + + def get_configspace(self, optimizer='smac'): + raise NotImplementedError + + +class c2dtlz2(BaseConstrainedMultiObjectiveProblem): + + def __init__(self, dim, num_objectives, **kwargs): + problem = DTLZ2(dim=dim, num_objectives=num_objectives, constrained=True) + super().__init__(dim=dim, num_objectives=num_objectives, num_constraints=1, problem=problem, **kwargs) + self.lb = 0 + self.ub = 1 + self.bounds = [(self.lb, self.ub)] * self.dim + + def evaluate_config(self, config, optimizer='smac'): + config_dict = self.get_config_dict(config, optimizer) + x_list = [config_dict['x%d' % i] for i in range(1, self.dim+1)] + X = np.array(x_list) + return self.evaluate(X) + + def evaluate(self, X: np.ndarray): + X = self.checkX(X) + return self.problem._evaluate(X) # dict + + def get_configspace(self, optimizer='smac'): + if optimizer == 'smac': + return self.problem.config_space + else: + raise ValueError('Unknown optimizer %s when getting configspace' % optimizer) + + +class cbranincurrin(BaseConstrainedMultiObjectiveProblem): + + def __init__(self, **kwargs): + problem = BraninCurrin(constrained=True) + super().__init__(dim=2, num_objectives=2, num_constraints=1, problem=problem, **kwargs) + self.lb = 1e-10 # fix numeric problem + self.ub = 1 + self.bounds = [(self.lb, self.ub)] * self.dim + + def evaluate_config(self, config, optimizer='smac'): + config_dict = self.get_config_dict(config, optimizer) + x_list = [config_dict['x%d' % i] for i in range(1, self.dim+1)] + X = np.array(x_list) + return self.evaluate(X) + + def evaluate(self, X: np.ndarray): + X = self.checkX(X) + return self.problem._evaluate(X) # dict + + def get_configspace(self, optimizer='smac'): + if optimizer == 'smac': + cs = ConfigurationSpace() + cs.add_hyperparameters( + [UniformFloatHyperparameter("x%s" % i, self.lb, self.ub) for i in range(1, self.dim+1)]) + return cs + else: + raise ValueError('Unknown optimizer %s when getting configspace' % optimizer) + + +class bnh(BaseConstrainedMultiObjectiveProblem): + + def __init__(self, **kwargs): + problem = BNH() + super().__init__(dim=2, num_objectives=2, num_constraints=2, problem=problem, **kwargs) + self.bounds = [(0.0, 5.0), (0.0, 3.0)] + self.new_max_hv = 7242.068539049498 # this is approximated using NSGA-II + + def evaluate_config(self, config, optimizer='smac'): + config_dict = self.get_config_dict(config, optimizer) + x_list = [config_dict['x%d' % i] for i in range(1, self.dim+1)] + X = np.array(x_list) + return self.evaluate(X) + + def evaluate(self, X: np.ndarray): + X = self.checkX(X) + return self.problem._evaluate(X) # dict + + def get_configspace(self, optimizer='smac'): + if optimizer == 'smac': + return self.problem.config_space + else: + raise ValueError('Unknown optimizer %s when getting configspace' % optimizer) + + +class srn(BaseConstrainedMultiObjectiveProblem): + + def __init__(self, **kwargs): + problem = SRN() + super().__init__(dim=2, num_objectives=2, num_constraints=2, problem=problem, **kwargs) + self.lb = -20.0 + self.ub = 20.0 + self.bounds = [(self.lb, self.ub)] * self.dim + self.new_max_hv = 34229.434882104855 # this is approximated using NSGA-II + + def evaluate_config(self, config, optimizer='smac'): + config_dict = self.get_config_dict(config, optimizer) + x_list = [config_dict['x%d' % i] for i in range(1, self.dim+1)] + X = np.array(x_list) + return self.evaluate(X) + + def evaluate(self, X: np.ndarray): + X = self.checkX(X) + return self.problem._evaluate(X) # dict + + def get_configspace(self, optimizer='smac'): + if optimizer == 'smac': + return self.problem.config_space + elif optimizer == 'hypermapper': + input_parameters = { + 'x%d' % (i+1): { + "parameter_type": "real", + "values": [self.lb, self.ub] + } for i in range(self.dim) + } + return input_parameters + else: + raise ValueError('Unknown optimizer %s when getting configspace' % optimizer) + + +class constr(BaseConstrainedMultiObjectiveProblem): + + def __init__(self, **kwargs): + problem = CONSTR() + super().__init__(dim=2, num_objectives=2, num_constraints=2, problem=problem, **kwargs) + self.bounds = [(0.1, 10.0), (0.0, 5.0)] + self.new_max_hv = 92.02004226679216 # this is approximated using NSGA-II + + def evaluate_config(self, config, optimizer='smac'): + config_dict = self.get_config_dict(config, optimizer) + x_list = [config_dict['x%d' % i] for i in range(1, self.dim+1)] + X = np.array(x_list) + return self.evaluate(X) + + def evaluate(self, X: np.ndarray): + X = self.checkX(X) + return self.problem._evaluate(X) # dict + + def get_configspace(self, optimizer='smac'): + if optimizer == 'smac': + return self.problem.config_space + elif optimizer == 'hypermapper': + input_parameters = { + 'x1': { + "parameter_type": "real", + "values": [0.1, 10.0] + }, + 'x2': { + "parameter_type": "real", + "values": [0.0, 5.0] + } + } + return input_parameters + else: + raise ValueError('Unknown optimizer %s when getting configspace' % optimizer) diff --git a/test/reproduction/so/benchmark_so_openbox_math.py b/test/reproduction/so/benchmark_so_openbox_math.py new file mode 100644 index 00000000..5d08e65c --- /dev/null +++ b/test/reproduction/so/benchmark_so_openbox_math.py @@ -0,0 +1,119 @@ +""" +example cmdline: + +python test/reproduction/so/benchmark_so_openbox_math.py --problem branin --n 200 --init 3 --rep 1 --start_id 0 + +""" +import os +NUM_THREADS = "2" +os.environ["OMP_NUM_THREADS"] = NUM_THREADS # export OMP_NUM_THREADS=1 +os.environ["OPENBLAS_NUM_THREADS"] = NUM_THREADS # export OPENBLAS_NUM_THREADS=1 +os.environ["MKL_NUM_THREADS"] = NUM_THREADS # export MKL_NUM_THREADS=1 +os.environ["VECLIB_MAXIMUM_THREADS"] = NUM_THREADS # export VECLIB_MAXIMUM_THREADS=1 +os.environ["NUMEXPR_NUM_THREADS"] = NUM_THREADS # export NUMEXPR_NUM_THREADS=1 + +import sys +import time +import numpy as np +import argparse +import pickle as pkl + +sys.path.insert(0, os.getcwd()) +from test.reproduction.so.so_benchmark_function import get_problem +from openbox import Optimizer +from test.reproduction.test_utils import timeit, seeds + +parser = argparse.ArgumentParser() +parser.add_argument('--problem', type=str) +parser.add_argument('--n', type=int, default=100) +parser.add_argument('--init', type=int, default=3) +parser.add_argument('--init_strategy', type=str, default='random_explore_first') +parser.add_argument('--surrogate', type=str, default='auto', choices=['auto', 'gp', 'prf']) +parser.add_argument('--optimizer', type=str, default='auto', choices=['auto', 'scipy', 'local']) +parser.add_argument('--rep', type=int, default=1) +parser.add_argument('--start_id', type=int, default=0) + +args = parser.parse_args() +problem_str = args.problem +max_runs = args.n +initial_runs = args.init +init_strategy = args.init_strategy +surrogate_type = args.surrogate +if args.optimizer == 'scipy': + acq_optimizer_type = 'random_scipy' +elif args.optimizer == 'local': + acq_optimizer_type = 'local_random' +elif args.optimizer == 'auto': + acq_optimizer_type = 'auto' +else: + raise ValueError('Unknown optimizer %s' % args.optimizer) +rep = args.rep +start_id = args.start_id +mth = 'openbox' + +problem = get_problem(problem_str) +cs = problem.get_configspace(optimizer='smac') +max_runtime_per_trial = 600 +task_id = '%s_%s' % (mth, problem_str) + + +def evaluate(mth, run_i, seed): + print(mth, run_i, seed, '===== start =====', flush=True) + + def objective_function(config): + y = problem.evaluate_config(config) + res = dict() + # res['config'] = config + res['objectives'] = (y,) + res['constraints'] = None + return res + + bo = Optimizer( + objective_function, + cs, + surrogate_type=surrogate_type, # default: auto: gp + acq_optimizer_type=acq_optimizer_type, # default: auto: random_scipy + initial_runs=initial_runs, # default: 3 + init_strategy=init_strategy, # default: random_explore_first + max_runs=max_runs, task_id=task_id, random_state=seed, + ) + # bo.run() + time_list = [] + global_start_time = time.time() + for i in range(max_runs): + observation = bo.iterate(bo.time_left) + config, trial_state, objectives = observation.config, observation.trial_state, observation.objectives + global_time = time.time() - global_start_time + bo.time_left -= global_time + print(seed, i, objectives, config, trial_state, 'time=', global_time) + time_list.append(global_time) + config_list = bo.get_history().configurations + perf_list = bo.get_history().get_objectives(transform='none') + + history = bo.get_history() + + return config_list, perf_list, time_list, history + +if __name__ == '__main__': + with timeit('%s all' % (mth,)): + for run_i in range(start_id, start_id + rep): + seed = seeds[run_i] + with timeit('%s %d %d' % (mth, run_i, seed)): + # Evaluate + config_list, perf_list, time_list, history = evaluate(mth, run_i, seed) + + # Save result + print('=' * 20) + print(seed, mth, config_list, perf_list, time_list) + print(seed, mth, 'best perf', np.min(perf_list)) + + timestamp = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time())) + dir_path = 'logs/so_benchmark_%s_%d/%s/' % (problem_str, max_runs, mth) + file = 'benchmark_%s_%04d_%s.pkl' % (mth, seed, timestamp) + os.makedirs(dir_path, exist_ok=True) + with open(os.path.join(dir_path, file), 'wb') as f: + save_item = (config_list, perf_list, time_list) + pkl.dump(save_item, f) + print(dir_path, file, 'saved!', flush=True) + + history.save_json(os.path.join(dir_path, 'benchmark_%s_%04d_%s.json' % (mth, seed, timestamp))) diff --git a/test/reproduction/so/so_benchmark_function.py b/test/reproduction/so/so_benchmark_function.py new file mode 100644 index 00000000..f5c3caf2 --- /dev/null +++ b/test/reproduction/so/so_benchmark_function.py @@ -0,0 +1,473 @@ +import numpy as np + +# from openbox.utils.config_space import ConfigurationSpace, UniformFloatHyperparameter, UniformIntegerHyperparameter, Constant +from ConfigSpace import ConfigurationSpace, UniformFloatHyperparameter, UniformIntegerHyperparameter, \ + Constant, CategoricalHyperparameter, InCondition, EqualsCondition, UnParametrizedHyperparameter, \ + ForbiddenEqualsClause, ForbiddenInClause, ForbiddenAndConjunction + + +def get_problem(problem_str, **kwargs): + # problem_str = problem_str.lower() # dataset name may be uppercase + if problem_str == 'branin': + problem = Branin + elif problem_str.startswith('ackley'): + problem = Ackley + params = problem_str.split('-') + if len(params) == 1: + dim = 2 + elif len(params) == 2: + dim = int(params[1]) + else: + raise ValueError + kwargs['dim'] = dim + elif problem_str == 'beale': + problem = Beale + elif problem_str.startswith('hartmann'): + problem = Hartmann6d + elif 'lgb' in problem_str: + problem = lgb + kwargs['dataset'] = '_'.join(problem_str.split('_')[1:]) + elif 'svc' in problem_str: + problem = svc + kwargs['dataset'] = '_'.join(problem_str.split('_')[1:]) + else: + raise ValueError('Unknown problem_str %s.' % problem_str) + return problem(**kwargs) + + +class BaseSingleObjectiveProblem: + def __init__(self, dim, **kwargs): + self.dim = dim + + def evaluate_config(self, config, optimizer='smac'): + raise NotImplementedError + + def evaluate(self, X: np.ndarray): + raise NotImplementedError + + @staticmethod + def get_config_dict(config, optimizer='smac'): + if optimizer == 'smac': + config_dict = config.get_dictionary().copy() + elif optimizer == 'tpe': + config_dict = config + else: + raise ValueError('Unknown optimizer %s' % optimizer) + return config_dict + + @staticmethod + def checkX(X: np.ndarray): + X = np.atleast_2d(X) + assert len(X.shape) == 2 and X.shape[0] == 1 + X = X.flatten() + return X + + def get_configspace(self, optimizer='smac'): + raise NotImplementedError + + def load_data(self, **kwargs): + from test.reproduction.test_utils import load_data + from sklearn.model_selection import train_test_split + dataset = kwargs['dataset'] + try: + data_dir = kwargs.get('data_dir', '../soln-ml/data/cls_datasets/') + x, y = load_data(dataset, data_dir) + except Exception as e: + data_dir = '../../soln-ml/data/cls_datasets/' + x, y = load_data(dataset, data_dir) + self.train_x, self.val_x, self.train_y, self.val_y = train_test_split(x, y, stratify=y, random_state=1, + test_size=0.3) + + +class Ackley(BaseSingleObjectiveProblem): + + optimal_value = 0.0 + + def __init__(self, dim=2, lb=-15, ub=30, **kwargs): + super().__init__(dim=dim, **kwargs) + self.lb = lb + self.ub = ub + self.bounds = [(self.lb, self.ub)] * self.dim + + def evaluate_config(self, config, optimizer='smac'): + config_dict = self.get_config_dict(config, optimizer) + x_list = [config_dict['x%d' % i] for i in range(self.dim)] + X = np.array(x_list) + return self.evaluate(X) + + def evaluate(self, X: np.ndarray): + X = self.checkX(X) + a = 20 + b = 0.2 + c = 2 * np.pi + t1 = -a * np.exp(-b * np.sqrt(np.mean(X ** 2))) + t2 = -np.exp(np.mean(np.cos(c * X))) + t3 = a + np.exp(1) + y = t1 + t2 + t3 + return y + + def get_configspace(self, optimizer='smac'): + if optimizer == 'smac': + cs = ConfigurationSpace() + for i in range(self.dim): + xi = UniformFloatHyperparameter("x%d" % i, self.lb, self.ub) + cs.add_hyperparameter(xi) + return cs + elif optimizer == 'tpe': + from hyperopt import hp + space = {'x%d' % i: hp.uniform('hp_x%d' % i, self.lb, self.ub) for i in range(self.dim)} + return space + elif optimizer == 'gpflowopt': + import gpflowopt + domain = np.sum([ + gpflowopt.domain.ContinuousParameter('x%d' % i, self.lb, self.ub) for i in range(self.dim) + ]) + return domain + else: + raise ValueError('Unknown optimizer %s when getting configspace' % optimizer) + + +class Beale(BaseSingleObjectiveProblem): + + optimal_value = 0.0 + + def __init__(self, lb=-4.5, ub=4.5, **kwargs): + super().__init__(dim=2, **kwargs) + self.lb = lb + self.ub = ub + self.bounds = [(self.lb, self.ub)] * self.dim + + def evaluate_config(self, config, optimizer='smac'): + config_dict = self.get_config_dict(config, optimizer) + x_list = [config_dict['x%d' % i] for i in range(self.dim)] + X = np.array(x_list) + return self.evaluate(X) + + def evaluate(self, X: np.ndarray): + X = self.checkX(X) + x1 = X[0] + x2 = X[1] + part1 = (1.5 - x1 + x1 * x2) ** 2 + part2 = (2.25 - x1 + x1 * x2 ** 2) ** 2 + part3 = (2.625 - x1 + x1 * x2 ** 3) ** 2 + y = part1 + part2 + part3 + return y + + def get_configspace(self, optimizer='smac'): + if optimizer == 'smac': + cs = ConfigurationSpace() + for i in range(self.dim): + xi = UniformFloatHyperparameter("x%d" % i, self.lb, self.ub) + cs.add_hyperparameter(xi) + return cs + elif optimizer == 'tpe': + from hyperopt import hp + space = {'x%d' % i: hp.uniform('hp_x%d' % i, self.lb, self.ub) for i in range(self.dim)} + return space + elif optimizer == 'gpflowopt': + import gpflowopt + domain = ( + gpflowopt.domain.ContinuousParameter('x0', self.lb, self.ub) + + gpflowopt.domain.ContinuousParameter('x1', self.lb, self.ub) + ) + return domain + else: + raise ValueError('Unknown optimizer %s when getting configspace' % optimizer) + + +class Branin(BaseSingleObjectiveProblem): + """ + y = (x(2)-(5.1/(4*pi^2))*x(1)^2+5*x(1)/pi-6)^2+10*(1-1/(8*pi))*cos(x(1))+10 + """ + optimal_value = 0.397887 + optimal_point = [(-np.pi, 12.275), (np.pi, 2.275), (9.42478, 2.475)] + + def __init__(self, **kwargs): + super().__init__(dim=2, **kwargs) + self.bounds = [(-5.0, 10.0), (0.0, 15.0)] + + def evaluate_config(self, config, optimizer='smac'): + config_dict = self.get_config_dict(config, optimizer) + x1 = config_dict['x1'] + x2 = config_dict['x2'] + X = np.array([x1, x2]) + return self.evaluate(X) + + def evaluate(self, X: np.ndarray): + X = self.checkX(X) + x1 = X[0] + x2 = X[1] + y = (x2 - (5.1 / (4 * np.pi ** 2)) * x1 ** 2 + 5 * x1 / np.pi - 6) ** 2 + 10 * (1 - 1 / (8 * np.pi)) * np.cos( + x1) + 10 + return y + + def get_configspace(self, optimizer='smac'): + if optimizer == 'smac': + cs = ConfigurationSpace() + x1 = UniformFloatHyperparameter("x1", -5, 10) + x2 = UniformFloatHyperparameter("x2", 0, 15) + cs.add_hyperparameters([x1, x2]) + return cs + elif optimizer == 'tpe': + from hyperopt import hp + space = {'x1': hp.uniform('hp_x1', -5, 10), + 'x2': hp.uniform('hp_x2', 0, 15), + } + return space + elif optimizer == 'gpflowopt': + import gpflowopt + domain = ( + gpflowopt.domain.ContinuousParameter('x1', -5, 10) + + gpflowopt.domain.ContinuousParameter('x2', 0, 15) + ) + return domain + else: + raise ValueError('Unknown optimizer %s when getting configspace' % optimizer) + + +class Hartmann6d(BaseSingleObjectiveProblem): + + optimal_value = -3.32237 + + def __init__(self, **kwargs): + super().__init__(dim=6, **kwargs) + self.bounds = [(0.0, 1.0)] * self.dim + self.a = np.array([ + [10, 3, 17, 3.5, 1.7, 8], + [0.05, 10, 17, 0.1, 8, 14], + [3, 3.5, 1.7, 10, 17, 8], + [17, 8, 0.05, 10, 0.1, 14], + ]) + self.c = np.array([1.0, 1.2, 3.0, 3.2]) + self.p = np.array([ + [0.1312, 0.1696, 0.5569, 0.0124, 0.8283, 0.5886], + [0.2329, 0.4135, 0.8307, 0.3736, 0.1004, 0.9991], + [0.2348, 0.1451, 0.3522, 0.2883, 0.3047, 0.6650], + [0.4047, 0.8828, 0.8732, 0.5743, 0.1091, 0.0381], + ]) + + def evaluate_config(self, config, optimizer='smac'): + config_dict = self.get_config_dict(config, optimizer) + x_list = [config_dict['x%d' % i] for i in range(self.dim)] + X = np.array(x_list) + return self.evaluate(X) + + def evaluate(self, X: np.ndarray): + X = self.checkX(X) + inner_sum = np.sum(self.a * (X - self.p) ** 2, axis=1) + y = -np.sum(self.c * np.exp(-inner_sum)) + return y + + def get_configspace(self, optimizer='smac'): + if optimizer == 'smac': + cs = ConfigurationSpace() + for i in range(self.dim): + xi = UniformFloatHyperparameter("x%d" % i, 0, 1) + cs.add_hyperparameter(xi) + return cs + elif optimizer == 'tpe': + from hyperopt import hp + space = {'x%d' % i: hp.uniform('hp_x%d' % i, 0, 1) for i in range(self.dim)} + return space + elif optimizer == 'gpflowopt': + import gpflowopt + domain = ( + gpflowopt.domain.ContinuousParameter('x0', 0, 1) + + gpflowopt.domain.ContinuousParameter('x1', 0, 1) + + gpflowopt.domain.ContinuousParameter('x2', 0, 1) + + gpflowopt.domain.ContinuousParameter('x3', 0, 1) + + gpflowopt.domain.ContinuousParameter('x4', 0, 1) + + gpflowopt.domain.ContinuousParameter('x5', 0, 1) + ) + return domain + else: + raise ValueError('Unknown optimizer %s when getting configspace' % optimizer) + + +class lgb(BaseSingleObjectiveProblem): + def __init__(self, n_jobs=3, **kwargs): + super().__init__(dim=7, **kwargs) + self.n_jobs = n_jobs + self.load_data(**kwargs) + self.bounds = [ + (100, 1000), + (31, 2047), + (15, 16), + (1e-3, 0.3), + (5, 30), + (0.7, 1), + (0.7, 1), + ] + + def evaluate_config(self, config, optimizer='smac'): + config_dict = self.get_config_dict(config, optimizer) + n_estimators = int(config_dict['n_estimators']) + num_leaves = int(config_dict['num_leaves']) + max_depth = int(config_dict['max_depth']) + learning_rate = config_dict['learning_rate'] + min_child_samples = config_dict['min_child_samples'] + subsample = config_dict['subsample'] + colsample_bytree = config_dict['colsample_bytree'] + from lightgbm import LGBMClassifier + from sklearn.metrics.scorer import balanced_accuracy_scorer + lgbc = LGBMClassifier(n_estimators=n_estimators, + num_leaves=num_leaves, + max_depth=max_depth, + learning_rate=learning_rate, + min_child_samples=min_child_samples, + subsample=subsample, + colsample_bytree=colsample_bytree, + n_jobs=self.n_jobs) + lgbc.fit(self.train_x, self.train_y) + return -balanced_accuracy_scorer(lgbc, self.val_x, self.val_y) + + def evaluate(self, x): + x = self.checkX(x) + from lightgbm import LGBMClassifier + from sklearn.metrics.scorer import balanced_accuracy_scorer + lgbc = LGBMClassifier(n_estimators=int(x[0]), + num_leaves=int(x[1]), + max_depth=int(x[2]), + learning_rate=x[3], + min_child_samples=int(x[4]), + subsample=x[5], + colsample_bytree=x[6], + n_jobs=self.n_jobs) + lgbc.fit(self.train_x, self.train_y) + return -balanced_accuracy_scorer(lgbc, self.val_x, self.val_y) + + def get_configspace(self, optimizer='smac'): + if optimizer == 'smac': + cs = ConfigurationSpace() + n_estimators = UniformIntegerHyperparameter("n_estimators", 100, 1000, default_value=500, q=50) + num_leaves = UniformIntegerHyperparameter("num_leaves", 31, 2047, default_value=128) + max_depth = Constant('max_depth', 15) + learning_rate = UniformFloatHyperparameter("learning_rate", 1e-3, 0.3, default_value=0.1, log=True) + min_child_samples = UniformIntegerHyperparameter("min_child_samples", 5, 30, default_value=20) + subsample = UniformFloatHyperparameter("subsample", 0.7, 1, default_value=1, q=0.1) + colsample_bytree = UniformFloatHyperparameter("colsample_bytree", 0.7, 1, default_value=1, q=0.1) + cs.add_hyperparameters([n_estimators, num_leaves, max_depth, learning_rate, min_child_samples, subsample, + colsample_bytree]) + return cs + elif optimizer == 'tpe': + from hyperopt import hp + space = {'n_estimators': (hp.randint('lgb_n_estimators', 19) + 2) * 50, + 'num_leaves': hp.randint('lgb_num_leaves', 2017) + 31, + 'max_depth': 15, + 'learning_rate': hp.loguniform('lgb_learning_rate', np.log(1e-3), np.log(0.3)), + 'min_child_samples': hp.randint('lgb_min_child_samples', 26) + 5, + 'subsample': (hp.randint('lgb_subsample', 4) + 7) * 0.1, + 'colsample_bytree': (hp.randint('lgb_colsample_bytree', 4) + 7) * 0.1, + } + return space + elif optimizer == 'gpflowopt': + from gpflowopt.domain import ContinuousParameter + domain = ( + ContinuousParameter('n_estimators', 100, 1000) + + ContinuousParameter('num_leaves', 31, 2047) + + ContinuousParameter('max_depth', 15, 16) + + ContinuousParameter("learning_rate", 1e-3, 0.3) + + ContinuousParameter("min_child_samples", 5, 30) + + ContinuousParameter("subsample", 0.7, 1) + + ContinuousParameter("colsample_bytree", 0.7, 1) + ) + return domain + else: + raise ValueError('Unknown optimizer %s when getting configspace' % optimizer) + + +class svc(BaseSingleObjectiveProblem): + def __init__(self, **kwargs): + super().__init__(dim=8, **kwargs) + self.load_data(**kwargs) + self.bounds = None + + def evaluate_config(self, config, optimizer='smac'): + config_dict = self.get_config_dict(config, optimizer) + penalty = config_dict['penalty'] + loss = config_dict.get('loss', None) + dual = config_dict.get('dual', None) + C = config_dict['C'] + tol = config_dict['tol'] + fit_intercept = config_dict['fit_intercept'] + intercept_scaling = config_dict['intercept_scaling'] + if isinstance(penalty, dict): + combination = penalty + penalty = combination['penalty'] + loss = combination['loss'] + dual = combination['dual'] + + from sklearn.svm import LinearSVC + from sklearn.metrics.scorer import balanced_accuracy_scorer + if dual == 'True': + dual = True + elif dual == 'False': + dual = False + + svcc = LinearSVC(penalty=penalty, + loss=loss, + dual=dual, + tol=tol, + C=C, + fit_intercept=fit_intercept, + intercept_scaling=intercept_scaling, + multi_class='ovr', + random_state=1) + svcc.fit(self.train_x, self.train_y) + return -balanced_accuracy_scorer(svcc, self.val_x, self.val_y) + + def get_configspace(self, optimizer='smac'): + if optimizer == 'smac': + cs = ConfigurationSpace() + + penalty = CategoricalHyperparameter( + "penalty", ["l1", "l2"], default_value="l2") + loss = CategoricalHyperparameter( + "loss", ["hinge", "squared_hinge"], default_value="squared_hinge") + dual = CategoricalHyperparameter("dual", ['True', 'False'], default_value='True') + # This is set ad-hoc + tol = UniformFloatHyperparameter( + "tol", 1e-5, 1e-1, default_value=1e-4, log=True) + C = UniformFloatHyperparameter( + "C", 0.03125, 32768, log=True, default_value=1.0) + multi_class = Constant("multi_class", "ovr") + # These are set ad-hoc + fit_intercept = Constant("fit_intercept", "True") + intercept_scaling = Constant("intercept_scaling", 1) + cs.add_hyperparameters([penalty, loss, dual, tol, C, multi_class, + fit_intercept, intercept_scaling]) + + penalty_and_loss = ForbiddenAndConjunction( + ForbiddenEqualsClause(penalty, "l1"), + ForbiddenEqualsClause(loss, "hinge") + ) + constant_penalty_and_loss = ForbiddenAndConjunction( + ForbiddenEqualsClause(dual, "False"), + ForbiddenEqualsClause(penalty, "l2"), + ForbiddenEqualsClause(loss, "hinge") + ) + penalty_and_dual = ForbiddenAndConjunction( + ForbiddenEqualsClause(dual, "True"), + ForbiddenEqualsClause(penalty, "l1") + ) + cs.add_forbidden_clause(penalty_and_loss) + cs.add_forbidden_clause(constant_penalty_and_loss) + cs.add_forbidden_clause(penalty_and_dual) + return cs + elif optimizer == 'tpe': + from hyperopt import hp + space = {'penalty': hp.choice('liblinear_combination', + [{'penalty': "l1", 'loss': "squared_hinge", 'dual': "False"}, + {'penalty': "l2", 'loss': "hinge", 'dual': "True"}, + {'penalty': "l2", 'loss': "squared_hinge", 'dual': "True"}, + {'penalty': "l2", 'loss': "squared_hinge", 'dual': "False"}]), + 'loss': None, + 'dual': None, + 'tol': hp.loguniform('liblinear_tol', np.log(1e-5), np.log(1e-1)), + 'C': hp.loguniform('liblinear_C', np.log(0.03125), np.log(32768)), + 'multi_class': hp.choice('liblinear_multi_class', ["ovr"]), + 'fit_intercept': hp.choice('liblinear_fit_intercept', ["True"]), + 'intercept_scaling': hp.choice('liblinear_intercept_scaling', [1])} + return space + else: + raise ValueError('Unknown optimizer %s when getting configspace' % optimizer) diff --git a/test/reproduction/soc/benchmark_soc_openbox_math.py b/test/reproduction/soc/benchmark_soc_openbox_math.py new file mode 100644 index 00000000..f4fbea7f --- /dev/null +++ b/test/reproduction/soc/benchmark_soc_openbox_math.py @@ -0,0 +1,126 @@ +""" +example cmdline: + +python test/reproduction/soc/benchmark_soc_openbox_math.py --problem mishra --n 100 --init 3 --rep 1 --start_id 0 + +""" +import os +NUM_THREADS = "2" +os.environ["OMP_NUM_THREADS"] = NUM_THREADS # export OMP_NUM_THREADS=1 +os.environ["OPENBLAS_NUM_THREADS"] = NUM_THREADS # export OPENBLAS_NUM_THREADS=1 +os.environ["MKL_NUM_THREADS"] = NUM_THREADS # export MKL_NUM_THREADS=1 +os.environ["VECLIB_MAXIMUM_THREADS"] = NUM_THREADS # export VECLIB_MAXIMUM_THREADS=1 +os.environ["NUMEXPR_NUM_THREADS"] = NUM_THREADS # export NUMEXPR_NUM_THREADS=1 + +import sys +import time +import numpy as np +import argparse +import pickle as pkl + +sys.path.insert(0, os.getcwd()) +from test.reproduction.soc.soc_benchmark_function import get_problem +from openbox import Optimizer +from test.reproduction.test_utils import timeit, seeds + +parser = argparse.ArgumentParser() +parser.add_argument('--problem', type=str, default='townsend') +parser.add_argument('--n', type=int, default=200) +parser.add_argument('--nc', type=int, default=1) +parser.add_argument('--init', type=int, default=3) +parser.add_argument('--init_strategy', type=str, default='random_explore_first') +parser.add_argument('--surrogate', type=str, default='auto', choices=['auto', 'gp']) +parser.add_argument('--optimizer', type=str, default='auto', choices=['auto', 'scipy', 'local']) +parser.add_argument('--rep', type=int, default=1) +parser.add_argument('--start_id', type=int, default=0) + +args = parser.parse_args() +problem_str = args.problem +max_runs = args.n +num_constraints = args.nc +initial_runs = args.init +init_strategy = args.init_strategy +surrogate_type = args.surrogate +if args.optimizer == 'scipy': + acq_optimizer_type = 'random_scipy' +elif args.optimizer == 'local': + acq_optimizer_type = 'local_random' +elif args.optimizer == 'auto': + acq_optimizer_type = 'auto' +else: + raise ValueError('Unknown optimizer %s' % args.optimizer) +rep = args.rep +start_id = args.start_id +mth = 'openbox' + +problem = get_problem(problem_str) +cs = problem.get_configspace(optimizer='smac') +max_runtime_per_trial = 600 +task_id = '%s_%s' % (mth, problem_str) + + +def evaluate(mth, run_i, seed): + print(mth, run_i, seed, '===== start =====', flush=True) + + def objective_function(config): + y = problem.evaluate_config(config) + return y + + bo = Optimizer( + objective_function, + cs, + num_constraints=num_constraints, + surrogate_type=surrogate_type, # default: auto: gp + acq_optimizer_type=acq_optimizer_type, # default: auto: random_scipy + initial_runs=initial_runs, # default: 3 + init_strategy=init_strategy, # default: random_explore_first + max_runs=max_runs + initial_runs, task_id=task_id, random_state=seed, + ) + + # bo.run() + config_list = [] + perf_list = [] + time_list = [] + global_start_time = time.time() + for i in range(max_runs): + observation = bo.iterate(bo.time_left) + config, trial_state, constraints, objectives = observation.config, observation.trial_state, observation.constraints, observation.objectives + global_time = time.time() - global_start_time + bo.time_left -= global_time + origin_perf = objectives[0] + if any(c > 0 for c in constraints): + perf = 9999999.0 + else: + perf = origin_perf + print(seed, i, perf, config, constraints, trial_state, 'time=', global_time) + config_list.append(config) + perf_list.append(perf) + time_list.append(global_time) + + history = bo.get_history() + + return config_list, perf_list, time_list, history + +if __name__ == '__main__': + with timeit('%s all' % (mth,)): + for run_i in range(start_id, start_id + rep): + seed = seeds[run_i] + with timeit('%s %d %d' % (mth, run_i, seed)): + # Evaluate + config_list, perf_list, time_list, history = evaluate(mth, run_i, seed) + + # Save result + print('=' * 20) + print(seed, mth, config_list, perf_list, time_list) + print(seed, mth, 'best perf', np.min(perf_list)) + + timestamp = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time())) + dir_path = 'logs/soc_benchmark_%s_%d/%s/' % (problem_str, max_runs, mth) + file = 'benchmark_%s_%04d_%s.pkl' % (mth, seed, timestamp) + os.makedirs(dir_path, exist_ok=True) + with open(os.path.join(dir_path, file), 'wb') as f: + save_item = (config_list, perf_list, time_list) + pkl.dump(save_item, f) + print(dir_path, file, 'saved!', flush=True) + + history.save_json(os.path.join(dir_path, 'benchmark_%s_%04d_%s.json' % (mth, seed, timestamp))) diff --git a/test/reproduction/soc/soc_benchmark_function.py b/test/reproduction/soc/soc_benchmark_function.py new file mode 100644 index 00000000..6bcf3202 --- /dev/null +++ b/test/reproduction/soc/soc_benchmark_function.py @@ -0,0 +1,228 @@ +import numpy as np + +# from openbox.utils.config_space import ConfigurationSpace, UniformFloatHyperparameter, UniformIntegerHyperparameter, Constant +from ConfigSpace import ConfigurationSpace, UniformFloatHyperparameter, UniformIntegerHyperparameter, \ + Constant, CategoricalHyperparameter, InCondition, EqualsCondition, UnParametrizedHyperparameter, \ + ForbiddenEqualsClause, ForbiddenInClause, ForbiddenAndConjunction + + +def get_problem(problem_str, **kwargs): + # problem_str = problem_str.lower() # dataset name may be uppercase + if problem_str == 'townsend': + problem = townsend + elif problem_str == 'keane': + problem = keane + elif problem_str == 'ackley': + problem = ackley + elif problem_str == 'mishra': + problem = mishra + else: + raise ValueError('Unknown problem_str %s.' % problem_str) + return problem(**kwargs) + + +class BaseConstrainedSingleObjectiveProblem: + def __init__(self, dim, **kwargs): + self.dim = dim + + def evaluate_config(self, config, optimizer='smac'): + raise NotImplementedError + + def evaluate(self, X: np.ndarray): + raise NotImplementedError + + @staticmethod + def get_config_dict(config, optimizer='smac'): + if optimizer == 'smac': + config_dict = config.get_dictionary().copy() + elif optimizer in ['tpe', 'hypermapper']: + config_dict = config + else: + raise ValueError('Unknown optimizer %s' % optimizer) + return config_dict + + @staticmethod + def checkX(X: np.ndarray): + X = np.atleast_2d(X) + assert len(X.shape) == 2 and X.shape[0] == 1 + X = X.flatten() + return X + + def get_configspace(self, optimizer='smac'): + raise NotImplementedError + + +class keane(BaseConstrainedSingleObjectiveProblem): + def __init__(self, **kwargs): + super().__init__(dim=10, **kwargs) + self.lb = 0 + self.ub = 10 + self.bounds = [(self.lb, self.ub)] * self.dim + self.num_constraints = 2 + + def evaluate_config(self, config, optimizer='smac'): + config_dict = self.get_config_dict(config, optimizer) + X = np.array([config_dict['x%s' % i] for i in range(1, 10 + 1)]) + return self.evaluate(X) + + def evaluate(self, X: np.ndarray): + X = self.checkX(X) + result = dict() + cosX2 = np.cos(X) ** 2 + up = np.abs(np.sum(cosX2 ** 2) - 2 * np.prod(cosX2)) + down = np.sqrt(np.sum(np.arange(1, 10 + 1) * X ** 2)) + result['objectives'] = [-up / down, ] + result['constraints'] = [0.75 - np.prod(X), np.sum(X) - 7.5 * 10, ] + return result + + def get_configspace(self, optimizer='smac'): + if optimizer == 'smac': + cs = ConfigurationSpace() + cs.add_hyperparameters( + [UniformFloatHyperparameter("x%s" % i, self.lb, self.ub) for i in range(1, 1 + 10)]) + return cs + elif optimizer == 'gpflowopt': + import gpflowopt + domain = gpflowopt.domain.ContinuousParameter('x1', self.lb, self.ub) + \ + gpflowopt.domain.ContinuousParameter('x2', self.lb, self.ub) + \ + gpflowopt.domain.ContinuousParameter('x3', self.lb, self.ub) + \ + gpflowopt.domain.ContinuousParameter('x4', self.lb, self.ub) + \ + gpflowopt.domain.ContinuousParameter('x5', self.lb, self.ub) + \ + gpflowopt.domain.ContinuousParameter('x6', self.lb, self.ub) + \ + gpflowopt.domain.ContinuousParameter('x7', self.lb, self.ub) + \ + gpflowopt.domain.ContinuousParameter('x8', self.lb, self.ub) + \ + gpflowopt.domain.ContinuousParameter('x9', self.lb, self.ub) + \ + gpflowopt.domain.ContinuousParameter('x10', self.lb, self.ub) + return domain + elif optimizer == 'hypermapper': + input_parameters = {} + for i in range(1, 1 + 10): + input_parameters['x%d' % i] = { + "parameter_type": "real", + "values": [self.lb, self.ub] + } + return input_parameters + else: + raise ValueError('Unknown optimizer %s when getting configspace' % optimizer) + + +class ackley(BaseConstrainedSingleObjectiveProblem): + def __init__(self, lb=-5, ub=10, **kwargs): # -15, 30? + super().__init__(dim=2, **kwargs) + self.lb = lb + self.ub = ub + self.bounds = [(self.lb, self.ub)] * self.dim + self.num_constraints = 1 + + def evaluate_config(self, config, optimizer='smac'): + config_dict = self.get_config_dict(config, optimizer) + x1 = config_dict['x1'] + x2 = config_dict['x2'] + X = np.array([x1, x2]) + return self.evaluate(X) + + def evaluate(self, X: np.ndarray): + X = self.checkX(X) + result = dict() + a = 20 + b = 0.2 + c = 2 * np.pi + t1 = -a * np.exp(-b * np.sqrt(np.mean(X ** 2))) + t2 = -np.exp(np.mean(np.cos(c * X))) + t3 = a + np.exp(1) + result['objectives'] = [t1 + t2 + t3, ] + result['constraints'] = [np.sign(np.sum(X)) + np.sign(np.sum(X ** 2) - 25) + 1.5, ] + return result + + def get_configspace(self, optimizer='smac'): + if optimizer == 'smac': + cs = ConfigurationSpace() + cs.add_hyperparameters( + [UniformFloatHyperparameter("x%s" % i, self.lb, self.ub) for i in range(1, 1 + 2)]) + return cs + elif optimizer == 'gpflowopt': + import gpflowopt + domain = gpflowopt.domain.ContinuousParameter('x1', self.lb, self.ub) + \ + gpflowopt.domain.ContinuousParameter('x2', self.lb, self.ub) + return domain + else: + raise ValueError('Unknown optimizer %s when getting configspace' % optimizer) + + +class mishra(BaseConstrainedSingleObjectiveProblem): + def __init__(self, **kwargs): + super().__init__(dim=2, **kwargs) + self.lb = -2 * 3.14 + self.ub = 2 * 3.14 + self.bounds = [(self.lb, self.ub)] * self.dim + self.num_constraints = 1 + + def evaluate_config(self, config, optimizer='smac'): + config_dict = self.get_config_dict(config, optimizer) + x1 = config_dict['x1'] + x2 = config_dict['x2'] + X = np.array([x1, x2]) + return self.evaluate(X) + + def evaluate(self, X: np.ndarray): + X = self.checkX(X) + result = dict() + x, y = X[0], X[1] + t1 = np.sin(y) * np.exp((1 - np.cos(x)) ** 2) + t2 = np.cos(x) * np.exp((1 - np.sin(y)) ** 2) + t3 = (x - y) ** 2 + result['objectives'] = (t1 + t2 + t3,) + result['constraints'] = ((X[0] + 5) ** 2 + (X[1] + 5) ** 2 - 25,) + return result + + def get_configspace(self, optimizer='smac'): + if optimizer == 'smac': + cs = ConfigurationSpace() + cs.add_hyperparameters( + [UniformFloatHyperparameter("x%s" % i, self.lb, self.ub) for i in range(1, 1 + 2)]) + return cs + elif optimizer == 'gpflowopt': + import gpflowopt + domain = gpflowopt.domain.ContinuousParameter('x1', self.lb, self.ub) + \ + gpflowopt.domain.ContinuousParameter('x2', self.lb, self.ub) + return domain + else: + raise ValueError('Unknown optimizer %s when getting configspace' % optimizer) + + +class townsend(BaseConstrainedSingleObjectiveProblem): + + def __init__(self, **kwargs): + super().__init__(dim=2, **kwargs) + self.bounds = [(-2.25, 2.5), (-2.5, 1.75)] + self.num_constraints = 1 + + def evaluate_config(self, config, optimizer='smac'): + config_dict = self.get_config_dict(config, optimizer) + x1 = config_dict['x1'] + x2 = config_dict['x2'] + X = np.array([x1, x2]) + return self.evaluate(X) + + def evaluate(self, X: np.ndarray): + X = self.checkX(X) + res = dict() + res['objectives'] = (-(np.cos((X[0] - 0.1) * X[1]) ** 2 + X[0] * np.sin(3 * X[0] + X[1])),) + res['constraints'] = ( + -(-np.cos(1.5 * X[0] + np.pi) * np.cos(1.5 * X[1]) + np.sin(1.5 * X[0] + np.pi) * np.sin(1.5 * X[1])),) + return res + + def get_configspace(self, optimizer='smac'): + if optimizer == 'smac': + cs = ConfigurationSpace() + x1 = UniformFloatHyperparameter("x1", -2.25, 2.5) + x2 = UniformFloatHyperparameter("x2", -2.5, 1.75) + cs.add_hyperparameters([x1, x2]) + return cs + elif optimizer == 'gpflowopt': + import gpflowopt + domain = gpflowopt.domain.ContinuousParameter('x1', -2.25, 2.5) + \ + gpflowopt.domain.ContinuousParameter('x2', -2.5, 1.75) + return domain + else: + raise ValueError('Unknown optimizer %s when getting configspace' % optimizer) diff --git a/test/reproduction/test_utils.py b/test/reproduction/test_utils.py new file mode 100644 index 00000000..856296cb --- /dev/null +++ b/test/reproduction/test_utils.py @@ -0,0 +1,133 @@ +import os +import pandas as pd +import numpy as np +import contextlib +import time + + +seeds = [4774, 3711, 7238, 3203, 4254, 2137, 1188, 4356, 517, 5887, + 9082, 4702, 4801, 8242, 7391, 1893, 4400, 1192, 5553, 9039] + + +# timer tool +@contextlib.contextmanager +def timeit(name=''): + print("[%s]Start." % name, flush=True) + start = time.time() + yield + end = time.time() + m, s = divmod(end - start, 60) + h, m = divmod(m, 60) + print("[%s]Total time = %d hours, %d minutes, %d seconds." % (name, h, m, s), flush=True) + + +def check_datasets(datasets, data_dir): + for _dataset in datasets: + try: + _ = load_data(_dataset, data_dir) + except Exception as e: + raise ValueError('Dataset - %s does not exist!' % _dataset) + + +def load_data(dataset, data_dir): + """ + todo: not finished: label encoding... + """ + data_path = os.path.join(data_dir, "%s.csv" % dataset) + + # Load train data. + if dataset in ['higgs', 'amazon_employee', 'spectf', 'usps', 'vehicle_sensIT', 'codrna']: + label_col = 0 + elif dataset in ['rmftsa_sleepdata(1)']: + label_col = 1 + else: + label_col = -1 + + if dataset in ['spambase', 'messidor_features']: + header = None + else: + header = 'infer' + + if dataset in ['winequality_white', 'winequality_red']: + sep = ';' + else: + sep = ',' + + na_values = ["n/a", "na", "--", "-", "?"] + keep_default_na = True + df = pd.read_csv(data_path, keep_default_na=keep_default_na, + na_values=na_values, header=header, sep=sep) + + # Drop the row with all NaNs. + df.dropna(how='all') + + # Clean the data where the label columns have nans. + columns_missed = df.columns[df.isnull().any()].tolist() + + label_colname = df.columns[label_col] + + if label_colname in columns_missed: + labels = df[label_colname].values + row_idx = [idx for idx, val in enumerate(labels) if np.isnan(val)] + # Delete the row with NaN label. + df.drop(df.index[row_idx], inplace=True) + + train_y = df[label_colname].values + + # Delete the label column. + df.drop(label_colname, axis=1, inplace=True) + + train_X = df + return train_X, train_y + + +# for plot +def descending(x): + y = [x[0]] + for i in range(1, len(x)): + y.append(min(y[-1], x[i])) + return y + + +def create_point(x, stats, default=0.0): + """ + get the closest perf of time point x where timestamp < x + :param x: + the time point + :param stats: + list of func. func is tuple of timestamp list and perf list + :param default: + init value of perf + :return: + list of perf of funcs at time point x + """ + perf_list = [] + for func in stats: + timestamp, perf = func + last_p = default + for t, p in zip(timestamp, perf): + if t > x: + break + last_p = p + perf_list.append(last_p) + return perf_list + + +def create_plot_points(stats, start_time, end_time, point_num=500): + """ + + :param stats: + list of func. func is tuple of timestamp list and perf list + :param start_time: + :param end_time: + :param point_num: + :return: + """ + x = np.linspace(start_time, end_time, num=point_num) + _mean, _std = list(), list() + for i, stage in enumerate(x): + perf_list = create_point(stage, stats) + _mean.append(np.mean(perf_list)) + _std.append(np.std(perf_list)) + # Used to plot errorbar. + return x, np.array(_mean), np.array(_std)