From 303f60b628d708b505af4895574345a7564b6c95 Mon Sep 17 00:00:00 2001
From: Xbc-gressor <2765024335@qq.com>
Date: Tue, 27 Feb 2024 09:51:09 +0800
Subject: [PATCH] openbox experiments

---
 test/reproduction/__init__.py                 |   0
 .../mo/benchmark_mo_openbox_math.py           | 148 ++++++
 test/reproduction/mo/mo_benchmark_function.py | 273 ++++++++++
 .../moc/benchmark_moc_openbox_math.py         | 157 ++++++
 .../moc/moc_benchmark_function.py             | 246 +++++++++
 .../so/benchmark_so_openbox_math.py           | 119 +++++
 test/reproduction/so/so_benchmark_function.py | 473 ++++++++++++++++++
 .../soc/benchmark_soc_openbox_math.py         | 126 +++++
 .../soc/soc_benchmark_function.py             | 228 +++++++++
 test/reproduction/test_utils.py               | 133 +++++
 10 files changed, 1903 insertions(+)
 create mode 100644 test/reproduction/__init__.py
 create mode 100644 test/reproduction/mo/benchmark_mo_openbox_math.py
 create mode 100644 test/reproduction/mo/mo_benchmark_function.py
 create mode 100644 test/reproduction/moc/benchmark_moc_openbox_math.py
 create mode 100644 test/reproduction/moc/moc_benchmark_function.py
 create mode 100644 test/reproduction/so/benchmark_so_openbox_math.py
 create mode 100644 test/reproduction/so/so_benchmark_function.py
 create mode 100644 test/reproduction/soc/benchmark_soc_openbox_math.py
 create mode 100644 test/reproduction/soc/soc_benchmark_function.py
 create mode 100644 test/reproduction/test_utils.py

diff --git a/test/reproduction/__init__.py b/test/reproduction/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/test/reproduction/mo/benchmark_mo_openbox_math.py b/test/reproduction/mo/benchmark_mo_openbox_math.py
new file mode 100644
index 00000000..f67346a9
--- /dev/null
+++ b/test/reproduction/mo/benchmark_mo_openbox_math.py
@@ -0,0 +1,148 @@
+"""
+example cmdline:
+
+python test/reproduction/mo/benchmark_mo_openbox_math.py --problem zdt2-3 --n 200 --init_strategy sobol --rep 1 --start_id 0
+
+"""
+import os
+NUM_THREADS = "2"
+os.environ["OMP_NUM_THREADS"] = NUM_THREADS         # export OMP_NUM_THREADS=1
+os.environ["OPENBLAS_NUM_THREADS"] = NUM_THREADS    # export OPENBLAS_NUM_THREADS=1
+os.environ["MKL_NUM_THREADS"] = NUM_THREADS         # export MKL_NUM_THREADS=1
+os.environ["VECLIB_MAXIMUM_THREADS"] = NUM_THREADS  # export VECLIB_MAXIMUM_THREADS=1
+os.environ["NUMEXPR_NUM_THREADS"] = NUM_THREADS     # export NUMEXPR_NUM_THREADS=1
+
+import sys
+import time
+import numpy as np
+import argparse
+import pickle as pkl
+
+sys.path.insert(0, os.getcwd())
+from mo_benchmark_function import get_problem, plot_pf
+from openbox import Optimizer
+from openbox.utils.multi_objective import Hypervolume
+from test.reproduction.test_utils import timeit, seeds
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--problem', type=str)
+parser.add_argument('--n', type=int, default=100)
+parser.add_argument('--init', type=int, default=0)
+parser.add_argument('--init_strategy', type=str, default='sobol', choices=['sobol', 'latin_hypercube'])
+parser.add_argument('--surrogate', type=str, default='auto', choices=['auto', 'gp', 'prf'])
+parser.add_argument('--acq_type', type=str, default='auto', choices=['auto', 'ehvi', 'usemo', 'mesmo'])
+parser.add_argument('--optimizer', type=str, default='auto', choices=['auto', 'scipy', 'local'])
+parser.add_argument('--rep', type=int, default=1)
+parser.add_argument('--start_id', type=int, default=0)
+parser.add_argument('--plot_mode', type=int, default=0)
+
+args = parser.parse_args()
+problem_str = args.problem
+max_runs = args.n
+initial_runs = args.init
+init_strategy = args.init_strategy
+surrogate_type = args.surrogate
+acq_type = args.acq_type
+if args.optimizer == 'scipy':
+    acq_optimizer_type = 'random_scipy'
+elif args.optimizer == 'local':
+    acq_optimizer_type = 'local_random'
+elif args.optimizer == 'auto':
+    acq_optimizer_type = 'auto'
+else:
+    raise ValueError('Unknown optimizer %s' % args.optimizer)
+if acq_type in ['usemo', 'mesmo']:
+    acq_optimizer_type = None
+rep = args.rep
+start_id = args.start_id
+plot_mode = args.plot_mode
+if acq_type == 'ehvi':
+    mth = 'openbox'
+else:
+    mth = 'openbox-%s' % acq_type
+
+problem = get_problem(problem_str)
+if initial_runs == 0:
+    initial_runs = 2 * (problem.dim + 1)
+cs = problem.get_configspace(optimizer='smac')
+# max_runtime_per_trial = 600
+task_id = '%s_%s_%s' % (mth, acq_type, problem_str)
+
+
+def evaluate(mth, run_i, seed):
+    print(mth, run_i, seed, '===== start =====', flush=True)
+
+    def objective_function(config):
+        res = problem.evaluate_config(config)
+        # res['config'] = config
+        return res
+
+    bo = Optimizer(
+        objective_function,
+        cs,
+        num_objectives=problem.num_objectives,
+        num_constraints=0,
+        surrogate_type=surrogate_type,            # default: auto: gp
+        acq_type=acq_type,                        # default: auto: ehvi
+        acq_optimizer_type=acq_optimizer_type,    # default: auto: random_scipy
+        initial_runs=initial_runs,                # default: 2 * (problem.dim + 1)
+        init_strategy=init_strategy,              # default: sobol
+        max_runs=max_runs,
+        ref_point=problem.ref_point, task_id=task_id, random_state=seed,
+    )
+
+    # bo.run()
+    hv_diffs = []
+    time_list = []
+    global_start_time = time.time()
+    for i in range(max_runs):
+        observation = bo.iterate(bo.time_left)
+        config, trial_state, objectives = observation.config, observation.trial_state, observation.objectives
+        global_time = time.time() - global_start_time
+        bo.time_left -= global_time
+        print(seed, i, objectives, config, trial_state, 'time=', global_time)
+        hv = Hypervolume(problem.ref_point).compute(bo.get_history().get_pareto_front())
+        hv_diff = problem.max_hv - hv
+        print(seed, i, 'hypervolume =', hv)
+        print(seed, i, 'hv diff =', hv_diff)
+        hv_diffs.append(hv_diff)
+        time_list.append(global_time)
+    config_list = bo.get_history().configurations
+    perf_list = bo.get_history().get_objectives(transform='none')
+    pf = np.asarray(bo.get_history().get_pareto_front())
+
+    # plot for debugging
+    if plot_mode == 1:
+        Y_init = None
+        plot_pf(problem, problem_str, mth, pf, Y_init)
+
+    history = bo.get_history()
+
+    return hv_diffs, pf, config_list, perf_list, time_list, history
+
+if __name__ == '__main__':
+    with timeit('%s all' % (mth,)):
+        for run_i in range(start_id, start_id + rep):
+            seed = seeds[run_i]
+            with timeit('%s %d %d' % (mth, run_i, seed)):
+                # Evaluate
+                hv_diffs, pf, config_list, perf_list, time_list, history = evaluate(mth, run_i, seed)
+
+                # Save result
+                print('=' * 20)
+                print(seed, mth, config_list, perf_list, time_list, hv_diffs)
+                print(seed, mth, 'best hv_diff:', hv_diffs[-1])
+                print(seed, mth, 'max_hv:', problem.max_hv)
+                if pf is not None:
+                    print(seed, mth, 'pareto num:', pf.shape[0])
+
+                timestamp = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time()))
+                dir_path = 'logs/mo_benchmark_%s_%d/%s/' % (problem_str, max_runs, mth)
+                file = 'benchmark_%s_%04d_%s.pkl' % (mth, seed, timestamp)
+                os.makedirs(dir_path, exist_ok=True)
+                with open(os.path.join(dir_path, file), 'wb') as f:
+                    save_item = (hv_diffs, pf, config_list, perf_list, time_list)
+                    pkl.dump(save_item, f)
+                print(dir_path, file, 'saved!', flush=True)
+
+                history.save_json(os.path.join(dir_path, 'benchmark_%s_%04d_%s.json' % (mth, seed, timestamp)))
diff --git a/test/reproduction/mo/mo_benchmark_function.py b/test/reproduction/mo/mo_benchmark_function.py
new file mode 100644
index 00000000..71c99515
--- /dev/null
+++ b/test/reproduction/mo/mo_benchmark_function.py
@@ -0,0 +1,273 @@
+import numpy as np
+
+from ConfigSpace import ConfigurationSpace, UniformFloatHyperparameter, UniformIntegerHyperparameter, \
+    Constant, CategoricalHyperparameter, InCondition, EqualsCondition, UnParametrizedHyperparameter, \
+    ForbiddenEqualsClause, ForbiddenInClause, ForbiddenAndConjunction
+
+from openbox.benchmark.objective_functions.synthetic import DTLZ1, DTLZ2, BraninCurrin, VehicleSafety, ZDT1, ZDT2, ZDT3
+
+
+def get_problem(problem_str, **kwargs):
+    problem = None
+    if problem_str.startswith('dtlz1'):
+        params = problem_str.split('-')
+        assert params[0] == 'dtlz1'
+        if len(params) == 1:
+            return dtlz1(dim=5, num_objectives=4)
+        elif len(params) == 3:
+            return dtlz1(dim=int(params[1]), num_objectives=int(params[2]))
+    elif problem_str.startswith('dtlz2'):
+        params = problem_str.split('-')
+        assert params[0] == 'dtlz2'
+        if len(params) == 1:
+            return dtlz2(dim=12, num_objectives=2)
+        elif len(params) == 3:
+            return dtlz2(dim=int(params[1]), num_objectives=int(params[2]))
+    elif problem_str == 'branincurrin':
+        problem = branincurrin
+    elif problem_str == 'vehiclesafety':
+        problem = vehiclesafety
+    elif problem_str.startswith('zdt'):
+        params = problem_str.split('-')
+        assert params[0] in ('zdt1', 'zdt2', 'zdt3')
+        if len(params) == 1:
+            return zdt(problem_str=params[0], dim=3)
+        else:
+            return zdt(problem_str=params[0], dim=int(params[1]))
+    if problem is None:
+        raise ValueError('Unknown problem_str %s.' % problem_str)
+    return problem(**kwargs)
+
+
+def plot_pf(problem, problem_str, mth, pf, Y_init=None):
+    import matplotlib.pyplot as plt
+    assert problem.num_objectives in (2, 3)
+    if problem.num_objectives == 2:
+        plt.scatter(pf[:, 0], pf[:, 1], label=mth)
+        if Y_init is not None:
+            plt.scatter(Y_init[:, 0], Y_init[:, 1], label='init', marker='x')
+        plt.xlabel('Objective 1')
+        plt.ylabel('Objective 2')
+    elif problem.num_objectives == 3:
+        ax = plt.axes(projection='3d')
+        ax.scatter3D(pf[:, 0], pf[:, 1], pf[:, 2], label=mth)
+        if Y_init is not None:
+            ax.scatter3D(Y_init[:, 0], Y_init[:, 1], Y_init[:, 3], label='init', marker='x')
+        ax.set_xlabel('Objective 1')
+        ax.set_ylabel('Objective 2')
+        ax.set_zlabel('Objective 3')
+    else:
+        raise ValueError('Cannot plot_pf with problem.num_objectives == %d.' % (problem.num_objectives,))
+    plt.title('Pareto Front of %s' % (problem_str,))
+    plt.legend()
+    plt.show()
+
+
+class BaseMultiObjectiveProblem:
+    def __init__(self, dim, num_objectives, problem=None, **kwargs):
+        self.dim = dim
+        self.num_objectives = num_objectives
+        if problem is not None:
+            self.problem = problem
+            self.ref_point = problem.ref_point
+            try:
+                self.max_hv = problem.max_hv
+            except NotImplementedError:
+                self.max_hv = 0.0
+
+    def evaluate_config(self, config, optimizer='smac'):
+        raise NotImplementedError
+
+    def evaluate(self, X: np.ndarray):
+        raise NotImplementedError
+
+    @staticmethod
+    def get_config_dict(config, optimizer='smac'):
+        if optimizer == 'smac':
+            config_dict = config.get_dictionary().copy()
+        elif optimizer == 'tpe':
+            config_dict = config
+        else:
+            raise ValueError('Unknown optimizer %s' % optimizer)
+        return config_dict
+
+    @staticmethod
+    def checkX(X: np.ndarray):
+        X = np.atleast_2d(X)
+        assert len(X.shape) == 2 and X.shape[0] == 1
+        X = X.flatten()
+        return X
+
+    def get_configspace(self, optimizer='smac'):
+        raise NotImplementedError
+
+
+class dtlz1(BaseMultiObjectiveProblem):
+
+    def __init__(self, dim, num_objectives, **kwargs):
+        problem = DTLZ1(dim=dim, num_objectives=num_objectives)
+        super().__init__(dim=dim, num_objectives=num_objectives, problem=problem, **kwargs)
+        self.lb = 0
+        self.ub = 1
+        self.bounds = [(self.lb, self.ub)] * self.dim
+
+    def evaluate_config(self, config, optimizer='smac'):
+        config_dict = self.get_config_dict(config, optimizer)
+        x_list = [config_dict['x%d' % i] for i in range(1, self.dim+1)]
+        X = np.array(x_list)
+        return self.evaluate(X)
+
+    def evaluate(self, X: np.ndarray):
+        X = self.checkX(X)
+        return self.problem._evaluate(X)  # dict
+
+    def get_configspace(self, optimizer='smac'):
+        if optimizer == 'smac':
+            return self.problem.config_space
+        elif optimizer == 'gpflowopt':
+            import gpflowopt
+            domain = np.sum([
+                gpflowopt.domain.ContinuousParameter('x%d' % i, self.lb, self.ub) for i in range(1, self.dim+1)
+            ])
+            return domain
+        else:
+            raise ValueError('Unknown optimizer %s when getting configspace' % optimizer)
+
+
+class dtlz2(BaseMultiObjectiveProblem):
+
+    def __init__(self, dim, num_objectives, **kwargs):
+        problem = DTLZ2(dim=dim, num_objectives=num_objectives)
+        super().__init__(dim=dim, num_objectives=num_objectives, problem=problem, **kwargs)
+        self.lb = 0
+        self.ub = 1
+        self.bounds = [(self.lb, self.ub)] * self.dim
+
+    def evaluate_config(self, config, optimizer='smac'):
+        config_dict = self.get_config_dict(config, optimizer)
+        x_list = [config_dict['x%d' % i] for i in range(1, self.dim+1)]
+        X = np.array(x_list)
+        return self.evaluate(X)
+
+    def evaluate(self, X: np.ndarray):
+        X = self.checkX(X)
+        return self.problem._evaluate(X)  # dict
+
+    def get_configspace(self, optimizer='smac'):
+        if optimizer == 'smac':
+            return self.problem.config_space
+        elif optimizer == 'gpflowopt':
+            import gpflowopt
+            domain = np.sum([
+                gpflowopt.domain.ContinuousParameter('x%d' % i, self.lb, self.ub) for i in range(1, self.dim+1)
+            ])
+            return domain
+        else:
+            raise ValueError('Unknown optimizer %s when getting configspace' % optimizer)
+
+
+class branincurrin(BaseMultiObjectiveProblem):
+
+    def __init__(self, **kwargs):
+        problem = BraninCurrin()
+        super().__init__(dim=2, num_objectives=2, problem=problem, **kwargs)
+        self.lb = 1e-10  # fix numeric problem
+        self.ub = 1
+        self.bounds = [(self.lb, self.ub)] * self.dim
+
+    def evaluate_config(self, config, optimizer='smac'):
+        config_dict = self.get_config_dict(config, optimizer)
+        x_list = [config_dict['x%d' % i] for i in range(1, self.dim+1)]
+        X = np.array(x_list)
+        return self.evaluate(X)
+
+    def evaluate(self, X: np.ndarray):
+        X = self.checkX(X)
+        return self.problem._evaluate(X)  # dict
+
+    def get_configspace(self, optimizer='smac'):
+        if optimizer == 'smac':
+            cs = ConfigurationSpace()
+            cs.add_hyperparameters(
+                [UniformFloatHyperparameter("x%s" % i, self.lb, self.ub) for i in range(1, self.dim+1)])
+            return cs
+        elif optimizer == 'gpflowopt':
+            import gpflowopt
+            domain = np.sum([
+                gpflowopt.domain.ContinuousParameter('x%d' % i, self.lb, self.ub) for i in range(1, self.dim+1)
+            ])
+            return domain
+        else:
+            raise ValueError('Unknown optimizer %s when getting configspace' % optimizer)
+
+
+class vehiclesafety(BaseMultiObjectiveProblem):
+
+    def __init__(self, **kwargs):
+        problem = VehicleSafety()
+        super().__init__(dim=5, num_objectives=3, problem=problem, **kwargs)
+        self.lb = 1
+        self.ub = 3
+        self.bounds = [(self.lb, self.ub)] * self.dim
+
+    def evaluate_config(self, config, optimizer='smac'):
+        config_dict = self.get_config_dict(config, optimizer)
+        x_list = [config_dict['x%d' % i] for i in range(1, self.dim+1)]
+        X = np.array(x_list)
+        return self.evaluate(X)
+
+    def evaluate(self, X: np.ndarray):
+        X = self.checkX(X)
+        return self.problem._evaluate(X)  # dict
+
+    def get_configspace(self, optimizer='smac'):
+        if optimizer == 'smac':
+            return self.problem.config_space
+        elif optimizer == 'gpflowopt':
+            import gpflowopt
+            domain = np.sum([
+                gpflowopt.domain.ContinuousParameter('x%d' % i, self.lb, self.ub) for i in range(1, self.dim+1)
+            ])
+            return domain
+        else:
+            raise ValueError('Unknown optimizer %s when getting configspace' % optimizer)
+
+
+class zdt(BaseMultiObjectiveProblem):
+
+    def __init__(self, problem_str, dim, **kwargs):
+        if problem_str == 'zdt1':
+            problem = ZDT1
+        elif problem_str == 'zdt2':
+            problem = ZDT2
+        elif problem_str == 'zdt3':
+            problem = ZDT3
+        else:
+            raise ValueError
+        problem = problem(dim=dim)
+        super().__init__(dim=dim, num_objectives=2, problem=problem, **kwargs)
+        self.lb = 0
+        self.ub = 1
+        self.bounds = [(self.lb, self.ub)] * self.dim
+
+    def evaluate_config(self, config, optimizer='smac'):
+        config_dict = self.get_config_dict(config, optimizer)
+        x_list = [config_dict['x%d' % i] for i in range(1, self.dim+1)]
+        X = np.array(x_list)
+        return self.evaluate(X)
+
+    def evaluate(self, X: np.ndarray):
+        X = self.checkX(X)
+        return self.problem._evaluate(X)  # dict
+
+    def get_configspace(self, optimizer='smac'):
+        if optimizer == 'smac':
+            return self.problem.config_space
+        elif optimizer == 'gpflowopt':
+            import gpflowopt
+            domain = np.sum([
+                gpflowopt.domain.ContinuousParameter('x%d' % i, self.lb, self.ub) for i in range(1, self.dim+1)
+            ])
+            return domain
+        else:
+            raise ValueError('Unknown optimizer %s when getting configspace' % optimizer)
diff --git a/test/reproduction/moc/benchmark_moc_openbox_math.py b/test/reproduction/moc/benchmark_moc_openbox_math.py
new file mode 100644
index 00000000..c2b96397
--- /dev/null
+++ b/test/reproduction/moc/benchmark_moc_openbox_math.py
@@ -0,0 +1,157 @@
+"""
+example cmdline:
+
+python test/reproduction/moc/benchmark_moc_openbox_math.py --problem constr --n 200 --init_strategy sobol --rep 1 --start_id 0
+
+"""
+import os
+NUM_THREADS = "2"
+os.environ["OMP_NUM_THREADS"] = NUM_THREADS         # export OMP_NUM_THREADS=1
+os.environ["OPENBLAS_NUM_THREADS"] = NUM_THREADS    # export OPENBLAS_NUM_THREADS=1
+os.environ["MKL_NUM_THREADS"] = NUM_THREADS         # export MKL_NUM_THREADS=1
+os.environ["VECLIB_MAXIMUM_THREADS"] = NUM_THREADS  # export VECLIB_MAXIMUM_THREADS=1
+os.environ["NUMEXPR_NUM_THREADS"] = NUM_THREADS     # export NUMEXPR_NUM_THREADS=1
+
+import sys
+import time
+import numpy as np
+import argparse
+import pickle as pkl
+
+sys.path.insert(0, os.getcwd())
+from moc_benchmark_function import get_problem, plot_pf
+from openbox import Optimizer
+from openbox.utils.multi_objective import Hypervolume
+from test.reproduction.test_utils import timeit, seeds
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--problem', type=str)
+parser.add_argument('--n', type=int, default=200)
+parser.add_argument('--init', type=int, default=0)
+parser.add_argument('--init_strategy', type=str, default='sobol', choices=['sobol', 'latin_hypercube'])
+parser.add_argument('--surrogate', type=str, default='auto', choices=['auto', 'gp', 'prf'])
+parser.add_argument('--acq_type', type=str, default='auto', choices=['auto', 'ehvic', 'mesmoc', 'mesmoc2'])
+parser.add_argument('--optimizer', type=str, default='auto', choices=['auto', 'scipy', 'local'])
+parser.add_argument('--rep', type=int, default=1)
+parser.add_argument('--start_id', type=int, default=0)
+parser.add_argument('--plot_mode', type=int, default=0)
+
+args = parser.parse_args()
+problem_str = args.problem
+max_runs = args.n
+initial_runs = args.init
+init_strategy = args.init_strategy
+surrogate_type = args.surrogate
+acq_type = args.acq_type
+if args.optimizer == 'scipy':
+    acq_optimizer_type = 'random_scipy'
+elif args.optimizer == 'local':
+    acq_optimizer_type = 'local_random'
+elif args.optimizer == 'auto':
+    acq_optimizer_type = 'auto'
+else:
+    raise ValueError('Unknown optimizer %s' % args.optimizer)
+if acq_type in ['mesmoc', 'mesmoc2']:
+    surrogate_type = None
+    acq_optimizer_type = None
+rep = args.rep
+start_id = args.start_id
+plot_mode = args.plot_mode
+if acq_type == 'ehvic':
+    mth = 'openbox'
+else:
+    mth = 'openbox-%s' % acq_type
+
+problem = get_problem(problem_str)
+if initial_runs == 0:
+    initial_runs = 2 * (problem.dim + 1)
+cs = problem.get_configspace(optimizer='smac')
+task_id = '%s_%s_%s' % (mth, acq_type, problem_str)
+
+
+def evaluate(mth, run_i, seed):
+    print(mth, run_i, seed, '===== start =====', flush=True)
+
+    def objective_function(config):
+        res = problem.evaluate_config(config)
+        # res['config'] = config
+        res['objectives'] = np.asarray(res['objectives']).tolist()
+        res['constraints'] = np.asarray(res['constraints']).tolist()
+        return res
+
+    bo = Optimizer(
+        objective_function,
+        cs,
+        num_objectives=problem.num_objectives,
+        num_constraints=problem.num_constraints,
+        surrogate_type=surrogate_type,            # default: auto: gp
+        acq_type=acq_type,                        # default: auto: ehvic
+        acq_optimizer_type=acq_optimizer_type,    # default: auto: random_scipy
+        initial_runs=initial_runs,                # default: 2 * (problem.dim + 1)
+        init_strategy=init_strategy,              # default: sobol
+        max_runs=max_runs,
+        ref_point=problem.ref_point, task_id=task_id, random_state=seed,
+    )
+
+    # bo.run()
+    hv_diffs = []
+    config_list = []
+    perf_list = []
+    time_list = []
+    global_start_time = time.time()
+    for i in range(max_runs):
+        observation = bo.iterate(bo.time_left)
+        config, trial_state, constraints, origin_objectives = observation.config, observation.trial_state, observation.constraints, observation.objectives
+        global_time = time.time() - global_start_time
+        bo.time_left -= global_time
+        if any(c > 0 for c in constraints):
+            objectives = [9999999.0] * problem.num_objectives
+        else:
+            objectives = origin_objectives
+        print(seed, i, origin_objectives, objectives, constraints, config, trial_state, 'time=', global_time)
+        config_list.append(config)
+        perf_list.append(objectives)
+        time_list.append(global_time)
+        hv = Hypervolume(problem.ref_point).compute(perf_list)
+        hv_diff = problem.max_hv - hv
+        hv_diffs.append(hv_diff)
+        print(seed, i, 'hypervolume =', hv)
+        print(seed, i, 'hv diff =', hv_diff)
+    pf = np.asarray(bo.get_history().get_pareto_front())
+
+    # plot for debugging
+    if plot_mode == 1:
+        Y_init = None
+        plot_pf(problem, problem_str, mth, pf, Y_init)
+
+    history = bo.get_history()
+
+    return hv_diffs, pf, config_list, perf_list, time_list, history
+
+
+if __name__ == '__main__':
+    with timeit('%s all' % (mth,)):
+        for run_i in range(start_id, start_id + rep):
+            seed = seeds[run_i]
+            with timeit('%s %d %d' % (mth, run_i, seed)):
+                # Evaluate
+                hv_diffs, pf, config_list, perf_list, time_list, history = evaluate(mth, run_i, seed)
+
+                # Save result
+                print('=' * 20)
+                print(seed, mth, config_list, perf_list, time_list, hv_diffs)
+                print(seed, mth, 'best hv_diff:', hv_diffs[-1])
+                print(seed, mth, 'max_hv:', problem.max_hv)
+                if pf is not None:
+                    print(seed, mth, 'pareto num:', pf.shape[0])
+
+                timestamp = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time()))
+                dir_path = 'logs/moc_benchmark_%s_%d/%s/' % (problem_str, max_runs, mth)
+                file = 'benchmark_%s_%04d_%s.pkl' % (mth, seed, timestamp)
+                os.makedirs(dir_path, exist_ok=True)
+                with open(os.path.join(dir_path, file), 'wb') as f:
+                    save_item = (hv_diffs, pf, config_list, perf_list, time_list)
+                    pkl.dump(save_item, f)
+                print(dir_path, file, 'saved!', flush=True)
+
+                history.save_json(os.path.join(dir_path, 'benchmark_%s_%04d_%s.json' % (mth, seed, timestamp)))
diff --git a/test/reproduction/moc/moc_benchmark_function.py b/test/reproduction/moc/moc_benchmark_function.py
new file mode 100644
index 00000000..bc1aae0e
--- /dev/null
+++ b/test/reproduction/moc/moc_benchmark_function.py
@@ -0,0 +1,246 @@
+import numpy as np
+
+# from openbox.utils.config_space import ConfigurationSpace, UniformFloatHyperparameter, UniformIntegerHyperparameter, Constant
+from ConfigSpace import ConfigurationSpace, UniformFloatHyperparameter, UniformIntegerHyperparameter, \
+    Constant, CategoricalHyperparameter, InCondition, EqualsCondition, UnParametrizedHyperparameter, \
+    ForbiddenEqualsClause, ForbiddenInClause, ForbiddenAndConjunction
+
+from openbox.benchmark.objective_functions.synthetic import DTLZ2, BraninCurrin, BNH, SRN, CONSTR
+
+
+def get_problem(problem_str, **kwargs):
+    problem = None
+    if problem_str.startswith('c2dtlz2'):
+        params = problem_str.split('-')
+        assert params[0] == 'c2dtlz2'
+        if len(params) == 1:
+            return c2dtlz2(dim=3, num_objectives=2)
+        elif len(params) == 3:
+            return c2dtlz2(dim=int(params[1]), num_objectives=int(params[2]))
+    elif problem_str == 'cbranincurrin':
+        problem = cbranincurrin
+    elif problem_str == 'bnh':
+        problem = bnh
+    elif problem_str == 'srn':
+        problem = srn
+    elif problem_str == 'constr':
+        problem = constr
+    if problem is None:
+        raise ValueError('Unknown problem_str %s.' % problem_str)
+    return problem(**kwargs)
+
+
+def plot_pf(problem, problem_str, mth, pf, Y_init=None):
+    import matplotlib.pyplot as plt
+    assert problem.num_objectives in (2, 3)
+    if problem.num_objectives == 2:
+        plt.scatter(pf[:, 0], pf[:, 1], label=mth)
+        if Y_init is not None:
+            plt.scatter(Y_init[:, 0], Y_init[:, 1], label='init', marker='x')
+        plt.xlabel('Objective 1')
+        plt.ylabel('Objective 2')
+    elif problem.num_objectives == 3:
+        ax = plt.axes(projection='3d')
+        ax.scatter3D(pf[:, 0], pf[:, 1], pf[:, 2], label=mth)
+        if Y_init is not None:
+            ax.scatter3D(Y_init[:, 0], Y_init[:, 1], Y_init[:, 3], label='init', marker='x')
+        ax.set_xlabel('Objective 1')
+        ax.set_ylabel('Objective 2')
+        ax.set_zlabel('Objective 3')
+    else:
+        raise ValueError('Cannot plot_pf with problem.num_objectives == %d.' % (problem.num_objectives,))
+    plt.title('Pareto Front of %s' % (problem_str,))
+    plt.legend()
+    plt.show()
+
+
+class BaseConstrainedMultiObjectiveProblem:
+    def __init__(self, dim, num_objectives, num_constraints, problem=None, **kwargs):
+        self.dim = dim
+        self.num_objectives = num_objectives
+        self.num_constraints = num_constraints
+        if problem is not None:
+            self.problem = problem
+            self.ref_point = problem.ref_point
+            try:
+                self.max_hv = problem.max_hv
+            except NotImplementedError:
+                self.max_hv = 0.0
+
+    def evaluate_config(self, config, optimizer='smac'):
+        raise NotImplementedError
+
+    def evaluate(self, X: np.ndarray):
+        raise NotImplementedError
+
+    @staticmethod
+    def get_config_dict(config, optimizer='smac'):
+        if optimizer == 'smac':
+            config_dict = config.get_dictionary().copy()
+        elif optimizer in ['tpe', 'hypermapper', 'optuna']:
+            config_dict = config
+        else:
+            raise ValueError('Unknown optimizer %s' % optimizer)
+        return config_dict
+
+    @staticmethod
+    def checkX(X: np.ndarray):
+        X = np.atleast_2d(X)
+        assert len(X.shape) == 2 and X.shape[0] == 1
+        X = X.flatten()
+        return X
+
+    def get_configspace(self, optimizer='smac'):
+        raise NotImplementedError
+
+
+class c2dtlz2(BaseConstrainedMultiObjectiveProblem):
+
+    def __init__(self, dim, num_objectives, **kwargs):
+        problem = DTLZ2(dim=dim, num_objectives=num_objectives, constrained=True)
+        super().__init__(dim=dim, num_objectives=num_objectives, num_constraints=1, problem=problem, **kwargs)
+        self.lb = 0
+        self.ub = 1
+        self.bounds = [(self.lb, self.ub)] * self.dim
+
+    def evaluate_config(self, config, optimizer='smac'):
+        config_dict = self.get_config_dict(config, optimizer)
+        x_list = [config_dict['x%d' % i] for i in range(1, self.dim+1)]
+        X = np.array(x_list)
+        return self.evaluate(X)
+
+    def evaluate(self, X: np.ndarray):
+        X = self.checkX(X)
+        return self.problem._evaluate(X)  # dict
+
+    def get_configspace(self, optimizer='smac'):
+        if optimizer == 'smac':
+            return self.problem.config_space
+        else:
+            raise ValueError('Unknown optimizer %s when getting configspace' % optimizer)
+
+
+class cbranincurrin(BaseConstrainedMultiObjectiveProblem):
+
+    def __init__(self, **kwargs):
+        problem = BraninCurrin(constrained=True)
+        super().__init__(dim=2, num_objectives=2, num_constraints=1, problem=problem, **kwargs)
+        self.lb = 1e-10  # fix numeric problem
+        self.ub = 1
+        self.bounds = [(self.lb, self.ub)] * self.dim
+
+    def evaluate_config(self, config, optimizer='smac'):
+        config_dict = self.get_config_dict(config, optimizer)
+        x_list = [config_dict['x%d' % i] for i in range(1, self.dim+1)]
+        X = np.array(x_list)
+        return self.evaluate(X)
+
+    def evaluate(self, X: np.ndarray):
+        X = self.checkX(X)
+        return self.problem._evaluate(X)  # dict
+
+    def get_configspace(self, optimizer='smac'):
+        if optimizer == 'smac':
+            cs = ConfigurationSpace()
+            cs.add_hyperparameters(
+                [UniformFloatHyperparameter("x%s" % i, self.lb, self.ub) for i in range(1, self.dim+1)])
+            return cs
+        else:
+            raise ValueError('Unknown optimizer %s when getting configspace' % optimizer)
+
+
+class bnh(BaseConstrainedMultiObjectiveProblem):
+
+    def __init__(self, **kwargs):
+        problem = BNH()
+        super().__init__(dim=2, num_objectives=2, num_constraints=2, problem=problem, **kwargs)
+        self.bounds = [(0.0, 5.0), (0.0, 3.0)]
+        self.new_max_hv = 7242.068539049498     # this is approximated using NSGA-II
+
+    def evaluate_config(self, config, optimizer='smac'):
+        config_dict = self.get_config_dict(config, optimizer)
+        x_list = [config_dict['x%d' % i] for i in range(1, self.dim+1)]
+        X = np.array(x_list)
+        return self.evaluate(X)
+
+    def evaluate(self, X: np.ndarray):
+        X = self.checkX(X)
+        return self.problem._evaluate(X)  # dict
+
+    def get_configspace(self, optimizer='smac'):
+        if optimizer == 'smac':
+            return self.problem.config_space
+        else:
+            raise ValueError('Unknown optimizer %s when getting configspace' % optimizer)
+
+
+class srn(BaseConstrainedMultiObjectiveProblem):
+
+    def __init__(self, **kwargs):
+        problem = SRN()
+        super().__init__(dim=2, num_objectives=2, num_constraints=2, problem=problem, **kwargs)
+        self.lb = -20.0
+        self.ub = 20.0
+        self.bounds = [(self.lb, self.ub)] * self.dim
+        self.new_max_hv = 34229.434882104855    # this is approximated using NSGA-II
+
+    def evaluate_config(self, config, optimizer='smac'):
+        config_dict = self.get_config_dict(config, optimizer)
+        x_list = [config_dict['x%d' % i] for i in range(1, self.dim+1)]
+        X = np.array(x_list)
+        return self.evaluate(X)
+
+    def evaluate(self, X: np.ndarray):
+        X = self.checkX(X)
+        return self.problem._evaluate(X)  # dict
+
+    def get_configspace(self, optimizer='smac'):
+        if optimizer == 'smac':
+            return self.problem.config_space
+        elif optimizer == 'hypermapper':
+            input_parameters = {
+                'x%d' % (i+1): {
+                    "parameter_type": "real",
+                    "values": [self.lb, self.ub]
+                } for i in range(self.dim)
+            }
+            return input_parameters
+        else:
+            raise ValueError('Unknown optimizer %s when getting configspace' % optimizer)
+
+
+class constr(BaseConstrainedMultiObjectiveProblem):
+
+    def __init__(self, **kwargs):
+        problem = CONSTR()
+        super().__init__(dim=2, num_objectives=2, num_constraints=2, problem=problem, **kwargs)
+        self.bounds = [(0.1, 10.0), (0.0, 5.0)]
+        self.new_max_hv = 92.02004226679216     # this is approximated using NSGA-II
+
+    def evaluate_config(self, config, optimizer='smac'):
+        config_dict = self.get_config_dict(config, optimizer)
+        x_list = [config_dict['x%d' % i] for i in range(1, self.dim+1)]
+        X = np.array(x_list)
+        return self.evaluate(X)
+
+    def evaluate(self, X: np.ndarray):
+        X = self.checkX(X)
+        return self.problem._evaluate(X)  # dict
+
+    def get_configspace(self, optimizer='smac'):
+        if optimizer == 'smac':
+            return self.problem.config_space
+        elif optimizer == 'hypermapper':
+            input_parameters = {
+                'x1': {
+                    "parameter_type": "real",
+                    "values": [0.1, 10.0]
+                },
+                'x2': {
+                    "parameter_type": "real",
+                    "values": [0.0, 5.0]
+                }
+            }
+            return input_parameters
+        else:
+            raise ValueError('Unknown optimizer %s when getting configspace' % optimizer)
diff --git a/test/reproduction/so/benchmark_so_openbox_math.py b/test/reproduction/so/benchmark_so_openbox_math.py
new file mode 100644
index 00000000..5d08e65c
--- /dev/null
+++ b/test/reproduction/so/benchmark_so_openbox_math.py
@@ -0,0 +1,119 @@
+"""
+example cmdline:
+
+python test/reproduction/so/benchmark_so_openbox_math.py --problem branin --n 200 --init 3 --rep 1 --start_id 0
+
+"""
+import os
+NUM_THREADS = "2"
+os.environ["OMP_NUM_THREADS"] = NUM_THREADS         # export OMP_NUM_THREADS=1
+os.environ["OPENBLAS_NUM_THREADS"] = NUM_THREADS    # export OPENBLAS_NUM_THREADS=1
+os.environ["MKL_NUM_THREADS"] = NUM_THREADS         # export MKL_NUM_THREADS=1
+os.environ["VECLIB_MAXIMUM_THREADS"] = NUM_THREADS  # export VECLIB_MAXIMUM_THREADS=1
+os.environ["NUMEXPR_NUM_THREADS"] = NUM_THREADS     # export NUMEXPR_NUM_THREADS=1
+
+import sys
+import time
+import numpy as np
+import argparse
+import pickle as pkl
+
+sys.path.insert(0, os.getcwd())
+from test.reproduction.so.so_benchmark_function import get_problem
+from openbox import Optimizer
+from test.reproduction.test_utils import timeit, seeds
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--problem', type=str)
+parser.add_argument('--n', type=int, default=100)
+parser.add_argument('--init', type=int, default=3)
+parser.add_argument('--init_strategy', type=str, default='random_explore_first')
+parser.add_argument('--surrogate', type=str, default='auto', choices=['auto', 'gp', 'prf'])
+parser.add_argument('--optimizer', type=str, default='auto', choices=['auto', 'scipy', 'local'])
+parser.add_argument('--rep', type=int, default=1)
+parser.add_argument('--start_id', type=int, default=0)
+
+args = parser.parse_args()
+problem_str = args.problem
+max_runs = args.n
+initial_runs = args.init
+init_strategy = args.init_strategy
+surrogate_type = args.surrogate
+if args.optimizer == 'scipy':
+    acq_optimizer_type = 'random_scipy'
+elif args.optimizer == 'local':
+    acq_optimizer_type = 'local_random'
+elif args.optimizer == 'auto':
+    acq_optimizer_type = 'auto'
+else:
+    raise ValueError('Unknown optimizer %s' % args.optimizer)
+rep = args.rep
+start_id = args.start_id
+mth = 'openbox'
+
+problem = get_problem(problem_str)
+cs = problem.get_configspace(optimizer='smac')
+max_runtime_per_trial = 600
+task_id = '%s_%s' % (mth, problem_str)
+
+
+def evaluate(mth, run_i, seed):
+    print(mth, run_i, seed, '===== start =====', flush=True)
+
+    def objective_function(config):
+        y = problem.evaluate_config(config)
+        res = dict()
+        # res['config'] = config
+        res['objectives'] = (y,)
+        res['constraints'] = None
+        return res
+
+    bo = Optimizer(
+        objective_function,
+        cs,
+        surrogate_type=surrogate_type,          # default: auto: gp
+        acq_optimizer_type=acq_optimizer_type,  # default: auto: random_scipy
+        initial_runs=initial_runs,              # default: 3
+        init_strategy=init_strategy,            # default: random_explore_first
+        max_runs=max_runs, task_id=task_id, random_state=seed,
+    )
+    # bo.run()
+    time_list = []
+    global_start_time = time.time()
+    for i in range(max_runs):
+        observation = bo.iterate(bo.time_left)
+        config, trial_state, objectives = observation.config, observation.trial_state, observation.objectives
+        global_time = time.time() - global_start_time
+        bo.time_left -= global_time
+        print(seed, i, objectives, config, trial_state, 'time=', global_time)
+        time_list.append(global_time)
+    config_list = bo.get_history().configurations
+    perf_list = bo.get_history().get_objectives(transform='none')
+
+    history = bo.get_history()
+
+    return config_list, perf_list, time_list, history
+
+if __name__ == '__main__':
+    with timeit('%s all' % (mth,)):
+        for run_i in range(start_id, start_id + rep):
+            seed = seeds[run_i]
+            with timeit('%s %d %d' % (mth, run_i, seed)):
+                # Evaluate
+                config_list, perf_list, time_list, history = evaluate(mth, run_i, seed)
+
+                # Save result
+                print('=' * 20)
+                print(seed, mth, config_list, perf_list, time_list)
+                print(seed, mth, 'best perf', np.min(perf_list))
+
+                timestamp = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time()))
+                dir_path = 'logs/so_benchmark_%s_%d/%s/' % (problem_str, max_runs, mth)
+                file = 'benchmark_%s_%04d_%s.pkl' % (mth, seed, timestamp)
+                os.makedirs(dir_path, exist_ok=True)
+                with open(os.path.join(dir_path, file), 'wb') as f:
+                    save_item = (config_list, perf_list, time_list)
+                    pkl.dump(save_item, f)
+                print(dir_path, file, 'saved!', flush=True)
+
+                history.save_json(os.path.join(dir_path, 'benchmark_%s_%04d_%s.json' % (mth, seed, timestamp)))
diff --git a/test/reproduction/so/so_benchmark_function.py b/test/reproduction/so/so_benchmark_function.py
new file mode 100644
index 00000000..f5c3caf2
--- /dev/null
+++ b/test/reproduction/so/so_benchmark_function.py
@@ -0,0 +1,473 @@
+import numpy as np
+
+# from openbox.utils.config_space import ConfigurationSpace, UniformFloatHyperparameter, UniformIntegerHyperparameter, Constant
+from ConfigSpace import ConfigurationSpace, UniformFloatHyperparameter, UniformIntegerHyperparameter, \
+    Constant, CategoricalHyperparameter, InCondition, EqualsCondition, UnParametrizedHyperparameter, \
+    ForbiddenEqualsClause, ForbiddenInClause, ForbiddenAndConjunction
+
+
+def get_problem(problem_str, **kwargs):
+    # problem_str = problem_str.lower()  # dataset name may be uppercase
+    if problem_str == 'branin':
+        problem = Branin
+    elif problem_str.startswith('ackley'):
+        problem = Ackley
+        params = problem_str.split('-')
+        if len(params) == 1:
+            dim = 2
+        elif len(params) == 2:
+            dim = int(params[1])
+        else:
+            raise ValueError
+        kwargs['dim'] = dim
+    elif problem_str == 'beale':
+        problem = Beale
+    elif problem_str.startswith('hartmann'):
+        problem = Hartmann6d
+    elif 'lgb' in problem_str:
+        problem = lgb
+        kwargs['dataset'] = '_'.join(problem_str.split('_')[1:])
+    elif 'svc' in problem_str:
+        problem = svc
+        kwargs['dataset'] = '_'.join(problem_str.split('_')[1:])
+    else:
+        raise ValueError('Unknown problem_str %s.' % problem_str)
+    return problem(**kwargs)
+
+
+class BaseSingleObjectiveProblem:
+    def __init__(self, dim, **kwargs):
+        self.dim = dim
+
+    def evaluate_config(self, config, optimizer='smac'):
+        raise NotImplementedError
+
+    def evaluate(self, X: np.ndarray):
+        raise NotImplementedError
+
+    @staticmethod
+    def get_config_dict(config, optimizer='smac'):
+        if optimizer == 'smac':
+            config_dict = config.get_dictionary().copy()
+        elif optimizer == 'tpe':
+            config_dict = config
+        else:
+            raise ValueError('Unknown optimizer %s' % optimizer)
+        return config_dict
+
+    @staticmethod
+    def checkX(X: np.ndarray):
+        X = np.atleast_2d(X)
+        assert len(X.shape) == 2 and X.shape[0] == 1
+        X = X.flatten()
+        return X
+
+    def get_configspace(self, optimizer='smac'):
+        raise NotImplementedError
+
+    def load_data(self, **kwargs):
+        from test.reproduction.test_utils import load_data
+        from sklearn.model_selection import train_test_split
+        dataset = kwargs['dataset']
+        try:
+            data_dir = kwargs.get('data_dir', '../soln-ml/data/cls_datasets/')
+            x, y = load_data(dataset, data_dir)
+        except Exception as e:
+            data_dir = '../../soln-ml/data/cls_datasets/'
+            x, y = load_data(dataset, data_dir)
+        self.train_x, self.val_x, self.train_y, self.val_y = train_test_split(x, y, stratify=y, random_state=1,
+                                                                              test_size=0.3)
+
+
+class Ackley(BaseSingleObjectiveProblem):
+
+    optimal_value = 0.0
+
+    def __init__(self, dim=2, lb=-15, ub=30, **kwargs):
+        super().__init__(dim=dim, **kwargs)
+        self.lb = lb
+        self.ub = ub
+        self.bounds = [(self.lb, self.ub)] * self.dim
+
+    def evaluate_config(self, config, optimizer='smac'):
+        config_dict = self.get_config_dict(config, optimizer)
+        x_list = [config_dict['x%d' % i] for i in range(self.dim)]
+        X = np.array(x_list)
+        return self.evaluate(X)
+
+    def evaluate(self, X: np.ndarray):
+        X = self.checkX(X)
+        a = 20
+        b = 0.2
+        c = 2 * np.pi
+        t1 = -a * np.exp(-b * np.sqrt(np.mean(X ** 2)))
+        t2 = -np.exp(np.mean(np.cos(c * X)))
+        t3 = a + np.exp(1)
+        y = t1 + t2 + t3
+        return y
+
+    def get_configspace(self, optimizer='smac'):
+        if optimizer == 'smac':
+            cs = ConfigurationSpace()
+            for i in range(self.dim):
+                xi = UniformFloatHyperparameter("x%d" % i, self.lb, self.ub)
+                cs.add_hyperparameter(xi)
+            return cs
+        elif optimizer == 'tpe':
+            from hyperopt import hp
+            space = {'x%d' % i: hp.uniform('hp_x%d' % i, self.lb, self.ub) for i in range(self.dim)}
+            return space
+        elif optimizer == 'gpflowopt':
+            import gpflowopt
+            domain = np.sum([
+                gpflowopt.domain.ContinuousParameter('x%d' % i, self.lb, self.ub) for i in range(self.dim)
+            ])
+            return domain
+        else:
+            raise ValueError('Unknown optimizer %s when getting configspace' % optimizer)
+
+
+class Beale(BaseSingleObjectiveProblem):
+
+    optimal_value = 0.0
+
+    def __init__(self, lb=-4.5, ub=4.5, **kwargs):
+        super().__init__(dim=2, **kwargs)
+        self.lb = lb
+        self.ub = ub
+        self.bounds = [(self.lb, self.ub)] * self.dim
+
+    def evaluate_config(self, config, optimizer='smac'):
+        config_dict = self.get_config_dict(config, optimizer)
+        x_list = [config_dict['x%d' % i] for i in range(self.dim)]
+        X = np.array(x_list)
+        return self.evaluate(X)
+
+    def evaluate(self, X: np.ndarray):
+        X = self.checkX(X)
+        x1 = X[0]
+        x2 = X[1]
+        part1 = (1.5 - x1 + x1 * x2) ** 2
+        part2 = (2.25 - x1 + x1 * x2 ** 2) ** 2
+        part3 = (2.625 - x1 + x1 * x2 ** 3) ** 2
+        y = part1 + part2 + part3
+        return y
+
+    def get_configspace(self, optimizer='smac'):
+        if optimizer == 'smac':
+            cs = ConfigurationSpace()
+            for i in range(self.dim):
+                xi = UniformFloatHyperparameter("x%d" % i, self.lb, self.ub)
+                cs.add_hyperparameter(xi)
+            return cs
+        elif optimizer == 'tpe':
+            from hyperopt import hp
+            space = {'x%d' % i: hp.uniform('hp_x%d' % i, self.lb, self.ub) for i in range(self.dim)}
+            return space
+        elif optimizer == 'gpflowopt':
+            import gpflowopt
+            domain = (
+                gpflowopt.domain.ContinuousParameter('x0', self.lb, self.ub) +
+                gpflowopt.domain.ContinuousParameter('x1', self.lb, self.ub)
+            )
+            return domain
+        else:
+            raise ValueError('Unknown optimizer %s when getting configspace' % optimizer)
+
+
+class Branin(BaseSingleObjectiveProblem):
+    """
+    y = (x(2)-(5.1/(4*pi^2))*x(1)^2+5*x(1)/pi-6)^2+10*(1-1/(8*pi))*cos(x(1))+10
+    """
+    optimal_value = 0.397887
+    optimal_point = [(-np.pi, 12.275), (np.pi, 2.275), (9.42478, 2.475)]
+
+    def __init__(self, **kwargs):
+        super().__init__(dim=2, **kwargs)
+        self.bounds = [(-5.0, 10.0), (0.0, 15.0)]
+
+    def evaluate_config(self, config, optimizer='smac'):
+        config_dict = self.get_config_dict(config, optimizer)
+        x1 = config_dict['x1']
+        x2 = config_dict['x2']
+        X = np.array([x1, x2])
+        return self.evaluate(X)
+
+    def evaluate(self, X: np.ndarray):
+        X = self.checkX(X)
+        x1 = X[0]
+        x2 = X[1]
+        y = (x2 - (5.1 / (4 * np.pi ** 2)) * x1 ** 2 + 5 * x1 / np.pi - 6) ** 2 + 10 * (1 - 1 / (8 * np.pi)) * np.cos(
+            x1) + 10
+        return y
+
+    def get_configspace(self, optimizer='smac'):
+        if optimizer == 'smac':
+            cs = ConfigurationSpace()
+            x1 = UniformFloatHyperparameter("x1", -5, 10)
+            x2 = UniformFloatHyperparameter("x2", 0, 15)
+            cs.add_hyperparameters([x1, x2])
+            return cs
+        elif optimizer == 'tpe':
+            from hyperopt import hp
+            space = {'x1': hp.uniform('hp_x1', -5, 10),
+                     'x2': hp.uniform('hp_x2', 0, 15),
+                     }
+            return space
+        elif optimizer == 'gpflowopt':
+            import gpflowopt
+            domain = (
+                gpflowopt.domain.ContinuousParameter('x1', -5, 10) +
+                gpflowopt.domain.ContinuousParameter('x2', 0, 15)
+            )
+            return domain
+        else:
+            raise ValueError('Unknown optimizer %s when getting configspace' % optimizer)
+
+
+class Hartmann6d(BaseSingleObjectiveProblem):
+
+    optimal_value = -3.32237
+
+    def __init__(self, **kwargs):
+        super().__init__(dim=6, **kwargs)
+        self.bounds = [(0.0, 1.0)] * self.dim
+        self.a = np.array([
+            [10, 3, 17, 3.5, 1.7, 8],
+            [0.05, 10, 17, 0.1, 8, 14],
+            [3, 3.5, 1.7, 10, 17, 8],
+            [17, 8, 0.05, 10, 0.1, 14],
+        ])
+        self.c = np.array([1.0, 1.2, 3.0, 3.2])
+        self.p = np.array([
+            [0.1312, 0.1696, 0.5569, 0.0124, 0.8283, 0.5886],
+            [0.2329, 0.4135, 0.8307, 0.3736, 0.1004, 0.9991],
+            [0.2348, 0.1451, 0.3522, 0.2883, 0.3047, 0.6650],
+            [0.4047, 0.8828, 0.8732, 0.5743, 0.1091, 0.0381],
+        ])
+
+    def evaluate_config(self, config, optimizer='smac'):
+        config_dict = self.get_config_dict(config, optimizer)
+        x_list = [config_dict['x%d' % i] for i in range(self.dim)]
+        X = np.array(x_list)
+        return self.evaluate(X)
+
+    def evaluate(self, X: np.ndarray):
+        X = self.checkX(X)
+        inner_sum = np.sum(self.a * (X - self.p) ** 2, axis=1)
+        y = -np.sum(self.c * np.exp(-inner_sum))
+        return y
+
+    def get_configspace(self, optimizer='smac'):
+        if optimizer == 'smac':
+            cs = ConfigurationSpace()
+            for i in range(self.dim):
+                xi = UniformFloatHyperparameter("x%d" % i, 0, 1)
+                cs.add_hyperparameter(xi)
+            return cs
+        elif optimizer == 'tpe':
+            from hyperopt import hp
+            space = {'x%d' % i: hp.uniform('hp_x%d' % i, 0, 1) for i in range(self.dim)}
+            return space
+        elif optimizer == 'gpflowopt':
+            import gpflowopt
+            domain = (
+                gpflowopt.domain.ContinuousParameter('x0', 0, 1) +
+                gpflowopt.domain.ContinuousParameter('x1', 0, 1) +
+                gpflowopt.domain.ContinuousParameter('x2', 0, 1) +
+                gpflowopt.domain.ContinuousParameter('x3', 0, 1) +
+                gpflowopt.domain.ContinuousParameter('x4', 0, 1) +
+                gpflowopt.domain.ContinuousParameter('x5', 0, 1)
+            )
+            return domain
+        else:
+            raise ValueError('Unknown optimizer %s when getting configspace' % optimizer)
+
+
+class lgb(BaseSingleObjectiveProblem):
+    def __init__(self, n_jobs=3, **kwargs):
+        super().__init__(dim=7, **kwargs)
+        self.n_jobs = n_jobs
+        self.load_data(**kwargs)
+        self.bounds = [
+            (100, 1000),
+            (31, 2047),
+            (15, 16),
+            (1e-3, 0.3),
+            (5, 30),
+            (0.7, 1),
+            (0.7, 1),
+        ]
+
+    def evaluate_config(self, config, optimizer='smac'):
+        config_dict = self.get_config_dict(config, optimizer)
+        n_estimators = int(config_dict['n_estimators'])
+        num_leaves = int(config_dict['num_leaves'])
+        max_depth = int(config_dict['max_depth'])
+        learning_rate = config_dict['learning_rate']
+        min_child_samples = config_dict['min_child_samples']
+        subsample = config_dict['subsample']
+        colsample_bytree = config_dict['colsample_bytree']
+        from lightgbm import LGBMClassifier
+        from sklearn.metrics.scorer import balanced_accuracy_scorer
+        lgbc = LGBMClassifier(n_estimators=n_estimators,
+                              num_leaves=num_leaves,
+                              max_depth=max_depth,
+                              learning_rate=learning_rate,
+                              min_child_samples=min_child_samples,
+                              subsample=subsample,
+                              colsample_bytree=colsample_bytree,
+                              n_jobs=self.n_jobs)
+        lgbc.fit(self.train_x, self.train_y)
+        return -balanced_accuracy_scorer(lgbc, self.val_x, self.val_y)
+
+    def evaluate(self, x):
+        x = self.checkX(x)
+        from lightgbm import LGBMClassifier
+        from sklearn.metrics.scorer import balanced_accuracy_scorer
+        lgbc = LGBMClassifier(n_estimators=int(x[0]),
+                              num_leaves=int(x[1]),
+                              max_depth=int(x[2]),
+                              learning_rate=x[3],
+                              min_child_samples=int(x[4]),
+                              subsample=x[5],
+                              colsample_bytree=x[6],
+                              n_jobs=self.n_jobs)
+        lgbc.fit(self.train_x, self.train_y)
+        return -balanced_accuracy_scorer(lgbc, self.val_x, self.val_y)
+
+    def get_configspace(self, optimizer='smac'):
+        if optimizer == 'smac':
+            cs = ConfigurationSpace()
+            n_estimators = UniformIntegerHyperparameter("n_estimators", 100, 1000, default_value=500, q=50)
+            num_leaves = UniformIntegerHyperparameter("num_leaves", 31, 2047, default_value=128)
+            max_depth = Constant('max_depth', 15)
+            learning_rate = UniformFloatHyperparameter("learning_rate", 1e-3, 0.3, default_value=0.1, log=True)
+            min_child_samples = UniformIntegerHyperparameter("min_child_samples", 5, 30, default_value=20)
+            subsample = UniformFloatHyperparameter("subsample", 0.7, 1, default_value=1, q=0.1)
+            colsample_bytree = UniformFloatHyperparameter("colsample_bytree", 0.7, 1, default_value=1, q=0.1)
+            cs.add_hyperparameters([n_estimators, num_leaves, max_depth, learning_rate, min_child_samples, subsample,
+                                    colsample_bytree])
+            return cs
+        elif optimizer == 'tpe':
+            from hyperopt import hp
+            space = {'n_estimators': (hp.randint('lgb_n_estimators', 19) + 2) * 50,
+                     'num_leaves': hp.randint('lgb_num_leaves', 2017) + 31,
+                     'max_depth': 15,
+                     'learning_rate': hp.loguniform('lgb_learning_rate', np.log(1e-3), np.log(0.3)),
+                     'min_child_samples': hp.randint('lgb_min_child_samples', 26) + 5,
+                     'subsample': (hp.randint('lgb_subsample', 4) + 7) * 0.1,
+                     'colsample_bytree': (hp.randint('lgb_colsample_bytree', 4) + 7) * 0.1,
+                     }
+            return space
+        elif optimizer == 'gpflowopt':
+            from gpflowopt.domain import ContinuousParameter
+            domain = (
+                ContinuousParameter('n_estimators', 100, 1000) +
+                ContinuousParameter('num_leaves', 31, 2047) +
+                ContinuousParameter('max_depth', 15, 16) +
+                ContinuousParameter("learning_rate", 1e-3, 0.3) +
+                ContinuousParameter("min_child_samples", 5, 30) +
+                ContinuousParameter("subsample", 0.7, 1) +
+                ContinuousParameter("colsample_bytree", 0.7, 1)
+            )
+            return domain
+        else:
+            raise ValueError('Unknown optimizer %s when getting configspace' % optimizer)
+
+
+class svc(BaseSingleObjectiveProblem):
+    def __init__(self, **kwargs):
+        super().__init__(dim=8, **kwargs)
+        self.load_data(**kwargs)
+        self.bounds = None
+
+    def evaluate_config(self, config, optimizer='smac'):
+        config_dict = self.get_config_dict(config, optimizer)
+        penalty = config_dict['penalty']
+        loss = config_dict.get('loss', None)
+        dual = config_dict.get('dual', None)
+        C = config_dict['C']
+        tol = config_dict['tol']
+        fit_intercept = config_dict['fit_intercept']
+        intercept_scaling = config_dict['intercept_scaling']
+        if isinstance(penalty, dict):
+            combination = penalty
+            penalty = combination['penalty']
+            loss = combination['loss']
+            dual = combination['dual']
+
+        from sklearn.svm import LinearSVC
+        from sklearn.metrics.scorer import balanced_accuracy_scorer
+        if dual == 'True':
+            dual = True
+        elif dual == 'False':
+            dual = False
+
+        svcc = LinearSVC(penalty=penalty,
+                         loss=loss,
+                         dual=dual,
+                         tol=tol,
+                         C=C,
+                         fit_intercept=fit_intercept,
+                         intercept_scaling=intercept_scaling,
+                         multi_class='ovr',
+                         random_state=1)
+        svcc.fit(self.train_x, self.train_y)
+        return -balanced_accuracy_scorer(svcc, self.val_x, self.val_y)
+
+    def get_configspace(self, optimizer='smac'):
+        if optimizer == 'smac':
+            cs = ConfigurationSpace()
+
+            penalty = CategoricalHyperparameter(
+                "penalty", ["l1", "l2"], default_value="l2")
+            loss = CategoricalHyperparameter(
+                "loss", ["hinge", "squared_hinge"], default_value="squared_hinge")
+            dual = CategoricalHyperparameter("dual", ['True', 'False'], default_value='True')
+            # This is set ad-hoc
+            tol = UniformFloatHyperparameter(
+                "tol", 1e-5, 1e-1, default_value=1e-4, log=True)
+            C = UniformFloatHyperparameter(
+                "C", 0.03125, 32768, log=True, default_value=1.0)
+            multi_class = Constant("multi_class", "ovr")
+            # These are set ad-hoc
+            fit_intercept = Constant("fit_intercept", "True")
+            intercept_scaling = Constant("intercept_scaling", 1)
+            cs.add_hyperparameters([penalty, loss, dual, tol, C, multi_class,
+                                    fit_intercept, intercept_scaling])
+
+            penalty_and_loss = ForbiddenAndConjunction(
+                ForbiddenEqualsClause(penalty, "l1"),
+                ForbiddenEqualsClause(loss, "hinge")
+            )
+            constant_penalty_and_loss = ForbiddenAndConjunction(
+                ForbiddenEqualsClause(dual, "False"),
+                ForbiddenEqualsClause(penalty, "l2"),
+                ForbiddenEqualsClause(loss, "hinge")
+            )
+            penalty_and_dual = ForbiddenAndConjunction(
+                ForbiddenEqualsClause(dual, "True"),
+                ForbiddenEqualsClause(penalty, "l1")
+            )
+            cs.add_forbidden_clause(penalty_and_loss)
+            cs.add_forbidden_clause(constant_penalty_and_loss)
+            cs.add_forbidden_clause(penalty_and_dual)
+            return cs
+        elif optimizer == 'tpe':
+            from hyperopt import hp
+            space = {'penalty': hp.choice('liblinear_combination',
+                                          [{'penalty': "l1", 'loss': "squared_hinge", 'dual': "False"},
+                                           {'penalty': "l2", 'loss': "hinge", 'dual': "True"},
+                                           {'penalty': "l2", 'loss': "squared_hinge", 'dual': "True"},
+                                           {'penalty': "l2", 'loss': "squared_hinge", 'dual': "False"}]),
+                     'loss': None,
+                     'dual': None,
+                     'tol': hp.loguniform('liblinear_tol', np.log(1e-5), np.log(1e-1)),
+                     'C': hp.loguniform('liblinear_C', np.log(0.03125), np.log(32768)),
+                     'multi_class': hp.choice('liblinear_multi_class', ["ovr"]),
+                     'fit_intercept': hp.choice('liblinear_fit_intercept', ["True"]),
+                     'intercept_scaling': hp.choice('liblinear_intercept_scaling', [1])}
+            return space
+        else:
+            raise ValueError('Unknown optimizer %s when getting configspace' % optimizer)
diff --git a/test/reproduction/soc/benchmark_soc_openbox_math.py b/test/reproduction/soc/benchmark_soc_openbox_math.py
new file mode 100644
index 00000000..f4fbea7f
--- /dev/null
+++ b/test/reproduction/soc/benchmark_soc_openbox_math.py
@@ -0,0 +1,126 @@
+"""
+example cmdline:
+
+python test/reproduction/soc/benchmark_soc_openbox_math.py --problem mishra --n 100 --init 3 --rep 1 --start_id 0
+
+"""
+import os
+NUM_THREADS = "2"
+os.environ["OMP_NUM_THREADS"] = NUM_THREADS         # export OMP_NUM_THREADS=1
+os.environ["OPENBLAS_NUM_THREADS"] = NUM_THREADS    # export OPENBLAS_NUM_THREADS=1
+os.environ["MKL_NUM_THREADS"] = NUM_THREADS         # export MKL_NUM_THREADS=1
+os.environ["VECLIB_MAXIMUM_THREADS"] = NUM_THREADS  # export VECLIB_MAXIMUM_THREADS=1
+os.environ["NUMEXPR_NUM_THREADS"] = NUM_THREADS     # export NUMEXPR_NUM_THREADS=1
+
+import sys
+import time
+import numpy as np
+import argparse
+import pickle as pkl
+
+sys.path.insert(0, os.getcwd())
+from test.reproduction.soc.soc_benchmark_function import get_problem
+from openbox import Optimizer
+from test.reproduction.test_utils import timeit, seeds
+
+parser = argparse.ArgumentParser()
+parser.add_argument('--problem', type=str, default='townsend')
+parser.add_argument('--n', type=int, default=200)
+parser.add_argument('--nc', type=int, default=1)
+parser.add_argument('--init', type=int, default=3)
+parser.add_argument('--init_strategy', type=str, default='random_explore_first')
+parser.add_argument('--surrogate', type=str, default='auto', choices=['auto', 'gp'])
+parser.add_argument('--optimizer', type=str, default='auto', choices=['auto', 'scipy', 'local'])
+parser.add_argument('--rep', type=int, default=1)
+parser.add_argument('--start_id', type=int, default=0)
+
+args = parser.parse_args()
+problem_str = args.problem
+max_runs = args.n
+num_constraints = args.nc
+initial_runs = args.init
+init_strategy = args.init_strategy
+surrogate_type = args.surrogate
+if args.optimizer == 'scipy':
+    acq_optimizer_type = 'random_scipy'
+elif args.optimizer == 'local':
+    acq_optimizer_type = 'local_random'
+elif args.optimizer == 'auto':
+    acq_optimizer_type = 'auto'
+else:
+    raise ValueError('Unknown optimizer %s' % args.optimizer)
+rep = args.rep
+start_id = args.start_id
+mth = 'openbox'
+
+problem = get_problem(problem_str)
+cs = problem.get_configspace(optimizer='smac')
+max_runtime_per_trial = 600
+task_id = '%s_%s' % (mth, problem_str)
+
+
+def evaluate(mth, run_i, seed):
+    print(mth, run_i, seed, '===== start =====', flush=True)
+
+    def objective_function(config):
+        y = problem.evaluate_config(config)
+        return y
+
+    bo = Optimizer(
+        objective_function,
+        cs,
+        num_constraints=num_constraints,
+        surrogate_type=surrogate_type,            # default: auto: gp
+        acq_optimizer_type=acq_optimizer_type,    # default: auto: random_scipy
+        initial_runs=initial_runs,                # default: 3
+        init_strategy=init_strategy,              # default: random_explore_first
+        max_runs=max_runs + initial_runs, task_id=task_id, random_state=seed,
+    )
+
+    # bo.run()
+    config_list = []
+    perf_list = []
+    time_list = []
+    global_start_time = time.time()
+    for i in range(max_runs):
+        observation = bo.iterate(bo.time_left)
+        config, trial_state, constraints, objectives = observation.config, observation.trial_state, observation.constraints, observation.objectives
+        global_time = time.time() - global_start_time
+        bo.time_left -= global_time
+        origin_perf = objectives[0]
+        if any(c > 0 for c in constraints):
+            perf = 9999999.0
+        else:
+            perf = origin_perf
+        print(seed, i, perf, config, constraints, trial_state, 'time=', global_time)
+        config_list.append(config)
+        perf_list.append(perf)
+        time_list.append(global_time)
+
+    history = bo.get_history()
+
+    return config_list, perf_list, time_list, history
+
+if __name__ == '__main__':
+    with timeit('%s all' % (mth,)):
+        for run_i in range(start_id, start_id + rep):
+            seed = seeds[run_i]
+            with timeit('%s %d %d' % (mth, run_i, seed)):
+                # Evaluate
+                config_list, perf_list, time_list, history = evaluate(mth, run_i, seed)
+
+                # Save result
+                print('=' * 20)
+                print(seed, mth, config_list, perf_list, time_list)
+                print(seed, mth, 'best perf', np.min(perf_list))
+
+                timestamp = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time()))
+                dir_path = 'logs/soc_benchmark_%s_%d/%s/' % (problem_str, max_runs, mth)
+                file = 'benchmark_%s_%04d_%s.pkl' % (mth, seed, timestamp)
+                os.makedirs(dir_path, exist_ok=True)
+                with open(os.path.join(dir_path, file), 'wb') as f:
+                    save_item = (config_list, perf_list, time_list)
+                    pkl.dump(save_item, f)
+                print(dir_path, file, 'saved!', flush=True)
+
+                history.save_json(os.path.join(dir_path, 'benchmark_%s_%04d_%s.json' % (mth, seed, timestamp)))
diff --git a/test/reproduction/soc/soc_benchmark_function.py b/test/reproduction/soc/soc_benchmark_function.py
new file mode 100644
index 00000000..6bcf3202
--- /dev/null
+++ b/test/reproduction/soc/soc_benchmark_function.py
@@ -0,0 +1,228 @@
+import numpy as np
+
+# from openbox.utils.config_space import ConfigurationSpace, UniformFloatHyperparameter, UniformIntegerHyperparameter, Constant
+from ConfigSpace import ConfigurationSpace, UniformFloatHyperparameter, UniformIntegerHyperparameter, \
+    Constant, CategoricalHyperparameter, InCondition, EqualsCondition, UnParametrizedHyperparameter, \
+    ForbiddenEqualsClause, ForbiddenInClause, ForbiddenAndConjunction
+
+
+def get_problem(problem_str, **kwargs):
+    # problem_str = problem_str.lower()  # dataset name may be uppercase
+    if problem_str == 'townsend':
+        problem = townsend
+    elif problem_str == 'keane':
+        problem = keane
+    elif problem_str == 'ackley':
+        problem = ackley
+    elif problem_str == 'mishra':
+        problem = mishra
+    else:
+        raise ValueError('Unknown problem_str %s.' % problem_str)
+    return problem(**kwargs)
+
+
+class BaseConstrainedSingleObjectiveProblem:
+    def __init__(self, dim, **kwargs):
+        self.dim = dim
+
+    def evaluate_config(self, config, optimizer='smac'):
+        raise NotImplementedError
+
+    def evaluate(self, X: np.ndarray):
+        raise NotImplementedError
+
+    @staticmethod
+    def get_config_dict(config, optimizer='smac'):
+        if optimizer == 'smac':
+            config_dict = config.get_dictionary().copy()
+        elif optimizer in ['tpe', 'hypermapper']:
+            config_dict = config
+        else:
+            raise ValueError('Unknown optimizer %s' % optimizer)
+        return config_dict
+
+    @staticmethod
+    def checkX(X: np.ndarray):
+        X = np.atleast_2d(X)
+        assert len(X.shape) == 2 and X.shape[0] == 1
+        X = X.flatten()
+        return X
+
+    def get_configspace(self, optimizer='smac'):
+        raise NotImplementedError
+
+
+class keane(BaseConstrainedSingleObjectiveProblem):
+    def __init__(self, **kwargs):
+        super().__init__(dim=10, **kwargs)
+        self.lb = 0
+        self.ub = 10
+        self.bounds = [(self.lb, self.ub)] * self.dim
+        self.num_constraints = 2
+
+    def evaluate_config(self, config, optimizer='smac'):
+        config_dict = self.get_config_dict(config, optimizer)
+        X = np.array([config_dict['x%s' % i] for i in range(1, 10 + 1)])
+        return self.evaluate(X)
+
+    def evaluate(self, X: np.ndarray):
+        X = self.checkX(X)
+        result = dict()
+        cosX2 = np.cos(X) ** 2
+        up = np.abs(np.sum(cosX2 ** 2) - 2 * np.prod(cosX2))
+        down = np.sqrt(np.sum(np.arange(1, 10 + 1) * X ** 2))
+        result['objectives'] = [-up / down, ]
+        result['constraints'] = [0.75 - np.prod(X), np.sum(X) - 7.5 * 10, ]
+        return result
+
+    def get_configspace(self, optimizer='smac'):
+        if optimizer == 'smac':
+            cs = ConfigurationSpace()
+            cs.add_hyperparameters(
+                [UniformFloatHyperparameter("x%s" % i, self.lb, self.ub) for i in range(1, 1 + 10)])
+            return cs
+        elif optimizer == 'gpflowopt':
+            import gpflowopt
+            domain = gpflowopt.domain.ContinuousParameter('x1', self.lb, self.ub) + \
+                     gpflowopt.domain.ContinuousParameter('x2', self.lb, self.ub) + \
+                     gpflowopt.domain.ContinuousParameter('x3', self.lb, self.ub) + \
+                     gpflowopt.domain.ContinuousParameter('x4', self.lb, self.ub) + \
+                     gpflowopt.domain.ContinuousParameter('x5', self.lb, self.ub) + \
+                     gpflowopt.domain.ContinuousParameter('x6', self.lb, self.ub) + \
+                     gpflowopt.domain.ContinuousParameter('x7', self.lb, self.ub) + \
+                     gpflowopt.domain.ContinuousParameter('x8', self.lb, self.ub) + \
+                     gpflowopt.domain.ContinuousParameter('x9', self.lb, self.ub) + \
+                     gpflowopt.domain.ContinuousParameter('x10', self.lb, self.ub)
+            return domain
+        elif optimizer == 'hypermapper':
+            input_parameters = {}
+            for i in range(1, 1 + 10):
+                input_parameters['x%d' % i] = {
+                    "parameter_type": "real",
+                    "values": [self.lb, self.ub]
+                }
+            return input_parameters
+        else:
+            raise ValueError('Unknown optimizer %s when getting configspace' % optimizer)
+
+
+class ackley(BaseConstrainedSingleObjectiveProblem):
+    def __init__(self, lb=-5, ub=10, **kwargs):  # -15, 30?
+        super().__init__(dim=2, **kwargs)
+        self.lb = lb
+        self.ub = ub
+        self.bounds = [(self.lb, self.ub)] * self.dim
+        self.num_constraints = 1
+
+    def evaluate_config(self, config, optimizer='smac'):
+        config_dict = self.get_config_dict(config, optimizer)
+        x1 = config_dict['x1']
+        x2 = config_dict['x2']
+        X = np.array([x1, x2])
+        return self.evaluate(X)
+
+    def evaluate(self, X: np.ndarray):
+        X = self.checkX(X)
+        result = dict()
+        a = 20
+        b = 0.2
+        c = 2 * np.pi
+        t1 = -a * np.exp(-b * np.sqrt(np.mean(X ** 2)))
+        t2 = -np.exp(np.mean(np.cos(c * X)))
+        t3 = a + np.exp(1)
+        result['objectives'] = [t1 + t2 + t3, ]
+        result['constraints'] = [np.sign(np.sum(X)) + np.sign(np.sum(X ** 2) - 25) + 1.5, ]
+        return result
+
+    def get_configspace(self, optimizer='smac'):
+        if optimizer == 'smac':
+            cs = ConfigurationSpace()
+            cs.add_hyperparameters(
+                [UniformFloatHyperparameter("x%s" % i, self.lb, self.ub) for i in range(1, 1 + 2)])
+            return cs
+        elif optimizer == 'gpflowopt':
+            import gpflowopt
+            domain = gpflowopt.domain.ContinuousParameter('x1', self.lb, self.ub) + \
+                     gpflowopt.domain.ContinuousParameter('x2', self.lb, self.ub)
+            return domain
+        else:
+            raise ValueError('Unknown optimizer %s when getting configspace' % optimizer)
+
+
+class mishra(BaseConstrainedSingleObjectiveProblem):
+    def __init__(self, **kwargs):
+        super().__init__(dim=2, **kwargs)
+        self.lb = -2 * 3.14
+        self.ub = 2 * 3.14
+        self.bounds = [(self.lb, self.ub)] * self.dim
+        self.num_constraints = 1
+
+    def evaluate_config(self, config, optimizer='smac'):
+        config_dict = self.get_config_dict(config, optimizer)
+        x1 = config_dict['x1']
+        x2 = config_dict['x2']
+        X = np.array([x1, x2])
+        return self.evaluate(X)
+
+    def evaluate(self, X: np.ndarray):
+        X = self.checkX(X)
+        result = dict()
+        x, y = X[0], X[1]
+        t1 = np.sin(y) * np.exp((1 - np.cos(x)) ** 2)
+        t2 = np.cos(x) * np.exp((1 - np.sin(y)) ** 2)
+        t3 = (x - y) ** 2
+        result['objectives'] = (t1 + t2 + t3,)
+        result['constraints'] = ((X[0] + 5) ** 2 + (X[1] + 5) ** 2 - 25,)
+        return result
+
+    def get_configspace(self, optimizer='smac'):
+        if optimizer == 'smac':
+            cs = ConfigurationSpace()
+            cs.add_hyperparameters(
+                [UniformFloatHyperparameter("x%s" % i, self.lb, self.ub) for i in range(1, 1 + 2)])
+            return cs
+        elif optimizer == 'gpflowopt':
+            import gpflowopt
+            domain = gpflowopt.domain.ContinuousParameter('x1', self.lb, self.ub) + \
+                     gpflowopt.domain.ContinuousParameter('x2', self.lb, self.ub)
+            return domain
+        else:
+            raise ValueError('Unknown optimizer %s when getting configspace' % optimizer)
+
+
+class townsend(BaseConstrainedSingleObjectiveProblem):
+
+    def __init__(self, **kwargs):
+        super().__init__(dim=2, **kwargs)
+        self.bounds = [(-2.25, 2.5), (-2.5, 1.75)]
+        self.num_constraints = 1
+
+    def evaluate_config(self, config, optimizer='smac'):
+        config_dict = self.get_config_dict(config, optimizer)
+        x1 = config_dict['x1']
+        x2 = config_dict['x2']
+        X = np.array([x1, x2])
+        return self.evaluate(X)
+
+    def evaluate(self, X: np.ndarray):
+        X = self.checkX(X)
+        res = dict()
+        res['objectives'] = (-(np.cos((X[0] - 0.1) * X[1]) ** 2 + X[0] * np.sin(3 * X[0] + X[1])),)
+        res['constraints'] = (
+            -(-np.cos(1.5 * X[0] + np.pi) * np.cos(1.5 * X[1]) + np.sin(1.5 * X[0] + np.pi) * np.sin(1.5 * X[1])),)
+        return res
+
+    def get_configspace(self, optimizer='smac'):
+        if optimizer == 'smac':
+            cs = ConfigurationSpace()
+            x1 = UniformFloatHyperparameter("x1", -2.25, 2.5)
+            x2 = UniformFloatHyperparameter("x2", -2.5, 1.75)
+            cs.add_hyperparameters([x1, x2])
+            return cs
+        elif optimizer == 'gpflowopt':
+            import gpflowopt
+            domain = gpflowopt.domain.ContinuousParameter('x1', -2.25, 2.5) + \
+                     gpflowopt.domain.ContinuousParameter('x2', -2.5, 1.75)
+            return domain
+        else:
+            raise ValueError('Unknown optimizer %s when getting configspace' % optimizer)
diff --git a/test/reproduction/test_utils.py b/test/reproduction/test_utils.py
new file mode 100644
index 00000000..856296cb
--- /dev/null
+++ b/test/reproduction/test_utils.py
@@ -0,0 +1,133 @@
+import os
+import pandas as pd
+import numpy as np
+import contextlib
+import time
+
+
+seeds = [4774, 3711, 7238, 3203, 4254, 2137, 1188, 4356,  517, 5887,
+         9082, 4702, 4801, 8242, 7391, 1893, 4400, 1192, 5553, 9039]
+
+
+# timer tool
+@contextlib.contextmanager
+def timeit(name=''):
+    print("[%s]Start." % name, flush=True)
+    start = time.time()
+    yield
+    end = time.time()
+    m, s = divmod(end - start, 60)
+    h, m = divmod(m, 60)
+    print("[%s]Total time = %d hours, %d minutes, %d seconds." % (name, h, m, s), flush=True)
+
+
+def check_datasets(datasets, data_dir):
+    for _dataset in datasets:
+        try:
+            _ = load_data(_dataset, data_dir)
+        except Exception as e:
+            raise ValueError('Dataset - %s does not exist!' % _dataset)
+
+
+def load_data(dataset, data_dir):
+    """
+    todo: not finished: label encoding...
+    """
+    data_path = os.path.join(data_dir, "%s.csv" % dataset)
+
+    # Load train data.
+    if dataset in ['higgs', 'amazon_employee', 'spectf', 'usps', 'vehicle_sensIT', 'codrna']:
+        label_col = 0
+    elif dataset in ['rmftsa_sleepdata(1)']:
+        label_col = 1
+    else:
+        label_col = -1
+
+    if dataset in ['spambase', 'messidor_features']:
+        header = None
+    else:
+        header = 'infer'
+
+    if dataset in ['winequality_white', 'winequality_red']:
+        sep = ';'
+    else:
+        sep = ','
+
+    na_values = ["n/a", "na", "--", "-", "?"]
+    keep_default_na = True
+    df = pd.read_csv(data_path, keep_default_na=keep_default_na,
+                     na_values=na_values, header=header, sep=sep)
+
+    # Drop the row with all NaNs.
+    df.dropna(how='all')
+
+    # Clean the data where the label columns have nans.
+    columns_missed = df.columns[df.isnull().any()].tolist()
+
+    label_colname = df.columns[label_col]
+
+    if label_colname in columns_missed:
+        labels = df[label_colname].values
+        row_idx = [idx for idx, val in enumerate(labels) if np.isnan(val)]
+        # Delete the row with NaN label.
+        df.drop(df.index[row_idx], inplace=True)
+
+    train_y = df[label_colname].values
+
+    # Delete the label column.
+    df.drop(label_colname, axis=1, inplace=True)
+
+    train_X = df
+    return train_X, train_y
+
+
+# for plot
+def descending(x):
+    y = [x[0]]
+    for i in range(1, len(x)):
+        y.append(min(y[-1], x[i]))
+    return y
+
+
+def create_point(x, stats, default=0.0):
+    """
+    get the closest perf of time point x where timestamp < x
+    :param x:
+        the time point
+    :param stats:
+        list of func. func is tuple of timestamp list and perf list
+    :param default:
+        init value of perf
+    :return:
+        list of perf of funcs at time point x
+    """
+    perf_list = []
+    for func in stats:
+        timestamp, perf = func
+        last_p = default
+        for t, p in zip(timestamp, perf):
+            if t > x:
+                break
+            last_p = p
+        perf_list.append(last_p)
+    return perf_list
+
+
+def create_plot_points(stats, start_time, end_time, point_num=500):
+    """
+
+    :param stats:
+        list of func. func is tuple of timestamp list and perf list
+    :param start_time:
+    :param end_time:
+    :param point_num:
+    :return:
+    """
+    x = np.linspace(start_time, end_time, num=point_num)
+    _mean, _std = list(), list()
+    for i, stage in enumerate(x):
+        perf_list = create_point(stage, stats)
+        _mean.append(np.mean(perf_list))
+        _std.append(np.std(perf_list))
+    # Used to plot errorbar.
+    return x, np.array(_mean), np.array(_std)