git --version

git clone https://github.com/machine-learning-tutorial/bayesian-optimization
cd bayesian-optimization

conda create -n bo-tutorial python=3.10
conda activate bo-tutorial
pip install -r requirements.txt
jupyter contrib nbextension install --user
jupyter nbextension enable varInspector/main

python -m venv bo-tutorial

jupyter notebook

from config import *
from importlib import reload
import time
import config
%matplotlib inline

random_seed = 3
rng = torch.random.manual_seed(random_seed)

xmin, xmax = 0, 1
observations_tot = 10
noise_level = 0.2  # add some white noise to observations
observations_x = torch.rand(observations_tot, 1, generator=rng) * (xmax - xmin) + xmin
observations_y = (torch.sin(observations_x * 2 * np.pi) + 
                  torch.randn(size=observations_x.size(), generator=rng) * noise_level)

samples = 200
objective_x = torch.linspace(xmin, xmax, samples).reshape(-1, 1)
objective_y = torch.sin(objective_x * 2 * np.pi)
test_X = torch.linspace(xmin, xmax, samples)

plt.plot(observations_x, observations_y, "*", markersize=12, color='black', label="Observations (data)")
plt.plot(objective_x, objective_y, color='orange', label="Objective function (unknown)")

plt.xlabel("X feature")
plt.ylabel("Y target")
plt.legend()

kernel = ScaleKernel(RBFKernel())

model = SingleTaskGP(train_X=observations_x, train_Y=observations_y, covar_module=kernel)

# You can change the GP hyperparameters here
model.covar_module.base_kernel.lengthscale = # fill here!
model.covar_module.outputscale = # fill here!  # signal variance
model.likelihood.noise_covar.noise = # fill here!

# Visualize the prior
ax = sample_gp_prior_plot(model, test_X)
ax.set_ylim(-2.5, 2.5);

# You can change the GP hyperparameters here again
model.covar_module.base_kernel.lengthscale = 0.1
model.covar_module.outputscale = 0.5  # signal variance
model.likelihood.noise_covar.noise = 0.02

# Visualize the posterior
sample_gp_posterior_plot(model, test_X, y_lim=(-2,2), n_samples=0, show_true_f=True, 
                         true_f_x= objective_x, true_f_y=objective_y);

manual_lenghtscale = float(model.covar_module.base_kernel.lengthscale)
manual_signal_variance = float(model.covar_module.outputscale)
manual_model_noise = float(model.likelihood.noise_covar.noise)

mll = ExactMarginalLogLikelihood(model.likelihood, model)

# fit_gpytorch_mll(mll);  # carries out the fit
fit_gpytorch_model(mll);  # obsolete with new version

# Show the results
sample_gp_posterior_plot(model, test_X, y_lim=(-2,2), n_samples=0, 
                         show_true_f=True, true_f_x= objective_x, true_f_y=objective_y);

print('Manual hyperparameters')
print('- Lengthscale: ', manual_lenghtscale)
print('- Signal variance: ', manual_signal_variance)
print('- Model noise: ', manual_model_noise)

print('')
print('Fitted hyperparameters')
print('- Lengthscale: ', np.round(float(model.covar_module.base_kernel.lengthscale), 2))
print('- Signal variance: ', np.round(float(model.covar_module.outputscale), 2))
print('- Model noise: ', np.round(float(model.likelihood.noise_covar.noise), 2))

acq_UCB = UpperConfidenceBound(model, beta=100)
plot_acq_with_gp(model, observations_x, observations_y, acq_UCB, test_X, show_true_f=True, 
                 true_f_x= objective_x, true_f_y=objective_y)

acq_EI = ExpectedImprovement(model,best_f=float(model.train_targets.max()))
plot_acq_with_gp(model, observations_x, observations_y, acq_EI, test_X, show_true_f=True, 
                 true_f_x= objective_x, true_f_y=objective_y)

acq_PI = ProbabilityOfImprovement(model, best_f=float(model.train_targets.max()))
plot_acq_with_gp(model, observations_x, observations_y, acq_PI, test_X, show_true_f=True, 
                 true_f_x= objective_x, true_f_y=objective_y)

# Create the environment
env = ARESEA()

# Wrap the environment with some utilities:
env = RescaleAction(env, -1, 1)  # Normalize the action space to [-1,1]^n
env.reset()

action = # fill here
action = np.array(action) # [Q1, Q2, CV, Q3, CH]

env.reset()
observation, reward, done, info = env.step(action)

fig = plt.figure()
fig.set_size_inches(16, 4)
ax = plt.Axes(fig, [0.0, 0.0, 1.0, 1.0])
ax.set_axis_off()
fig.add_axes(ax)
img = env.render(mode="rgb_array")

ax.imshow(img)

env.reset()
steps = 10

for i in range(steps):
    env.step(np.array([0.2, -0.2, -.5 + 1 / steps * i, 0.3, 0]))
    img = env.render(mode="rgb_array")
    ax.imshow(img)
    display(fig)
    clear_output(wait=True)
    time.sleep(0.5)

target_beam = [0, 1e-4, 0, 1e-4]#fill here!]

env = ARESEA(target_beam_mode="constant",target_beam_values=target_beam, 
             magnet_init_mode="constant", magnet_init_values=[15,-5,1e-3,5,2e-3])
env = RescaleAction(env, -1, 1)  # Normalize the action space to [-1,1]^n
observation, _ = env.reset()
env.render()

def bayesian_optimize(env: gym.Env, last_observation, init_mode="current", n_steps=50, 
                      acquisition="UCB", beta=2, n_init=5, max_step_size:float=0.5, set_to_best=True, 
                      random_seed=None, show_plot=False, proximal=None,time_sleep=0.2):
    ##################################################################
    # Some preliminary settings
    if random_seed is None:
        random_seed = torch.random.seed() # random
    rng = torch.random.manual_seed(random_seed)

    # Initialization: some initial samples are needed to build a GP model

    # First sample from the reset observation
    initial_action = scale_action(env, last_observation)

    if init_mode=="current":
        X = torch.tensor([initial_action]).reshape(1,-1)
        while len(X)<n_init:
            last_action = X[0].detach().numpy()
            bounds = get_new_bound(env, last_action, max_step_size)
            new_action = np.random.uniform(low=bounds[0], high=bounds[1])
            new_action_tensor = torch.tensor(new_action, dtype=torch.double).reshape(
                1, -1
            )
            X = torch.cat([X,new_action_tensor])
    else:  # sample purely randomly
        X = torch.tensor([], dtype=torch.double)
        while len(X)<n_init:
            new_action = env.action_space.sample()
            new_action_tensor = torch.tensor(new_action, dtype=torch.double).reshape(
                1, -1
            )
            X = torch.cat([X,new_action_tensor])
    
    Y = torch.zeros(n_init,1,dtype=torch.double)
    # sample initial points
    for i,x in enumerate(X):  
        _, reward, _, _ = env.step(x.numpy())
        Y[i] = reward

    if show_plot:
        fig = plt.figure()
        fig.set_size_inches(16,4)
        ax_progress = plt.Axes(fig, [0.0, 0.0, 0.25, 1.0])
        ax = plt.Axes(fig, [0.25, 0.0, 1.0, 1.0])
        ax.set_axis_off()
        fig.add_axes(ax_progress)
        fig.add_axes(ax)
        ax_progress.set_xlabel("Steps")
        ax_progress.set_ylabel(r"log(MAE($b_\mathrm{current}, b_\mathrm{target}$))")
        ax_progress.set_title(f"Best objective: {float(Y.max())}")
    
    ##################################################################
    # Actual BO logic
    for i in range(n_steps):
        # Fit GP model to the observed data
        kernel=ScaleKernel(MaternKernel())
        model = SingleTaskGP(X, Y,covar_module=kernel,outcome_transform=Standardize(m=1))
        # model.likelihood.noise = 1e-2
        # model.likelihood.noise_covar.raw_noise.requires_grad_(False)
        mll = ExactMarginalLogLikelihood(model.likelihood,model)
#         fit_gpytorch_mll(mll)
        fit_gpytorch_model(mll)
        
        # Build acquisition
        if acquisition=="UCB":
            acq = UpperConfidenceBound(model, beta=beta)
        elif acquisition=="EI":
            ymax = float(Y.max())
            acq = ExpectedImprovement(model, best_f=ymax)
        
        if proximal is not None:
            acq = ProximalAcquisitionFunction(acq,proximal_weights=proximal)

        # Choose next action
        new_bound = get_new_bound(env,X[-1].detach().numpy(),max_step_size)
        x_next, _ = optimize_acqf(acq, bounds=torch.tensor(new_bound),q=1,num_restarts=16,raw_samples=256,options={"maxiter": 200},) 
        # Apply the action
        observation, reward, done, _ = env.step(x_next.numpy().flatten())
        
        # Append data (with correct shape)
        Y = torch.cat([Y,torch.tensor([[reward]])])
        X = torch.cat([X,torch.tensor(x_next).reshape(1,-1)])
        

        # Plotting
        if show_plot:
            img = env.render(mode="rgb_array")
            ax.imshow(img)
            ax_progress.clear()
            ax_progress.plot(Y.detach().numpy().flatten())
            ax_progress.set_title(f"Best objective: {float(Y.max()):.2f}")
            ax_progress.set_xlabel("Steps")
            ax_progress.set_ylabel(r"log(MAE($b_\mathrm{current}, b_\mathrm{target}$))")
            display(fig)
            clear_output(wait=True)
            time.sleep(time_sleep)

        # Check termination
        if done:
            print("Target beam is reached")
            set_to_best=False  # no need to reset
            break
    # Set to best observed if not reaching target in the allowed steps
    if set_to_best:
        x_best = X[Y.flatten().argmax()].numpy()
        env.step(x_best)
        # Plotting
        if show_plot:
#             print(f"Best objective: {float(Y.max())}")
            img = env.render(mode="rgb_array")
            ax.imshow(img)
            display(fig)
            clear_output(wait=True)
            time.sleep(time_sleep)

    # Return some information
    opt_info = {
        "X": X,
        "Y": Y,
        "best": Y.max(),
    }
    return opt_info

env = ARESEA(target_beam_mode="constant",target_beam_values=target_beam, magnet_init_mode="constant", 
             magnet_init_values=[15, -5, 1e-3, 5, 2e-3])
env = RescaleAction(env, -1, 1)  # Normalize the action space to [-1,1]^n
observation, _ = env.reset()

opt_info = bayesian_optimize(env, last_observation=observation, n_steps=50, max_step_size=0.1,  
                             show_plot=True, beta=0.2, time_sleep=0.1)

# Another advanced technique "Proximal biasing" uses soft step size limit
# opt_info = bayesian_optimize(env, last_observation=observation,n_steps=50,max_step_size=1,  
# show_plot=True, beta=0.2, proximal=torch.ones(5)*0.5)

env = ARESEA(target_beam_mode="constant",target_beam_values=target_beam, magnet_init_mode="constant", 
             magnet_init_values=[15, -5, 1e-3, 5, 2e-3])
env = RescaleAction(env, -1, 1)  # Normalize the action space to [-1,1]^n
observation, _ = env.reset()


### Change the value of beta, see how it impacts the optimization process; 
### Or switch to another acquisition
beta = 2.0
acquisition = "UCB"

opt_info = bayesian_optimize(env, observation, n_steps=50, acquisition=acquisition, beta=beta,
                             max_step_size=0.3, show_plot=True, time_sleep=0.05)

Introduction to Bayesian optimization

Real application in a particle accelerator

Tutorial by Dr. Andrea Santamaria Garcia and Chenran Xu

Get the repository with Git¶

Git installation in mac¶

Git installation in linux¶

Download the repository¶

Install dependencies¶

Using conda¶

Install dependencies¶

Using venv only¶

Running the tutorial¶

Imports and modules¶

Today!

Abbreviations used in this notebook

Jargon

Part I: Bayesian Optimization Theory

What is Bayesian optimization?¶

What is Bayesian optimization?¶

Bayes' Theorem

Bayes' Theorem

Bayes’ theorem – a classical example: the medical test

Motivation

Why use it?

When to use it?

BO algorithm

BO algorithm

BO components explained: Gaussian process (GP)

Introduction

BO components explained: Gaussian process (GP)

Definitions

BO components explained: Gaussian process (GP)

BO components explained: Gaussian process (GP)

Covariance function (kernel): Radial basis function

BO components explained: Gaussian process (GP)

Covariance function (kernel): Radial basis function

BO components explained: Gaussian process (GP)

Covariance function (kernel): Radial basis function

BO components explained: Gaussian process (GP)

Posterior

BO components explained: Gaussian process (GP)

Bonus: would an arbitrary function of input pairs $x$ and $x'$ be a valid covariance function?

BO components explained: Gaussian process (GP)

Bonus: would an arbitrary function of input pairs $x$ and $x'$ be a valid covariance function?

BO components explained: acquisition function

BO components explained: acquisition function

Caveats of Bayesian optimization

Part II: Bayesian Optimization Algorithm Implementation

BoTorch: Bayesian Optimization in PyTorch

BoTorch: Bayesian Optimization in PyTorch

Build an objective function and create some random observations

Create some observation points with noise

Create the objective function (200 samples)

Build your first Gaussian process model

Define the desired kernel, in this case RBF:

Build your GP model with previous observations and selected kernel:

Hyperparameters

Visualization of the prior of the GP model

How do the hyperparameter values affect the drawn samples from the model?

Visualization of the posterior of the GP model

Change the number of samples visualized

Change the hyperparameters again to fit the data by hand (two cells above)

Guide your hyperparameter setting!

Now the GP model nicely fits the data!

Let's compare manual and automatically fitted hyperparameters

Build an acquisition function

Explore how the UCB acquisition function behaves

Try other acquisition functions

What are the different sampling strategies of different acquisition functions?

Bonus: Bayesian exploration

Part III: Bayesian Optimization for Beam Positioning and Focusing in a Linac

Beam positioning and focusing task at ARES Experimental Area (EA)

Look at the ARESEA environment

Get familiar with the Gym environment

Take several steps in the environment

Set a target beam you want to achieve

Implementation of a full Bayesian optimization loop

Let's apply Bayesian optimization to this problem

Running Bayesian optimization

Exploration and exploitation with acquisition functions