Skip to content

Commit

Permalink
Refactor infra creation to improve the overall infra setup time (#1869)
Browse files Browse the repository at this point in the history
  • Loading branch information
dharaneeshvrd authored Aug 28, 2024
1 parent eea78e7 commit 3eb2b52
Show file tree
Hide file tree
Showing 2 changed files with 172 additions and 80 deletions.
20 changes: 10 additions & 10 deletions cloud/scope/powervs_cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -823,13 +823,13 @@ func (s *PowerVSClusterScope) createServiceInstance() (*resourcecontrollerv2.Res
func (s *PowerVSClusterScope) ReconcileNetwork() (bool, error) {
if s.GetDHCPServerID() != nil {
s.V(3).Info("DHCP server ID is set, fetching details", "id", s.GetDHCPServerID())
requeue, err := s.isDHCPServerActive()
active, err := s.isDHCPServerActive()
if err != nil {
return false, err
}
// if dhcp server exist and in active state, its assumed that dhcp network exist
// TODO(Phase 2): Verify that dhcp network is exist.
return requeue, nil
return active, nil
// TODO(karthik-k-n): If needed set dhcp status here
}
// check network exist in cloud
Expand All @@ -851,7 +851,7 @@ func (s *PowerVSClusterScope) ReconcileNetwork() (bool, error) {

s.Info("Created DHCP Server", "id", *dhcpServer)
s.SetStatus(infrav1beta2.ResourceTypeDHCPServer, infrav1beta2.ResourceReference{ID: dhcpServer, ControllerCreated: ptr.To(true)})
return true, nil
return false, nil
}

// checkNetwork checks the network exist in cloud.
Expand Down Expand Up @@ -913,25 +913,25 @@ func (s *PowerVSClusterScope) isDHCPServerActive() (bool, error) {
return false, err
}

requeue, err := s.checkDHCPServerStatus(*dhcpServer)
active, err := s.checkDHCPServerStatus(*dhcpServer)
if err != nil {
return false, err
}
return requeue, nil
return active, nil
}

// checkDHCPServerStatus checks the state of a DHCP server.
// If state is BUILD, true is returned indicating a requeue for reconciliation.
// If state is active, true is returned.
// In all other cases, it returns false.
func (s *PowerVSClusterScope) checkDHCPServerStatus(dhcpServer models.DHCPServerDetail) (bool, error) {
s.V(3).Info("Checking the status of DHCP server", "id", *dhcpServer.ID)
switch *dhcpServer.Status {
case string(infrav1beta2.DHCPServerStateActive):
s.V(3).Info("DHCP server is in active state")
return false, nil
return true, nil
case string(infrav1beta2.DHCPServerStateBuild):
s.V(3).Info("DHCP server is in build state")
return true, nil
return false, nil
case string(infrav1beta2.DHCPServerStateError):
return false, fmt.Errorf("DHCP server creation failed and is in error state")
}
Expand Down Expand Up @@ -1917,9 +1917,9 @@ func (s *PowerVSClusterScope) ReconcileLoadBalancers() (bool, error) {
}
s.Info("Created VPC load balancer", "id", loadBalancerStatus.ID)
s.SetLoadBalancerStatus(loadBalancer.Name, *loadBalancerStatus)
return true, nil
return false, nil
}
return false, nil
return true, nil
}

// checkLoadBalancerStatus checks the state of a VPC load balancer.
Expand Down
232 changes: 162 additions & 70 deletions controllers/ibmpowervscluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,12 @@ import (
"context"
"fmt"
"strings"
"sync"
"time"

"github.com/pkg/errors"

corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/meta"
"k8s.io/apimachinery/pkg/runtime"
Expand Down Expand Up @@ -114,6 +116,116 @@ func (r *IBMPowerVSClusterReconciler) Reconcile(ctx context.Context, req ctrl.Re
return r.reconcile(clusterScope)
}

type powerVSCluster struct {
cluster *infrav1beta2.IBMPowerVSCluster
mu sync.Mutex
}

type reconcileResult struct {
reconcile.Result
error
}

func (update *powerVSCluster) updateCondition(condition bool, conditionArgs ...interface{}) {
update.mu.Lock()
defer update.mu.Unlock()
if condition {
conditions.MarkTrue(update.cluster, conditionArgs[0].(capiv1beta1.ConditionType))
return
}

conditions.MarkFalse(update.cluster, conditionArgs[0].(capiv1beta1.ConditionType), conditionArgs[1].(string), conditionArgs[2].(capiv1beta1.ConditionSeverity), conditionArgs[3].(string), conditionArgs[4:]...)
}

func (r *IBMPowerVSClusterReconciler) reconcilePowerVSResources(clusterScope *scope.PowerVSClusterScope, powerVSCluster *powerVSCluster, ch chan reconcileResult, wg *sync.WaitGroup) {
defer wg.Done()
powerVSLog := clusterScope.WithName("powervs")
// reconcile PowerVS service instance
powerVSLog.Info("Reconciling PowerVS service instance")
if requeue, err := clusterScope.ReconcilePowerVSServiceInstance(); err != nil {
powerVSLog.Error(err, "failed to reconcile PowerVS service instance")
powerVSCluster.updateCondition(false, infrav1beta2.ServiceInstanceReadyCondition, infrav1beta2.ServiceInstanceReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error())
ch <- reconcileResult{reconcile.Result{}, err}
return
} else if requeue {
powerVSLog.Info("PowerVS service instance creation is pending, requeuing")
ch <- reconcileResult{reconcile.Result{Requeue: true}, nil}
return
}
powerVSCluster.updateCondition(true, infrav1beta2.ServiceInstanceReadyCondition)

clusterScope.IBMPowerVSClient.WithClients(powervs.ServiceOptions{CloudInstanceID: clusterScope.GetServiceInstanceID()})

// reconcile network
powerVSLog.Info("Reconciling network")
if dhcpServerActive, err := clusterScope.ReconcileNetwork(); err != nil {
powerVSLog.Error(err, "failed to reconcile PowerVS network")
powerVSCluster.updateCondition(false, infrav1beta2.NetworkReadyCondition, infrav1beta2.NetworkReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error())
ch <- reconcileResult{reconcile.Result{}, err}
return
} else if dhcpServerActive {
powerVSCluster.updateCondition(true, infrav1beta2.NetworkReadyCondition)
return
}
// Do not want to block the reconciliation of other resources like setting up TG and COS, so skipping the requeue and only logging the info.
powerVSLog.Info("PowerVS network creation is pending")
}

func (r *IBMPowerVSClusterReconciler) reconcileVPCResources(clusterScope *scope.PowerVSClusterScope, powerVSCluster *powerVSCluster, ch chan reconcileResult, wg *sync.WaitGroup) {
defer wg.Done()
vpcLog := clusterScope.WithName("vpc")
vpcLog.Info("Reconciling VPC")
if requeue, err := clusterScope.ReconcileVPC(); err != nil {
clusterScope.Error(err, "failed to reconcile VPC")
powerVSCluster.updateCondition(false, infrav1beta2.VPCReadyCondition, infrav1beta2.VPCReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error())
ch <- reconcileResult{reconcile.Result{}, err}
return
} else if requeue {
vpcLog.Info("VPC creation is pending, requeuing")
ch <- reconcileResult{reconcile.Result{Requeue: true}, nil}
return
}
powerVSCluster.updateCondition(true, infrav1beta2.VPCReadyCondition)

// reconcile VPC Subnet
vpcLog.Info("Reconciling VPC subnets")
if requeue, err := clusterScope.ReconcileVPCSubnets(); err != nil {
vpcLog.Error(err, "failed to reconcile VPC subnets")
powerVSCluster.updateCondition(false, infrav1beta2.VPCSubnetReadyCondition, infrav1beta2.VPCSubnetReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error())
ch <- reconcileResult{reconcile.Result{}, err}
return
} else if requeue {
vpcLog.Info("VPC subnet creation is pending, requeuing")
ch <- reconcileResult{reconcile.Result{Requeue: true}, nil}
return
}
powerVSCluster.updateCondition(true, infrav1beta2.VPCSubnetReadyCondition)

// reconcile VPC security group
vpcLog.Info("Reconciling VPC security group")
if err := clusterScope.ReconcileVPCSecurityGroups(); err != nil {
vpcLog.Error(err, "failed to reconcile VPC security groups")
powerVSCluster.updateCondition(false, infrav1beta2.VPCSecurityGroupReadyCondition, infrav1beta2.VPCSecurityGroupReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error())
ch <- reconcileResult{reconcile.Result{}, err}
return
}
powerVSCluster.updateCondition(true, infrav1beta2.VPCSecurityGroupReadyCondition)

// reconcile LoadBalancer
vpcLog.Info("Reconciling VPC load balancers")
if loadBalancerReady, err := clusterScope.ReconcileLoadBalancers(); err != nil {
vpcLog.Error(err, "failed to reconcile VPC load balancers")
powerVSCluster.updateCondition(false, infrav1beta2.LoadBalancerReadyCondition, infrav1beta2.LoadBalancerReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error())
ch <- reconcileResult{reconcile.Result{}, err}
return
} else if loadBalancerReady {
powerVSCluster.updateCondition(true, infrav1beta2.LoadBalancerReadyCondition)
return
}
// Do not want to block the reconciliation of other resources like setting up TG and COS, so skipping the requeue and only logging the info.
vpcLog.Info("VPC load balancer creation is pending")
}

func (r *IBMPowerVSClusterReconciler) reconcile(clusterScope *scope.PowerVSClusterScope) (ctrl.Result, error) { //nolint:gocyclo
if controllerutil.AddFinalizer(clusterScope.IBMPowerVSCluster, infrav1beta2.IBMPowerVSClusterFinalizer) {
return ctrl.Result{}, nil
Expand All @@ -140,116 +252,96 @@ func (r *IBMPowerVSClusterReconciler) reconcile(clusterScope *scope.PowerVSClust
return reconcile.Result{}, err
}

powerVSCluster := clusterScope.IBMPowerVSCluster
// reconcile PowerVS service instance
clusterScope.Info("Reconciling PowerVS service instance")
if requeue, err := clusterScope.ReconcilePowerVSServiceInstance(); err != nil {
clusterScope.Error(err, "failed to reconcile PowerVS service instance")
conditions.MarkFalse(powerVSCluster, infrav1beta2.ServiceInstanceReadyCondition, infrav1beta2.ServiceInstanceReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error())
return reconcile.Result{}, err
} else if requeue {
clusterScope.Info("PowerVS service instance creation is pending, requeuing")
return reconcile.Result{RequeueAfter: 1 * time.Minute}, nil
powerVSCluster := &powerVSCluster{
cluster: clusterScope.IBMPowerVSCluster,
}
conditions.MarkTrue(powerVSCluster, infrav1beta2.ServiceInstanceReadyCondition)

clusterScope.IBMPowerVSClient.WithClients(powervs.ServiceOptions{CloudInstanceID: clusterScope.GetServiceInstanceID()})
var wg sync.WaitGroup
ch := make(chan reconcileResult)

// reconcile network
clusterScope.Info("Reconciling network")
if requeue, err := clusterScope.ReconcileNetwork(); err != nil {
clusterScope.Error(err, "failed to reconcile PowerVS network")
conditions.MarkFalse(powerVSCluster, infrav1beta2.NetworkReadyCondition, infrav1beta2.NetworkReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error())
return reconcile.Result{}, err
} else if requeue {
clusterScope.Info("PowerVS network creation is pending, requeuing")
return reconcile.Result{RequeueAfter: 1 * time.Minute}, nil
}
conditions.MarkTrue(powerVSCluster, infrav1beta2.NetworkReadyCondition)
// reconcile PowerVS resources
wg.Add(1)
go r.reconcilePowerVSResources(clusterScope, powerVSCluster, ch, &wg)

// reconcile VPC
clusterScope.Info("Reconciling VPC")
if requeue, err := clusterScope.ReconcileVPC(); err != nil {
clusterScope.Error(err, "failed to reconcile VPC")
conditions.MarkFalse(powerVSCluster, infrav1beta2.VPCReadyCondition, infrav1beta2.VPCReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error())
return reconcile.Result{}, err
} else if requeue {
clusterScope.Info("VPC creation is pending, requeuing")
return reconcile.Result{RequeueAfter: 15 * time.Second}, nil
}
conditions.MarkTrue(powerVSCluster, infrav1beta2.VPCReadyCondition)
wg.Add(1)
go r.reconcileVPCResources(clusterScope, powerVSCluster, ch, &wg)

// reconcile VPC Subnet
clusterScope.Info("Reconciling VPC subnets")
if requeue, err := clusterScope.ReconcileVPCSubnets(); err != nil {
clusterScope.Error(err, "failed to reconcile VPC subnets")
conditions.MarkFalse(powerVSCluster, infrav1beta2.VPCSubnetReadyCondition, infrav1beta2.VPCSubnetReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error())
return reconcile.Result{}, err
} else if requeue {
clusterScope.Info("VPC subnet creation is pending, requeuing")
return reconcile.Result{RequeueAfter: 15 * time.Second}, nil
// wait for above reconcile to complete and close the channel
go func() {
wg.Wait()
close(ch)
}()

var requeue bool
var errList []error
// receive return values from the channel and decide the requeue
for val := range ch {
if val.Requeue {
requeue = true
}
if val.error != nil {
errList = append(errList, val.error)
}
}
conditions.MarkTrue(powerVSCluster, infrav1beta2.VPCSubnetReadyCondition)

// reconcile VPC security group
clusterScope.Info("Reconciling VPC security group")
if err := clusterScope.ReconcileVPCSecurityGroups(); err != nil {
clusterScope.Error(err, "failed to reconcile VPC security groups")
conditions.MarkFalse(powerVSCluster, infrav1beta2.VPCSecurityGroupReadyCondition, infrav1beta2.VPCSecurityGroupReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error())
return reconcile.Result{}, err
if requeue && len(errList) > 1 {
return ctrl.Result{RequeueAfter: 30 * time.Second}, kerrors.NewAggregate(errList)
} else if requeue {
return ctrl.Result{RequeueAfter: 30 * time.Second}, nil
} else if len(errList) > 1 {
return ctrl.Result{}, kerrors.NewAggregate(errList)
}
conditions.MarkTrue(powerVSCluster, infrav1beta2.VPCSecurityGroupReadyCondition)

// reconcile Transit Gateway
clusterScope.Info("Reconciling Transit Gateway")
if requeue, err := clusterScope.ReconcileTransitGateway(); err != nil {
clusterScope.Error(err, "failed to reconcile transit gateway")
conditions.MarkFalse(powerVSCluster, infrav1beta2.TransitGatewayReadyCondition, infrav1beta2.TransitGatewayReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error())
conditions.MarkFalse(powerVSCluster.cluster, infrav1beta2.TransitGatewayReadyCondition, infrav1beta2.TransitGatewayReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error())
return reconcile.Result{}, err
} else if requeue {
clusterScope.Info("Transit gateway creation is pending, requeuing")
return reconcile.Result{RequeueAfter: 1 * time.Minute}, nil
}
conditions.MarkTrue(powerVSCluster, infrav1beta2.TransitGatewayReadyCondition)

// reconcile LoadBalancer
clusterScope.Info("Reconciling VPC load balancers")
if requeue, err := clusterScope.ReconcileLoadBalancers(); err != nil {
clusterScope.Error(err, "failed to reconcile VPC load balancers")
conditions.MarkFalse(powerVSCluster, infrav1beta2.LoadBalancerReadyCondition, infrav1beta2.LoadBalancerReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error())
return reconcile.Result{}, err
} else if requeue {
clusterScope.Info("VPC load balancer creation is pending, requeuing")
return reconcile.Result{RequeueAfter: 1 * time.Minute}, nil
}
conditions.MarkTrue(powerVSCluster.cluster, infrav1beta2.TransitGatewayReadyCondition)

// reconcile COSInstance
if clusterScope.IBMPowerVSCluster.Spec.Ignition != nil {
clusterScope.Info("Reconciling COS service instance")
if err := clusterScope.ReconcileCOSInstance(); err != nil {
conditions.MarkFalse(powerVSCluster, infrav1beta2.COSInstanceReadyCondition, infrav1beta2.COSInstanceReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error())
conditions.MarkFalse(powerVSCluster.cluster, infrav1beta2.COSInstanceReadyCondition, infrav1beta2.COSInstanceReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error())
return reconcile.Result{}, err
}
conditions.MarkTrue(powerVSCluster, infrav1beta2.COSInstanceReadyCondition)
conditions.MarkTrue(powerVSCluster.cluster, infrav1beta2.COSInstanceReadyCondition)
}

var networkReady, loadBalancerReady bool
for _, cond := range clusterScope.IBMPowerVSCluster.Status.Conditions {
if cond.Type == infrav1beta2.NetworkReadyCondition && cond.Status == corev1.ConditionTrue {
networkReady = true
}
if cond.Type == infrav1beta2.LoadBalancerReadyCondition && cond.Status == corev1.ConditionTrue {
loadBalancerReady = true
}
}

if !networkReady || !loadBalancerReady {
clusterScope.Info("Network or LoadBalancer still not ready, requeuing")
return ctrl.Result{RequeueAfter: 30 * time.Second}, nil
}

// update cluster object with loadbalancer host
loadBalancer := clusterScope.PublicLoadBalancer()
if loadBalancer == nil {
return reconcile.Result{}, fmt.Errorf("failed to fetch public loadbalancer")
}
if clusterScope.GetLoadBalancerState(loadBalancer.Name) == nil || *clusterScope.GetLoadBalancerState(loadBalancer.Name) != infrav1beta2.VPCLoadBalancerStateActive {
clusterScope.Info("LoadBalancer state is not active")
return reconcile.Result{RequeueAfter: time.Minute}, nil
}

clusterScope.Info("Getting load balancer host")
hostName := clusterScope.GetLoadBalancerHostName(loadBalancer.Name)
if hostName == nil || *hostName == "" {
clusterScope.Info("LoadBalancer hostname is not yet available, requeuing")
return reconcile.Result{RequeueAfter: time.Minute}, nil
}
conditions.MarkTrue(powerVSCluster, infrav1beta2.LoadBalancerReadyCondition)

clusterScope.IBMPowerVSCluster.Spec.ControlPlaneEndpoint.Host = *clusterScope.GetLoadBalancerHostName(loadBalancer.Name)
clusterScope.IBMPowerVSCluster.Spec.ControlPlaneEndpoint.Port = clusterScope.APIServerPort()
Expand Down

0 comments on commit 3eb2b52

Please sign in to comment.