From 8a0d7eac6678e174145875368e41e5aadf9cd19e Mon Sep 17 00:00:00 2001 From: Michael Weibel Date: Mon, 7 Oct 2024 16:22:08 +0200 Subject: [PATCH] fix(machinepool): clear VirtualMachineProfile if no model/custom data update Scale up/down with MachinePool always updates the VM image model to use. This change sets the VirtualMachineProfile to nil when no change is necessary and ensures therefore less churn on scale up/downs leading to model updates which may require manual fixing Fixes some linting errors related to unnecessary params, too. --- azure/scope/machinepool.go | 26 ++-- azure/scope/machinepool_test.go | 149 +++++++++++++++++++++ azure/services/scalesets/scalesets_test.go | 4 +- azure/services/scalesets/spec.go | 12 ++ azure/services/scalesets/spec_test.go | 103 ++++++++++---- 5 files changed, 254 insertions(+), 40 deletions(-) diff --git a/azure/scope/machinepool.go b/azure/scope/machinepool.go index 611a6aa5c57..58d49c1c1dc 100644 --- a/azure/scope/machinepool.go +++ b/azure/scope/machinepool.go @@ -80,6 +80,7 @@ type ( capiMachinePoolPatchHelper *patch.Helper vmssState *azure.VMSS cache *MachinePoolCache + skuCache *resourceskus.Cache } // NodeStatus represents the status of a Kubernetes node. @@ -163,12 +164,15 @@ func (m *MachinePoolScope) InitMachinePoolCache(ctx context.Context) error { return err } - skuCache, err := resourceskus.GetCache(m, m.Location()) - if err != nil { - return err + if m.skuCache == nil { + skuCache, err := resourceskus.GetCache(m, m.Location()) + if err != nil { + return errors.Wrap(err, "failed to init resourceskus cache") + } + m.skuCache = skuCache } - m.cache.VMSKU, err = skuCache.Get(ctx, m.AzureMachinePool.Spec.Template.VMSize, resourceskus.VirtualMachines) + m.cache.VMSKU, err = m.skuCache.Get(ctx, m.AzureMachinePool.Spec.Template.VMSize, resourceskus.VirtualMachines) if err != nil { return errors.Wrapf(err, "failed to get VM SKU %s in compute api", m.AzureMachinePool.Spec.Template.VMSize) } @@ -221,10 +225,8 @@ func (m *MachinePoolScope) ScaleSetSpec(ctx context.Context) azure.ResourceSpecG } if m.cache != nil { - if m.HasReplicasExternallyManaged(ctx) { - spec.ShouldPatchCustomData = m.cache.HasBootstrapDataChanges - log.V(4).Info("has bootstrap data changed?", "shouldPatchCustomData", spec.ShouldPatchCustomData) - } + spec.ShouldPatchCustomData = m.cache.HasBootstrapDataChanges + log.V(4).Info("has bootstrap data changed?", "shouldPatchCustomData", spec.ShouldPatchCustomData) spec.VMSSExtensionSpecs = m.VMSSExtensionSpecs() spec.SKU = m.cache.VMSKU spec.VMImage = m.cache.VMImage @@ -705,11 +707,9 @@ func (m *MachinePoolScope) Close(ctx context.Context) error { if err := m.updateReplicasAndProviderIDs(ctx); err != nil { return errors.Wrap(err, "failed to update replicas and providerIDs") } - if m.HasReplicasExternallyManaged(ctx) { - if err := m.updateCustomDataHash(ctx); err != nil { - // ignore errors to calculating the custom data hash since it's not absolutely crucial. - log.V(4).Error(err, "unable to update custom data hash, ignoring.") - } + if err := m.updateCustomDataHash(ctx); err != nil { + // ignore errors to calculating the custom data hash since it's not absolutely crucial. + log.V(4).Error(err, "unable to update custom data hash, ignoring.") } } diff --git a/azure/scope/machinepool_test.go b/azure/scope/machinepool_test.go index 83d8b5d740c..f985d596da7 100644 --- a/azure/scope/machinepool_test.go +++ b/azure/scope/machinepool_test.go @@ -18,13 +18,17 @@ package scope import ( "context" + "crypto/sha256" + "encoding/base64" "fmt" + "io" "reflect" "testing" "time" "github.com/Azure/azure-sdk-for-go/sdk/azidentity" "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/authorization/armauthorization/v2" + "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5" azureautorest "github.com/Azure/go-autorest/autorest/azure" "github.com/Azure/go-autorest/autorest/azure/auth" . "github.com/onsi/gomega" @@ -1576,3 +1580,148 @@ func TestMachinePoolScope_applyAzureMachinePoolMachines(t *testing.T) { }) } } + +func TestBootstrapDataChanges(t *testing.T) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + scheme := runtime.NewScheme() + _ = clusterv1.AddToScheme(scheme) + _ = infrav1.AddToScheme(scheme) + _ = infrav1exp.AddToScheme(scheme) + _ = corev1.AddToScheme(scheme) + + var ( + g = NewWithT(t) + mockCtrl = gomock.NewController(t) + cb = fake.NewClientBuilder().WithScheme(scheme) + cluster = &clusterv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "cluster1", + Namespace: "default", + }, + Spec: clusterv1.ClusterSpec{ + InfrastructureRef: &corev1.ObjectReference{ + Name: "azCluster1", + }, + }, + Status: clusterv1.ClusterStatus{ + InfrastructureReady: true, + }, + } + azureCluster = &infrav1.AzureCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "azCluster1", + Namespace: "default", + }, + Spec: infrav1.AzureClusterSpec{ + AzureClusterClassSpec: infrav1.AzureClusterClassSpec{ + Location: "test", + }, + }, + } + mp = &expv1.MachinePool{ + ObjectMeta: metav1.ObjectMeta{ + Name: "mp1", + Namespace: "default", + OwnerReferences: []metav1.OwnerReference{ + { + Name: "cluster1", + Kind: "Cluster", + APIVersion: clusterv1.GroupVersion.String(), + }, + }, + }, + Spec: expv1.MachinePoolSpec{ + Template: clusterv1.MachineTemplateSpec{ + Spec: clusterv1.MachineSpec{ + Bootstrap: clusterv1.Bootstrap{ + DataSecretName: ptr.To("mp-secret"), + }, + Version: ptr.To("v1.31.0"), + }, + }, + }, + } + bootstrapData = "test" + bootstrapDataHash = sha256Hash(base64.StdEncoding.EncodeToString([]byte(bootstrapData))) + bootstrapSecret = corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "mp-secret", + Namespace: "default", + }, + Data: map[string][]byte{"value": []byte(bootstrapData)}, + } + amp = &infrav1exp.AzureMachinePool{ + ObjectMeta: metav1.ObjectMeta{ + Name: "amp1", + Namespace: "default", + OwnerReferences: []metav1.OwnerReference{ + { + Name: "mp1", + Kind: "MachinePool", + APIVersion: expv1.GroupVersion.String(), + }, + }, + Annotations: map[string]string{ + azure.CustomDataHashAnnotation: fmt.Sprintf("%x", bootstrapDataHash), + }, + }, + Spec: infrav1exp.AzureMachinePoolSpec{ + Template: infrav1exp.AzureMachinePoolMachineTemplate{ + Image: &infrav1.Image{}, + NetworkInterfaces: []infrav1.NetworkInterface{ + { + SubnetName: "test", + }, + }, + VMSize: "VM_SIZE", + }, + }, + } + vmssState = &azure.VMSS{} + ) + defer mockCtrl.Finish() + + s := &MachinePoolScope{ + client: cb. + WithObjects(&bootstrapSecret). + Build(), + ClusterScoper: &ClusterScope{ + Cluster: cluster, + AzureCluster: azureCluster, + }, + skuCache: resourceskus.NewStaticCache([]armcompute.ResourceSKU{ + { + Name: ptr.To("VM_SIZE"), + }, + }, "test"), + MachinePool: mp, + AzureMachinePool: amp, + vmssState: vmssState, + } + + g.Expect(s.InitMachinePoolCache(ctx)).NotTo(HaveOccurred()) + + spec := s.ScaleSetSpec(ctx) + sSpec := spec.(*scalesets.ScaleSetSpec) + g.Expect(sSpec.ShouldPatchCustomData).To(Equal(false)) + + amp.Annotations[azure.CustomDataHashAnnotation] = "old" + + // reset cache to be able to build up the cache again + s.cache = nil + g.Expect(s.InitMachinePoolCache(ctx)).NotTo(HaveOccurred()) + + spec = s.ScaleSetSpec(ctx) + sSpec = spec.(*scalesets.ScaleSetSpec) + g.Expect(sSpec.ShouldPatchCustomData).To(Equal(true)) +} + +func sha256Hash(text string) []byte { + h := sha256.New() + _, err := io.WriteString(h, text) + if err != nil { + panic(err) + } + return h.Sum(nil) +} diff --git a/azure/services/scalesets/scalesets_test.go b/azure/services/scalesets/scalesets_test.go index 459feb25b8d..c0b0ba4b1da 100644 --- a/azure/services/scalesets/scalesets_test.go +++ b/azure/services/scalesets/scalesets_test.go @@ -742,8 +742,8 @@ func newWindowsVMSSSpec() ScaleSetSpec { return vmss } -func newDefaultExistingVMSS(vmSize string) armcompute.VirtualMachineScaleSet { - vmss := newDefaultVMSS(vmSize) +func newDefaultExistingVMSS() armcompute.VirtualMachineScaleSet { + vmss := newDefaultVMSS("VM_SIZE") vmss.ID = ptr.To("subscriptions/1234/resourceGroups/my_resource_group/providers/Microsoft.Compute/virtualMachines/my-vm") return vmss } diff --git a/azure/services/scalesets/spec.go b/azure/services/scalesets/spec.go index c014d7db5c2..cc9c0c10898 100644 --- a/azure/services/scalesets/spec.go +++ b/azure/services/scalesets/spec.go @@ -92,6 +92,9 @@ func (s *ScaleSetSpec) OwnerResourceName() string { } func (s *ScaleSetSpec) existingParameters(ctx context.Context, existing interface{}) (parameters interface{}, err error) { + ctx, log, done := tele.StartSpanWithLogger(ctx, "scalesets.ScaleSetSpec.existingParameters") + defer done() + existingVMSS, ok := existing.(armcompute.VirtualMachineScaleSet) if !ok { return nil, errors.Errorf("%T is not an armcompute.VirtualMachineScaleSet", existing) @@ -131,6 +134,15 @@ func (s *ScaleSetSpec) existingParameters(ctx context.Context, existing interfac return nil, nil } + // if there are no model changes and no change in custom data, remove VirtualMachineProfile to avoid unnecessary VMSS model + // updates. + if !hasModelChanges && !s.ShouldPatchCustomData { + log.V(4).Info("removing virtual machine profile from parameters", "hasModelChanges", hasModelChanges, "shouldPatchCustomData", s.ShouldPatchCustomData) + vmss.Properties.VirtualMachineProfile = nil + } else { + log.V(4).Info("has changes, not removing virtual machine profile from parameters", "hasModelChanges", hasModelChanges, "shouldPatchCustomData", s.ShouldPatchCustomData) + } + return vmss, nil } diff --git a/azure/services/scalesets/spec_test.go b/azure/services/scalesets/spec_test.go index 361741dab7b..b8dc3e76891 100644 --- a/azure/services/scalesets/spec_test.go +++ b/azure/services/scalesets/spec_test.go @@ -32,26 +32,28 @@ import ( ) var ( - defaultSpec, defaultVMSS = getDefaultVMSS() - windowsSpec, windowsVMSS = getDefaultWindowsVMSS() - acceleratedNetworkingSpec, acceleratedNetworkingVMSS = getAcceleratedNetworkingVMSS() - customSubnetSpec, customSubnetVMSS = getCustomSubnetVMSS() - customNetworkingSpec, customNetworkingVMSS = getCustomNetworkingVMSS() - spotVMSpec, spotVMVMSS = getSpotVMVMSS() - ephemeralSpec, ephemeralVMSS = getEPHVMSSS() - resourceDiskSpec, resourceDiskVMSS = getResourceDiskVMSS() - evictionSpec, evictionVMSS = getEvictionPolicyVMSS() - maxPriceSpec, maxPriceVMSS = getMaxPriceVMSS() - encryptionSpec, encryptionVMSS = getEncryptionVMSS() - userIdentitySpec, userIdentityVMSS = getUserIdentityVMSS() - hostEncryptionSpec, hostEncryptionVMSS = getHostEncryptionVMSS() - hostEncryptionUnsupportedSpec = getHostEncryptionUnsupportedSpec() - ephemeralReadSpec, ephemeralReadVMSS = getEphemeralReadOnlyVMSS() - defaultExistingSpec, defaultExistingVMSS, defaultExistingVMSSClone = getExistingDefaultVMSS() - userManagedStorageAccountDiagnosticsSpec, userManagedStorageAccountDiagnosticsVMSS = getUserManagedAndStorageAcccountDiagnosticsVMSS() - managedDiagnosticsSpec, managedDiagnoisticsVMSS = getManagedDiagnosticsVMSS() - disabledDiagnosticsSpec, disabledDiagnosticsVMSS = getDisabledDiagnosticsVMSS() - nilDiagnosticsProfileSpec, nilDiagnosticsProfileVMSS = getNilDiagnosticsProfileVMSS() + defaultSpec, defaultVMSS = getDefaultVMSS() + windowsSpec, windowsVMSS = getDefaultWindowsVMSS() + acceleratedNetworkingSpec, acceleratedNetworkingVMSS = getAcceleratedNetworkingVMSS() + customSubnetSpec, customSubnetVMSS = getCustomSubnetVMSS() + customNetworkingSpec, customNetworkingVMSS = getCustomNetworkingVMSS() + spotVMSpec, spotVMVMSS = getSpotVMVMSS() + ephemeralSpec, ephemeralVMSS = getEPHVMSSS() + resourceDiskSpec, resourceDiskVMSS = getResourceDiskVMSS() + evictionSpec, evictionVMSS = getEvictionPolicyVMSS() + maxPriceSpec, maxPriceVMSS = getMaxPriceVMSS() + encryptionSpec, encryptionVMSS = getEncryptionVMSS() + userIdentitySpec, userIdentityVMSS = getUserIdentityVMSS() + hostEncryptionSpec, hostEncryptionVMSS = getHostEncryptionVMSS() + hostEncryptionUnsupportedSpec = getHostEncryptionUnsupportedSpec() + ephemeralReadSpec, ephemeralReadVMSS = getEphemeralReadOnlyVMSS() + defaultExistingSpec, defaultExistingVMSS, defaultExistingVMSSClone = getExistingDefaultVMSS() + defaultExistingSpecOnlyCapacityChange, defaultExistingVMSSOnlyCapacityChange, defaultExistingVMSSResultOnlyCapacityChange = getExistingDefaultVMSSOnlyCapacityChange() + defaultExistingSpecOnlyCapacityChangeWithCustomDataChange, defaultExistingVMSSOnlyCapacityChangeWithCustomDataChange, defaultExistingVMSSResultOnlyCapacityChangeWithCustomDataChange = getExistingDefaultVMSSOnlyCapacityChangeWithCustomDataChange() + userManagedStorageAccountDiagnosticsSpec, userManagedStorageAccountDiagnosticsVMSS = getUserManagedAndStorageAcccountDiagnosticsVMSS() + managedDiagnosticsSpec, managedDiagnoisticsVMSS = getManagedDiagnosticsVMSS() + disabledDiagnosticsSpec, disabledDiagnosticsVMSS = getDisabledDiagnosticsVMSS() + nilDiagnosticsProfileSpec, nilDiagnosticsProfileVMSS = getNilDiagnosticsProfileVMSS() ) func getDefaultVMSS() (ScaleSetSpec, armcompute.VirtualMachineScaleSet) { @@ -426,22 +428,57 @@ func getExistingDefaultVMSS() (s ScaleSetSpec, existing armcompute.VirtualMachin }, } - existingVMSS := newDefaultExistingVMSS("VM_SIZE") + existingVMSS := newDefaultExistingVMSS() existingVMSS.Properties.AdditionalCapabilities = &armcompute.AdditionalCapabilities{UltraSSDEnabled: ptr.To(true)} existingVMSS.SKU.Capacity = ptr.To[int64](2) - existingVMSS.Properties.AdditionalCapabilities = &armcompute.AdditionalCapabilities{UltraSSDEnabled: ptr.To(true)} - clone := newDefaultExistingVMSS("VM_SIZE") + clone := newDefaultExistingVMSS() clone.SKU.Capacity = ptr.To[int64](3) clone.Properties.AdditionalCapabilities = &armcompute.AdditionalCapabilities{UltraSSDEnabled: ptr.To(true)} clone.Properties.VirtualMachineProfile.NetworkProfile = nil clone.Properties.VirtualMachineProfile.StorageProfile.ImageReference.Version = ptr.To("2.0") - clone.Properties.VirtualMachineProfile.NetworkProfile = nil return spec, existingVMSS, clone } +func getExistingDefaultVMSSOnlyCapacityChange() (s ScaleSetSpec, existing armcompute.VirtualMachineScaleSet, result armcompute.VirtualMachineScaleSet) { + spec := newDefaultVMSSSpec() + spec.Capacity = 3 + + existingVMSS := newDefaultExistingVMSS() + + result = newDefaultExistingVMSS() + result.Properties.VirtualMachineProfile = nil + result.SKU.Capacity = ptr.To[int64](3) + + return spec, existingVMSS, result +} + +func getExistingDefaultVMSSOnlyCapacityChangeWithCustomDataChange() (s ScaleSetSpec, existing armcompute.VirtualMachineScaleSet, result armcompute.VirtualMachineScaleSet) { + spec := newDefaultVMSSSpec() + spec.Capacity = 3 + spec.DataDisks = append(spec.DataDisks, infrav1.DataDisk{ + NameSuffix: "my_disk_with_ultra_disks", + DiskSizeGB: 128, + Lun: ptr.To[int32](3), + ManagedDisk: &infrav1.ManagedDiskParameters{ + StorageAccountType: "UltraSSD_LRS", + }, + }) + spec.ShouldPatchCustomData = true + + existingVMSS := newDefaultExistingVMSS() + existingVMSS.Properties.AdditionalCapabilities = &armcompute.AdditionalCapabilities{UltraSSDEnabled: ptr.To(true)} + + result = newDefaultExistingVMSS() + result.SKU.Capacity = ptr.To[int64](3) + result.Properties.AdditionalCapabilities = &armcompute.AdditionalCapabilities{UltraSSDEnabled: ptr.To(true)} + result.Properties.VirtualMachineProfile.NetworkProfile = nil + + return spec, existingVMSS, result +} + func getUserManagedAndStorageAcccountDiagnosticsVMSS() (ScaleSetSpec, armcompute.VirtualMachineScaleSet) { storageURI := "https://fakeurl" spec := newDefaultVMSSSpec() @@ -711,6 +748,20 @@ func TestScaleSetParameters(t *testing.T) { expected: nilDiagnosticsProfileVMSS, expectedError: "", }, + { + name: "update for existing vmss with only capacity change", + spec: defaultExistingSpecOnlyCapacityChange, + existing: defaultExistingVMSSOnlyCapacityChange, + expected: defaultExistingVMSSResultOnlyCapacityChange, + expectedError: "", + }, + { + name: "update for existing vmss with only capacity change but should patch custom data", + spec: defaultExistingSpecOnlyCapacityChangeWithCustomDataChange, + existing: defaultExistingVMSSOnlyCapacityChangeWithCustomDataChange, + expected: defaultExistingVMSSResultOnlyCapacityChangeWithCustomDataChange, + expectedError: "", + }, } for _, tc := range testcases { tc := tc @@ -731,7 +782,9 @@ func TestScaleSetParameters(t *testing.T) { if !ok { t.Fatalf("expected type VirtualMachineScaleSet, got %T", param) } - result.Properties.VirtualMachineProfile.OSProfile.AdminPassword = nil // Override this field as it's randomly generated. We can't set anything in tc.expected to match it. + if result.Properties.VirtualMachineProfile != nil { + result.Properties.VirtualMachineProfile.OSProfile.AdminPassword = nil // Override this field as it's randomly generated. We can't set anything in tc.expected to match it. + } if !reflect.DeepEqual(tc.expected, result) { t.Errorf("Diff between actual result and expected result:\n%s", cmp.Diff(result, tc.expected))