Skip to content

Commit

Permalink
Add metrics for leaked ENI cleanup routine (#328)
Browse files Browse the repository at this point in the history
Add metrics for leaked ENI cleanup routine
  • Loading branch information
sushrk authored Nov 2, 2023
1 parent c43b62a commit 1e82e2a
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 2 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ MAKEFILE_PATH = $(dir $(realpath -s $(firstword $(MAKEFILE_LIST))))
VERSION ?= $(GIT_VERSION)
IMAGE ?= $(REPO):$(VERSION)
BASE_IMAGE ?= public.ecr.aws/eks-distro-build-tooling/eks-distro-minimal-base-nonroot:latest.2
BUILD_IMAGE ?= public.ecr.aws/bitnami/golang:1.20.5
BUILD_IMAGE ?= public.ecr.aws/bitnami/golang:1.21.3
GOARCH ?= amd64
PLATFORM ?= linux/amd64

Expand Down
32 changes: 32 additions & 0 deletions pkg/aws/ec2/api/eni_cleanup.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ import (

"github.com/aws/amazon-vpc-resource-controller-k8s/pkg/config"
rcHealthz "github.com/aws/amazon-vpc-resource-controller-k8s/pkg/healthz"
"github.com/prometheus/client_golang/prometheus"
"golang.org/x/exp/slices"

"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/service/ec2"
Expand All @@ -39,6 +41,21 @@ type ENICleaner struct {
ctx context.Context
}

var (
vpcCniLeakedENICleanupCnt = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "vpc_cni_created_leaked_eni_cleanup_count",
Help: "The number of leaked ENIs created by VPC-CNI that is cleaned up by the controller",
},
)
vpcrcLeakedENICleanupCnt = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "vpc_rc_created_leaked_eni_cleanup_count",
Help: "The number of leaked ENIs created by VPC-RC that is cleaned up by the controller",
},
)
)

func (e *ENICleaner) SetupWithManager(ctx context.Context, mgr ctrl.Manager, healthzHandler *rcHealthz.HealthzHandler) error {
e.clusterNameTagKey = fmt.Sprintf(config.ClusterNameTagKeyFormat, e.ClusterName)
e.availableENIs = make(map[string]struct{})
Expand Down Expand Up @@ -113,6 +130,21 @@ func (e *ENICleaner) cleanUpAvailableENIs() {

for _, networkInterface := range describeNetworkInterfaceOp.NetworkInterfaces {
if _, exists := e.availableENIs[*networkInterface.NetworkInterfaceId]; exists {
// Increment promethues metrics for number of leaked ENIs cleaned up
if tagIdx := slices.IndexFunc(networkInterface.TagSet, func(tag *ec2.Tag) bool {
return *tag.Key == config.NetworkInterfaceOwnerTagKey
}); tagIdx != -1 {
switch *networkInterface.TagSet[tagIdx].Value {
case config.NetworkInterfaceOwnerTagValue:
vpcrcLeakedENICleanupCnt.Inc()
case config.NetworkInterfaceOwnerVPCCNITagValue:
vpcCniLeakedENICleanupCnt.Inc()
default:
// We will not hit this case as we only filter for above two tag values, adding it for any future use cases
e.Log.Info("found available ENI not created by VPC-CNI/VPC-RC")
}
}

// The ENI in available state has been sitting for at least the eni clean up interval and it should
// be removed
_, err := e.EC2Wrapper.DeleteNetworkInterface(&ec2.DeleteNetworkInterfaceInput{
Expand Down
5 changes: 4 additions & 1 deletion pkg/aws/ec2/api/wrapper.go
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,10 @@ func prometheusRegister() {
ec2describeTrunkInterfaceAssociationAPIErrCnt,
ec2modifyNetworkInterfaceAttributeAPICallCnt,
ec2modifyNetworkInterfaceAttributeAPIErrCnt,
ec2APICallLatencies)
ec2APICallLatencies,
vpcCniLeakedENICleanupCnt,
vpcrcLeakedENICleanupCnt,
)

prometheusRegistered = true
}
Expand Down

0 comments on commit 1e82e2a

Please sign in to comment.