Skip to content

Commit

Permalink
Merge pull request #1086 from klihub/fixes/relay-container-events
Browse files Browse the repository at this point in the history
release-0.9: pkg/cri: implement container event relaying.
  • Loading branch information
marquiz authored Jan 11, 2024
2 parents db28d55 + 5db9a45 commit 82ba7bc
Show file tree
Hide file tree
Showing 3 changed files with 113 additions and 14 deletions.
18 changes: 12 additions & 6 deletions pkg/cri/relay/relay.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ import (
"os"
"sync"

criv1 "k8s.io/cri-api/pkg/apis/runtime/v1"

"github.com/intel/cri-resource-manager/pkg/cri/client"
"github.com/intel/cri-resource-manager/pkg/cri/server"
logger "github.com/intel/cri-resource-manager/pkg/log"
Expand Down Expand Up @@ -60,19 +62,23 @@ type Relay interface {
// relay is the implementation of Relay.
type relay struct {
logger.Logger
sync.Mutex // hmm... do *we* need to be lockable, or the upper layer(s) ?
options Options // relay options
client client.Client // relay CRI client
server server.Server // relay CRI server
sync.Mutex
options Options // relay options
client client.Client // relay CRI client
server server.Server // relay CRI server

evtClient criv1.RuntimeService_GetContainerEventsClient
evtChans map[*criv1.GetEventsRequest]chan *criv1.ContainerEventResponse
}

// NewRelay creates a new relay instance.
func NewRelay(options Options) (Relay, error) {
var err error

r := &relay{
Logger: logger.NewLogger("cri/relay"),
options: options,
Logger: logger.NewLogger("cri/relay"),
options: options,
evtChans: map[*criv1.GetEventsRequest]chan *criv1.ContainerEventResponse{},
}

imageSocket := r.options.ImageSocket
Expand Down
100 changes: 96 additions & 4 deletions pkg/cri/relay/runtime-service.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@ package relay

import (
"context"
"fmt"
"time"

"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
criv1 "k8s.io/cri-api/pkg/apis/runtime/v1"

"github.com/intel/cri-resource-manager/pkg/dump"
Expand Down Expand Up @@ -188,8 +188,23 @@ func (r *relay) CheckpointContainer(ctx context.Context, req *criv1.CheckpointCo
return r.client.CheckpointContainer(ctx, req)
}

func (r *relay) GetContainerEvents(_ *criv1.GetEventsRequest, _ criv1.RuntimeService_GetContainerEventsServer) error {
return status.Errorf(codes.Unimplemented, "method GetContainerEvents not implemented")
func (r *relay) GetContainerEvents(req *criv1.GetEventsRequest, srv criv1.RuntimeService_GetContainerEventsServer) error {
evtC := r.addEventServer(req)

if err := r.startEventRelay(req); err != nil {
r.delEventServer(req)
return err
}

for evt := range evtC {
if err := srv.Send(evt); err != nil {
r.Errorf("failed to relay/send container event: %v", err)
r.delEventServer(req)
return err
}
}

return nil
}

func (r *relay) ListMetricDescriptors(ctx context.Context, req *criv1.ListMetricDescriptorsRequest) (*criv1.ListMetricDescriptorsResponse, error) {
Expand All @@ -206,3 +221,80 @@ func (r *relay) RuntimeConfig(ctx context.Context, req *criv1.RuntimeConfigReque
r.dump("RuntimeConfig", req)
return r.client.RuntimeConfig(ctx, req)
}

const (
eventRelayTimeout = 1 * time.Second
)

func (r *relay) addEventServer(req *criv1.GetEventsRequest) chan *criv1.ContainerEventResponse {
r.Lock()
defer r.Unlock()

evtC := make(chan *criv1.ContainerEventResponse, 128)
r.evtChans[req] = evtC

return evtC
}

func (r *relay) delEventServer(req *criv1.GetEventsRequest) chan *criv1.ContainerEventResponse {
r.Lock()
defer r.Unlock()

evtC := r.evtChans[req]
delete(r.evtChans, req)

return evtC
}

func (r *relay) startEventRelay(req *criv1.GetEventsRequest) error {
r.Lock()
defer r.Unlock()

if r.evtClient != nil {
return nil
}

c, err := r.client.GetContainerEvents(context.Background(), req)
if err != nil {
return fmt.Errorf("failed to create container event client: %w", err)
}

r.evtClient = c
go r.relayEvents()

return nil
}

func (r *relay) relayEvents() {
for {
evt, err := r.evtClient.Recv()
if err != nil {
r.Errorf("failed to relay/receive container event: %v", err)
}

r.Lock()

if err != nil {
for req, evtC := range r.evtChans {
delete(r.evtChans, req)
close(evtC)
}
r.evtClient = nil
} else {
for req, evtC := range r.evtChans {
select {
case evtC <- evt:
case _ = <-time.After(eventRelayTimeout):
delete(r.evtChans, req)
close(evtC)
}
}
}

r.Unlock()

if err != nil {
return
}
}
}
9 changes: 5 additions & 4 deletions pkg/cri/server/services.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@ import (

"go.opencensus.io/trace"
"google.golang.org/grpc"
grpccodes "google.golang.org/grpc/codes"
grpcstatus "google.golang.org/grpc/status"

criv1 "k8s.io/cri-api/pkg/apis/runtime/v1"
)
Expand Down Expand Up @@ -61,6 +59,7 @@ const (
updateRuntimeConfig = "UpdateRuntimeConfig"
status = "Status"
checkpointContainer = "CheckpointContainer"
getContainerEvents = "GetContainerEvents"
listMetricDescriptors = "ListMetricDescriptors"
listPodSandboxMetrics = "ListPodSandboxMetrics"
runtimeConfig = "RuntimeConfig"
Expand Down Expand Up @@ -501,8 +500,10 @@ func (s *server) CheckpointContainer(ctx context.Context, req *criv1.CheckpointC
return rsp.(*criv1.CheckpointContainerResponse), err
}

func (s *server) GetContainerEvents(_ *criv1.GetEventsRequest, _ criv1.RuntimeService_GetContainerEventsServer) error {
return grpcstatus.Errorf(grpccodes.Unimplemented, "GetContainerEvents not implemented")
func (s *server) GetContainerEvents(req *criv1.GetEventsRequest, srv criv1.RuntimeService_GetContainerEventsServer) error {
// TODO(klihub): interceptRequest is a unary interceptor. It can't handle streaming
// requests so for now we short-circuit the call to the server here.
return (*s.runtime).GetContainerEvents(req, srv)
}

func (s *server) ListMetricDescriptors(ctx context.Context, req *criv1.ListMetricDescriptorsRequest) (*criv1.ListMetricDescriptorsResponse, error) {
Expand Down

0 comments on commit 82ba7bc

Please sign in to comment.