Skip to content

Commit

Permalink
feat(interactive): support adding a store pod as a backup (#3703)
Browse files Browse the repository at this point in the history
  • Loading branch information
siyuan0322 authored Apr 11, 2024
1 parent d0e9dca commit 60e3e31
Show file tree
Hide file tree
Showing 16 changed files with 362 additions and 32 deletions.
1 change: 0 additions & 1 deletion charts/graphscope-store-one-pod/templates/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@ data:
pegasus.timeout={{ .Values.pegasus.timeout }}
pegasus.batch.size=1024
pegasus.output.capacity=16
pegasus.hosts=localhost:8080
## Kafka Config
kafka.servers=KAFKA_SERVERS
Expand Down
8 changes: 8 additions & 0 deletions charts/graphscope-store-one-pod/templates/statefulset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,14 @@ spec:
{{- if .Values.store.resources }}
resources: {{- toYaml .Values.store.resources | nindent 12 }}
{{- end }}
readinessProbe:
failureThreshold: 5
tcpSocket:
port: gaia-rpc
initialDelaySeconds: 30
periodSeconds: 30
successThreshold: 1
timeoutSeconds: 1
volumeMounts:
- name: data
mountPath: {{ .Values.storeDataPath }}
Expand Down
7 changes: 7 additions & 0 deletions charts/graphscope-store/templates/_helpers.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -157,3 +157,10 @@ Get full broker list.
{{- end }}
{{- join "," $brokerList | printf "%s" -}}
{{- end -}}
{{/*
Create a default fully qualified zookeeper name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
*/}}
{{- define "graphscope-store.zookeeper.fullname" -}}
{{- printf "%s-%s" .Release.Name "zookeeper" | trunc 63 | trimSuffix "-" -}}
{{- end -}}
20 changes: 8 additions & 12 deletions charts/graphscope-store/templates/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,10 @@ data:
kafka.producer.custom.configs={{ .Values.kafkaProducerCustomConfigs }}
kafka.test.cluster.enable=false
## Zk Config
zk.base.path={{ .Values.zkBasePath }}
zk.connect.string=ZK_CONNECT
## Frontend Config
gremlin.server.port=12312
## disable neo4j when launching groot server by default
Expand All @@ -72,13 +76,15 @@ data:
pegasus.timeout={{ .Values.pegasus.timeout }}
pegasus.batch.size=1024
pegasus.output.capacity=16
pegasus.hosts=PEGASUS_HOSTS
## Secondary config
secondary.instance.enabled={{ .Values.secondary.enabled }}
store.data.secondary.path={{ .Values.secondary.storeDataPath }}
store.gc.interval.ms={{ .Values.storeGcIntervalMs }}
write.ha.enabled={{ .Values.backup.enabled }}
## Extra Config
{{- if .Values.extraConfig }}
{{- $config_list := regexSplit ";" .Values.extraConfig -1 }}
Expand All @@ -95,24 +101,14 @@ data:
[[ `hostname` =~ -([0-9]+)$ ]] || exit 1
ordinal=${BASH_REMATCH[1]}
pegasus_hosts=""
i=0
while [ $i -ne $STORE_COUNT ]; do
pod=`echo $DNS_NAME_PREFIX_STORE | sed -e "s/{}/$i/g"`
# 60001 is fixed gaia engine port
pegasus_hosts="${pegasus_hosts},${pod}:60001"
i=$(($i+1))
done
pegasus_hosts=${pegasus_hosts:1}
sudo sed -e "s/GRAPH_NAME/${GRAPH_NAME}/g" \
-e "s/ROLE/${ROLE}/g" \
-e "s/INDEX/${ordinal}/g" \
-e "s/ZK_CONNECT/${ZK_CONNECT}/g" \
-e "s/KAFKA_SERVERS/${KAFKA_SERVERS}/g" \
-e "s/FRONTEND/${DNS_NAME_PREFIX_FRONTEND}/g" \
-e "s/COORDINATOR/${DNS_NAME_PREFIX_COORDINATOR}/g" \
-e "s/STORE/${DNS_NAME_PREFIX_STORE}/g" \
-e "s/PEGASUS_HOSTS/${pegasus_hosts}/g" \
-e "s@LOG4RS_CONFIG@${GRAPHSCOPE_HOME}/groot/conf/log4rs.yml@g" \
/etc/groot/groot.config.tpl | sudo tee -a /etc/groot/groot.config
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,8 @@ spec:
value: {{ $kafkaFullname}}-headless.{{ $releaseNamespace }}
- name: KAFKA_SERVERS
value: {{ include "graphscope-store.kafka.brokerlist" . }}
- name: ZK_CONNECT
value: {{ printf "%s-headless" (include "graphscope-store.zookeeper.fullname" .) | quote }}
ports:
- name: port
containerPort: 55555
Expand Down
7 changes: 7 additions & 0 deletions charts/graphscope-store/templates/frontend/statefulset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,8 @@ spec:
value: {{ $kafkaFullname}}-headless.{{ $releaseNamespace }}
- name: KAFKA_SERVERS
value: {{ include "graphscope-store.kafka.brokerlist" . }}
- name: ZK_CONNECT
value: {{ printf "%s-headless" (include "graphscope-store.zookeeper.fullname" .) | quote }}
ports:
- name: service-port
containerPort: 55556
Expand All @@ -139,6 +141,11 @@ spec:
readinessProbe: {{- include "common.tplvalues.render" (dict "value" (omit .Values.readinessProbe "enabled") "context" $) | nindent 12 }}
tcpSocket:
port: gremlin
failureThreshold: 5
initialDelaySeconds: 30
periodSeconds: 30
successThreshold: 1
timeoutSeconds: 1
{{- end }}
{{- if .Values.startupProbe.enabled }}
startupProbe: {{- include "common.tplvalues.render" (dict "value" (omit .Values.startupProbe "enabled") "context" $) | nindent 12 }}
Expand Down
191 changes: 191 additions & 0 deletions charts/graphscope-store/templates/store/statefulset-backup.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
{{- if .Values.backup.enabled }}
{{- $frontendFullname := include "graphscope-store.frontend.fullname" . }}
{{- $coordinatorFullname := include "graphscope-store.coordinator.fullname" . }}
{{- $storeFullname := include "graphscope-store.store.fullname" . }}
{{- $kafkaFullname := include "graphscope-store.kafka.fullname" . -}}
{{- $releaseNamespace := .Release.Namespace }}
{{- $clusterDomain := .Values.clusterDomain }}

apiVersion: apps/v1
kind: StatefulSet
metadata:
name: {{ include "graphscope-store.store.fullname" . }}-backup
namespace: {{ .Release.Namespace }}
labels: {{- include "common.labels.standard" . | nindent 4 }}
app.kubernetes.io/component: store
{{- if .Values.commonLabels }}
{{- include "common.tplvalues.render" ( dict "value" .Values.commonLabels "context" $ ) | nindent 4 }}
{{- end }}
{{- if .Values.commonAnnotations }}
annotations: {{- include "common.tplvalues.render" ( dict "value" .Values.commonAnnotations "context" $ ) | nindent 4 }}
{{- end }}
spec:
podManagementPolicy: {{ .Values.podManagementPolicy }}
replicas: {{ .Values.store.replicaCount }}
selector:
matchLabels: {{- include "common.labels.matchLabels" . | nindent 6 }}
app.kubernetes.io/component: store
serviceName: {{ printf "%s-store-headless" (include "common.names.fullname" .) | trunc 63 | trimSuffix "-" }}
updateStrategy: {{- include "common.tplvalues.render" (dict "value" .Values.updateStrategy "context" $ ) | nindent 4 }}
template:
metadata:
labels: {{- include "common.labels.standard" . | nindent 8 }}
app.kubernetes.io/component: store
{{- if .Values.store.podLabels }}
{{- include "common.tplvalues.render" (dict "value" .Values.store.podLabels "context" $) | nindent 8 }}
{{- end }}
annotations:
{{- if (include "graphscope-store.createConfigmap" .) }}
checksum/configuration: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }}
{{- end }}
{{- if .Values.store.podAnnotations }}
{{- include "common.tplvalues.render" (dict "value" .Values.store.podAnnotations "context" $) | nindent 8 }}
{{- end }}
spec:
{{- include "graphscope-store.imagePullSecrets" . | nindent 6 }}
{{- if .Values.store.hostAliases }}
hostAliases: {{- include "common.tplvalues.render" (dict "value" .Values.store.hostAliases "context" $) | nindent 8 }}
{{- end }}
hostNetwork: {{ .Values.store.hostNetwork }}
hostIPC: {{ .Values.store.hostIPC }}
{{- if .Values.store.schedulerName }}
schedulerName: {{ .Values.store.schedulerName | quote }}
{{- end }}
{{- if .Values.affinity }}
affinity: {{- include "common.tplvalues.render" (dict "value" .Values.affinity "context" $) | nindent 8 }}
{{- else }}
affinity:
podAffinity: {{- include "common.affinities.pods" (dict "type" .Values.podAffinityPreset "component" "store" "context" $) | nindent 10 }}
podAntiAffinity: {{- include "common.affinities.pods" (dict "type" .Values.podAntiAffinityPreset "component" "store" "context" $) | nindent 10 }}
nodeAffinity: {{- include "common.affinities.nodes" (dict "type" .Values.nodeAffinityPreset.type "key" .Values.nodeAffinityPreset.key "values" .Values.nodeAffinityPreset.values) | nindent 10 }}
{{- end }}
{{- if .Values.nodeSelector }}
nodeSelector: {{- include "common.tplvalues.render" (dict "value" .Values.nodeSelector "context" $) | nindent 8 }}
{{- end }}
{{- if .Values.dnsPolicy }}
dnsPolicy: {{ .Values.dnsPolicy | quote }}
{{- end }}
{{- if .Values.dnsConfig }}
dnsConfig: {{- include "common.tplvalues.render" (dict "value" .Values.dnsConfig "context" $) | nindent 8 }}
{{- end }}
{{- if .Values.tolerations }}
tolerations: {{- include "common.tplvalues.render" (dict "value" .Values.tolerations "context" $) | nindent 8 }}
{{- end }}
{{- if .Values.topologySpreadConstraints }}
topologySpreadConstraints: {{- include "common.tplvalues.render" (dict "value" .Values.topologySpreadConstraints "context" $) | nindent 8 }}
{{- end }}
{{- if .Values.terminationGracePeriodSeconds }}
terminationGracePeriodSeconds: {{ .Values.terminationGracePeriodSeconds }}
{{- end }}
{{- if .Values.priorityClassName }}
priorityClassName: {{ .Values.priorityClassName }}
{{- end }}
{{- if .Values.podSecurityContext.enabled }}
securityContext: {{- omit .Values.podSecurityContext "enabled" | toYaml | nindent 8 }}
{{- end }}
serviceAccountName: {{ include "graphscope-store.serviceAccountName" . }}
{{- if .Values.initContainers }}
initContainers:
{{- if .Values.initContainers }}
{{- include "common.tplvalues.render" ( dict "value" .Values.initContainers "context" $ ) | nindent 8 }}
{{- end }}
{{- end }}
containers:
- name: store
image: {{ include "graphscope-store.image" . }}
imagePullPolicy: {{ .Values.image.pullPolicy | quote }}
{{- if .Values.containerSecurityContext.enabled }}
securityContext: {{- omit .Values.containerSecurityContext "enabled" | toYaml | nindent 12 }}
{{- end }}
command: {{- include "common.tplvalues.render" (dict "value" .Values.command "context" $) | nindent 12 }}
{{- if .Values.args }}
args: {{- include "common.tplvalues.render" (dict "value" .Values.args "context" $) | nindent 12 }}
{{- end }}
env:
- name: GRAPH_NAME
value: {{ .Values.graphName | quote }}
- name: GROOT_JAVA_OPTS
value: {{ .Values.javaOpts | quote }}
- name: ROLE
value: "store"
- name: FRONTEND_COUNT
value: {{ .Values.frontend.replicaCount | quote }}
- name: COORDINATOR_COUNT
value: {{ .Values.coordinator.replicaCount | quote }}
- name: STORE_COUNT
value: {{ .Values.store.replicaCount | quote }}
- name: DNS_NAME_PREFIX_FRONTEND
value: {{ $frontendFullname }}-{}.{{ $frontendFullname }}-headless
- name: DNS_NAME_PREFIX_COORDINATOR
value: {{ $coordinatorFullname }}-{}.{{ $coordinatorFullname }}-headless
- name: DNS_NAME_PREFIX_STORE
value: {{ $storeFullname }}-{}.{{ $storeFullname }}-headless
- name: DNS_NAME_SERVICE_KAFKA
value: {{ $kafkaFullname}}-headless.{{ $releaseNamespace }}
- name: KAFKA_SERVERS
value: {{ include "graphscope-store.kafka.brokerlist" . }}
- name: ZK_CONNECT
value: {{ printf "%s-headless" (include "graphscope-store.zookeeper.fullname" .) | quote }}
ports:
- name: port
containerPort: 55555
- name: gaia-rpc
containerPort: 60000
- name: gaia-engine
containerPort: 60001
{{- if .Values.store.resources }}
resources: {{- toYaml .Values.store.resources | nindent 12 }}
{{- end }}
readinessProbe:
failureThreshold: 5
tcpSocket:
port: gaia-rpc
initialDelaySeconds: 30
periodSeconds: 30
successThreshold: 1
timeoutSeconds: 1
volumeMounts:
- name: data
mountPath: {{ .Values.storeDataPath }}
- name: config
mountPath: /etc/groot/groot.config.tpl
subPath: groot.config
- name: config
mountPath: /etc/groot/setup.sh
subPath: setup.sh
volumes:
- name: config
configMap:
name: {{ include "graphscope-store.configmapName" . }}
defaultMode: 0755
{{- if and .Values.store.persistence.enabled .Values.store.persistence.existingClaim }}
- name: data
persistentVolumeClaim:
claimName: {{ tpl .Values.store.persistence.existingClaim . }}
{{- else if not .Values.store.persistence.enabled }}
- name: data
emptyDir: {}
{{- else if and .Values.store.persistence.enabled (not .Values.store.persistence.existingClaim) }}
volumeClaimTemplates:
- metadata:
name: data
{{- if .Values.persistence.annotations }}
annotations: {{- include "common.tplvalues.render" (dict "value" .Values.persistence.annotations "context" $) | nindent 10 }}
{{- end }}
{{- if .Values.persistence.labels }}
labels: {{- include "common.tplvalues.render" (dict "value" .Values.persistence.labels "context" $) | nindent 10 }}
{{- end }}
spec:
accessModes:
{{- range .Values.persistence.accessModes }}
- {{ . | quote }}
{{- end }}
resources:
requests:
storage: {{ .Values.store.persistence.size | quote }}
{{ include "graphscope-store.storageClass" . | nindent 8 }}
{{- if .Values.store.persistence.selector }}
selector: {{- include "graphscope-store.tplvalues.render" (dict "value" .Values.store.persistence.selector "context" $) | nindent 10 }}
{{- end -}}
{{- end }}
{{- end }}
10 changes: 10 additions & 0 deletions charts/graphscope-store/templates/store/statefulset.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,8 @@ spec:
value: {{ $kafkaFullname}}-headless.{{ $releaseNamespace }}
- name: KAFKA_SERVERS
value: {{ include "graphscope-store.kafka.brokerlist" . }}
- name: ZK_CONNECT
value: {{ printf "%s-headless" (include "graphscope-store.zookeeper.fullname" .) | quote }}
ports:
- name: port
containerPort: 55555
Expand All @@ -133,6 +135,14 @@ spec:
{{- if .Values.store.resources }}
resources: {{- toYaml .Values.store.resources | nindent 12 }}
{{- end }}
readinessProbe:
failureThreshold: 5
tcpSocket:
port: gaia-rpc
initialDelaySeconds: 30
periodSeconds: 30
successThreshold: 1
timeoutSeconds: 1
volumeMounts:
- name: data
mountPath: {{ .Values.storeDataPath }}
Expand Down
6 changes: 6 additions & 0 deletions charts/graphscope-store/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,9 @@ kafka:
port: 9092
socketRequestMaxBytes: _1048576000

## Zk Config
zkBasePath: "/graphscope/groot"

## This value is only used when kafka.enabled is set to false
##
externalKafka:
Expand Down Expand Up @@ -494,3 +497,6 @@ pegasus:
secondary:
enabled: false
storeDataPath: "./data_secondary"

backup:
enabled: false
4 changes: 4 additions & 0 deletions docs/storage_engine/groot.md
Original file line number Diff line number Diff line change
Expand Up @@ -692,3 +692,7 @@ And use a different `zk.base.path` for each secondary instance to avoid conflict
### Traces

use `--set otel.enabled=true` to enable trace export.

### Write High-availability

use `--set write.ha.enabled=True` in multi-pod deployment mode to open a backup store pod.
Original file line number Diff line number Diff line change
Expand Up @@ -74,12 +74,11 @@ public class CommonConfig {

public static final Config<Boolean> SECONDARY_INSTANCE_ENABLED =
Config.boolConfig("secondary.instance.enabled", false);
public static final Config<Boolean> TRACING_ENABLED =
Config.boolConfig("tracing.enabled", false);

// Create an extra store pod for each original store pod for backup.
// Only available in multi pod mode.
public static final Config<Boolean> WRITE_HIGH_AVAILABILITY_ENABLED =
Config.boolConfig("write.high.availability.enabled", false);

public static final Config<Boolean> TRACING_ENABLED =
Config.boolConfig("tracing.enabled", false);
public static final Config<Boolean> WRITE_HA_ENABLED =
Config.boolConfig("write.ha.enabled", false);
}
Loading

0 comments on commit 60e3e31

Please sign in to comment.