NationalSecurityAgency · alerman · Oct 4, 2024 · Oct 8, 2024 · Oct 8, 2024 · Oct 9, 2024
diff --git a/audit/values.yaml b/audit/values.yaml
@@ -1,4 +1,6 @@
 global:
+  security:
+    userid: 1000
   dockerRegistry:
     url: ghcr.io/nationalsecurityagency
   volumes:

diff --git a/authorization/values.yaml b/authorization/values.yaml
@@ -1,4 +1,6 @@
 global:
+  security:
+    userid: 1000
   dockerRegistry:
     url: ghcr.io/nationalsecurityagency
   volumes:

diff --git a/cache/values.yaml b/cache/values.yaml
@@ -1,4 +1,6 @@
 global:
+  security:
+    userid: 1000
   dockerRegistry:
     url: ghcr.io/nationalsecurityagency
   volumes:

diff --git a/common-service-library/templates/_deployment.yaml b/common-service-library/templates/_deployment.yaml
@@ -78,9 +78,15 @@ spec:
       # Containers Associated with this Deployment #
       ##############################################
       securityContext:
-        runAsUser: 1000
+        runAsUser: {{ .Values.global.security.userid }}
       containers:
         - name: "{{ .Values.meta.name }}"
+          command:
+          {{ if .Values.command }}
+          {{- range $i, $value := .Values.command }}
+          - {{ $value }}
+          {{- end }}
+          {{- end }}
           args:
           {{- range $i, $value := .Values.args }}
           - {{ $value }}

diff --git a/configuration/configMapFiles/accumulo.yml b/configuration/configMapFiles/accumulo.yml
@@ -0,0 +1,23 @@
+warehouse-cluster:
+  accumulo:
+    zookeepers: "{{ .Values.global.zookeeper.quorum }}"
+    instanceName: '{{ .Values.global.accumulo.instanceName }}'
+    username: '{{ .Values.global.accumulo.user }}'
+    password: '{{ .Values.global.accumulo.password }}'
+
+accumulo:
+  lookup:
+    audit:
+      defaultAuditType: 'ACTIVE'
+  stats:
+    enabled: true
+
+audit-client:
+  discovery:
+    enabled: false
+  uri: '${AUDIT_SERVER_URL:http://audit-service:8080/audit}'
+
+datawave:
+  swagger:
+    title: "Accumulo Service"
+    description: "REST API provided by the Accumulo Service"
diff --git a/configuration/configMapFiles/application-cachedresults.yml b/configuration/configMapFiles/application-cachedresults.yml
@@ -0,0 +1,79 @@
+spring:
+  datasource:
+    cachedResults:
+      url: 'jdbc:mysql://{{.Values.global.serviceDns.mysql.name}}:3306/cachedresults?zeroDateTimeBehavior=convertToNull'
+      username: '{{ .Values.global.mysql.user }}'
+      password: 'secret'
+      driver-class-name: 'com.mysql.cj.jdbc.Driver'
+      hikari:
+        # default: 30000
+        connection-timeout: 5000
+        # default: 600000
+        idle-timeout: 900000
+        # default: maximum-pool-size
+        minimum-idle: {{ .Values.global.mysql.minIdle  }}
+        # default: 10
+        maximum-pool-size: {{ .Values.global.mysql.maxSize }}
+
+datawave:
+  mysql:
+    host: 'mysql'
+    dbname: 'cachedresults'
+    pool:
+      min-size: '5'
+      max-size: '20'
+    username: 'datawave'
+    password: 'secret'
+  query:
+    cachedResults:
+      enabled: ${CACHED_RESULTS:false}
+      remoteQuery:
+        queryServiceUri: "https://dwv-web-query:8443/query/v1/query"
+        # unlimited
+        maxBytesToBuffer: -1
+      numFields: 900
+      statementTemplates:
+        createTableTemplate: |
+          CREATE TABLE IF NOT EXISTS template (
+          _user_ VARCHAR(200) NOT NULL,
+          _queryId_ VARCHAR(200) NOT NULL,
+          _logicName_ VARCHAR(200) NOT NULL,
+          _datatype_ VARCHAR(35) NOT NULL,
+          _eventId_ VARCHAR(50) NOT NULL,
+          _row_ LONGTEXT NOT NULL,
+          _colf_ LONGTEXT NOT NULL,
+          _markings_ VARCHAR(400) NOT NULL,
+          _column_markings_ LONGTEXT NOT NULL,
+          _column_timestamps_ LONGTEXT NOT NULL,
+          %FIELD_DEFINITIONS%
+          ) ENGINE = MyISAM
+        createTable: "CREATE TABLE %TABLE% LIKE template"
+        dropTable: "DROP TABLE %TABLE%"
+        dropView: "DROP VIEW %TABLE%"
+        insert: |
+          INSERT INTO %TABLE% (
+          _user_, 
+          _queryId_, 
+          _logicName_, 
+          _datatype_, 
+          _eventId_, 
+          _row_, 
+          _colf_, 
+          _markings_, 
+          _column_markings_, 
+          _column_timestamps_,
+          %PREPARED_FIELDS%
+          ) VALUES (
+          ?,
+          ?,
+          ?,
+          ?,
+          ?,
+          ?,
+          ?,
+          ?,
+          ?,
+          ?,
+          %PREPARED_VALUES%
+          )
+        createView: "CREATE VIEW %VIEW%(%VIEW_COLS%) AS SELECT %TABLE_COLS% FROM %TABLE%"
diff --git a/configuration/configMapFiles/application-federation.yml b/configuration/configMapFiles/application-federation.yml
@@ -0,0 +1,15 @@
+# This serves as a set of sensible defaults for authorization and query federation.
+datawave:
+  authorization:
+    federation:
+      # Each entry in the following map will be registered as a FederatedAuthorizationService bean, named after the key
+      services:
+        FederatedAuthorizationService:
+          federatedAuthorizationUri: "https://{{ .Values.global.serviceDns.authorization.name }}:{{ .Values.global.serviceDns.authorization.port }}/{{ .Values.global.serviceDns.authorization.endpoint }}"
+  query:
+    federation:
+      # Each entry in the following map will be registered as a FederatedQueryService (RemoteQueryService) bean, named after the key
+      services:
+        FederatedQueryService:
+          queryServiceUri: 'https://{{ .Values.global.serviceDns.query.name }}:{{ .Values.global.serviceDns.query.port  }}/query/v1'
+          queryMetricServiceUri: 'https://{{ .Values.global.serviceDns.metrics.name }}:{{ .Values.global.serviceDns.metrics.port }}/querymetric/v1/id'
diff --git a/configuration/configMapFiles/application-k8s.yml b/configuration/configMapFiles/application-k8s.yml
@@ -1,4 +1,30 @@
 hazelcast:
   client:
+    clusterName: cache
     k8s:
-      service-dns-name: {{ .Values.global.serviceDns.cache.name }}.{{ .Release.Namespace }}.svc.cluster.local
+      service-dns-name: {{ .Values.global.serviceDns.cache.name }}.{{ .Release.Namespace }}.svc.cluster.local
+      # This template is for configuring your site-specific properties for all microservices.
+system.name: CONTAINERS
+# Define the client certificates (in lower-case subjectDN<issuerDN> form) that are allowed to call a service. Note that you only need to
+# specify one value in the list below.
+# Since enforce-allowed-callers is false, you can skip configuring this section if you want.
+spring:
+  kafka:
+    bootstrap-servers: {{ .Values.global.serviceDns.kafka.name }}.{{ .Release.Namespace }}.svc.cluster.local:{{ .Values.global.serviceDns.kafka.port }}
+    consumer:
+      autoOffsetReset: earliest
+      enableAutoCommit: false
+      properties:
+        allow.auto.create.topics: false
+# This is the accumulo configuration we use in services.  These don't map directly to a properties class, but
+# they are here to be used as a reference for other properties.
+accumulo:
+  zookeepers: '{{ .Values.global.zookeeper.quorum }}'
+  instanceName: '{{ .Values.global.accumulo.instanceName }}'
+  username: '{{ .Values.global.accumulo.user }}'
+  password: '{{ .Values.global.accumulo.password }}'
+# Configuration placeholders which 1) determines what backend will be used for transmitting query results
+# and 2) determines the message size limit before claim checks are used with RabbitMQ messaging
+messaging:
+  backend: kafka
+  maxMessageSizeBytes: 536870912
diff --git a/configuration/configMapFiles/application-metricssource.yml b/configuration/configMapFiles/application-metricssource.yml
@@ -0,0 +1,23 @@
+# This profile should be added to your service if you depend on the 
+# query metric starter to send metrics to the query metric service.
+spring:
+  cloud:
+    stream:
+      bindings:
+        queryMetricSource-out-0:
+          destination: queryMetricChannel
+          producer:
+            requiredGroups: queryMetricService
+            errorChannelEnabled: true
+    # NOTE: When defining your functions, be sure to include busConsumer, or else spring cloud bus will not work
+    function:
+      definition: queryMetricSource;busConsumer
+
+datawave:
+  query:
+    metric:
+      client:
+        confirmAckTimeoutMillis: 30000
+# To send metrics via REST, uncomment the following
+#        host: metrics
+#        transport: HTTPS
diff --git a/configuration/configMapFiles/application-mrquery.yml b/configuration/configMapFiles/application-mrquery.yml
@@ -0,0 +1,54 @@
+datawave:
+  query:
+    mapreduce:
+      fsConfigResources:
+        - ${HADOOP_CONF_DIR:/etc/hadoop/conf}/core-site.xml
+        - ${HADOOP_CONF_DIR:/etc/hadoop/conf}/hdfs-site.xml
+        - ${HADOOP_CONF_DIR:/etc/hadoop/conf}/mapred-site.xml
+        - ${HADOOP_CONF_DIR:/etc/hadoop/conf}/yarn-site.xml
+      callbackServletURL: "http://query:8080/query/v1/mapreduce/updateState"
+      mapReduceBaseDirectory: "/datawave/MapReduceService"
+      restrictInputFormats: true
+      validInputFormats:
+        - "org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat"
+        - "datawave.mr.bulk.BulkInputFormat"
+      jobs:
+        'BulkResultsJob':
+          startingClass: datawave.microservice.query.mapreduce.MapReduce
+          jobJarName: "MapReduceQueryCoreJob.jar"
+          description: "MapReduce job that runs a query and either puts the results into a table or files in HDFS"
+          hdfsUri: "hdfs://${HADOOP_HOST}:9000/"
+          jobTracker: "${HADOOP_HOST}:8021"
+          requiredRuntimeParameters:
+            queryId: java.lang.String
+            format: datawave.microservice.mapreduce.bulkresults.map.SerializationFormat
+          optionalRuntimeParameters:
+            outputTableName: java.lang.String
+            outputFormat: java.lang.String
+          jobConfigurationProperties:
+            "mapreduce.map.speculative": "false"
+            "mapreduce.map.output.compress": "false"
+            "mapreduce.output.fileoutputformat.compress": "false"
+            "mapreduce.job.user.classpath.first": "true"
+          # NOTE: Disable spring components which should not be run in a map-reduce context
+          jobSystemProperties:
+            "datawave.table.cache.enabled": "false"
+            "spring.profiles.active": "query,mrquery"
+            "spring.cloud.bus.enabled": "false"
+            "spring.cloud.discovery.enabled": "false"
+            "spring.cloud.consul.enabled": "false"
+            "spring.rabbitmq.discovery.enabled": "false"
+            "datawave.query.messaging.backend": "none"
+            "datawave.query.messaging.claimCheck.enabled": "false"
+            "datawave.query.storage.cache.enabled": "false"
+            "hazelcast.client.enabled": "false"
+            "spring.cloud.config.enabled": "false"
+            "datawave.query.metric.client.enabled": "false"
+          accumulo:
+            zookeepers: '${accumulo.zookeepers}'
+            instanceName: '${accumulo.instanceName}'
+            username: '${accumulo.username}'
+            password: '${accumulo.password}'
+        'OozieJob':
+          hdfsUri: "hdfs://${HADOOP_HOST}:9000/"
+          jobTracker: "${HADOOP_HOST}:8021"