Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Query microservices round 2 #73

Open
wants to merge 17 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions audit/values.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
global:
security:
userid: 1000
dockerRegistry:
url: ghcr.io/nationalsecurityagency
volumes:
Expand Down
2 changes: 2 additions & 0 deletions authorization/values.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
global:
security:
userid: 1000
dockerRegistry:
url: ghcr.io/nationalsecurityagency
volumes:
Expand Down
2 changes: 2 additions & 0 deletions cache/values.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
global:
security:
userid: 1000
dockerRegistry:
url: ghcr.io/nationalsecurityagency
volumes:
Expand Down
8 changes: 7 additions & 1 deletion common-service-library/templates/_deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,15 @@ spec:
# Containers Associated with this Deployment #
##############################################
securityContext:
runAsUser: 1000
runAsUser: {{ .Values.global.security.userid }}
containers:
- name: "{{ .Values.meta.name }}"
command:
{{ if .Values.command }}
{{- range $i, $value := .Values.command }}
- {{ $value }}
{{- end }}
{{- end }}
args:
{{- range $i, $value := .Values.args }}
- {{ $value }}
Expand Down
23 changes: 23 additions & 0 deletions configuration/configMapFiles/accumulo.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
warehouse-cluster:
accumulo:
zookeepers: "{{ .Values.global.zookeeper.quorum }}"
instanceName: '{{ .Values.global.accumulo.instanceName }}'
username: '{{ .Values.global.accumulo.user }}'
password: '{{ .Values.global.accumulo.password }}'

accumulo:
lookup:
audit:
defaultAuditType: 'ACTIVE'
stats:
enabled: true

audit-client:
discovery:
enabled: false
uri: '${AUDIT_SERVER_URL:http://audit-service:8080/audit}'

datawave:
swagger:
title: "Accumulo Service"
description: "REST API provided by the Accumulo Service"
79 changes: 79 additions & 0 deletions configuration/configMapFiles/application-cachedresults.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
spring:
datasource:
cachedResults:
url: 'jdbc:mysql://{{.Values.global.serviceDns.mysql.name}}:3306/cachedresults?zeroDateTimeBehavior=convertToNull'
username: '{{ .Values.global.mysql.user }}'
password: 'secret'
driver-class-name: 'com.mysql.cj.jdbc.Driver'
hikari:
# default: 30000
connection-timeout: 5000
# default: 600000
idle-timeout: 900000
# default: maximum-pool-size
minimum-idle: {{ .Values.global.mysql.minIdle }}
# default: 10
maximum-pool-size: {{ .Values.global.mysql.maxSize }}

datawave:
mysql:
host: 'mysql'
dbname: 'cachedresults'
pool:
min-size: '5'
max-size: '20'
username: 'datawave'
password: 'secret'
query:
cachedResults:
enabled: ${CACHED_RESULTS:false}
remoteQuery:
queryServiceUri: "https://dwv-web-query:8443/query/v1/query"
# unlimited
maxBytesToBuffer: -1
numFields: 900
statementTemplates:
createTableTemplate: |
CREATE TABLE IF NOT EXISTS template (
_user_ VARCHAR(200) NOT NULL,
_queryId_ VARCHAR(200) NOT NULL,
_logicName_ VARCHAR(200) NOT NULL,
_datatype_ VARCHAR(35) NOT NULL,
_eventId_ VARCHAR(50) NOT NULL,
_row_ LONGTEXT NOT NULL,
_colf_ LONGTEXT NOT NULL,
_markings_ VARCHAR(400) NOT NULL,
_column_markings_ LONGTEXT NOT NULL,
_column_timestamps_ LONGTEXT NOT NULL,
%FIELD_DEFINITIONS%
) ENGINE = MyISAM
createTable: "CREATE TABLE %TABLE% LIKE template"
dropTable: "DROP TABLE %TABLE%"
dropView: "DROP VIEW %TABLE%"
insert: |
INSERT INTO %TABLE% (
_user_,
_queryId_,
_logicName_,
_datatype_,
_eventId_,
_row_,
_colf_,
_markings_,
_column_markings_,
_column_timestamps_,
%PREPARED_FIELDS%
) VALUES (
?,
?,
?,
?,
?,
?,
?,
?,
?,
?,
%PREPARED_VALUES%
)
createView: "CREATE VIEW %VIEW%(%VIEW_COLS%) AS SELECT %TABLE_COLS% FROM %TABLE%"
15 changes: 15 additions & 0 deletions configuration/configMapFiles/application-federation.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# This serves as a set of sensible defaults for authorization and query federation.
datawave:
authorization:
federation:
# Each entry in the following map will be registered as a FederatedAuthorizationService bean, named after the key
services:
FederatedAuthorizationService:
federatedAuthorizationUri: "https://{{ .Values.global.serviceDns.authorization.name }}:{{ .Values.global.serviceDns.authorization.port }}/{{ .Values.global.serviceDns.authorization.endpoint }}"
query:
federation:
# Each entry in the following map will be registered as a FederatedQueryService (RemoteQueryService) bean, named after the key
services:
FederatedQueryService:
queryServiceUri: 'https://{{ .Values.global.serviceDns.query.name }}:{{ .Values.global.serviceDns.query.port }}/query/v1'
queryMetricServiceUri: 'https://{{ .Values.global.serviceDns.metrics.name }}:{{ .Values.global.serviceDns.metrics.port }}/querymetric/v1/id'
28 changes: 27 additions & 1 deletion configuration/configMapFiles/application-k8s.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,30 @@
hazelcast:
client:
clusterName: cache
k8s:
service-dns-name: {{ .Values.global.serviceDns.cache.name }}.{{ .Release.Namespace }}.svc.cluster.local
service-dns-name: {{ .Values.global.serviceDns.cache.name }}.{{ .Release.Namespace }}.svc.cluster.local
# This template is for configuring your site-specific properties for all microservices.
system.name: CONTAINERS
# Define the client certificates (in lower-case subjectDN<issuerDN> form) that are allowed to call a service. Note that you only need to
# specify one value in the list below.
# Since enforce-allowed-callers is false, you can skip configuring this section if you want.
spring:
kafka:
bootstrap-servers: {{ .Values.global.serviceDns.kafka.name }}.{{ .Release.Namespace }}.svc.cluster.local:{{ .Values.global.serviceDns.kafka.port }}
consumer:
autoOffsetReset: earliest
enableAutoCommit: false
properties:
allow.auto.create.topics: false
# This is the accumulo configuration we use in services. These don't map directly to a properties class, but
# they are here to be used as a reference for other properties.
accumulo:
zookeepers: '{{ .Values.global.zookeeper.quorum }}'
instanceName: '{{ .Values.global.accumulo.instanceName }}'
username: '{{ .Values.global.accumulo.user }}'
password: '{{ .Values.global.accumulo.password }}'
# Configuration placeholders which 1) determines what backend will be used for transmitting query results
# and 2) determines the message size limit before claim checks are used with RabbitMQ messaging
messaging:
backend: kafka
maxMessageSizeBytes: 536870912
23 changes: 23 additions & 0 deletions configuration/configMapFiles/application-metricssource.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# This profile should be added to your service if you depend on the
# query metric starter to send metrics to the query metric service.
spring:
cloud:
stream:
bindings:
queryMetricSource-out-0:
destination: queryMetricChannel
producer:
requiredGroups: queryMetricService
errorChannelEnabled: true
# NOTE: When defining your functions, be sure to include busConsumer, or else spring cloud bus will not work
function:
definition: queryMetricSource;busConsumer

datawave:
query:
metric:
client:
confirmAckTimeoutMillis: 30000
# To send metrics via REST, uncomment the following
# host: metrics
# transport: HTTPS
54 changes: 54 additions & 0 deletions configuration/configMapFiles/application-mrquery.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
datawave:
query:
mapreduce:
fsConfigResources:
- ${HADOOP_CONF_DIR:/etc/hadoop/conf}/core-site.xml
- ${HADOOP_CONF_DIR:/etc/hadoop/conf}/hdfs-site.xml
- ${HADOOP_CONF_DIR:/etc/hadoop/conf}/mapred-site.xml
- ${HADOOP_CONF_DIR:/etc/hadoop/conf}/yarn-site.xml
callbackServletURL: "http://query:8080/query/v1/mapreduce/updateState"
mapReduceBaseDirectory: "/datawave/MapReduceService"
restrictInputFormats: true
validInputFormats:
- "org.apache.accumulo.core.client.mapreduce.AccumuloInputFormat"
- "datawave.mr.bulk.BulkInputFormat"
jobs:
'BulkResultsJob':
startingClass: datawave.microservice.query.mapreduce.MapReduce
jobJarName: "MapReduceQueryCoreJob.jar"
description: "MapReduce job that runs a query and either puts the results into a table or files in HDFS"
hdfsUri: "hdfs://${HADOOP_HOST}:9000/"
jobTracker: "${HADOOP_HOST}:8021"
requiredRuntimeParameters:
queryId: java.lang.String
format: datawave.microservice.mapreduce.bulkresults.map.SerializationFormat
optionalRuntimeParameters:
outputTableName: java.lang.String
outputFormat: java.lang.String
jobConfigurationProperties:
"mapreduce.map.speculative": "false"
"mapreduce.map.output.compress": "false"
"mapreduce.output.fileoutputformat.compress": "false"
"mapreduce.job.user.classpath.first": "true"
# NOTE: Disable spring components which should not be run in a map-reduce context
jobSystemProperties:
"datawave.table.cache.enabled": "false"
"spring.profiles.active": "query,mrquery"
"spring.cloud.bus.enabled": "false"
"spring.cloud.discovery.enabled": "false"
"spring.cloud.consul.enabled": "false"
"spring.rabbitmq.discovery.enabled": "false"
"datawave.query.messaging.backend": "none"
"datawave.query.messaging.claimCheck.enabled": "false"
"datawave.query.storage.cache.enabled": "false"
"hazelcast.client.enabled": "false"
"spring.cloud.config.enabled": "false"
"datawave.query.metric.client.enabled": "false"
accumulo:
zookeepers: '${accumulo.zookeepers}'
instanceName: '${accumulo.instanceName}'
username: '${accumulo.username}'
password: '${accumulo.password}'
'OozieJob':
hdfsUri: "hdfs://${HADOOP_HOST}:9000/"
jobTracker: "${HADOOP_HOST}:8021"
Loading
Loading