-
Notifications
You must be signed in to change notification settings - Fork 29
/
muchos.props.example
179 lines (169 loc) · 7.22 KB
/
muchos.props.example
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
[general]
# Cluster type (ec2 or existing)
cluster_type = ec2
# Cluster user name (install command will SSH to cluster using this user)
# Leave default below if launching cluster in AWS
cluster_user = centos
# Cluster user group
cluster_group = %(cluster_user)s
# Cluster user home directory
user_home = /home/%(cluster_user)s
# Install directory where Hadoop, Accumulo, etc will be installed
install_dir = %(user_home)s/install
# Hostname of proxy node that Muchos will use to direct installation of cluster. Will be given
# public IP if launching in EC2. If not launching in EC2, node must have public IP that can be reached
# from your machine. Hostname can be chosen from "nodes" section below.
proxy_hostname = leader1
# If set, a SOCKS proxy will be created on the specified port when connecting to proxy using 'muchos ssh <cluster>'
#proxy_socks_port = 38585
# Accumulo Instance name
accumulo_instance = muchos
# Accumluo Password
accumulo_password = secret
# Software versions (set sha-256 in conf/checksums)
hadoop_version = 2.9.2
zookeeper_version = 3.4.14
spark_version = 2.3.2
fluo_version = 1.2.0
fluo_yarn_version = 1.0.0
accumulo_version = 1.9.3
# Specifies if software should be downloaded. If 'False', tarballs of the software above should be in conf/upload/
download_software = True
# Install Hub (for GitHub)
install_hub = True
# The java package to install
java_package=java-1.8.0-openjdk-devel
# The package to use for java 11
# java_package=java-11-openjdk-devel
[ec2]
# AWS machine image to use. The default below is for a CentOS 7 image (in us-east-1).
# You may need to change this value if a new image has been released or you are running in a different region.
# Before using this AMI, subscribe to it on the CentOS product page below or launching will fail:
# https://aws.amazon.com/marketplace/pp/B00O7WM7QW
aws_ami = ami-9887c6e7
# Type of AWS instance launched by default
default_instance_type = m5d.2xlarge
# Type of AWS instance launched for any node running 'worker' service
# Leave default below to use same instance type set by 'default_instance_type' property
worker_instance_type = %(default_instance_type)s
# Enable template mode by selecting a template from conf/templates, in order to leverage your own
# custom EC2 launch requests (optional). See conf/templates/README.md for more information
#cluster_template = example
# VPC to launch instances in (optional)
#vpc_id = vpc-xxxxx
# VPC Subnet to launch instances in (optional)
#subnet_id = subnet-xxxxxx
# Security group ID to launch in (optional)
#security_group_id = sg-xxxxxx
# Name of public key that will be loaded by Amazon on to your EC2 instances.
# You can upload and name your public key using the EC2 Management Console.
# Only the user with this key will be able to SSH to the cluster.
# Name below should be your 'Key pair name' in EC2 and not name of your public key file.
key_name = my_aws_key
# Type of filesystem to format instance storage as.
fstype = ext3
# Force formatting of instance devices, even when it has an existing filesystem.
force_format = no
# Tags to add instances
#instance_tags = key1:value1,key2:value2
# Nodes will be given public IP addresses if true
associate_public_ip = true
# Path to file containing user data that will be executed at launch
#user_data_path = /path/to/user_data
# Shutdown instances after a delay (in minutes). If 0, no shutdown will occur.
shutdown_delay_minutes = 0
# Shutdown behavior of EC2 instances: terminate or stop
shutdown_behavior = stop
[existing]
# Root of data dirs
mount_root = /var/data
# Data directories on all nodes
data_dirs = /var/data1,/var/data2,/var/data3
# Identifies drives for metrics
metrics_drive_ids = var-data1,var-data2,var-data3
[performance]
# Automatically tune Accumulo, Yarn, and Fluo performance setting by selecting or
# creating a performance profile. Try not to use more memory than each node has
# and leave some space for the OS.
profile=perf-large
# Below are different performance profiles that can be selected. Each profile
# has the same properties with different values.
[perf-small]
# Amount of JVM heap for each tserver
accumulo_tserv_mem=2G
# Amount of data cache for each tserver. Only applies when using Accumulo 1.x
accumulo_dcache_size=768M
# Amount of index cache for each tserver. Only applies when using Accumulo 1.x
accumulo_icache_size=256M
# In memory map size for each tserver. Only applies when using Accumulo 1.x
accumulo_imap_size=512M
# Amount of JVM heap for each Fluo worker
fluo_worker_mem_mb=2048
# Determines the gap between the Yarn memory limit and the java -Xmx setting.
# For example if fluo_worker_mem_mb is set to 2048 and twill_reserve_mem_mb is
# set to 256, then for workers the java -Xmx setting will be set to 2048-256.
# If yarn is killing worker processes because they are using too much memory,
# then consider increasing this setting.
twill_reserve_mem_mb=256
# Number of threads for each Flup worker
fluo_worker_threads=20
# Number of worker to run per node
fluo_worker_instances_multiplier=1
# Max amount of memory for YARN per node
yarn_nm_mem_mb=4096
[perf-medium]
accumulo_tserv_mem=3G
# Accumulo configs below only apply when using Accumulo 1.x
accumulo_dcache_size=1536M
accumulo_icache_size=512M
accumulo_imap_size=512M
fluo_worker_mem_mb=4096
twill_reserve_mem_mb=512
fluo_worker_threads=64
fluo_worker_instances_multiplier=1
yarn_nm_mem_mb=8192
[perf-large]
accumulo_tserv_mem=4G
# Accumulo configs below only apply when using Accumulo 1.x
accumulo_dcache_size=2G
accumulo_icache_size=1G
accumulo_imap_size=512M
fluo_worker_mem_mb=4096
twill_reserve_mem_mb=512
fluo_worker_threads=64
fluo_worker_instances_multiplier=2
yarn_nm_mem_mb=16384
[ansible-vars]
# This section is used to override Ansible variables. Any variable set below will be placed in the hosts file created by Muchos.
# Expected format: variable = value
[nodes]
# Describes nodes in cluster in the following format:
# <Hostname> = <Service1>[,<Service2>,<Service3>]
# Where:
# Hostname = Must be unique. Will be used for hostname in EC2 or should match hostname on your own cluster
# Service = Service to run on node (possible values: zookeeper, namenode, resourcemanager, accumulomaster, client, swarmmanager,
# mesosmaster, worker, fluo, metrics, spark). The following services are required: namenode, resourcemanager,
# accumulomaster, zookeeper & worker
leader1 = namenode,resourcemanager
leader2 = accumulomaster,zookeeper
worker1 = worker
worker2 = worker
worker3 = worker
worker4 = worker
worker5 = worker
webserver1 = client
ingest1 = client