Commit 6ec46ee4 authored by Jérome Perrin's avatar Jérome Perrin

WIP grafana

parent 4c4f04ce
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
[instance-profile] [instance-profile]
filename = instance.cfg.in filename = instance.cfg.in
md5sum = 39a1ee09ca7a12995703ff2a6a869637 md5sum = bdc556fa76a3f5a763391797c7f3e342
[influxdb-config-file] [influxdb-config-file]
filename = influxdb-config-file.cfg.in filename = influxdb-config-file.cfg.in
...@@ -23,7 +23,7 @@ md5sum = a28972ced3e0f4aa776e43a9c44717c0 ...@@ -23,7 +23,7 @@ md5sum = a28972ced3e0f4aa776e43a9c44717c0
[telegraf-config-file] [telegraf-config-file]
filename = telegraf-config-file.cfg.in filename = telegraf-config-file.cfg.in
md5sum = 6de1faa34842e1eda095a51edecc2083 md5sum = 016d0163ca3dbabe538a8feeee745c60
[grafana-config-file] [grafana-config-file]
filename = grafana-config-file.cfg.in filename = grafana-config-file.cfg.in
...@@ -39,8 +39,4 @@ md5sum = 5616679a9c5c2757540175ead3f5500a ...@@ -39,8 +39,4 @@ md5sum = 5616679a9c5c2757540175ead3f5500a
[loki-config-file] [loki-config-file]
filename = loki-config-file.cfg.in filename = loki-config-file.cfg.in
md5sum = 19a7f5cb904b3287b0bc7cb3e8a27429 md5sum = e4917bae2a07598ce5305520e9a58b6d
[loki-nginx-config-file]
filename = loki-nginx-config-file.cfg.in
md5sum = b08ce1e4abb34eb79e26133459c27c3a
{% import "caucase" as caucase with context %}
[buildout] [buildout]
parts = parts =
promises promises
...@@ -45,17 +47,13 @@ grafana-dashboards-dir = ${:grafana-dir}/dashboards ...@@ -45,17 +47,13 @@ grafana-dashboards-dir = ${:grafana-dir}/dashboards
telegraf-dir = ${:srv}/telegraf telegraf-dir = ${:srv}/telegraf
telegraf-extra-config-dir = ${:telegraf-dir}/extra-config telegraf-extra-config-dir = ${:telegraf-dir}/extra-config
loki-dir = ${:srv}/loki loki-dir = ${:srv}/loki
loki-boltdb-shipper-active-index-directory = ${:loki-dir}/index
loki-boltdb-shipper-cache-location = ${:loki-dir}/index-cache
loki-compactor-working-directory = ${:loki-dir}/compactor
loki-storage-filesystem-directory = ${:loki-dir}/chunks loki-storage-filesystem-directory = ${:loki-dir}/chunks
loki-nginx-dir = ${:srv}/loki-nginx caucase-updater-loki = ${:srv}/caucase-updater/loki/
loki-nginx-logs-dir = ${:loki-nginx-dir}/logs
promtail-dir = ${:srv}/promtail promtail-dir = ${:srv}/promtail
# macros # macros
[generate-certificate] [generate-insecure-self-signed-certificate]
# TODO: stop using this, use caucase
recipe = plone.recipe.command recipe = plone.recipe.command
command = command =
if [ ! -e ${:key-file} ] if [ ! -e ${:key-file} ]
...@@ -117,7 +115,7 @@ recipe = slapos.cookbook:generate.password ...@@ -117,7 +115,7 @@ recipe = slapos.cookbook:generate.password
username = influxdb username = influxdb
[influxdb-certificate] [influxdb-certificate]
<= generate-certificate <= generate-insecure-self-signed-certificate
[influxdb-listen-promise] [influxdb-listen-promise]
<= check-port-listening-promise <= check-port-listening-promise
...@@ -163,17 +161,15 @@ ssl-cert-file = ${grafana-certificate:cert-file} ...@@ -163,17 +161,15 @@ ssl-cert-file = ${grafana-certificate:cert-file}
recipe = slapos.cookbook:wrapper recipe = slapos.cookbook:wrapper
command-line = command-line =
{{ grafana_bin }} -config ${grafana-config-file:output} -homepath {{ grafana_homepath }} {{ grafana_bin }} server -config ${grafana-config-file:output} -homepath {{ grafana_homepath }}
wrapper-path = ${directory:service}/grafana wrapper-path = ${directory:service}/grafana
[grafana-certificate] [grafana-certificate]
<= generate-certificate <= generate-insecure-self-signed-certificate
[grafana-password] [grafana-password]
# TODO recipe = slapos.cookbook:generate.password
#recipe = slapos.cookbook:generate.password
username = admin username = admin
passwd = admin
[grafana-secret-key] [grafana-secret-key]
recipe = slapos.cookbook:generate.password recipe = slapos.cookbook:generate.password
...@@ -220,6 +216,7 @@ context = ...@@ -220,6 +216,7 @@ context =
section influxdb influxdb section influxdb influxdb
section telegraf telegraf section telegraf telegraf
section extra telegraf-config-file-extra section extra telegraf-config-file-extra
section slap_configuration slap-configuration
[telegraf-config-file-extra] [telegraf-config-file-extra]
recipe = slapos.recipe.build recipe = slapos.recipe.build
...@@ -267,7 +264,12 @@ init = ...@@ -267,7 +264,12 @@ init =
"name_override": f"{partition['name']}-mysql", "name_override": f"{partition['name']}-mysql",
"servers": [dsn], "servers": [dsn],
"gather_innodb_metrics": True, "gather_innodb_metrics": True,
"tags": dict(partition.get("static-tags", {}), app=application["name"]), "tags": dict(
partition.get("static-tags", {}),
app=application["name"],
name=partition["name"],
partition=partition["reference"],
),
} }
) )
if partition["type"] == "erp5/mariadb": if partition["type"] == "erp5/mariadb":
...@@ -278,39 +280,44 @@ init = ...@@ -278,39 +280,44 @@ init =
"dsn": dsn, "dsn": dsn,
"query": [ "query": [
{ {
"query": "select count(*) as message_count from message", "query": """
select 'message' as cmf_activity_queue, count(*) as message_count from message
union all select 'message_queue' as cmf_activity_queue, count(*) as message_count from message_queue
""",
"field_columns_include": ["message_count"], "field_columns_include": ["message_count"],
}, "tag_columns_include": ["cmf_activity_queue"],
{
"query": "select count(*) as message_queue_count from message_queue",
"field_columns_include": ["message_queue_count"],
},
{
"query": "select count(*) as message_failed_count from message where processing_node=-2",
"field_columns_include": ["message_failed_count"],
},
{
"query": "select count(*) as message_queue_failed_count from message_queue where processing_node=-2",
"field_columns_include": ["message_queue_failed_count"],
}, },
{ {
"query": """ "query": """
select cast(coalesce(max(UNIX_TIMESTAMP(now()) - UNIX_TIMESTAMP(message.date)), 0) as int) select 'message' as cmf_activity_queue, count(*) as failed_message_count
as message_waiting_time from message from message where processing_node between -2 and -10
where processing_node in (-1, 0) and message not like '%after_tag%' union all select 'message_queue' as cmf_activity_queue, count(*) as failed_message_count
from message_queue where processing_node between -2 and -10
""", """,
"field_columns_include": ["message_waiting_time"], "field_columns_include": ["failed_message_count"],
"tag_columns_include": ["cmf_activity_queue"],
}, },
{ {
"query": """ "query": """
select cast(coalesce(max(UNIX_TIMESTAMP(now()) - UNIX_TIMESTAMP(message.date)), 0) as int)
as waiting_time, 'message' as cmf_activity_queue
from message where processing_node in (-1, 0) and message.message not like '%after_tag%'
union all
select cast(coalesce(max(UNIX_TIMESTAMP(now()) - UNIX_TIMESTAMP(message_queue.date)), 0) as int) select cast(coalesce(max(UNIX_TIMESTAMP(now()) - UNIX_TIMESTAMP(message_queue.date)), 0) as int)
as message_queue_waiting_time from message_queue as waiting_time, 'message_queue' as cmf_activity_queue
where processing_node in (-1, 0) and message not like '%after_tag%' from message_queue where processing_node in (-1, 0) and message_queue.message not like '%after_tag%'
""", """,
"field_columns_include": ["message_queue_waiting_time"], "field_columns_include": ["waiting_time"],
} "tag_columns_include": ["cmf_activity_queue"],
},
], ],
"tags": dict(partition.get("static-tags", {}), app=application["name"]), "tags": dict(
partition.get("static-tags", {}),
app=application["name"],
name=partition["name"],
partition=partition["reference"],
),
} }
) )
...@@ -326,7 +333,12 @@ init = ...@@ -326,7 +333,12 @@ init =
], ],
"grok_timezone": "Local", "grok_timezone": "Local",
"name_override": f"{partition['name']}", "name_override": f"{partition['name']}",
"tags": dict(partition.get("static-tags", {}), app=application["name"]), "tags": dict(
partition.get("static-tags", {}),
app=application["name"],
name=partition["name"],
partition=partition["reference"],
),
} }
) )
urls = application.get("urls", []) urls = application.get("urls", [])
...@@ -350,6 +362,7 @@ init = ...@@ -350,6 +362,7 @@ init =
"interval": "5h", "interval": "5h",
"tags": {"app": application["name"]}, "tags": {"app": application["name"]},
}) })
# TODO some kind of GET request every 5 minutes ?
if application.get("type") == "SlapOS": if application.get("type") == "SlapOS":
telegraf_slapos_input_config_file = os.path.join( telegraf_slapos_input_config_file = os.path.join(
...@@ -360,8 +373,6 @@ init = ...@@ -360,8 +373,6 @@ init =
"slapos": [{ "slapos": [{
"instance_root": application["instance-root"]}]}}) "instance_root": application["instance-root"]}]}})
# TODO: supervisor process finder for
# https://github.com/influxdata/telegraf/tree/master/plugins/inputs/procstat ?
telegraf_slapos_input_command = self.options['telegraf-input-slapos-bin'] telegraf_slapos_input_command = self.options['telegraf-input-slapos-bin']
inputs["execd"].append({ inputs["execd"].append({
"name_override": f"{application['name']}-processes", "name_override": f"{application['name']}-processes",
...@@ -389,20 +400,20 @@ init = ...@@ -389,20 +400,20 @@ init =
processors["enum"].append({ processors["enum"].append({
"namepass": [ f"{application['name']}-processes"], "namepass": [ f"{application['name']}-processes"],
"mapping": [{ "mapping": [{
# "tag": "group", # TODO: rename this in input plugin # XXX I don't remember what this means "tag": "reference",
"tag": "slappart", "dest": "name",
"dest": "partition",
"value_mappings": partition_mapping, "value_mappings": partition_mapping,
}]}) }]})
# TODOs: # TODOs:
# - [ ] use tags partition-id and partition-reference with consistency
# - [ ] slapos input # - [ ] slapos input
# - [x] friendly name of slappart # - [x] friendly name of slappart
# - [x] strip hashes from -on-watch # - [x] strip hashes from -on-watch
# - [ ] process name is incorrect for zope
# - [x] activity metrics # - [x] activity metrics
# - [ ] alert dashboard # - [?] alert dashboard
# - [ ] inclu "jerome-dev" partout ??? # - [?] apdex
# - [ ] apdex
# - [ ] "job" is bad name in Explore # - [ ] "job" is bad name in Explore
options["extra-config"] = toml.dumps({ options["extra-config"] = toml.dumps({
...@@ -410,6 +421,10 @@ init = ...@@ -410,6 +421,10 @@ init =
"processors": processors}) "processors": processors})
# import pdb; pdb.set_trace() # import pdb; pdb.set_trace()
# tips:
# LogQL to graph total slow query time
# sum(rate({partition="mariadb"} | regexp `(Query_time:\s(?P<query_time>\d+)\.)` | unwrap query_time [$__interval]))
# apdex # apdex
# SELECT sum("success") / sum("all") FROM # SELECT sum("success") / sum("all") FROM
# (SELECT count("duration") AS "all" FROM "jerome-dev-balancer" WHERE $timeFilter GROUP BY time($__interval) fill(null)), # (SELECT count("duration") AS "all" FROM "jerome-dev-balancer" WHERE $timeFilter GROUP BY time($__interval) fill(null)),
...@@ -428,179 +443,131 @@ install = ...@@ -428,179 +443,131 @@ install =
f.write(content) f.write(content)
[loki] [loki-server]
boltdb-shipper-active-index-directory = ${directory:loki-boltdb-shipper-active-index-directory}
boltdb-shipper-cache-location = ${directory:loki-boltdb-shipper-cache-location}
compactor-working-directory = ${directory:loki-compactor-working-directory}
storage-filesystem-directory = ${directory:loki-storage-filesystem-directory} storage-filesystem-directory = ${directory:loki-storage-filesystem-directory}
path-prefix = ${directory:loki-dir}
ip = ${instance-parameter:ipv4-random} http-port = 3100
read-1-http-port = 3101 url = https://${:ip6}:${:http-port}
read-1-grpc-port = 9096 ipv4 = ${instance-parameter:ipv4-random}
read-1-memberlist-port = 7947
read-2-http-port = 3102
read-2-grpc-port = 9097
read-2-memberlist-port = 7948
write-http-port = 3103
write-grpc-port = 9098
write-memberlist-port = 7949
query-frontend-http-port = 3104
query-frontend-grpc-port = 9099
query-frontend-memberlist-port = 7950
querier-http-port = 3105
querier-grpc-port = 9100
querier-memberlist-port = 7951
index-gateway-http-port = 3106
index-gateway-grpc-port = 9101
index-gateway-memberlist-port = 7952
query-scheduler-http-port = 3107
query-scheduler-grpc-port = 9102
query-scheduler-memberlist-port = 7953
# compactor
nginx-port = 3100
url = http://${:ip}:${:nginx-port}
ipv6 = ${instance-parameter:ipv6-random} ipv6 = ${instance-parameter:ipv6-random}
ca-file = ${loki-server-certificate:ca-file}
cert-file = ${loki-server-certificate:cert-file}
key-file = ${loki-server-certificate:key-file}
# TODO: CRL
[loki-service]
[loki-service-macro]
recipe = slapos.cookbook:wrapper recipe = slapos.cookbook:wrapper
command-line = command-line =
bash -c 'nice -19 chrt --idle 0 ionice -c3 {{ loki_bin }} \ bash -c 'nice -19 chrt --idle 0 ionice -c3 {{ loki_bin }} \
-config.file=${loki-config-file:output} \ -config.file=${loki-server-config-file:output} \
\
-boltdb.shipper.compactor.ring.instance-addr=${loki:ip} \
-boltdb.shipper.compactor.ring.instance-id=${:_buildout_section_name_} \
-common.embedded-cachering.instance-addr=${loki:ip} \
-common.embedded-cachering.instance-id=${:_buildout_section_name_} \
-distributor.ring.instance-addr=${loki:ip} \
-distributor.ring.instance-id=${:_buildout_section_name_} \
-frontend.instance-addr=${loki:ip} \
-frontend.instance-port=${loki:query-frontend-grpc-port} \
-index-gateway.ring.instance-addr=${loki:ip} \
-index-gateway.ring.instance-id=${:_buildout_section_name_} \
-memberlist.advertise-port=${:memberlist-port} \
-memberlist.bind-port=${:memberlist-port} \
-memberlist.nodename=${:_buildout_section_name_} \
-query-scheduler.ring.instance-addr=${loki:ip} \
-query-scheduler.ring.instance-id=${:_buildout_section_name_} \
-ruler.ring.instance-addr=${loki:ip} \
-ruler.ring.instance-id=${:_buildout_section_name_} \
-server.grpc-listen-port=${:grpc-port} \
-server.http-listen-port=${:http-port} \
${:extra-command-line}'
wrapper-path = ${directory:service}/${:_buildout_section_name_}
extra-command-line =
wrapper-path = ${directory:service}/${:_buildout_section_name_}
[loki-listen-promise-macro] [loki-server-config-file]
<= check-url-available-promise
url = http://${loki:ip}:${:port}/ready
[loki-read-1-service]
<= loki-service-macro
extra-command-line = -target=read -querier.scheduler-address=${loki:ip}:${loki:read-2-grpc-port} -query-scheduler.ring.instance-port=${loki:read-1-grpc-port}
http-port = ${loki:read-1-http-port}
grpc-port = ${loki:read-1-grpc-port}
memberlist-port = ${loki:read-1-memberlist-port}
[loki-read-1-listen-promise]
<= loki-listen-promise-macro
port = ${loki-read-1-service:http-port}
[loki-read-2-service]
<= loki-service-macro
extra-command-line = -target=read -querier.scheduler-address=${loki:ip}:${loki:read-1-grpc-port} -query-scheduler.ring.instance-port=${loki:read-2-grpc-port}
http-port = ${loki:read-2-http-port}
grpc-port = ${loki:read-2-grpc-port}
memberlist-port = ${loki:read-2-memberlist-port}
[loki-read-2-listen-promise]
<= loki-listen-promise-macro
port = ${loki-read-2-service:http-port}
[loki-write-service]
<= loki-service-macro
extra-command-line = -target=write
http-port = ${loki:write-http-port}
grpc-port = ${loki:write-grpc-port}
memberlist-port = ${loki:write-memberlist-port}
[loki-write-listen-promise]
<= loki-listen-promise-macro
port = ${loki-write-service:http-port}
[loki-querier-service]
<= loki-service-macro
extra-command-line = -target=querier -querier.scheduler-address=${loki:ip}:${loki:query-scheduler-grpc-port} -query-scheduler.ring.instance-port=${loki:querier-grpc-port}
http-port = ${loki:querier-http-port}
grpc-port = ${loki:querier-grpc-port}
memberlist-port = ${loki:querier-memberlist-port}
[loki-querier-listen-promise]
<= loki-listen-promise-macro
port = ${loki-querier-service:http-port}
[loki-index-gateway-service]
<= loki-service-macro
extra-command-line = -target=index-gateway -boltdb.shipper.query-ready-num-days=30
# XXX -boltdb.shipper.query-ready-num-days=30 useful ?
http-port = ${loki:index-gateway-http-port}
grpc-port = ${loki:index-gateway-grpc-port}
memberlist-port = ${loki:index-gateway-memberlist-port}
[loki-index-gateway-listen-promise]
<= loki-listen-promise-macro
port = ${loki-index-gateway-service:http-port}
[loki-query-frontend-service]
<= loki-service-macro
extra-command-line = -target=query-frontend -frontend.scheduler-address=${loki:ip}:${loki:query-scheduler-grpc-port}
http-port = ${loki:query-frontend-http-port}
grpc-port = ${loki:query-frontend-grpc-port}
memberlist-port = ${loki:query-frontend-memberlist-port}
[loki-query-frontend-listen-promise]
<= loki-listen-promise-macro
port = ${loki-query-frontend-service:http-port}
[loki-query-scheduler-service]
<= loki-service-macro
extra-command-line = -target=query-scheduler
http-port = ${loki:query-scheduler-http-port}
grpc-port = ${loki:query-scheduler-grpc-port}
memberlist-port = ${loki:query-scheduler-memberlist-port}
[loki-query-scheduler-listen-promise]
<= loki-listen-promise-macro
port = ${loki-query-scheduler-service:http-port}
[loki-config-file]
<= config-file <= config-file
context = context =
section loki loki section loki-server loki
[loki-nginx-service] [loki-server-certificate-init-certificate]
recipe = slapos.cookbook:wrapper recipe = slapos.recipe.build
command-line = init =
{{ nginx_bin }} -p ${directory:loki-nginx-dir} -c ${loki-nginx-config-file:output} # pre-create a file at the path of the certificate,
wrapper-path = ${directory:service}/${:_buildout_section_name_} # so that we can use hash-existing-files options
url = http://${loki:ip}:${loki:nginx-port} import pathlib
cert_file = pathlib.Path(self.buildout['loki-server-certificate']['cert-file'])
if not cert_file.parent.exists():
cert_file.parent.mkdir()
if not cert_file.exists():
cert_file.touch()
[loki-server-certificate]
init = ${loki-server-certificate-init-certificate:init}
key-file = ${directory:etc}/${:_buildout_section_name_}.key
cert-file = ${directory:etc}/${:_buildout_section_name_}.crt
common-name = ${:_buildout_section_name_}
ca-file = ${directory:etc}/${:_buildout_section_name_}.ca.crt
crl-file = ${directory:etc}/${:_buildout_section_name_}.crl
{{
caucase.updater(
prefix='loki-server-certificate',
buildout_bin_directory=buildout['bin-directory'],
updater_path='${directory:service}/loki-server-certificate-updater',
url='${caucased:url}',
data_dir='${directory:caucase-updater-loki}',
crt_path='${loki-server-certificate:cert-file}',
ca_path='${loki-server-certificate:ca-file}',
crl_path='${loki-server-certificate:crl-file}',
key_path='${loki-server-certificate:key-file}',
template_csr='${loki-server-certificate-prepare-csr:csr}',
openssl=openssl_bin,
)}}
[loki-server-certificate-csr-config]
recipe = slapos.recipe.template
inline =
[req]
prompt = no
req_extensions = req_ext
distinguished_name = dn
[ dn ]
CN = loki-server
[ req_ext ]
subjectAltName = @alt_names
[ alt_names ]
IP.1 = ${loki-server:ipv4}
IP.2 = ${loki-server:ipv6}
output = ${buildout:parts-directory}/${:_buildout_section_name_}/${:_buildout_section_name_}
[loki-server-certificate-prepare-csr]
recipe = plone.recipe.command
command =
if [ ! -f '${:csr}' ] ; then
{{ openssl_bin }} req \
-newkey rsa \
-batch \
-new \
-sha256 \
-nodes \
-keyout /dev/null \
-config '${loki-server-certificate-csr-config:output}' \
-out '${:csr}'
fi
stop-on-error = true
csr = ${directory:srv}/${:_buildout_section_name_}.csr.pem
[loki-nginx-listen-promise] [loki-server-listen-promise]
<= check-url-available-promise <= check-url-available-promise
url = ${loki-nginx-service:url} url = https://${loki-server:ipv6}:${loki-server:http-port}/ready
ca-cert-file = ${loki-server:ca-file}
cert-file = ${:cert-file}
key-file = ${:key-file}
[loki-caucased]
port = 18080
ip = ${instance-parameter:ipv6-random}
netloc = [${:ip}]:${:port}
url = http://${:netloc}/
{{
caucase.caucased(
prefix='loki-caucased',
buildout_bin_directory=buildout['bin-directory'],
caucased_path='${directory:service}/caucased',
backup_dir='${directory:backup-caucased}',
data_dir='${directory:srv}/caucased',
netloc='${caucased:netloc}',
tmp='${directory:tmp}',
# server: loki clients: grafana, promtail
service_auto_approve_count=5,
user_auto_approve_count=1,
key_len=2048,
)}}
[loki-nginx-config-file]
<= config-file
context =
section loki loki
[promtail] [promtail]
recipe = slapos.cookbook:wrapper recipe = slapos.cookbook:wrapper
...@@ -790,9 +757,13 @@ install = ...@@ -790,9 +757,13 @@ install =
"stages": [ "stages": [
{ {
"multiline": { "multiline": {
# TODO # between each slow query, slow query log has a first line like:
#"firstline": "^# Time: \\d{2}\\d{2}\\d{2}\\s\\d{1,2}\\:\\d{2}\\:\\d{2}", # # Time: 231008 16:29:01
"firstline": r"^# Time: \d{2}.*", # and then a second like:
# # User@Host: user[user] @ [10.0.71.207]
# but the first line is not repeated for subsequent queries that happens
# at the same second
"firstline": r"(^# Time: \d{2}.*\n^# User@Host:.*|^# User@Host:.*)",
"max_wait_time": "3s" "max_wait_time": "3s"
} }
}, },
...@@ -947,14 +918,7 @@ instance-promises = ...@@ -947,14 +918,7 @@ instance-promises =
${influxdb-password-promise:wrapper-path} ${influxdb-password-promise:wrapper-path}
${influxdb-database-ready-promise:wrapper-path} ${influxdb-database-ready-promise:wrapper-path}
${grafana-listen-promise:path} ${grafana-listen-promise:path}
${loki-query-frontend-listen-promise:path} ${loki-listen-promise:path}
${loki-query-scheduler-listen-promise:path}
# ${loki-index-gateway-listen-promise:path}
${loki-querier-listen-promise:path}
# ${loki-read-1-listen-promise:path}
# ${loki-read-2-listen-promise:path}
${loki-write-listen-promise:path}
${loki-nginx-listen-promise:path}
${promtail-listen-promise:path} ${promtail-listen-promise:path}
${apache-frontend-available-promise:path} ${apache-frontend-available-promise:path}
...@@ -969,6 +933,6 @@ telegraf-extra-config-dir = ${telegraf:extra-config-dir} ...@@ -969,6 +933,6 @@ telegraf-extra-config-dir = ${telegraf:extra-config-dir}
grafana-url = ${grafana:url} grafana-url = ${grafana:url}
grafana-username = ${grafana:admin-user} grafana-username = ${grafana:admin-user}
grafana-password = ${grafana:admin-password} grafana-password = ${grafana:admin-password}
loki-url = ${loki:url} loki-internal-url = ${loki:url}
promtail-url = ${promtail:url} promtail-url = ${promtail:url}
url = ${apache-frontend:connection-secure_access} url = ${apache-frontend:connection-secure_access}
# insipired from
# https://github.com/grafana/loki/blob/1489c1731277c327e3661da182bfc6c90d4559f4/tools/dev/loki-boltdb-storage-s3/docker-compose.yml
# and othe configuration examples with microservices, because the single binary
# mode assumes running on 127.0.0.1, but in slapos we want to bind on partition's
# addresses
auth_enabled: false auth_enabled: false
http_prefix:
server: server:
http_listen_address: {{ loki['ip'] }} http_listen_address: {{ loki['ipv6'] }}
grpc_listen_address: {{ loki['ip'] }} http_listen_port: {{ loki['http-port'] }}
http_tls_ca_path: {{ loki['ca-file'] }}
http_tls_cert_path: {{ loki['cert-file'] }}
http_tls_key_path: {{ loki['key-file'] }}
http_tls_client_auth_type: RequireAndVerifyClientCert
grpc_listen_address: {{ loki['ipv4'] }}
grpc_server_max_recv_msg_size: 1.048576e+08 grpc_server_max_recv_msg_size: 1.048576e+08
grpc_server_max_send_msg_size: 1.048576e+08 grpc_server_max_send_msg_size: 1.048576e+08
# # TODO ?
# wal:
# enabled: true
# dir: /loki/wal
common: common:
compactor_address: http://{{ loki['ip'] }}:{{ loki['write-http-port'] }} ring:
instance_addr: {{ loki['ipv4'] }}
kvstore:
store: inmemory
replication_factor: 1
path_prefix: {{ loki['path-prefix'] }}
schema_config: schema_config:
configs: configs:
- from: 2020-05-15 - from: 2020-05-15
store: boltdb-shipper store: tsdb
object_store: filesystem object_store: filesystem
schema: v11 schema: v13
index: index:
prefix: index_ prefix: index_
period: 24h period: 24h
storage_config: storage_config:
boltdb_shipper:
active_index_directory: {{ loki['boltdb-shipper-active-index-directory'] }}
cache_location: {{ loki['boltdb-shipper-cache-location'] }}
filesystem: filesystem:
directory: {{ loki['storage-filesystem-directory'] }} directory: {{ loki['storage-filesystem-directory'] }}
...@@ -45,42 +41,8 @@ limits_config: ...@@ -45,42 +41,8 @@ limits_config:
ingestion_rate_mb: 1024 ingestion_rate_mb: 1024
ingestion_burst_size_mb: 1024 ingestion_burst_size_mb: 1024
# https://github.com/grafana/loki/issues/5143#issuecomment-1697196679
ingester:
lifecycler:
address: {{ loki['ip'] }}
ring:
kvstore:
store: memberlist
replication_factor: 1
compactor:
compaction_interval: 1m
retention_enabled: true
working_directory: {{ loki['compactor-working-directory'] }}
frontend:
log_queries_longer_than: 5s
compress_responses: true
max_outstanding_per_tenant: 2048
tail_proxy_url: http://{{ loki['ip'] }}:{{ loki['querier-http-port']}}
frontend_worker: frontend_worker:
scheduler_address: {{ loki['ip'] }}:{{ loki['query-scheduler-grpc-port'] }} grpc_client_config:
#testERP5Type max_send_msg_size: 268435456
memberlist:
bind_addr:
- {{ loki['ip'] }}
join_members:
# - {{ loki['ip'] }}:{{ loki['read-1-memberlist-port'] }}
- {{ loki['ip'] }}:{{ loki['querier-memberlist-port'] }}
# - {{ loki['ip'] }}:{{ loki['write-memberlist-port'] }}
query_scheduler:
max_outstanding_requests_per_tenant: 1024
querier:
query_ingesters_within: 2h
daemon off;
events {
worker_connections 1024;
}
error_log /dev/stdout;
http {
default_type application/octet-stream;
access_log /dev/stdout;
sendfile on;
tcp_nopush on;
upstream read {
server {{ loki['ip'] }}:{{ loki['query-frontend-http-port'] }};
}
upstream write {
server {{ loki['ip'] }}:{{ loki['write-http-port'] }};
}
upstream cluster {
server {{ loki['ip'] }}:{{ loki['write-http-port'] }};
server {{ loki['ip'] }}:{{ loki['query-frontend-http-port'] }};
server {{ loki['ip'] }}:{{ loki['querier-http-port'] }};
}
upstream query-frontend {
server {{ loki['ip'] }}:{{ loki['query-frontend-http-port'] }};
}
server {
listen {{ loki['ip'] }}:{{ loki['nginx-port'] }};
# XXX while debugging
listen [{{ loki['ipv6'] }}]:{{ loki['nginx-port'] }};
location / {
return 200 'OK';
}
location = /ring {
proxy_pass http://cluster$request_uri;
}
location = /memberlist {
proxy_pass http://cluster$request_uri;
}
location = /config {
proxy_pass http://cluster$request_uri;
}
location = /metrics {
proxy_pass http://cluster$request_uri;
}
location = /ready {
proxy_pass http://cluster$request_uri;
}
location = /loki/api/v1/push {
proxy_pass http://write$request_uri;
}
location = /loki/api/v1/tail {
proxy_pass http://read$request_uri;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
}
location ~ /loki/api/.* {
proxy_pass http://query-frontend$request_uri;
}
}
}
[buildout] [buildout]
extends = extends =
../../stack/slapos.cfg ../../stack/slapos.cfg
../../stack/caucase/buildout.cfg
../../stack/nodejs.cfg ../../stack/nodejs.cfg
../../component/make/buildout.cfg ../../component/make/buildout.cfg
../../component/golang/buildout.cfg ../../component/golang/buildout.cfg
../../component/openssl/buildout.cfg ../../component/openssl/buildout.cfg
../../component/curl/buildout.cfg ../../component/curl/buildout.cfg
../../component/dash/buildout.cfg ../../component/dash/buildout.cfg
../../component/nginx/buildout.cfg ../../component/systemd/buildout.cfg
buildout.hash.cfg buildout.hash.cfg
parts = parts =
...@@ -20,16 +21,7 @@ parts = ...@@ -20,16 +21,7 @@ parts =
grafana-provisioning-datasources-config-file grafana-provisioning-datasources-config-file
grafana-provisioning-dashboards-config-file grafana-provisioning-dashboards-config-file
loki-config-file loki-config-file
loki-nginx-config-file
; [nodejs]
; <= nodejs-16.19.0
[gowork]
# XXX speed up development cycle by not rebuilding workspace on every software run
# XXX does not work ?
update-command =
[go_github.com_grafana_grafana] [go_github.com_grafana_grafana]
<= go-git-package <= go-git-package
...@@ -41,7 +33,7 @@ revision = v10.1.2-0-g8e428858dd ...@@ -41,7 +33,7 @@ revision = v10.1.2-0-g8e428858dd
<= go-git-package <= go-git-package
go.importpath = github.com/grafana/loki go.importpath = github.com/grafana/loki
repository = https://github.com/grafana/loki repository = https://github.com/grafana/loki
revision = v2.9.1-0-gd9d5ed4a1 revision = v3.0.0-0-gb4f7181c7
[go_github.com_influxdata_influxdb] [go_github.com_influxdata_influxdb]
<= go-git-package <= go-git-package
...@@ -59,7 +51,7 @@ revision = v1.28.1-0-g3ea9ffbe2 ...@@ -59,7 +51,7 @@ revision = v1.28.1-0-g3ea9ffbe2
<= go-git-package <= go-git-package
go.importpath = github.com/perrinjerome/telegraf-input-slapos go.importpath = github.com/perrinjerome/telegraf-input-slapos
repository = https://github.com/perrinjerome/telegraf-input-slapos repository = https://github.com/perrinjerome/telegraf-input-slapos
revision = v0.0.1-0-gf8981f3 revision = v0.0.2-0-gd4c5221
[go_github.com_prometheus_prometheus] [go_github.com_prometheus_prometheus]
<= go-git-package <= go-git-package
...@@ -84,15 +76,18 @@ install = ...@@ -84,15 +76,18 @@ install =
${go_github.com_perrinjerome_slapos_telegraf_input:location}:./... ${go_github.com_perrinjerome_slapos_telegraf_input:location}:./...
${go_github.com_prometheus_prometheus:location}:./cmd/... ${go_github.com_prometheus_prometheus:location}:./cmd/...
# disable cgo, to prevent loki/promtail from using go-systemd
environment = environment =
CGO_ENABLED = 0 CGO_ENABLED=1
CGO_CFLAGS=-I${systemd:location}/include
buildflags =
-tags promtail_journal_enabled
cpkgpath =
${systemd:location}
telegraf-bin = ${:bin}/telegraf telegraf-bin = ${:bin}/telegraf
telegraf-input-slapos-bin = ${:bin}/telegraf-input-slapos telegraf-input-slapos-bin = ${:bin}/telegraf-input-slapos
influx-bin = ${:bin}/influx influx-bin = ${:bin}/influx
influxd-bin = ${:bin}/influxd influxd-bin = ${:bin}/influxd
grafana-bin = ${:bin}/grafana-server grafana-bin = ${grafana:binpath}/grafana
grafana-homepath = ${grafana:homepath} grafana-homepath = ${grafana:homepath}
loki-bin = ${:bin}/loki loki-bin = ${:bin}/loki
promtail-bin = ${:bin}/promtail promtail-bin = ${:bin}/promtail
...@@ -105,8 +100,6 @@ command = bash -ce " ...@@ -105,8 +100,6 @@ command = bash -ce "
. ${gowork:env.sh} && \ . ${gowork:env.sh} && \
go install github.com/google/wire/cmd/wire@v0.5.0 && \ go install github.com/google/wire/cmd/wire@v0.5.0 && \
wire gen -tags oss ./pkg/server ./pkg/cmd/grafana-cli/runner && \ wire gen -tags oss ./pkg/server ./pkg/cmd/grafana-cli/runner && \
# Unlike loki, grafana _needs_ CGO_ENABLED, so we override here
export CGO_ENABLED=1 && \
go run build.go setup && \ go run build.go setup && \
go run build.go build && \ go run build.go build && \
export NODE_OPTIONS=--max_old_space_size=8192 && \ export NODE_OPTIONS=--max_old_space_size=8192 && \
...@@ -119,6 +112,8 @@ command = bash -ce " ...@@ -119,6 +112,8 @@ command = bash -ce "
rm -rf ${buildout:directory}/.cache/yarn/ rm -rf ${buildout:directory}/.cache/yarn/
" "
homepath = ${go_github.com_grafana_grafana:location} homepath = ${go_github.com_grafana_grafana:location}
# XXX "linux-amd64" is not portable here
binpath = ${go_github.com_grafana_grafana:location}/bin/linux-amd64
stop-on-error = true stop-on-error = true
[download-file-base] [download-file-base]
...@@ -143,9 +138,6 @@ url = ${:_profile_base_location_}/${:filename} ...@@ -143,9 +138,6 @@ url = ${:_profile_base_location_}/${:filename}
[loki-config-file] [loki-config-file]
<= download-file-base <= download-file-base
[loki-nginx-config-file]
<= download-file-base
[instance-eggs] [instance-eggs]
recipe = zc.recipe.egg recipe = zc.recipe.egg
eggs = eggs =
...@@ -167,13 +159,14 @@ context = ...@@ -167,13 +159,14 @@ context =
key grafana_bin gowork:grafana-bin key grafana_bin gowork:grafana-bin
key grafana_homepath gowork:grafana-homepath key grafana_homepath gowork:grafana-homepath
key loki_bin gowork:loki-bin key loki_bin gowork:loki-bin
raw nginx_bin ${nginx:location}/sbin/nginx
key promtail_bin gowork:promtail-bin key promtail_bin gowork:promtail-bin
key curl_bin :curl-bin key curl_bin :curl-bin
key dash_bin :dash-bin key dash_bin :dash-bin
curl-bin = ${curl:location}/bin/curl curl-bin = ${curl:location}/bin/curl
dash-bin = ${dash:location}/bin/dash dash-bin = ${dash:location}/bin/dash
depends = ${instance-eggs:eggs} depends = ${instance-eggs:eggs}
import-list =
file caucase caucase-jinja2-library:target
[versions] [versions]
inotifyx = 0.2.2 inotifyx = 0.2.2
......
...@@ -21,7 +21,7 @@ ...@@ -21,7 +21,7 @@
# Tags can also be specified via a normal map, but only one form at a time: # Tags can also be specified via a normal map, but only one form at a time:
[tags] [tags]
# dc = "us-east-1" computer_id = "{{ slap_configuration['computer'] }}"
# Configuration for telegraf agent # Configuration for telegraf agent
[agent] [agent]
...@@ -42,8 +42,7 @@ ...@@ -42,8 +42,7 @@
# Run telegraf in debug mode # Run telegraf in debug mode
debug = false debug = false
# Override default hostname, if empty use os.Hostname() # Override default hostname, if empty use os.Hostname()
hostname = "" hostname =
############################################################################### ###############################################################################
# OUTPUTS # # OUTPUTS #
...@@ -97,6 +96,7 @@ ...@@ -97,6 +96,7 @@
[system] [system]
# TODO: generate this full config file in toml
{{ extra['extra-config'] }} {{ extra['extra-config'] }}
############################################################################### ###############################################################################
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment