Commit aa2178ec authored by Jérome Perrin's avatar Jérome Perrin

software/grafana: WIP generate telegraf and loki config

parent 041aef9e
......@@ -15,7 +15,7 @@
[instance-profile]
filename = instance.cfg.in
md5sum = 8c9dc41c176ba01116de5b71aaa704de
md5sum = d44f6a26e36db9ecc863f3111c0075cb
[influxdb-config-file]
filename = influxdb-config-file.cfg.in
......@@ -23,20 +23,24 @@ md5sum = a28972ced3e0f4aa776e43a9c44717c0
[telegraf-config-file]
filename = telegraf-config-file.cfg.in
md5sum = a1a9c22c2a7829c66a49fc2504604d21
md5sum = 6de1faa34842e1eda095a51edecc2083
[grafana-config-file]
filename = grafana-config-file.cfg.in
md5sum = e255dcca466f5de51698d24cbd114577
md5sum = 83a8445858eab21a12f1769c23424bea
[grafana-provisioning-config-file]
filename = grafana-provisioning-config-file.cfg.in
[grafana-provisioning-datasources-config-file]
filename = grafana-provisioning-datasources-config-file.cfg.in
md5sum = 3aa0f1ed752b2a59ea2b5e7c1733daf3
[grafana-provisioning-dashboards-config-file]
filename = grafana-provisioning-dashboards-config-file.cfg.in
md5sum = 5616679a9c5c2757540175ead3f5500a
[loki-config-file]
filename = loki-config-file.cfg.in
md5sum = ad2baf4599a937d7352034a41fa24814
md5sum = 19a7f5cb904b3287b0bc7cb3e8a27429
[promtail-config-file]
filename = promtail-config-file.cfg.in
md5sum = 5f1b3a1a3d3f98daeab4780106452d71
[loki-nginx-config-file]
filename = loki-nginx-config-file.cfg.in
md5sum = b08ce1e4abb34eb79e26133459c27c3a
......@@ -154,7 +154,7 @@ reporting_enabled = true
# in some UI views to notify that grafana or plugin update exists
# This option does not cause any auto updates, nor send any information
# only a GET request to https://grafana.com to get latest versions
check_for_updates = true
check_for_updates = false
# Google Analytics universal tracking code, only enabled if you specify an id here
google_analytics_ua_id =
......@@ -345,11 +345,8 @@ user = {{ slapparameter_dict.get('smtp-username', '') }}
password = {{ slapparameter_dict.get('smtp-password', '') and '"""%s"""' % slapparameter_dict['smtp-password'] or ""}}
cert_file =
key_file =
#skip_verify = false
skip_verify = {{ slapparameter_dict.get('smtp-verify-ssl', 'true').lower() == 'true' and 'false' or 'true' }}
#from_address = admin@grafana.localhost
skip_verify = {{ slapparameter_dict.get('smtp-verify-ssl') and 'true' or 'false' }}
from_address = {{ slapparameter_dict.get('email-from-address', '') }}
#from_name = Grafana
from_name = {{ slapparameter_dict.get('email-from-name', 'Grafana') }}
ehlo_identity =
......
# https://grafana.com/docs/grafana/latest/administration/provisioning/#dashboards
apiVersion: 1
providers:
- name: SlapOS
folder: ''
updateIntervalSeconds: 10
allowUiUpdates: false
options:
path: {{ dashboards_dir }}
{
"$schema": "http://json-schema.org/draft-04/schema#",
"$schema": "http://json-schema.org/draft-07/schema#",
"description": "Parameters to instantiate Grafana",
"type": "object",
"additionalProperties": false,
......@@ -18,11 +18,7 @@
},
"smtp-verify-ssl": {
"description": "Verify SSL certificate of SMTP server",
"type": "string",
"enum": [
"true",
"false"
]
"type": "boolean"
},
"email-from-address": {
"description": "Email address used in From: header of emails",
......@@ -33,6 +29,133 @@
"default": "Grafana",
"type": "string"
},
"applications": {
"description": "Applications to monitor",
"type": "array",
"items": {
"type": "object",
"required": [
"name",
"instance-root",
"partitions"
],
"properties": {
"name": {
"description": "Name of this application",
"type": "string"
},
"instance-root": {
"description": "Directory containing SlapOS partitions.",
"type": "string"
},
"urls": {
"description": "URLs to monitor for availability and certificate lifetime",
"type": "array",
"items": {
"type": "string"
}
},
"partitions": {
"description": "SlapOS partitions to monitor",
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {
"type": "string",
"description": "Friendly name of the partition",
"examples": [
"mariadb",
"zope-activity"
]
},
"reference": {
"type": "string",
"description": "Reference of the partition",
"examples": [
"slappart1",
"slappart2"
]
},
"type": {
"type": "string",
"description": "Type of the partition. Known types have extra metrics and logs collected",
"enum": [
"erp5/mariadb",
"erp5/balancer",
"erp5/zope-activity",
"erp5/zope-front",
"erp5/zeo",
"mariadb",
"default"
]
},
"file-path": {
"type": "string",
"description": "Glob for the files to watch. This mostly makes sense for `default` type"
},
"static-tags": {
"type": "object",
"description": "Static tags for this partition",
"examples": [
{
"region": "eu",
"data-center": "abc123"
}
]
}
},
"anyOf": [
{
"properties": {
"type": {
"const": "default"
}
},
"required": [
"name",
"file-path"
]
},
{
"properties": {
"type": {
"not": {
"const": "default"
}
}
},
"required": [
"name",
"reference"
]
}
],
"examples": [
{
"name": "zope-backoffice",
"type": "erp5/zope-front",
"reference": "slappart1",
"static-tags": {
"instance": "instance-name"
}
},
{
"name": "mariadb",
"type": "erp5/mariadb",
"reference": "slappart2"
},
{
"name": "syslog",
"type": "default",
"file-path": "/var/log/syslog"
}
]
}
}
}
}
},
"promtail-extra-scrape-config": {
"description": "Raw promtail config (experimental parameter, see https://github.com/grafana/loki/blob/v0.3.0/docs/promtail.md#scrape-configs for detail)",
"default": "",
......
This diff is collapsed.
# insipired from
# https://github.com/grafana/loki/blob/1489c1731277c327e3661da182bfc6c90d4559f4/tools/dev/loki-boltdb-storage-s3/docker-compose.yml
# and othe configuration examples with microservices, because the single binary
# mode assumes running on 127.0.0.1, but in slapos we want to bind on partition's
# addresses
auth_enabled: false
http_prefix:
server:
http_listen_address: {{ loki['ip'] }}
http_listen_port: {{ loki['port'] }}
grpc_listen_address: {{ loki['ip'] }}
grpc_listen_port: {{ loki['grpc-port'] }}
grpc_server_max_recv_msg_size: 1.048576e+08
grpc_server_max_send_msg_size: 1.048576e+08
ingester:
lifecycler:
address: {{ loki['ip'] }}
ring:
kvstore:
store: inmemory
replication_factor: 1
chunk_idle_period: 15m
# # TODO ?
# wal:
# enabled: true
# dir: /loki/wal
common:
compactor_address: http://{{ loki['ip'] }}:{{ loki['write-http-port'] }}
schema_config:
configs:
- from: 2018-04-15
store: boltdb
- from: 2020-05-15
store: boltdb-shipper
object_store: filesystem
schema: v9
schema: v11
index:
prefix: index_
period: 168h
period: 24h
storage_config:
boltdb:
directory: {{ loki['storage-boltdb-dir'] }}
boltdb_shipper:
active_index_directory: {{ loki['boltdb-shipper-active-index-directory'] }}
cache_location: {{ loki['boltdb-shipper-cache-location'] }}
filesystem:
directory: {{ loki['storage-filesystem-dir'] }}
directory: {{ loki['storage-filesystem-directory'] }}
limits_config:
reject_old_samples: false
enforce_metric_name: false
reject_old_samples: true
reject_old_samples_max_age: 168h
chunk_store_config:
max_look_back_period: 0
table_manager:
chunk_tables_provisioning:
inactive_read_throughput: 0
inactive_write_throughput: 0
provisioned_read_throughput: 0
provisioned_write_throughput: 0
index_tables_provisioning:
inactive_read_throughput: 0
inactive_write_throughput: 0
provisioned_read_throughput: 0
provisioned_write_throughput: 0
retention_deletes_enabled: false
retention_period: 0
ingestion_rate_mb: 1024
ingestion_burst_size_mb: 1024
ingester:
lifecycler:
address: {{ loki['ip'] }}
ring:
kvstore:
store: memberlist
replication_factor: 1
compactor:
compaction_interval: 1m
retention_enabled: true
working_directory: {{ loki['compactor-working-directory'] }}
frontend:
log_queries_longer_than: 5s
compress_responses: true
max_outstanding_per_tenant: 2048
tail_proxy_url: http://{{ loki['ip'] }}:{{ loki['querier-http-port']}}
frontend_worker:
scheduler_address: {{ loki['ip'] }}:{{ loki['query-scheduler-grpc-port'] }}
#testERP5Type
memberlist:
bind_addr:
- {{ loki['ip'] }}
join_members:
# - {{ loki['ip'] }}:{{ loki['read-1-memberlist-port'] }}
- {{ loki['ip'] }}:{{ loki['querier-memberlist-port'] }}
# - {{ loki['ip'] }}:{{ loki['write-memberlist-port'] }}
query_scheduler:
max_outstanding_requests_per_tenant: 1024
querier:
query_ingesters_within: 2h
daemon off;
events {
worker_connections 1024;
}
error_log /dev/stdout;
http {
default_type application/octet-stream;
access_log /dev/stdout;
sendfile on;
tcp_nopush on;
upstream read {
server {{ loki['ip'] }}:{{ loki['query-frontend-http-port'] }};
}
upstream write {
server {{ loki['ip'] }}:{{ loki['write-http-port'] }};
}
upstream cluster {
server {{ loki['ip'] }}:{{ loki['write-http-port'] }};
server {{ loki['ip'] }}:{{ loki['query-frontend-http-port'] }};
server {{ loki['ip'] }}:{{ loki['querier-http-port'] }};
}
upstream query-frontend {
server {{ loki['ip'] }}:{{ loki['query-frontend-http-port'] }};
}
server {
listen {{ loki['ip'] }}:{{ loki['nginx-port'] }};
# XXX while debugging
listen [{{ loki['ipv6'] }}]:{{ loki['nginx-port'] }};
location / {
return 200 'OK';
}
location = /ring {
proxy_pass http://cluster$request_uri;
}
location = /memberlist {
proxy_pass http://cluster$request_uri;
}
location = /config {
proxy_pass http://cluster$request_uri;
}
location = /metrics {
proxy_pass http://cluster$request_uri;
}
location = /ready {
proxy_pass http://cluster$request_uri;
}
location = /loki/api/v1/push {
proxy_pass http://write$request_uri;
}
location = /loki/api/v1/tail {
proxy_pass http://read$request_uri;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
}
location ~ /loki/api/.* {
proxy_pass http://query-frontend$request_uri;
}
}
}
server:
http_listen_address: {{ promtail['ip'] }}
http_listen_port: {{ promtail['http-port'] }}
grpc_listen_address: {{ promtail['ip'] }}
grpc_listen_port: {{ promtail['grpc-port'] }}
external_url: {{ promtail['url'] }}
positions:
filename: {{ promtail['dir'] }}/positions.yaml
clients:
- url: {{ loki['url'] }}/api/prom/push
scrape_configs:
- job_name: test
static_configs:
- targets:
- localhost
labels:
job: grafanalogs
__path__: ./var/log/*log
{{ slapparameter_dict.get('promtail-extra-scrape-config', '') }}
......@@ -7,9 +7,9 @@ extends =
../../component/openssl/buildout.cfg
../../component/curl/buildout.cfg
../../component/dash/buildout.cfg
../../component/nginx/buildout.cfg
buildout.hash.cfg
versions = versions
parts =
slapos-cookbook
instance-profile
......@@ -17,24 +17,33 @@ parts =
influxdb-config-file
telegraf-config-file
grafana-config-file
grafana-provisioning-config-file
grafana-provisioning-datasources-config-file
grafana-provisioning-dashboards-config-file
loki-config-file
promtail-config-file
loki-nginx-config-file
[nodejs]
<= nodejs-14.16.0
<= nodejs-16.14.0
[gowork]
golang = ${golang1.19:location}
# XXX speed up development cycle by not rebuilding workspace on every software run
# XXX does not work ?
update-command =
[go_github.com_grafana_grafana]
<= go-git-package
go.importpath = github.com/grafana/grafana
repository = https://github.com/grafana/grafana
revision = v7.5.2-0-gca413c612f
revision = v9.1.5-0-gdf015a9301
[go_github.com_grafana_loki]
<= go-git-package
go.importpath = github.com/grafana/loki
repository = https://github.com/perrinjerome/loki
revision = v2.2.1-1-gda6d45f2
repository = https://github.com/grafana/loki
revision = v2.1.0-2075-gafd63c598
# tag helm-loki-3.1.0 which supports golang 1.19
[go_github.com_influxdata_influxdb]
<= go-git-package
......@@ -46,7 +55,7 @@ revision = v1.8.4-0-gbc8ec4384e
<= go-git-package
go.importpath = github.com/influxdata/telegraf
repository = https://github.com/influxdata/telegraf
revision = v1.20.2-0-gf721f53d
revision = v1.24.0-0-g3c4a6516e
[go_github.com_perrinjerome_slapos_telegraf_input]
<= go-git-package
......@@ -54,12 +63,18 @@ go.importpath = github.com/perrinjerome/telegraf-input-slapos
repository = https://github.com/perrinjerome/telegraf-input-slapos
revision = v0.0.1-0-gf8981f3
# [go_github.com_jaegertracking_jaeger]
# <= go-git-package
# go.importpath = github.com/jaegertracking/jaeger
# repository = https://github.com/jaegertracking/jaeger
# revision = v1.20.0-623-gcac21f82
[gowork]
# Fails with current default golang1.18
golang = ${golang1.17:location}
install =
${go_github.com_grafana_loki:location}:./cmd/loki
${go_github.com_grafana_loki:location}:./cmd/promtail
${go_github.com_grafana_loki:location}:./clients/cmd/promtail
${go_github.com_grafana_loki:location}:./cmd/logcli
${go_github.com_influxdata_telegraf:location}:./cmd/...
${go_github.com_influxdata_influxdb:location}:./cmd/...
......@@ -70,6 +85,7 @@ environment =
CGO_ENABLED = 0
telegraf-bin = ${:bin}/telegraf
telegraf-input-slapos-bin = ${:bin}/telegraf-input-slapos
influx-bin = ${:bin}/influx
influxd-bin = ${:bin}/influxd
grafana-bin = ${:bin}/grafana-server
......@@ -80,14 +96,18 @@ promtail-bin = ${:bin}/promtail
[grafana]
recipe = plone.recipe.command
command = bash -c "
cd ${:homepath} &&
. ${gowork:env.sh} &&
command = bash -ce "
cd ${:homepath} && \
. ${gowork:env.sh} && \
go install github.com/google/wire/cmd/wire@v0.5.0 && \
wire gen -tags oss ./pkg/server ./pkg/cmd/grafana-cli/runner && \
# Unlike the loki, grafana _needs_ CGO_ENABLED, so we override here
export CGO_ENABLED=1 &&
export CGO_ENABLED=1 && \
go run build.go setup && \
go run build.go build && \
${yarn:location}/bin/yarn install --pure-lockfile && \
export NODE_OPTIONS=--max_old_space_size=8192 && \
${yarn:location}/bin/yarn install --immutable && \
${yarn:location}/bin/yarn run themes:generate && \
${yarn:location}/bin/yarn run build && \
${yarn:location}/bin/yarn run plugins:build-bundled && \
# Cleanup yarn and Cypress caches
......@@ -110,15 +130,24 @@ url = ${:_profile_base_location_}/${:filename}
[grafana-config-file]
<= download-file-base
[grafana-provisioning-config-file]
[grafana-provisioning-datasources-config-file]
<= download-file-base
[grafana-provisioning-dashboards-config-file]
<= download-file-base
[loki-config-file]
<= download-file-base
[promtail-config-file]
[loki-nginx-config-file]
<= download-file-base
[instance-eggs]
recipe = zc.recipe.egg
eggs =
${python-PyYAML:egg}
toml
[instance-profile]
recipe = slapos.recipe.template:jinja2
url = ${:_profile_base_location_}/${:filename}
......@@ -128,16 +157,20 @@ context =
section buildout buildout
key openssl_bin openssl-output:openssl
key telegraf_bin gowork:telegraf-bin
key telegraf_input_slapos_bin gowork:telegraf-input-slapos-bin
key influxd_bin gowork:influxd-bin
key influx_bin gowork:influx-bin
key grafana_bin gowork:grafana-bin
key grafana_homepath gowork:grafana-homepath
key loki_bin gowork:loki-bin
raw nginx_bin ${nginx:location}/sbin/nginx
key promtail_bin gowork:promtail-bin
key curl_bin :curl-bin
key dash_bin :dash-bin
curl-bin = ${curl:location}/bin/curl
dash-bin = ${dash:location}/bin/dash
depends = ${instance-eggs:eggs}
[versions]
inotifyx = 0.2.2
toml = 0.10.2
{
"name": "Grafana",
"description": "Grafana, Telegraf and Influxdb",
"serialisation": "xml",
"serialisation": "json-in-xml",
"software-type": {
"default": {
"title": "Default",
......
......@@ -55,9 +55,6 @@
[outputs.influxdb]
# The full HTTP or UDP endpoint URL for your InfluxDB instance
# Multiple urls can be specified for InfluxDB cluster support.
# urls = ["udp://localhost:8089"] # UDP endpoint example
# XXX XXX XXX
#urls = ["http://localhost:8086"] # required
urls = ["{{ influxdb['url'] }}"]
insecure_skip_verify = true # because we are using a self signed certificate
# The target database for metrics (telegraf will create it if not exists)
......@@ -100,32 +97,9 @@
[system]
{{ extra['extra-config'] }}
###############################################################################
# ERP5 - PLUGINS #
###############################################################################
#
# Left here as example, don't edit this file directly, but place your config
# To add ad-hoc config, don't edit this file directly, but place your config
# files in {{ telegraf['extra-config-dir'] }}
#
#[mysql]
# servers = ["root@unix(/srv/slapgrid/slappart12/srv/runner/instance/slappart1/var/run/mariadb.sock)/erp5"]
#[memcached]
# # XXX kumofs does not support memcached's stat command
# servers = ["10.0.248.233:2013", "10.0.248.233:2003"]
#[haproxy]
# servers = ["http://10.0.121.162:2150/haproxy", "http://10.0.121.162:2152/haproxy"]
#[[inputs.exec]]
# commands = ["/srv/slapgrid/slappart0/bin/slapsensor /srv/slapgrid/slappart0/srv/runner/instance/etc/supervisord.conf"]
# name_suffix = "_slapos"
# interval = "5s"
###############################################################################
# SERVICE PLUGINS #
###############################################################################
......@@ -32,6 +32,7 @@ import os
import tempfile
import textwrap
import time
import json
import psutil
import requests
......@@ -102,7 +103,7 @@ class TestGrafana(GrafanaTestCase):
with open(
os.path.join(self.computer_partition_root_path, 'etc',
'grafana-config-file.cfg')) as f:
config.readfp(io.StringIO('[default]\n' + f.read()))
config.read_file(io.StringIO('[default]\n' + f.read()))
self.assertEqual(config.get('smtp', 'enabled'), 'false')
......@@ -185,8 +186,109 @@ class TestTelegraf(GrafanaTestCase):
class TestLoki(GrafanaTestCase):
instance_max_retry = 2
@classmethod
def getInstanceParameterDict(cls):
cls._logfile = tempfile.NamedTemporaryFile(suffix='log')
parameter_dict = {
"applications": [
{
"name": "System",
"instance-root": "/",
"partitions": [
{
# no slapos for system application
# XXX example
"name": "syslog",
"reference": "syslog",
"files": [
"/srv/slapgrid/slappart15/grosgzip/bench.log",
]
},
]
},
{
"name": "ERP5",
"instance-root": "/srv/slapgrid/slappart15/srv/runner/instance/",
"urls": [
# TODO
# "https://XXX.host.vifib.net/erp5/",
],
"partitions": [
{
"name": "jerome-dev-mariadb",
"reference": "slappart3",
"type": "erp5/mariadb",
#"static-tags": {
# "XXX": "needed?"
#}
},
{
"name": "jerome-dev-zodb",
"reference": "slappart4",
"type": "erp5/zeo",
#"static-tags": {
# "XXX": "needed?"
#}
},
{
"name": "jerome-dev-balancer",
"reference": "slappart6",
"type": "erp5/balancer",
#"static-tags": {
# "XXX": "needed?"
#}
},
{
"name": "jerome-dev-zope-front",
"reference": "slappart5",
"type": "erp5/zope-front",
#"static-tags": {
# "XXX": "needed?"
#}
},
# {
# "name": "jerome-dev-zope-front",
# "reference": "slappart13",
# "type": "erp5/zope-activity",
# #"static-tags": {
# # "XXX": "needed?"
# #}
# }
]
}
],
# TODO: drop this
'promtail-extra-scrape-config':
textwrap.dedent(r'''
- job_name: {cls.__name__}
pipeline_stages:
- match:
selector: '{{job="{cls.__name__}"}}'
stages:
- multiline:
firstline: '^\d{{4}}-\d{{2}}-\d{{2}}\s\d{{1,2}}\:\d{{2}}\:\d{{2}}\,\d{{3}}'
max_wait_time: 3s
- regex:
expression: '^(?P<timestamp>.*) - (?P<name>\S+) - (?P<level>\S+) - (?P<message>.*)'
- timestamp:
format: 2006-01-02T15:04:05Z00:00
source: timestamp
- labels:
level:
name:
static_configs:
- targets:
- localhost
labels:
job: {cls.__name__}
__path__: {cls._logfile.name}
''').format(**locals())
}
return {'_': json.dumps(parameter_dict)}
def xgetInstanceParameterDict(cls):
cls._logfile = tempfile.NamedTemporaryFile(suffix='log')
return {
'promtail-extra-scrape-config':
......@@ -227,9 +329,10 @@ class TestLoki(GrafanaTestCase):
)['loki-url']
def test_loki_available(self):
import pdb;pdb; set_trace()
self.assertEqual(
requests.codes.ok,
requests.get('{self.loki_url}/ready'.format(**locals()),
requests.get(f'{self.loki_url}/ready',
verify=False).status_code)
def test_log_ingested(self):
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment