Commit f39f1597 authored by Jérome Perrin's avatar Jérome Perrin

WIP grafana

parent 862073fb
......@@ -15,32 +15,17 @@
[instance-profile]
filename = instance.cfg.in
md5sum = 39a1ee09ca7a12995703ff2a6a869637
md5sum = e4d5ac3e6ad239d3bf48c2b3172919b5
[influxdb-config-file]
filename = influxdb-config-file.cfg.in
md5sum = a28972ced3e0f4aa776e43a9c44717c0
[telegraf-config-file]
filename = telegraf-config-file.cfg.in
md5sum = 6de1faa34842e1eda095a51edecc2083
[grafana-config-file]
filename = grafana-config-file.cfg.in
md5sum = 83a8445858eab21a12f1769c23424bea
[grafana-provisioning-datasources-config-file]
filename = grafana-provisioning-datasources-config-file.cfg.in
md5sum = 3aa0f1ed752b2a59ea2b5e7c1733daf3
[grafana-provisioning-dashboards-config-file]
filename = grafana-provisioning-dashboards-config-file.cfg.in
md5sum = 5616679a9c5c2757540175ead3f5500a
[loki-config-file]
filename = loki-config-file.cfg.in
md5sum = 19a7f5cb904b3287b0bc7cb3e8a27429
[loki-nginx-config-file]
filename = loki-nginx-config-file.cfg.in
md5sum = b08ce1e4abb34eb79e26133459c27c3a
# https://grafana.com/docs/administration/provisioning/#example-datasource-config-file
apiVersion: 1
datasources:
- name: telegraf
type: influxdb
access: proxy
url: {{ influxdb['url'] }}
user: {{ influxdb['auth-username'] }}
database: telegraf
isDefault: true
jsonData:
tlsSkipVerify: true
secureJsonData:
password: {{ influxdb['auth-password'] }}
version: 1
editable: false
- name: loki
type: loki
access: proxy
url: {{ loki['url'] }}
version: 1
editable: false
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"description": "Parameters to instantiate Grafana",
"$schema": "http://json-schema.org/draft-04/schema",
"description": "Parameters to instantiate an agent collecting logs and metrics",
"type": "object",
"additionalProperties": false,
"$defs": {
......@@ -42,32 +42,12 @@
]
}
},
"required": [
"applications",
"influxdb",
"loki"
],
"properties": {
"smtp-server": {
"description": "SMTP server used by Grafana to send emails (in host:port format). Leaving this empty will disable email sending.",
"type": "string"
},
"smtp-username": {
"description": "Username to connect to SMTP server",
"type": "string"
},
"smtp-password": {
"description": "Password to connect to SMTP server",
"type": "string"
},
"smtp-verify-ssl": {
"description": "Verify SSL certificate of SMTP server",
"type": "boolean"
},
"email-from-address": {
"description": "Email address used in From: header of emails",
"type": "string"
},
"email-from-name": {
"description": "Name used in From: header of emails",
"default": "Grafana",
"type": "string"
},
"applications": {
"description": "Applications to monitor",
"type": "array",
......@@ -107,6 +87,7 @@
"name",
"reference"
],
"additionalProperties": false,
"properties": {
"name": {
"type": "string",
......@@ -174,6 +155,7 @@
},
{
"type": "object",
"additionalProperties": false,
"description": "Configuration for `system` type application",
"required": [
"type",
......@@ -194,6 +176,7 @@
"type": "array",
"items": {
"type": "object",
"additionalProperties": false,
"properties": {
"name": {
"type": "string",
......@@ -237,6 +220,57 @@
}
]
}
},
"influxdb": {
"description": "Connection information for influxdb",
"type": "object",
"additionalProperties": false,
"required": [
"url",
"database",
"username",
"password"
],
"properties": {
"url": {
"description": "IPv6 URL of influxdb HTTP endpoint",
"format": "uri",
"type": "string"
},
"database": {
"description": "database created in influxdb",
"type": "string"
},
"username": {
"description": "username for influxdb",
"type": "string"
},
"password": {
"description": "password for influxdb user",
"type": "string"
}
}
},
"loki": {
"description": "Connection information for loki",
"type": "object",
"additionalProperties": false,
"required": [
"url",
"caucase-url"
],
"properties": {
"url": {
"description": "Base URL of Loki",
"format": "uri",
"type": "string"
},
"caucase-url": {
"description": "URL caucase service used by Loki",
"format": "uri",
"type": "string"
}
}
}
}
}
{
"$schema": "http://json-schema.org/draft-07/schema#",
"description": "Values returned by agent instantiation",
"additionalProperties": false,
"properties": {
"telegraf-extra-config-dir": {
"description": "Directory in telegraf partition where extra configuration file will be loaded. These files must match *.conf pattern",
"type": "string"
}
},
"type": "object"
}
{
"$schema": "http://json-schema.org/draft-07/schema",
"description": "Parameters to instantiate Grafana",
"type": "object",
"additionalProperties": false,
"properties": {
"smtp-server": {
"description": "SMTP server used by Grafana to send emails (in host:port format). Leaving this empty will disable email sending.",
"type": "string"
},
"smtp-username": {
"description": "Username to connect to SMTP server",
"type": "string"
},
"smtp-password": {
"description": "Password to connect to SMTP server",
"type": "string"
},
"smtp-verify-ssl": {
"description": "Verify SSL certificate of SMTP server",
"type": "boolean"
},
"email-from-address": {
"description": "Email address used in From: header of emails",
"type": "string"
},
"email-from-name": {
"description": "Name used in From: header of emails",
"default": "Grafana",
"type": "string"
},
"caucase-url": {
"description": "URL of a caucase instance to manage all server and clients certificates",
"type": "string",
"format": "uri"
},
"influxdb": {
"description": "Fine tuning influxdb parameters",
"type": "object",
"additionalProperties": false,
"properties": {
"default-retention-policy-days": {
"description": "Number of days to keep metrics data",
"default": 720,
"type": "integer"
}
}
},
"loki": {
"description": "Fine tuning loki parameters",
"type": "object",
"additionalProperties": false,
"properties": {
"retention-period-days": {
"description": "Number of days to keep log data",
"default": 60,
"type": "integer"
}
}
},
"agent": {
"type": "object",
"properties": {
"applications": {
"$ref": "./instance-agent-input-schema.json#properties/applications"
}
}
}
}
}
{
"$schema": "http://json-schema.org/draft-04/schema#",
"$schema": "http://json-schema.org/draft-07/schema#",
"description": "Values returned by Grafana instantiation",
"additionalProperties": false,
"properties": {
"url": {
"description": "Shared frontend for this Grafana instance",
"pattern": "^https://",
"format": "uri",
"type": "string"
},
"grafana-username": {
......@@ -18,12 +18,12 @@
},
"grafana-url": {
"description": "IPv6 URL to access grafana",
"pattern": "^https://",
"format": "uri",
"type": "string"
},
"influxdb-url": {
"description": "IPv6 URL of influxdb HTTP endpoint",
"pattern": "^https://",
"format": "uri",
"type": "string"
},
"influxdb-database": {
......@@ -38,8 +38,14 @@
"description": "password for influxdb user",
"type": "string"
},
"telegraf-extra-config-dir": {
"description": "Directory in telegraf partition where extra configuration file will be loaded. These files must match *.conf pattern",
"loki-url": {
"description": "Base URL of Loki",
"format": "uri",
"type": "string"
},
"loki-caucase-url": {
"description": "URL caucase service used by Loki",
"format": "uri",
"type": "string"
}
},
......
This diff is collapsed.
# insipired from
# https://github.com/grafana/loki/blob/1489c1731277c327e3661da182bfc6c90d4559f4/tools/dev/loki-boltdb-storage-s3/docker-compose.yml
# and othe configuration examples with microservices, because the single binary
# mode assumes running on 127.0.0.1, but in slapos we want to bind on partition's
# addresses
auth_enabled: false
http_prefix:
server:
http_listen_address: {{ loki['ip'] }}
grpc_listen_address: {{ loki['ip'] }}
grpc_server_max_recv_msg_size: 1.048576e+08
grpc_server_max_send_msg_size: 1.048576e+08
# # TODO ?
# wal:
# enabled: true
# dir: /loki/wal
common:
compactor_address: http://{{ loki['ip'] }}:{{ loki['write-http-port'] }}
schema_config:
configs:
- from: 2020-05-15
store: boltdb-shipper
object_store: filesystem
schema: v11
index:
prefix: index_
period: 24h
storage_config:
boltdb_shipper:
active_index_directory: {{ loki['boltdb-shipper-active-index-directory'] }}
cache_location: {{ loki['boltdb-shipper-cache-location'] }}
filesystem:
directory: {{ loki['storage-filesystem-directory'] }}
limits_config:
reject_old_samples: false
enforce_metric_name: false
ingestion_rate_mb: 1024
ingestion_burst_size_mb: 1024
ingester:
lifecycler:
address: {{ loki['ip'] }}
ring:
kvstore:
store: memberlist
replication_factor: 1
compactor:
compaction_interval: 1m
retention_enabled: true
working_directory: {{ loki['compactor-working-directory'] }}
frontend:
log_queries_longer_than: 5s
compress_responses: true
max_outstanding_per_tenant: 2048
tail_proxy_url: http://{{ loki['ip'] }}:{{ loki['querier-http-port']}}
frontend_worker:
scheduler_address: {{ loki['ip'] }}:{{ loki['query-scheduler-grpc-port'] }}
#testERP5Type
memberlist:
bind_addr:
- {{ loki['ip'] }}
join_members:
# - {{ loki['ip'] }}:{{ loki['read-1-memberlist-port'] }}
- {{ loki['ip'] }}:{{ loki['querier-memberlist-port'] }}
# - {{ loki['ip'] }}:{{ loki['write-memberlist-port'] }}
query_scheduler:
max_outstanding_requests_per_tenant: 1024
querier:
query_ingesters_within: 2h
daemon off;
events {
worker_connections 1024;
}
error_log /dev/stdout;
http {
default_type application/octet-stream;
access_log /dev/stdout;
sendfile on;
tcp_nopush on;
upstream read {
server {{ loki['ip'] }}:{{ loki['query-frontend-http-port'] }};
}
upstream write {
server {{ loki['ip'] }}:{{ loki['write-http-port'] }};
}
upstream cluster {
server {{ loki['ip'] }}:{{ loki['write-http-port'] }};
server {{ loki['ip'] }}:{{ loki['query-frontend-http-port'] }};
server {{ loki['ip'] }}:{{ loki['querier-http-port'] }};
}
upstream query-frontend {
server {{ loki['ip'] }}:{{ loki['query-frontend-http-port'] }};
}
server {
listen {{ loki['ip'] }}:{{ loki['nginx-port'] }};
# XXX while debugging
listen [{{ loki['ipv6'] }}]:{{ loki['nginx-port'] }};
location / {
return 200 'OK';
}
location = /ring {
proxy_pass http://cluster$request_uri;
}
location = /memberlist {
proxy_pass http://cluster$request_uri;
}
location = /config {
proxy_pass http://cluster$request_uri;
}
location = /metrics {
proxy_pass http://cluster$request_uri;
}
location = /ready {
proxy_pass http://cluster$request_uri;
}
location = /loki/api/v1/push {
proxy_pass http://write$request_uri;
}
location = /loki/api/v1/tail {
proxy_pass http://read$request_uri;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection "upgrade";
}
location ~ /loki/api/.* {
proxy_pass http://query-frontend$request_uri;
}
}
}
[buildout]
extends =
../../stack/slapos.cfg
../../stack/caucase/buildout.cfg
../../stack/nodejs.cfg
../../component/make/buildout.cfg
../../component/golang/buildout.cfg
../../component/openssl/buildout.cfg
../../component/curl/buildout.cfg
../../component/dash/buildout.cfg
../../component/nginx/buildout.cfg
../../component/jq/buildout.cfg
../../component/systemd/buildout.cfg
../../component/fluent-bit/buildout.cfg
buildout.hash.cfg
parts =
......@@ -15,21 +18,10 @@ parts =
instance-profile
gowork
influxdb-config-file
telegraf-config-file
grafana-config-file
grafana-provisioning-datasources-config-file
grafana-provisioning-dashboards-config-file
loki-config-file
loki-nginx-config-file
fluent-bit
; [nodejs]
; <= nodejs-16.19.0
[gowork]
# XXX speed up development cycle by not rebuilding workspace on every software run
# XXX does not work ?
update-command =
[go_github.com_grafana_grafana]
<= go-git-package
......@@ -41,7 +33,7 @@ revision = v10.1.2-0-g8e428858dd
<= go-git-package
go.importpath = github.com/grafana/loki
repository = https://github.com/grafana/loki
revision = v2.9.1-0-gd9d5ed4a1
revision = v3.1.0-0-g935aee77e
[go_github.com_influxdata_influxdb]
<= go-git-package
......@@ -59,7 +51,7 @@ revision = v1.28.1-0-g3ea9ffbe2
<= go-git-package
go.importpath = github.com/perrinjerome/telegraf-input-slapos
repository = https://github.com/perrinjerome/telegraf-input-slapos
revision = v0.0.1-0-gf8981f3
revision = v0.0.2-0-gd4c5221
[go_github.com_prometheus_prometheus]
<= go-git-package
......@@ -84,15 +76,18 @@ install =
${go_github.com_perrinjerome_slapos_telegraf_input:location}:./...
${go_github.com_prometheus_prometheus:location}:./cmd/...
# disable cgo, to prevent loki/promtail from using go-systemd
environment =
CGO_ENABLED = 0
CGO_ENABLED=1
CGO_CFLAGS=-I${systemd:location}/include
buildflags =
-tags promtail_journal_enabled
cpkgpath =
${systemd:location}
telegraf-bin = ${:bin}/telegraf
telegraf-input-slapos-bin = ${:bin}/telegraf-input-slapos
influx-bin = ${:bin}/influx
influxd-bin = ${:bin}/influxd
grafana-bin = ${:bin}/grafana-server
grafana-bin = ${grafana:binpath}/grafana
grafana-homepath = ${grafana:homepath}
loki-bin = ${:bin}/loki
promtail-bin = ${:bin}/promtail
......@@ -105,8 +100,6 @@ command = bash -ce "
. ${gowork:env.sh} && \
go install github.com/google/wire/cmd/wire@v0.5.0 && \
wire gen -tags oss ./pkg/server ./pkg/cmd/grafana-cli/runner && \
# Unlike loki, grafana _needs_ CGO_ENABLED, so we override here
export CGO_ENABLED=1 && \
go run build.go setup && \
go run build.go build && \
export NODE_OPTIONS=--max_old_space_size=8192 && \
......@@ -119,6 +112,8 @@ command = bash -ce "
rm -rf ${buildout:directory}/.cache/yarn/
"
homepath = ${go_github.com_grafana_grafana:location}
# XXX "linux-amd64" is not portable here
binpath = ${go_github.com_grafana_grafana:location}/bin/linux-amd64
stop-on-error = true
[download-file-base]
......@@ -128,24 +123,15 @@ url = ${:_profile_base_location_}/${:filename}
[influxdb-config-file]
<= download-file-base
[telegraf-config-file]
<= download-file-base
[grafana-config-file]
<= download-file-base
[grafana-provisioning-datasources-config-file]
<= download-file-base
[grafana-provisioning-dashboards-config-file]
<= download-file-base
[loki-config-file]
<= download-file-base
[loki-nginx-config-file]
<= download-file-base
[instance-eggs]
recipe = zc.recipe.egg
eggs =
......@@ -167,13 +153,16 @@ context =
key grafana_bin gowork:grafana-bin
key grafana_homepath gowork:grafana-homepath
key loki_bin gowork:loki-bin
raw nginx_bin ${nginx:location}/sbin/nginx
key promtail_bin gowork:promtail-bin
key curl_bin :curl-bin
key dash_bin :dash-bin
key jq_bin :jq-bin
curl-bin = ${curl:location}/bin/curl
dash-bin = ${dash:location}/bin/dash
depends = ${instance-eggs:eggs}
jq-bin = ${jq:location}/bin/jq
depends = ${instance-eggs:eggs} ${caucase-eggs:eggs}
import-list =
file caucase caucase-jinja2-library:target
[versions]
inotifyx = 0.2.2
......
{
"name": "Grafana",
"description": "Grafana, Telegraf and Influxdb",
"description": "Grafana, Influxdb, Loki and Telegraf",
"serialisation": "json-in-xml",
"software-type": {
"default": {
"title": "Default",
"description": "Grafana, Telegraf and Influxdb in same partition",
"request": "instance-input-schema.json",
"response": "instance-output-schema.json",
"description": "Grafana, Influxdb and Loki",
"request": "instance-default-input-schema.json",
"response": "instance-default-output-schema.json",
"index": 0
},
"agent": {
"title": "Agent",
"description": "Telegraf agent sending metrics to Influxdb and Promtail agent sending logs to Loki",
"request": "instance-agent-input-schema.json",
"response": "instance-agent-output-schema.json",
"index": 0
}
}
......
# Telegraf configuration
# Telegraf is entirely plugin driven. All metrics are gathered from the
# declared plugins.
# Even if a plugin has no configuration, it must be declared in here
# to be active. Declaring a plugin means just specifying the name
# as a section with no variables. To deactivate a plugin, comment
# out the name and any variables.
# Use 'telegraf -config telegraf.toml -test' to see what metrics a config
# file would generate.
# One rule that plugins conform to is wherever a connection string
# can be passed, the values '' and 'localhost' are treated specially.
# They indicate to the plugin to use their own builtin configuration to
# connect to the local system.
# NOTE: The configuration has a few required parameters. They are marked
# with 'required'. Be sure to edit those to make this configuration work.
# Tags can also be specified via a normal map, but only one form at a time:
[tags]
# dc = "us-east-1"
# Configuration for telegraf agent
[agent]
# Default data collection interval for all plugins
interval = "10s"
# Rounds collection interval to 'interval'
# ie, if interval="10s" then always collect on :00, :10, :20, etc.
round_interval = true
# Default data flushing interval for all outputs. You should not set this below
# interval. Maximum flush_interval will be flush_interval + flush_jitter
flush_interval = "10s"
# Jitter the flush interval by a random amount. This is primarily to avoid
# large write spikes for users running a large number of telegraf instances.
# ie, a jitter of 5s and interval 10s means flushes will happen every 10-15s
flush_jitter = "0s"
# Run telegraf in debug mode
debug = false
# Override default hostname, if empty use os.Hostname()
hostname = ""
###############################################################################
# OUTPUTS #
###############################################################################
[outputs]
# Configuration for influxdb server to send metrics to
[outputs.influxdb]
# The full HTTP or UDP endpoint URL for your InfluxDB instance
# Multiple urls can be specified for InfluxDB cluster support.
urls = ["{{ influxdb['url'] }}"]
insecure_skip_verify = true # because we are using a self signed certificate
# The target database for metrics (telegraf will create it if not exists)
database = "{{ influxdb['database'] }}" # required
# Precision of writes, valid values are n, u, ms, s, m, and h
# note: using second precision greatly helps InfluxDB compression
precision = "s"
# Connection timeout (for the connection with InfluxDB), formatted as a string.
# If not provided, will default to 0 (no timeout)
# timeout = "5s"
username = "{{ influxdb['auth-username'] }}"
password = "{{ influxdb['auth-password'] }}"
# Set the user agent for HTTP POSTs (can be useful for log differentiation)
# user_agent = "telegraf"
# Set UDP payload size, defaults to InfluxDB UDP Client default (512 bytes)
# udp_payload = 512
###############################################################################
# PLUGINS #
###############################################################################
# Read metrics about cpu usage
[cpu]
# Whether to report per-cpu stats or not
percpu = true
# Whether to report total system cpu stats or not
totalcpu = true
# Comment this line if you want the raw CPU time metrics
drop = ["cpu_time"]
# Read metrics about memory usage
[mem]
# no configuration
[disk]
[io]
[system]
{{ extra['extra-config'] }}
###############################################################################
# To add ad-hoc config, don't edit this file directly, but place your config
# files in {{ telegraf['extra-config-dir'] }}
###############################################################################
This diff is collapsed.
......@@ -3,12 +3,16 @@ import argparse
import json
import os.path
import urllib
from urlparse import urlparse, urlunparse, ParseResult
from urllib.parse import urlparse, urlunparse, ParseResult
import jsonschema
# Adapted from slapos.core.git/slapos/slap/util.py
from lxml import etree
def xml2dict(infile):
import json
d = json.load(infile)
d.pop('$schema', None)
return d
from lxml import etree
result_dict = {}
for element in etree.parse(infile).iter(tag=etree.Element):
if element.tag == 'parameter':
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment