Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
slapos
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Titouan Soulard
slapos
Commits
321734be
Commit
321734be
authored
6 years ago
by
Alain Takoudjou
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
monitor: fix cpu-load-threshold parameter, make cpu load promise more robust
parent
42b8cba0
No related merge requests found
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
15 additions
and
149 deletions
+15
-149
software/monitor/instance-monitor.cfg.jinja2
software/monitor/instance-monitor.cfg.jinja2
+13
-15
software/monitor/instance.cfg
software/monitor/instance.cfg
+0
-1
software/monitor/script/check_system_health.py
software/monitor/script/check_system_health.py
+0
-124
software/monitor/software.cfg
software/monitor/software.cfg
+2
-9
No files found.
software/monitor/instance-monitor.cfg.jinja2
View file @
321734be
...
...
@@ -89,11 +89,6 @@ command-line =
${monitor-directory:bin}/python {{ monitor_collect_csv_dump }} --output_folder ${monitor-directory:consumption}
wrapper-path = ${monitor-directory:reports}/monitor-collect-csv-dump
[monitor-check-cpu-usage]
recipe = slapos.cookbook:wrapper
command-line = ${monitor-directory:bin}/python {{ monitor_check_system_health }} cpu ${init-monitor-parameters:cpu-load-file}
wrapper-path = ${directory:promises}/system-CPU-load-check
[monitor-check-memory-usage]
recipe = slapos.cookbook:wrapper
command-line = {{ buildout_bin}}/check-computer-memory
...
...
@@ -102,20 +97,22 @@ command-line = {{ buildout_bin}}/check-computer-memory
--unit percent
wrapper-path = ${directory:promises}/check-computer-memory-usage
[monitor-check-cpu-usage]
recipe = slapos.cookbook:promise.plugin
eggs =
slapos.toolbox
file = ${monitor-conf-parameters:promise-output-file}
content =
from slapos.promise.plugin.check_server_cpu_load import RunPromise
output = ${directory:plugins}/system-CPU-load-check.py
mode = 600
config-cpu-load-threshold = ${slap-parameter:cpu-load-threshold}
[publish-connection-information]
recipe = slapos.cookbook:publish
monitor-setup-url = https://monitor.app.officejs.com/#page=settings_configurator&url=${monitor-publish-parameters:monitor-url}&username=${monitor-publish-parameters:monitor-user}&password=${monitor-publish-parameters:monitor-password}
server_log_url = ${monitor-publish-parameters:monitor-base-url}/${slap-configuration:private-hash}/
[init-monitor-parameters]
recipe = plone.recipe.command
cpu-load-file = ${directory:monitor}/cpu-load-tolerance
mem-free-file = ${directory:monitor}/mem-free-limit
command =
if [ ! -s "${:cpu-load-file}" ]; then
echo ${slap-parameter:cpu-load-threshold} > ${:cpu-load-file}
fi
[slap-configuration]
recipe = slapos.cookbook:slapconfiguration.serialised
computer = ${slap-connection:computer-id}
...
...
@@ -127,5 +124,6 @@ private-hash = ${pwgen:passwd}${pwgen32:passwd}
frontend-domain =
[slap-parameter]
cpu-load-threshold = 2.0
# Max cpu load for one core on server
cpu-load-threshold = 3.0
memory-percent-threshold = 96
This diff is collapsed.
Click to expand it.
software/monitor/instance.cfg
View file @
321734be
...
...
@@ -22,7 +22,6 @@ context = key develop_eggs_directory buildout:develop-eggs-directory
raw monitor_template_output ${monitor-template:output}
raw network_benck_cfg_output ${network-bench-cfg:output}
raw monitor_collect_csv_dump ${monitor-collect-csv-dump:output}
raw monitor_check_system_health ${monitor-system-health:output}
mode = 0644
[instance-base-distributor]
...
...
This diff is collapsed.
Click to expand it.
software/monitor/script/check_system_health.py
deleted
100644 → 0
View file @
42b8cba0
#!/usr/bin/env python
import
subprocess
import
os
import
re
import
json
cpu_command_list
=
[
'top'
,
'-n'
,
'1'
,
'-b'
]
mem_command_list
=
[
'free'
,
'-m'
]
head_command_list
=
[
'head'
,
'-n'
,
'5'
]
cpu_core_cmd_list
=
[
'nproc'
]
def
cpu_usage
(
tolerance
=
1.5
):
# tolerance=1.5 => accept up to 1.5 =150% CPU load
uptime_result
=
subprocess
.
check_output
([
'uptime'
])
line
=
uptime_result
.
strip
().
split
(
' '
)
load
,
load5
,
long_load
=
line
[
-
3
:]
core_count
=
int
(
subprocess
.
check_output
(
cpu_core_cmd_list
).
strip
())
threshold
=
core_count
*
tolerance
if
float
(
long_load
)
>
threshold
:
# display top statistics
top
=
subprocess
.
Popen
(
cpu_command_list
,
stdout
=
subprocess
.
PIPE
)
result
=
subprocess
.
check_output
(
head_command_list
,
stdin
=
top
.
stdout
)
message
=
"CPU load is high: %s %s %s
\
n
\
n
"
%
(
load
,
load5
,
long_load
)
message
+=
result
return
message
def
check_last_result
(
file
,
last_value
,
threshold
=
7.0
,
elt_count
=
5
):
mem_average
=
0.0
value_list
=
[]
if
os
.
path
.
exists
(
file
):
with
open
(
file
)
as
f
:
values
=
f
.
read
()
value_list
=
values
.
split
(
' '
)
size
=
len
(
value_list
)
value_list
.
append
(
str
(
last_value
))
if
size
>=
elt_count
:
while
len
(
value_list
)
>
elt_count
:
value_list
.
pop
(
0
)
# calculate average
average
=
sum
([
float
(
l
)
for
l
in
value_list
])
/
(
size
*
1.0
)
if
average
<
threshold
:
mem_average
=
round
(
average
,
2
)
else
:
value_list
.
append
(
str
(
last_value
))
with
open
(
file
,
'w'
)
as
f
:
f
.
write
(
' '
.
join
(
value_list
))
return
mem_average
def
memory_usage
(
storage_file
,
threshold
=
7.0
,
elt_count
=
5
):
mem_stats
=
subprocess
.
check_output
(
mem_command_list
)
result_list
=
mem_stats
.
split
(
'
\
n
'
)
usage
=
re
.
sub
(
'
\
s+
'
, '
', result_list[1])
usage_real = re.sub('
\
s
+
', '
', result_list[2])
usage_list = usage.split('
')
mem_total = float(usage_list[1])
mem_free = float(usage_real.split('
')[-1])
if mem_free == 0.0:
mem_available = 0.0
else:
mem_available = round(mem_free * 100 / (mem_total * 1.0), 2)
average = check_last_result(
storage_file,
mem_available,
threshold=threshold,
elt_count=elt_count)
if average != 0.0 and average < threshold:
# mem used at (threshold)% at least
message = "Memory usage is high. %s%% is available (%s%% for last %s minutes).
\
n
\
n
" % (
mem_available, average, elt_count)
message += mem_stats
return message
swap_usage = re.sub('
\
s
+
', '
', result_list[3])
swap_usage_list = swap_usage.split('
')
swap_total = float(swap_usage_list[1])
swap_free = float(swap_usage_list[3])
if swap_total > 1:
if swap_free == 0.0:
swap_available = 0.0
else:
swap_available = round(swap_free * 100 / (swap_total * 1.0), 2) * 100
if swap_available < threshold*1.7:
message = "Memory SWAP usage is high. %s%% is available.
\
n
\
n
" % swap_available
message += mem_stats
return message
if __name__ == '
__main__
':
if len(sys.argv) < 2:
print "Usage: %s [cpu | mem] CONFIG_FILE [BASE_DIR]" % os.path.basename(sys.argv[0])
exit(2)
check_type = sys.argv[1]
threshold = None
if len(sys.argv) >= 3:
config_file = sys.argv[2]
if os.path.exists(config_file):
with open(config_file) as f:
try:
threshold = float(f.read())
if not threshold > 0:
threshold = None
except ValueError:
pass
if check_type == "cpu":
result = cpu_usage(threshold or 1.5)
if result:
print result
exit(1)
elif check_type == "mem":
directory = ""
if len(sys.argv) >= 4:
directory = sys.argv[3]
if not os.path.exists(directory) or not os.path.isdir(directory):
directory = os.getcwd()
storage_file = os.path.join(directory, '
mem
-
usage
.
mo
')
result = memory_usage(storage_file, threshold=(threshold or 4.0), elt_count=10)
if result:
print result
exit(1)
else:
exit(3)
exit(0)
\ No newline at end of file
This diff is collapsed.
Click to expand it.
software/monitor/software.cfg
View file @
321734be
...
...
@@ -21,14 +21,14 @@ parts =
recipe = slapos.recipe.template
url = ${:_profile_base_location_}/instance.cfg
output = ${buildout:directory}/template.cfg
md5sum =
641c5916739f78171c616af00fe974a2
md5sum =
1b7d2d097f208f6641bf98a17df079c8
mode = 0644
[template-monitor]
recipe = slapos.recipe.build:download
url = ${:_profile_base_location_}/instance-monitor.cfg.jinja2
destination = ${buildout:directory}/template-base-monitor.cfg
md5sum =
79125819f20f4f18a301b806daed2ceb
md5sum =
ef3297619e1fc2a5a8d1b0546c1a0db2
mode = 0644
[template-monitor-distributor]
...
...
@@ -59,13 +59,6 @@ filename = collect_csv_dump.py
output = ${:destination}/${:filename}
md5sum = cad2402bbd21907cfed6bc5af8c5d3ab
[monitor-system-health]
<= monitor-template-script
url = ${:_profile_base_location_}/script/${:filename}
filename = check_system_health.py
output = ${:destination}/${:filename}
md5sum = 7eb74a0be4995c6a1015a9a1eb6874c6
[extra-eggs]
<= monitor-eggs
interpreter = pythonwitheggs
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment