Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
S
slapos.toolbox
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Xavier Thompson
slapos.toolbox
Commits
61cd5f2b
Commit
61cd5f2b
authored
Dec 15, 2022
by
Joanne Hugé
Committed by
Xavier Thompson
Dec 16, 2022
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
promise/plugin: Add check_cpu_temperature promise
Co-authored-by:
Xaver Thompson
<
xavier.thompson@nexedi.com
>
parent
e7224743
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
89 additions
and
0 deletions
+89
-0
slapos/promise/plugin/check_cpu_temperature.py
slapos/promise/plugin/check_cpu_temperature.py
+89
-0
No files found.
slapos/promise/plugin/check_cpu_temperature.py
0 → 100644
View file @
61cd5f2b
import
json
import
os
import
psutil
import
time
from
.util
import
JSONPromise
from
zope.interface
import
implementer
from
slapos.grid.promise
import
interface
@
implementer
(
interface
.
IPromise
)
class
RunPromise
(
JSONPromise
):
def
__init__
(
self
,
config
):
super
(
RunPromise
,
self
).
__init__
(
config
)
self
.
setPeriodicity
(
minute
=
2
)
self
.
avg_flag_file
=
self
.
getConfig
(
'last-avg-computation-file'
,
'last_avg'
)
self
.
max_spot_temp
=
float
(
self
.
getConfig
(
'max-spot-temp'
,
90
))
self
.
max_avg_temp
=
float
(
self
.
getConfig
(
'max-avg-temp'
,
80
))
avg_temp_duration_sec
=
int
(
self
.
getConfig
(
'avg-temp-duration-sec'
,
0
))
if
avg_temp_duration_sec
:
self
.
avg_temp_duration
=
avg_temp_duration_sec
else
:
self
.
avg_temp_duration
=
60
*
int
(
self
.
getConfig
(
'avg-temp-duration'
,
5
))
def
sense
(
self
):
success
=
True
# Get current temperature
try
:
cpu_temp
=
psutil
.
sensors_temperatures
()[
'coretemp'
][
0
][
1
]
except
(
KeyError
,
IndexError
)
as
e
:
self
.
logger
.
error
(
"Could not read core temperature"
)
return
# Check spot temperature
if
cpu_temp
>
self
.
max_spot_temp
:
success
=
False
self
.
logger
.
error
(
"Temperature reached critical threshold: %s °C"
" (threshold is %s °C)"
,
cpu_temp
,
self
.
max_spot_temp
)
# Log temperature
data
=
json
.
dumps
({
'cpu_temperature'
:
cpu_temp
})
self
.
json_logger
.
info
(
"Temperature data"
,
extra
=
{
'data'
:
data
})
# TODO: promise should compute average only with logs between interval
# Computer average temperature
avg_computation_period
=
self
.
avg_temp_duration
/
4
try
:
t
=
os
.
path
.
getmtime
(
self
.
avg_flag_file
)
except
OSError
:
t
=
0
if
(
time
.
time
()
-
t
)
>
avg_computation_period
:
open
(
self
.
avg_flag_file
,
'w'
).
close
()
temp_list
=
self
.
getJsonLogDataInterval
(
self
.
avg_temp_duration
)
if
temp_list
:
avg_temp
=
sum
(
x
[
'cpu_temperature'
]
for
x
in
temp_list
)
/
len
(
temp_list
)
if
avg_temp
>
self
.
max_avg_temp
:
success
=
False
self
.
logger
.
error
(
"Average temperature over the last %ds reached threshold: %s °C"
" (threshold is %s °C)"
,
self
.
avg_temp_duration
,
avg_temp
,
self
.
max_avg_temp
)
else
:
success
=
False
self
.
logger
.
error
(
"Couldn't read temperature from log"
)
if
success
:
self
.
logger
.
info
(
"Temperature OK (%s °C)"
,
cpu_temp
)
def
test
(
self
):
"""
Called after sense() if the instance is still converging.
Returns success or failure based on sense results.
In this case, fail if the previous sensor result is negative.
"""
return
self
.
_test
(
result_count
=
1
,
failure_amount
=
1
)
def
anomaly
(
self
):
"""
Called after sense() if the instance has finished converging.
Returns success or failure based on sense results.
Failure signals the instance has diverged.
In this case, fail if two out of the last three results are negative.
"""
return
self
.
_anomaly
(
result_count
=
3
,
failure_amount
=
2
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment