Commit 5f92ce8a authored by Rafael Monnerat's avatar Rafael Monnerat

apache-mpm_watchdog: Initial commit

  Implement a watchdog to kill process which are hold on dead lock
  state after a greceful reload.
parent d1ed4bf0
......@@ -11,3 +11,4 @@
.eggs/
config.json
slapos/test/promise/data/SOFTINST-0_*
slapos/test/promise/data/write_db.json
......@@ -69,6 +69,7 @@ setup(name=name,
entry_points={
'console_scripts': [
'agent = slapos.agent.agent:main',
'apache-mpm-watchdog = slapos.promise.apache_mpm_watchdog:main',
'check-web-page-http-cache-hit = slapos.promise.check_web_page_http_cache_hit:main',
'check-feed-as-promise = slapos.checkfeedaspromise:main',
'check-error-on-apache-log = slapos.promise.check_error_on_apache_log:main',
......
import requests
import re
import signal
import os
import psutil
import json
import time
search_pid_regex = r"</td><td.*?>(.+?)</td><td>yes \(old gen\)</td>"
def loadJSONFile(db_path):
if os.path.exists(db_path):
with open(db_path) as json_file:
try:
return json.load(json_file)
except ValueError:
return {}
else:
return {}
def writeJSONFile(pid_dict, db_path):
if db_path is None:
# No place to save
return
for pid in pid_dict.copy():
try:
process = psutil.Process(int(pid))
except psutil.NoSuchProcess:
del pid_dict[pid]
with open(db_path, "w") as f:
f.write(json.dumps(pid_dict))
def getServerStatus(url, user, password):
try:
if user is not None:
r = requests.get(url, auth=(user, password))
else:
r = requests.get(url)
if r.status_code == 200:
return r.text
except requests.exceptions.ConnectionError:
return
def watchServerStatus(pid_dict, server_status):
_pid_dict = pid_dict.copy()
for i in re.findall(search_pid_regex, server_status):
try:
process = psutil.Process(int(i))
except psutil.NoSuchProcess:
continue
# Ensure the process is actually an apache
if process.cmdline()[0].endswith("/httpd"):
pid_dict.setdefault(i, time.time() + timeout)
if pid_dict[i] < time.time():
print "Sending signal -%s to %s" % (signal.SIGKILL, i)
os.kill(int(i), signal.SIGKILL)
return _pid_dict
def main():
parser = argparse.ArgumentParser()
# Address to ping to
parser.add_argument("-u", "--url", required=True)
# Force use ipv4 protocol
parser.add_argument("-u", "--user")
parser.add_argument("-p", "--password")
parser.add_argument("-d", "--db")
parser.add_argument("-t", "--timeout", default=600)
args = parser.parse_args()
pid_dict = loadJSONFile(args.db)
server_status = getServerStatus(
args.url, args.user, args.password)
if server_status is None:
raise ValueError("Couldn't connect to server status page")
pid_dict = watchServerStatus(pid_dict, server_status)
writeJSONFile(pid_dict, args.db)
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
<html><head>
<title>Apache Status</title>
</head><body>
<h1>Apache Server Status </h1>
<dl><dt>Server Version: Apache/2.4.25 (Unix) OpenSSL/1.0.2k mod_antiloris/0.5.1</dt>
<dt>Server MPM: event</dt>
<dt>Server Built: Apr 19 2017 12:41:47
</dt></dl><hr /><dl>
<dt>Current Time: Wednesday, 31-May-2017 14:57:52 CEST</dt>
<dt>Restart Time: Friday, 26-May-2017 00:24:30 CEST</dt>
<dt>Parent Server Config. Generation: 498</dt>
<dt>Parent Server MPM Generation: 497</dt>
<dt>Server uptime: 5 days 14 hours 33 minutes 22 seconds</dt>
<dt>Server load: 2.32 1.98 1.94</dt>
<dt>Total accesses: 26754245 - Total Traffic: 249.0 GB</dt>
<dt>CPU Usage: u7129.39 s1016.88 cu0 cs0 - 1.68% CPU load</dt>
<dt>55.2 requests/sec - 0.5 MB/second - 9.8 kB/request</dt>
<dt>8 requests currently being processed, 117 idle workers</dt>
</dl>
<table rules="all" cellpadding="1%">
<tr><th rowspan="2">Slot</th><th rowspan="2">PID</th><th rowspan="2">Stopping</th><th colspan="2">Connections</th>
<th colspan="2">Threads</th><th colspan="3">Async connections</th></tr>
<tr><th>total</th><th>accepting</th><th>busy</th><th>idle</th><th>writing</th><th>keep-alive</th><th>closing</th></tr>
<tr><td>0</td><td>12345</td><td>yes (old gen)</td><td>3</td><td>no</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td></tr>
<tr><td>1</td><td>12346</td><td>yes (old gen)</td><td>3</td><td>no</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td></tr>
<tr><td>2</td><td>24443</td><td>no</td><td>8</td><td>yes</td><td>1</td><td>24</td><td>0</td><td>1</td><td>7</td></tr>
<tr><td>5</td><td>23019</td><td>no</td><td>11</td><td>yes</td><td>1</td><td>24</td><td>0</td><td>3</td><td>7</td></tr>
<tr><td>8</td><td>12348</td><td>yes (old gen)</td><td>1</td><td>no</td><td>0</td><td>0</td><td>0</td><td>0</td><td>0</td></tr>
<tr><td>9</td><td>23032</td><td>no</td><td>18</td><td>yes</td><td>2</td><td>23</td><td>0</td><td>2</td><td>14</td></tr>
<tr><td>10</td><td>23053</td><td>no</td><td>17</td><td>yes</td><td>2</td><td>23</td><td>0</td><td>2</td><td>14</td></tr>
<tr><td>11</td><td>23118</td><td>no</td><td>13</td><td>yes</td><td>2</td><td>23</td><td>0</td><td>7</td><td>3</td></tr>
<tr><td>Sum</td><td>8</td><td>3</td><td>74</td><td>&nbsp;</td><td>8</td><td>117</td><td>0</td><td>15</td><td>45</td></tr>
</table>
<pre>..............G..G.G.............G.......G.......G___R__________
___________.................................................._R_
______________________..........................................
...........G.....................______________R_____R__________
______________WC___________R________R_______....................
................................................................
................</pre>
<p>Scoreboard Key:<br />
"<b><code>_</code></b>" Waiting for Connection,
"<b><code>S</code></b>" Starting up,
"<b><code>R</code></b>" Reading Request,<br />
"<b><code>W</code></b>" Sending Reply,
"<b><code>K</code></b>" Keepalive (read),
"<b><code>D</code></b>" DNS Lookup,<br />
"<b><code>C</code></b>" Closing connection,
"<b><code>L</code></b>" Logging,
"<b><code>G</code></b>" Gracefully finishing,<br />
"<b><code>I</code></b>" Idle cleanup of worker,
"<b><code>.</code></b>" Open slot with no current process<br />
</p>
<table border="0"><tr><th>Srv</th><th>PID</th><th>Acc</th><th>M</th><th>CPU
</th><th>SS</th><th>Req</th><th>Conn</th><th>Child</th><th>Slot</th><th>Client</th><th>Protocol</th><th>VHost</th><th>Request</th></tr>
<tr><td><b>0-495</b></td><td>-</td><td>0/0/168016</td><td>.
</td><td>2094.10</td><td>8672</td><td>8</td><td>0.0</td><td>0.00</td><td>1344.54
</td><td>163.172.65.117</td><td>http/1.1</td><td nowrap></td><td nowrap></td></tr>
</table>
<hr /> <table>
<tr><th>Srv</th><td>Child Server number - generation</td></tr>
<tr><th>PID</th><td>OS process ID</td></tr>
<tr><th>Acc</th><td>Number of accesses this connection / this child / this slot</td></tr>
<tr><th>M</th><td>Mode of operation</td></tr>
<tr><th>CPU</th><td>CPU usage, number of seconds</td></tr>
<tr><th>SS</th><td>Seconds since beginning of most recent request</td></tr>
<tr><th>Req</th><td>Milliseconds required to process most recent request</td></tr>
<tr><th>Conn</th><td>Kilobytes transferred this connection</td></tr>
<tr><th>Child</th><td>Megabytes transferred this child</td></tr>
<tr><th>Slot</th><td>Total megabytes transferred this slot</td></tr>
</table>
<hr>
<table cellspacing=0 cellpadding=0>
<tr><td bgcolor="#000000">
<b><font color="#ffffff" face="Arial,Helvetica">SSL/TLS Session Cache Status:</font></b></td></tr>
<tr><td bgcolor="#ffffff">
cache type: <b>SHMCB</b>, shared memory: <b>512000</b> bytes, current entries: <b>642</b><br>subcaches: <b>32</b>, indexes per subcache: <b>88</b><br>time left on oldest entries' objects: avg: <b>15</b> seconds, (range: 0...45)<br>index usage: <b>22%</b>, cache usage: <b>28%</b><br>total entries stored since starting: <b>16275</b><br>total entries replaced since starting: <b>0</b><br>total entries expired since starting: <b>15633</b><br>total (pre-expiry) entries scrolled out of the cache: <b>0</b><br>total retrieves since starting: <b>3285</b> hit, <b>607</b> miss<br>total removes since starting: <b>0</b> hit, <b>0</b> miss<br></td></tr>
</table>
</body></html>
{"1234": 1496161635.514768, "4321": 1496161635.514768}
##############################################################################
#
# Copyright (c) 2017 Vifib SARL and Contributors. All Rights Reserved.
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsibility of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
# guarantees and support are strongly adviced to contract a Free Software
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 3
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
##############################################################################
import unittest
import os.path
import socket
import time
import psutil
from slapos.promise.apache_mpm_watchdog import watchServerStatus, \
loadJSONFile, writeJSONFile, getServerStatus, search_pid_regex
from slapos.test.promise import data
class TestApacheMPMWatchdog(unittest.TestCase):
def setUp(self):
self.base_path = "/".join(data.__file__.split("/")[:-1])
def text_searchPidRegex(self):
with open(self.base_path + "/server_status.html") as f:
server_status = f.read()
f.close()
self.assertEquals(['12345', '12346'],
re.findall(search_pid_regex, server_status))
def test_loadJSONFile(self):
self.assertEquals({},
loadJSONFile("couscous"))
self.assertEquals(
{"1234": 1496161635.514768 , "4321": 1496161635.514768},
loadJSONFile(os.path.join(self.base_path, "test_db.json")))
self.assertEquals(
{},
loadJSONFile(os.path.join(self.base_path, "corrupted_db.json")))
def test_writeJSONFile(self):
# Check if don't raise.
self.assertEquals(None,
writeJSONFile({}, None))
current_pid = os.getpid()
self.assertEquals(None,
writeJSONFile({"123482": 123, current_pid: 124},
os.path.join(self.base_path, "write_db.json")))
with open(os.path.join(self.base_path, "write_db.json")) as f:
json_content = f.read()
f.close()
self.assertEquals(json_content,
'{"%s": 124}' % current_pid)
def test_getServerStatus(self):
self.assertEquals(None,
getServerStatus("http://localhost/", None, None))
self.assertEquals(None,
getServerStatus("http://localhost/",
"user", "password"))
self.assertNotEquals(None,
getServerStatus("https://www.erp5.com/", None, None))
if __name__ == '__main__':
unittest.main()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment