Commit e335e8fb authored by Romain Courteaud's avatar Romain Courteaud

Add CONTACT configuration parameter.

Surcharge the bot User-Agent contact url/email.
parent c3769d8b
......@@ -328,6 +328,7 @@ class WebBot:
timeout = int(self.config["TIMEOUT"])
elapsed_fast = float(self.config["ELAPSED_FAST"])
elapsed_moderate = float(self.config["ELAPSED_MODERATE"])
contact = self.config["CONTACT"]
# logPlatform(self._db, __version__, status_id)
# Get list of all domains
......@@ -420,6 +421,7 @@ class WebBot:
url,
ip,
__version__,
contact,
timeout,
elapsed_fast,
elapsed_moderate,
......
......@@ -64,6 +64,10 @@ def createConfiguration(
config[CONFIG_SECTION]["ELAPSED_FAST"] = "-1"
if "ELAPSED_MODERATE" not in config[CONFIG_SECTION]:
config[CONFIG_SECTION]["ELAPSED_MODERATE"] = "-1"
if "CONTACT" not in config[CONFIG_SECTION]:
config[CONFIG_SECTION][
"CONTACT"
] = "https://lab.nexedi.com/nexedi/surykatka"
if "RELOAD" not in config[CONFIG_SECTION]:
config[CONFIG_SECTION]["RELOAD"] = str(False)
......
......@@ -40,15 +40,22 @@ def getRootUrl(url):
return "%s://%s/" % (parsed_url.scheme, parsed_url.hostname)
def getUserAgent(version):
return "%s/%s (+%s)" % (
"SURYKATKA",
version,
"https://lab.nexedi.com/nexedi/surykatka",
)
def getUserAgent(version, contact=None):
if contact:
contact = " (+%s)" % contact
else:
contact = ""
return "%s/%s%s" % ("SURYKATKA", version, contact)
def request(url, timeout=TIMEOUT, headers=None, session=requests, version=0):
def request(
url,
timeout=TIMEOUT,
headers=None,
session=requests,
contact=None,
version=0,
):
if headers is None:
headers = {}
......@@ -56,7 +63,7 @@ def request(url, timeout=TIMEOUT, headers=None, session=requests, version=0):
headers["Accept"] = "%s;q=0.9,*/*;q=0.8" % PREFERRED_TYPE
if "User-Agent" not in headers:
# XXX user agent
headers["User-Agent"] = getUserAgent(version)
headers["User-Agent"] = getUserAgent(version, contact)
kwargs = {}
kwargs["stream"] = False
......@@ -188,6 +195,7 @@ def checkHttpStatus(
url,
ip,
bot_version,
contact,
timeout=TIMEOUT,
elapsed_fast=ELAPSED_FAST,
elapsed_moderate=ELAPSED_MODERATE,
......@@ -213,7 +221,11 @@ def checkHttpStatus(
raise NotImplementedError("Unhandled url: %s" % url)
response = request(
ip_url, headers={"Host": hostname}, version=bot_version, **request_kw
ip_url,
headers={"Host": hostname},
contact=contact,
version=bot_version,
**request_kw,
)
# Blacklisted, because of non stability
......
......@@ -58,11 +58,12 @@ class SurykatkaHttpTestCase(unittest.TestCase):
# getUserAgent
################################################
def test_getUserAgent_version(self):
result = getUserAgent("0.0.3")
assert (
result
== "SURYKATKA/0.0.3 (+https://lab.nexedi.com/nexedi/surykatka)"
)
result = getUserAgent("0.0.3", "foocontact")
assert result == "SURYKATKA/0.0.3 (+foocontact)"
def test_getUserAgent_no_contact(self):
result = getUserAgent("0.0.4")
assert result == "SURYKATKA/0.0.4"
################################################
# request
......@@ -78,7 +79,7 @@ class SurykatkaHttpTestCase(unittest.TestCase):
allow_redirects=False,
headers={
"Accept": "text/html;q=0.9,*/*;q=0.8",
"User-Agent": "SURYKATKA/0 (+https://lab.nexedi.com/nexedi/surykatka)",
"User-Agent": "SURYKATKA/0",
},
stream=False,
timeout=2,
......@@ -97,10 +98,7 @@ class SurykatkaHttpTestCase(unittest.TestCase):
assert last_request.headers["Accept-Encoding"] == "gzip, deflate"
assert last_request.headers["Connection"] == "keep-alive"
assert last_request.headers["Host"] == "example.org"
assert (
last_request.headers["User-Agent"]
== "SURYKATKA/0 (+https://lab.nexedi.com/nexedi/surykatka)"
)
assert last_request.headers["User-Agent"] == "SURYKATKA/0"
assert len(last_request.body) == 0
assert response.status_code == 418
......@@ -898,6 +896,7 @@ class SurykatkaHttpTestCase(unittest.TestCase):
ip = "127.0.0.1"
url = "http://example.org/foo?bar=1"
bot_version = 1
bot_contact = "http://example.org/contact10"
httpretty.register_uri(
httpretty.GET,
"http://127.0.0.1/foo?bar=1",
......@@ -906,7 +905,7 @@ class SurykatkaHttpTestCase(unittest.TestCase):
)
status_id = logStatus(self.db, "foo")
checkHttpStatus(self.db, status_id, url, ip, bot_version)
checkHttpStatus(self.db, status_id, url, ip, bot_version, bot_contact)
last_request = httpretty.last_request()
assert len(last_request.headers) == 5, last_request.headers.keys()
......@@ -916,7 +915,7 @@ class SurykatkaHttpTestCase(unittest.TestCase):
assert last_request.headers["Host"] == "example.org"
assert (
last_request.headers["User-Agent"]
== "SURYKATKA/1 (+https://lab.nexedi.com/nexedi/surykatka)"
== "SURYKATKA/1 (+http://example.org/contact10)"
)
assert len(last_request.body) == 0
......@@ -935,6 +934,7 @@ class SurykatkaHttpTestCase(unittest.TestCase):
ip = "127.0.0.1"
url = "https://example.org/foo?bar=1"
bot_version = 2
bot_contact = "http://example.org/contact3"
status_id = logStatus(self.db, "foo")
with mock.patch("surykatka.http.request") as mock_request:
......@@ -943,20 +943,26 @@ class SurykatkaHttpTestCase(unittest.TestCase):
"Cache-Control": "public",
}
checkHttpStatus(self.db, status_id, url, ip, bot_version)
checkHttpStatus(
self.db, status_id, url, ip, bot_version, bot_contact
)
assert mock_request.call_count == 1
assert mock_request.call_args.args == (
"https://example.org/foo?bar=1",
)
assert (
len(mock_request.call_args.kwargs) == 4
len(mock_request.call_args.kwargs) == 5
), mock_request.call_args.kwargs
assert mock_request.call_args.kwargs["headers"] == {
"Host": "example.org"
}
assert mock_request.call_args.kwargs["session"] is not None
assert mock_request.call_args.kwargs["version"] == 2
assert (
mock_request.call_args.kwargs["contact"]
== "http://example.org/contact3"
)
assert mock_request.call_args.kwargs["timeout"] == 2
assert self.db.HttpCodeChange.select().count() == 1
......@@ -974,10 +980,13 @@ class SurykatkaHttpTestCase(unittest.TestCase):
ip = "127.0.0.1"
url = "foo?bar=1"
bot_version = 1
bot_contact = "http://example.org/contact"
status_id = logStatus(self.db, "foo")
try:
checkHttpStatus(self.db, status_id, url, ip, bot_version)
checkHttpStatus(
self.db, status_id, url, ip, bot_version, bot_contact
)
except NotImplementedError as err:
assert str(err) == "Unhandled url: foo?bar=1"
else:
......@@ -997,6 +1006,7 @@ class SurykatkaHttpTestCase(unittest.TestCase):
ip = "127.0.0.1"
url = "http://example.org/foo?bar=1"
bot_version = 1
bot_contact = "http://example.org/contact"
whitelist_header_list = [
# Redirect
"Location",
......@@ -1046,7 +1056,7 @@ class SurykatkaHttpTestCase(unittest.TestCase):
)
status_id = logStatus(self.db, "foo")
checkHttpStatus(self.db, status_id, url, ip, bot_version)
checkHttpStatus(self.db, status_id, url, ip, bot_version, bot_contact)
last_request = httpretty.last_request()
assert len(last_request.headers) == 5, last_request.headers.keys()
......@@ -1056,7 +1066,7 @@ class SurykatkaHttpTestCase(unittest.TestCase):
assert last_request.headers["Host"] == "example.org"
assert (
last_request.headers["User-Agent"]
== "SURYKATKA/1 (+https://lab.nexedi.com/nexedi/surykatka)"
== "SURYKATKA/1 (+http://example.org/contact)"
)
assert len(last_request.body) == 0
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment