1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
##############################################################################
#
# Copyright (c) 2019 Nexedi SA and Contributors. All Rights Reserved.
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsibility of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
# guarantees and support are strongly adviced to contract a Free Software
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 3
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
#
##############################################################################
import os
from urllib.parse import urlparse
import requests
import time
from slapos.testing.testcase import makeModuleSetUpAndTestCaseClass
from slapos.testing.utils import ManagedHTTPServer
setUpModule, SlapOSInstanceTestCase = makeModuleSetUpAndTestCaseClass(
os.path.abspath(
os.path.join(os.path.dirname(__file__), '..', 'software.cfg')))
class TestJSCrawler(SlapOSInstanceTestCase):
@classmethod
def getInstanceParameterDict(cls):
class TestServer(ManagedHTTPServer):
def do_GET(self):
# type: () -> None
self.send_response(200)
self.send_header("Content-Type", "application/html")
self.end_headers()
self.wfile.write(f'<title>Hello {self._name}</title>'.encode())
return {
'urls': '\n'.join([
cls.getManagedResource('website1', TestServer).url,
cls.getManagedResource('website2', TestServer).url,
])
}
def setUp(self):
self.url = self.computer_partition.getConnectionParameterDict(
)['url']
def test_http_get(self):
resp = requests.get(self.url, verify=False)
self.assertTrue(
resp.status_code in [requests.codes.ok, requests.codes.found])
def test_crawled_sitemap(self):
url_list = self.computer_partition.getInstanceParameterDict()['urls'].split('\n')
time.sleep(70) # wait until cron runs
website1 = urlparse(url_list[0]).netloc
sitemap1 = requests.get(self.url + '/%s.xml' % website1, verify=False)
self.assertEqual(sitemap1.status_code, requests.codes.ok)
website2 = urlparse(url_list[1]).netloc
sitemap2 = requests.get(self.url + '/%s.xml' % website2, verify=False)
self.assertEqual(sitemap2.status_code, requests.codes.ok)