Public
Snippet $346 authored by Tristan Cavelier

url-content-checker.py

Edited
url-content-checker.py
# Last-Modified: 2018-09-20
import pycurl
#import tempfile
from StringIO import StringIO
#from mimetools import Message as MimeMessage
import re

if 1:
  def request(url, resolve_list=[]):
    # inspired by https://lab.nexedi.com/nexedi/slapos.toolbox/blob/f873f59d6b6cdd2e5911a45fb7f9818dfe3a9de2/slapos/promise/check_web_page_http_cache_hit/__init__.py
    c = pycurl.Curl()
    response_header_io = StringIO()
    output = StringIO()
    c.setopt(c.URL, url)
    c.setopt(c.RESOLVE, resolve_list)
    c.setopt(c.WRITEFUNCTION, output.write)
    c.setopt(c.HEADERFUNCTION, response_header_io.write)
    try:
      c.perform()
    except pycurl.error as e:
      return {
        "http_code": 0,
        "status_text": "ERROR",
        "error_message": e[1],
        "pycurl_error": e,
      }
    http_code = c.getinfo(pycurl.HTTP_CODE)
    response_header_io.seek(0)
    status_data = response_header_io.readline()
    http_version, _, status_text = status_data.split(" ")
    result = {
      "http_code": http_code,
      "http_version": http_version,
      "status_text": status_text.strip(),
      "response_header_data": response_header_io.read(),
      "response_data": output.getvalue(),
    }
    return result

  def check(
    url,
    expected_content=None,
    expected_http_code=None,
    expect_200=True,
  ):
    report_list = []
    result = request(url)
    if result["http_code"] == 0:
      return result["error_message"]
    if expected_content is not None:
      if getattr(expected_content, "pattern", None) is not None:  # if is regexp
        if not expected_content.search(result["response_data"]):
          report_list.append("Pattern not found ({!r})".format(expected_content.pattern))
      else:
        if expected_content not in result["response_data"]:
          report_list.append("String not found ({!r})".format(expected_content))
    if expected_http_code is not None:
      if result["http_code"] != expected_http_code:
        report_list.append("Expected http code {!r} instead of {!r}".format(expected_http_code, result["http_code"]))
    if expect_200:
      if result["http_code"] < 200 or result["http_code"] >= 300:
        report_list.append("Expected 200 <= code < 300 instead of {!r}".format(result["http_code"]))
    return report_list

#print request("https://www.erp5.com/")
print check("https://www.erp5.com/", expected_content="Most Powerful Open Source ERP")
print check("https://www.erp5.com/", expected_content=re.compile("develop"))