##############################################################################
#
# Copyright (c) 2016 Nexedi SA and Contributors. All Rights Reserved.
#
# WARNING: This program as such is intended to be used by professional
# programmers who take the whole responsibility of assessing all potential
# consequences resulting from its eventual inadequacies and bugs
# End users who are looking for a ready-to-use solution with commercial
# garantees and support are strongly advised to contract a Free Software
# Service Company
#
# This program is Free Software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
#
##############################################################################

from HTMLParser import HTMLParser
class HtmlParseHelper(HTMLParser):
  """
  Listens to all the HTMLParser methods and push results in a list of tuple.
  Tuple contains every method arguments, for instance the `handle_starttag`
  method pushes `('starttag', tag, attrs)` to the tuple list.

  See https://docs.python.org/2/library/htmlparser.html
  """
  def __init__(self, *args, **kw):
    HTMLParser.__init__(self, *args, **kw)
    self.result = []
  def handle_starttag(self, tag, attrs):
    self.result.append(("starttag", tag, attrs))
  def handle_startendtag(self, tag, attrs):
    self.result.append(("startendtag", tag, attrs))
  def handle_endtag(self, tag):
    self.result.append(("endtag", tag))
  def handle_data(self, data):
    self.result.append(("data", data))
  def handle_entityref(self, name):
    self.result.append(("entityref", name))
  def handle_charref(self, name):
    self.result.append(("charref", name))
  def handle_comment(self, data):
    self.result.append(("comment", data))
  def handle_decl(self, decl):
    self.result.append(("decl", decl))
  def handle_pi(self, data):
    self.result.append(("pi", data))
  def unknown_decl(self, data):
    self.result.append(("unknown_decl", data))

def parseHtml(text):
  """
  Parses a string and returns html parts as tuple list.

  Example:
  input: 'Click <a href="destination">here</a> to see the documentation.'
  return: [
    ('data', 'Click '),
    ('starttag', 'a', ('href', 'destination')),
    ('data', 'here'),
    ('endtag', 'a'),
    ('data', ' to see the documentation'),
  ]
  """
  hr = HtmlParseHelper()
  hr.feed(text)
  hr.close()
  return hr.result

import re
def partition(text, separatorRegexp):
  """
  partition("abcba", re.compile("(b)")) -> [
    ("a",),
    ("b", "b"),
    ("c",),
    ("b", "b"),
    ("a",),
  ]
  """
  result = []
  lastIndex = 0
  for match in separatorRegexp.finditer(text):
    result.append((text[lastIndex:match.start()],))
    result.append((match.group(0),) + match.groups())
    lastIndex = match.end()
  result.append((text[lastIndex:],))
  return result

css_comment_filter_re = re.compile(r"/\*((?:[^\*]|\*[^/])*)\*/")
#css_url_re = re.compile(r"""(:[ \t]*url\()((")([^"]*)"|(')([^']*)'|([^\)]*))\)""")
css_url_re = re.compile(r"""(:[ \t]*url\()(\s*(")([^"]*)"\s*|\s*(')([^']*)'\s*|([^\)]*))\)""")
def parseCssForUrl(text):
  """
  return tuple list like: [
    ("data", ""),
    ("comment", "/* set body background image */", " set body background image "),
    ("data", "\nbody {\n  background-image: url("),
    ("url", "  'http://ima.ge/bg.png' ", "http://ima.ge/bg.png", "'"),
    ("data", ");\n}\n"),
  ]
  """
  result = []
  parts = partition(text, css_comment_filter_re)  # filter comments
  i = 0
  for part in parts:
    i += 1
    if i % 2 == 0:  # comment
      result.append(("comment", part[0], part[1]))
    else:  # non comment
      parts = partition(part[0], css_url_re)
      data = ""
      j = 0
      for part in parts:
        j += 1
        if j % 2 == 1:  # css data
          data += part[0]
        else:  # url
          result.append(("data", data + part[1]))
          result.append(("url", part[2], (part[4] or part[6] or part[7] or "").strip(), part[3] or part[5] or ""))
          data = ")"
      result.append(("data", data))
  return result