Commit 116420fe authored by Dirley Rodrigues's avatar Dirley Rodrigues

avoid naming problems

--HG--
branch : distribute
extra : rebase_source : 29eeb99013055b8d27cad7f7e8898d06a865b188
parent e3207bd6
...@@ -139,25 +139,25 @@ REL = re.compile("""<([^>]*\srel\s*=\s*['"]?([^'">]+)[^>]*)>""", re.I) ...@@ -139,25 +139,25 @@ REL = re.compile("""<([^>]*\srel\s*=\s*['"]?([^'">]+)[^>]*)>""", re.I)
def find_external_links(url, page): def find_external_links(url, page):
"""Find rel="homepage" and rel="download" links in `page`, yielding URLs""" """Find rel="homepage" and rel="download" links in `page`, yielding URLs"""
seen = set() seen_links = set()
for match in REL.finditer(page): for match in REL.finditer(page):
tag, rel = match.groups() tag, rel = match.groups()
rels = map(str.strip, rel.lower().split(',')) rels = map(str.strip, rel.lower().split(','))
if 'homepage' in rels or 'download' in rels: if 'homepage' in rels or 'download' in rels:
for match in HREF.finditer(tag): for match in HREF.finditer(tag):
url = urlparse.urljoin(url, htmldecode(match.group(1))) link = urlparse.urljoin(url, htmldecode(match.group(1)))
if not url in seen: if not link in seen_links:
yield url yield link
for tag in ("<th>Home Page", "<th>Download URL"): for tag in ("<th>Home Page", "<th>Download URL"):
pos = page.find(tag) pos = page.find(tag)
if pos!=-1: if pos!=-1:
match = HREF.search(page,pos) match = HREF.search(page,pos)
if match: if match:
url = urlparse.urljoin(url, htmldecode(match.group(1))) link = urlparse.urljoin(url, htmldecode(match.group(1)))
if not url in seen: if not link in seen_links:
yield url yield link
user_agent = "Python-urllib/%s distribute/%s" % ( user_agent = "Python-urllib/%s distribute/%s" % (
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment