Commit d7ba7ce3 authored by Dirley Rodrigues's avatar Dirley Rodrigues

actually filter the links

--HG--
branch : distribute
extra : rebase_source : cb6e3497e1f8594181f10110cbc833bd6c81f89e
parent 116420fe
...@@ -148,6 +148,7 @@ def find_external_links(url, page): ...@@ -148,6 +148,7 @@ def find_external_links(url, page):
for match in HREF.finditer(tag): for match in HREF.finditer(tag):
link = urlparse.urljoin(url, htmldecode(match.group(1))) link = urlparse.urljoin(url, htmldecode(match.group(1)))
if not link in seen_links: if not link in seen_links:
seen_links.add(link)
yield link yield link
for tag in ("<th>Home Page", "<th>Download URL"): for tag in ("<th>Home Page", "<th>Download URL"):
...@@ -157,6 +158,7 @@ def find_external_links(url, page): ...@@ -157,6 +158,7 @@ def find_external_links(url, page):
if match: if match:
link = urlparse.urljoin(url, htmldecode(match.group(1))) link = urlparse.urljoin(url, htmldecode(match.group(1)))
if not link in seen_links: if not link in seen_links:
seen_links.add(link)
yield link yield link
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment