Commit e7426010 authored by Andrew Gerrand's avatar Andrew Gerrand

misc/linkcheck: better redirect handling, use meaningful exit code

Prevent linkcheck from following redirects that lead beyond the outside
the root URL.

Return a non-zero exit code when there are problems.

Some minor refactoring for clarity.

R=golang-dev, bradfitz
CC=golang-dev
https://golang.org/cl/14425049
parent 2d6a1399
...@@ -8,11 +8,13 @@ ...@@ -8,11 +8,13 @@
package main package main
import ( import (
"errors"
"flag" "flag"
"fmt" "fmt"
"io/ioutil" "io/ioutil"
"log" "log"
"net/http" "net/http"
"os"
"regexp" "regexp"
"strings" "strings"
"sync" "sync"
...@@ -101,49 +103,71 @@ func crawl(url string, sourceURL string) { ...@@ -101,49 +103,71 @@ func crawl(url string, sourceURL string) {
func addProblem(url, errmsg string) { func addProblem(url, errmsg string) {
msg := fmt.Sprintf("Error on %s: %s (from %s)", url, errmsg, linkSources[url]) msg := fmt.Sprintf("Error on %s: %s (from %s)", url, errmsg, linkSources[url])
log.Print(msg) if *verbose {
log.Print(msg)
}
problems = append(problems, msg) problems = append(problems, msg)
} }
func crawlLoop() { func crawlLoop() {
for url := range urlq { for url := range urlq {
res, err := http.Get(url) if err := doCrawl(url); err != nil {
if err != nil { addProblem(url, err.Error())
addProblem(url, fmt.Sprintf("Error fetching: %v", err))
wg.Done()
continue
} }
if res.StatusCode != 200 { }
addProblem(url, fmt.Sprintf("Status code = %d", res.StatusCode)) }
wg.Done()
continue func doCrawl(url string) error {
} defer wg.Done()
slurp, err := ioutil.ReadAll(res.Body)
res.Body.Close() req, err := http.NewRequest("GET", url, nil)
if err != nil {
return err
}
res, err := http.DefaultTransport.RoundTrip(req)
if err != nil {
return err
}
// Handle redirects.
if res.StatusCode/100 == 3 {
newURL, err := res.Location()
if err != nil { if err != nil {
log.Fatalf("Error reading %s body: %v", url, err) return fmt.Errorf("resolving redirect: %v", err)
} }
if *verbose { if !strings.HasPrefix(newURL.String(), *root) {
log.Printf("Len of %s: %d", url, len(slurp)) // Skip off-site redirects.
return nil
} }
body := string(slurp) crawl(newURL.String(), url)
for _, ref := range localLinks(body) { return nil
if *verbose { }
log.Printf(" links to %s", ref) if res.StatusCode != 200 {
} return errors.New(res.Status)
dest := *root + ref }
linkSources[dest] = append(linkSources[dest], url) slurp, err := ioutil.ReadAll(res.Body)
crawl(dest, url) res.Body.Close()
if err != nil {
log.Fatalf("Error reading %s body: %v", url, err)
}
if *verbose {
log.Printf("Len of %s: %d", url, len(slurp))
}
body := string(slurp)
for _, ref := range localLinks(body) {
if *verbose {
log.Printf(" links to %s", ref)
} }
for _, id := range pageIDs(body) { dest := *root + ref
if *verbose { linkSources[dest] = append(linkSources[dest], url)
log.Printf(" url %s has #%s", url, id) crawl(dest, url)
} }
fragExists[urlFrag{url, id}] = true for _, id := range pageIDs(body) {
if *verbose {
log.Printf(" url %s has #%s", url, id)
} }
fragExists[urlFrag{url, id}] = true
wg.Done()
} }
return nil
} }
func main() { func main() {
...@@ -151,7 +175,6 @@ func main() { ...@@ -151,7 +175,6 @@ func main() {
go crawlLoop() go crawlLoop()
crawl(*root, "") crawl(*root, "")
crawl(*root+"/doc/go1.1.html", "")
wg.Wait() wg.Wait()
close(urlq) close(urlq)
...@@ -164,4 +187,7 @@ func main() { ...@@ -164,4 +187,7 @@ func main() {
for _, s := range problems { for _, s := range problems {
fmt.Println(s) fmt.Println(s)
} }
if len(problems) > 0 {
os.Exit(1)
}
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment