Commit 4bad5c79 authored by AndreKR's avatar AndreKR Committed by Matt Holt

Simple rewrite regex captures (#2592)

* More informative rewrite test output

When running rewrite tests, the output in case of a test failure now
includes not only the rewritten URLs but also the from URL.

* Move re-escaping to regexpMatches

This commit moves the code to post-process the match replacements from
ComplexRule to regexpMatches, so it can later be re-used for SimpleRule.

Also changes the comment in an attempt to better explain the reasoning
behind that code.

The required strings.Replacer is now built only once.

* Support regex captures in simple rewrite rules

Closes #2586
parent 81430e4a
...@@ -95,9 +95,15 @@ func (s *SimpleRule) Match(r *http.Request) bool { ...@@ -95,9 +95,15 @@ func (s *SimpleRule) Match(r *http.Request) bool {
// Rewrite rewrites the internal location of the current request. // Rewrite rewrites the internal location of the current request.
func (s *SimpleRule) Rewrite(fs http.FileSystem, r *http.Request) Result { func (s *SimpleRule) Rewrite(fs http.FileSystem, r *http.Request) Result {
matches := regexpMatches(s.Regexp, "/", r.URL.Path)
replacer := newReplacer(r)
for i := 1; i < len(matches); i++ {
replacer.Set(fmt.Sprint(i), matches[i])
}
// attempt rewrite // attempt rewrite
return To(fs, r, s.To, newReplacer(r)) return To(fs, r, s.To, replacer)
} }
// ComplexRule is a rewrite rule based on a regular expression // ComplexRule is a rewrite rule based on a regular expression
...@@ -198,16 +204,7 @@ func (r ComplexRule) Rewrite(fs http.FileSystem, req *http.Request) (re Result) ...@@ -198,16 +204,7 @@ func (r ComplexRule) Rewrite(fs http.FileSystem, req *http.Request) (re Result)
default: default:
// set regexp match variables {1}, {2} ... // set regexp match variables {1}, {2} ...
// url escaped values of ? and #.
q, f := url.QueryEscape("?"), url.QueryEscape("#")
for i := 1; i < len(matches); i++ { for i := 1; i < len(matches); i++ {
// Special case of unescaped # and ? by stdlib regexp.
// Reverse the unescape.
if strings.ContainsAny(matches[i], "?#") {
matches[i] = strings.NewReplacer("?", q, "#", f).Replace(matches[i])
}
replacer.Set(fmt.Sprint(i), matches[i]) replacer.Set(fmt.Sprint(i), matches[i])
} }
} }
...@@ -246,6 +243,8 @@ func (r ComplexRule) matchExt(rPath string) bool { ...@@ -246,6 +243,8 @@ func (r ComplexRule) matchExt(rPath string) bool {
return !mustUse return !mustUse
} }
var escaper = strings.NewReplacer("?", url.QueryEscape("?"), "#", url.QueryEscape("#"))
func regexpMatches(regexp *regexp.Regexp, base, rPath string) []string { func regexpMatches(regexp *regexp.Regexp, base, rPath string) []string {
if regexp != nil { if regexp != nil {
// include trailing slash in regexp if present // include trailing slash in regexp if present
...@@ -253,7 +252,19 @@ func regexpMatches(regexp *regexp.Regexp, base, rPath string) []string { ...@@ -253,7 +252,19 @@ func regexpMatches(regexp *regexp.Regexp, base, rPath string) []string {
if strings.HasSuffix(base, "/") { if strings.HasSuffix(base, "/") {
start-- start--
} }
return regexp.FindStringSubmatch(rPath[start:])
matches := regexp.FindStringSubmatch(rPath[start:])
// When processing a rewrite rule, the matching is done with an unescaped
// version of the old path. However, when such a match contains a ? or #
// character, the match cannot be used verbatim in the "to" URL because
// there it would be interpreted as query/fragment marker, so we re-escape
// those characters:
for i := 1; i < len(matches); i++ {
matches[i] = escaper.Replace(matches[i])
}
return matches
} }
return nil return nil
} }
......
...@@ -33,6 +33,7 @@ func TestRewrite(t *testing.T) { ...@@ -33,6 +33,7 @@ func TestRewrite(t *testing.T) {
newSimpleRule(t, "^/from$", "/to"), newSimpleRule(t, "^/from$", "/to"),
newSimpleRule(t, "^/a$", "/b"), newSimpleRule(t, "^/a$", "/b"),
newSimpleRule(t, "^/b$", "/b{uri}"), newSimpleRule(t, "^/b$", "/b{uri}"),
newSimpleRule(t, "^/simplereggrp/([0-9]+)([a-z]*)$", "/{1}/{2}/{query}"),
}, },
FileSys: http.Dir("."), FileSys: http.Dir("."),
} }
...@@ -113,6 +114,7 @@ func TestRewrite(t *testing.T) { ...@@ -113,6 +114,7 @@ func TestRewrite(t *testing.T) {
{"/hashtest/a%20%23%20test", "/a%20%23%20test"}, {"/hashtest/a%20%23%20test", "/a%20%23%20test"},
{"/hashtest/a%20%3F%20test", "/a%20%3F%20test"}, {"/hashtest/a%20%3F%20test", "/a%20%3F%20test"},
{"/hashtest/a%20%3F%23test", "/a%20%3F%23test"}, {"/hashtest/a%20%3F%23test", "/a%20%3F%23test"},
{"/simplereggrp/123abc?q", "/123/abc/q?q"},
} }
for i, test := range tests { for i, test := range tests {
...@@ -129,7 +131,7 @@ func TestRewrite(t *testing.T) { ...@@ -129,7 +131,7 @@ func TestRewrite(t *testing.T) {
} }
if got, want := rec.Body.String(), test.expectedTo; got != want { if got, want := rec.Body.String(), test.expectedTo; got != want {
t.Errorf("Test %d: Expected URL to be '%s' but was '%s'", i, want, got) t.Errorf("Test %d: Expected URL '%s' to be rewritten to '%s' but was rewritten to '%s'", i, test.from, want, got)
} }
} }
} }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment