Commit c390f92e authored by Kirill Smelkov's avatar Kirill Smelkov

X longest-match ref works + cache cleanup

parent dabcc517
Pipeline #114 failed with stage
...@@ -2,93 +2,56 @@ ...@@ -2,93 +2,56 @@
Handler for raw blob downloads Handler for raw blob downloads
*/ */
/*
Cache-Control: private
ETag: "4c10677531b44f555ebbdaff24a9b2d6"
X-Content-Type-Options: nosniff
Content-Disposition: inline
Content-Transfer-Encoding: binary
Content-Type: text/plain; charset=utf-8
*/
package main package main
import ( import (
"io" "io"
// "os"
"log" "log"
"fmt"
"bufio"
"time" "time"
"strings" "strings"
"bytes"
"regexp" "regexp"
"net/http" "net/http"
"net/http/httptest" "net/http/httptest"
) )
// Reply from auth backend for "download from repo" authorization request
// auth backend reply type AuthReply struct {
type backendAuthReply struct {
w *httptest.ResponseRecorder // output of backend/preAuthorizeHandler w *httptest.ResponseRecorder // output of backend/preAuthorizeHandler
authorizationResponse authorizationResponse // parsed auth response from preAuthorizeHandler
} }
// ask auth backend whether download is ok for project // Entry in authorization reply cache
func askAuthBackend(u *upstream, project string) backendAuthReply { type AuthCacheEntry struct {
authReply := backendAuthReply{ AuthReply
w: httptest.NewRecorder(),
}
Tauth int64 // in seconds XXX do we strictly need this?
// request to verify whether download is possible via asking as git fetch would do // how many times this entry was hit when quiering auth cache during
// XXX privateToken not propagated, etc ... // the last refresh period.
reqDownloadAccess, err := http.NewRequest("GET", project + ".git/info/refs?service=git-upload-pack", nil) Nhit int64
if err != nil {
fail500(authReply.w, "GET git-upload-pack", err)
//return false // XXX not cache as it is just we cannot create request
return authReply
}
// swap original request to 'verify-download' one XXX "swap" not correct
r := &gitRequest{
Request: reqDownloadAccess,
u: u,
}
// downloadOk := false
preAuthorizeHandler(
func(w http.ResponseWriter, r *gitRequest) {
// if we ever get to this point - auth handler approved
// access and thus it is ok to download
// downloadOk = true
}, "") (authReply.w, r)
return authReply
} }
// Authorization reply cache
// {} project -> AuthCacheEntry
//
// XXX should be not only project (privateToken etc...)
var authCache = make(map[string]*AuthCacheEntry)
// authorization info, as replied by authBackend for a request // Time period for refreshing / removing unused entires in authCache
type authInfo struct { const authCacheRefresh = 5 * time.Second // XXX -> 30 or 60
authReply backendAuthReply
// XXX no need? // Goroutine to refresh auth cache entry periodically while it is used.
Tauth int64 // in seconds // if the entry is detected to be not used - remove it from cache and stop rereshing.
func authRefreshEntry(u *upstream, project string) {
// XXX auth := authCache[project]
// and then use auth without authCache lookup ?
Naccess int64
}
// {} project -> authInfo
// FIXME should be not only project (privateToken etc...)
var authCache = make(map[string]*authInfo)
const authCacheRefresh = 10 * time.Second // XXX -> 30
// refresh auth cache entry periodically while it is used
// if the entry is detected to be not used - remove it from cache and stop rereshing
func authRefresh(u *upstream, project string) {
for ;; { for ;; {
log.Printf("AUTH refresh sleep ...") //log.Printf("AUTH refresh sleep ...")
time.Sleep(authCacheRefresh) time.Sleep(authCacheRefresh)
// XXX lock? // XXX lock?
...@@ -98,8 +61,9 @@ func authRefresh(u *upstream, project string) { ...@@ -98,8 +61,9 @@ func authRefresh(u *upstream, project string) {
break // need to further refresh XXX ok? break // need to further refresh XXX ok?
} }
log.Printf("AUTH refresh - %v Naccess: %v", project, auth.Naccess) log.Printf("AUTH refresh - %v #hit: %v", project, auth.Nhit)
if auth.Naccess == 0 { // not used - we can remove and stop refreshing if auth.Nhit == 0 { // not used - we can remove and stop refreshing
log.Printf("AUTH - removing %v", project)
// XXX lock? // XXX lock?
delete(authCache, project) delete(authCache, project)
break break
...@@ -109,44 +73,81 @@ func authRefresh(u *upstream, project string) { ...@@ -109,44 +73,81 @@ func authRefresh(u *upstream, project string) {
authReply := askAuthBackend(u, project) authReply := askAuthBackend(u, project)
// XXX lock ? // XXX lock ?
auth.authReply = authReply auth.AuthReply = authReply
auth.Tauth = time.Now().Unix() auth.Tauth = time.Now().Unix()
auth.Naccess = 0 auth.Nhit = 0
} }
} }
// verify that download access is authorized by auth backend
func verifyDownloadAccess(w http.ResponseWriter, r *gitRequest, project string) bool { // Ask auth backend about whether download is ok for a project
func askAuthBackend(u *upstream, project string) AuthReply {
authReply := AuthReply{
w: httptest.NewRecorder(),
}
// Request to auth backend to verify whether download is possible via
// asking as git fetch would do.
// XXX privateToken not propagated, etc ...
reqDownloadAccess, err := http.NewRequest("GET", project + ".git/info/refs?service=git-upload-pack", nil)
if err != nil {
fail500(authReply.w, "GET git-upload-pack", err)
return authReply
}
// prepare everything and go through preAuthorizeHandler that will send
// request to auth backend and analyze/parse the reply into r.authorizationResponse
r := &gitRequest{
Request: reqDownloadAccess,
u: u,
}
preAuthorizeHandler(
func(w http.ResponseWriter, r *gitRequest) {
// if we ever get to this point - auth handler approved
// access and thus it is ok to download
// downloadOk = true
// NOTE we can use authorizationResponse.RepoPath != "" as test for this
}, "") (authReply.w, r)
// propagate authorizationResponse back and we are done
authReply.authorizationResponse = r.authorizationResponse
return authReply
}
// Verify that download access is authorized by auth backend
func verifyDownloadAccess(w http.ResponseWriter, u *upstream, project string) AuthReply {
// XXX do we need mutex to lock authCache ? // XXX do we need mutex to lock authCache ?
auth, ok := authCache[project] auth, ok := authCache[project]
if ok { if ok {
auth.Naccess++ auth.Nhit++
log.Printf("authReply cached %v ago: %v (hits: %v)", log.Printf("authReply for %v cached ago: %v (hits: %v)",
project,
time.Since(time.Unix(auth.Tauth, 0)), time.Since(time.Unix(auth.Tauth, 0)),
auth.authReply.authorizationResponse, auth.Nhit)
auth.Naccess) return auth.AuthReply // XXX make pointer?
r.authorizationResponse = auth.authReply.authorizationResponse
return (auth.authReply.RepoPath != "") // XXX ok?
} }
authReply := askAuthBackend(r.u, project) authReply := askAuthBackend(u, project)
// XXX do we need to lock authCache ? // XXX do we need to lock authCache ?
authCache[project] = &authInfo{authReply, time.Now().Unix(), 0} // store in cache and start cache entry refresher
go authRefresh(r.u, project) authCache[project] = &AuthCacheEntry{authReply, time.Now().Unix(), 0}
return (authReply.RepoPath != "") go authRefreshEntry(u, project)
return authReply
} }
// HTTP handler for .../raw/<ref>/path
var projectRe = regexp.MustCompile(`^/[\w\.-]+/[\w\.-]+/`) var projectRe = regexp.MustCompile(`^/[\w\.-]+/[\w\.-]+/`)
func handleGetBlobRaw(w http.ResponseWriter, r *gitRequest) { func handleGetBlobRaw(w http.ResponseWriter, r *gitRequest) {
Tstart := time.Now() // Extract project & refpath
// <project>/raw/branch/file -> <project>, branch/file
// extract project & refpath
// /namespace/project/raw/branch/file -> /namespace/project, branch/file
project := projectRe.FindString(r.Request.URL.Path) project := projectRe.FindString(r.Request.URL.Path)
refpath := r.Request.URL.Path[len(project):] refpath := r.Request.URL.Path[len(project):]
if project == "" { if project == "" {
...@@ -156,68 +157,147 @@ func handleGetBlobRaw(w http.ResponseWriter, r *gitRequest) { ...@@ -156,68 +157,147 @@ func handleGetBlobRaw(w http.ResponseWriter, r *gitRequest) {
// assert project[-1] == "/" // assert project[-1] == "/"
project = project[:len(project)-1] project = project[:len(project)-1]
// assert refpath[:4] == "raw/"
if refpath[:4] != "raw/" { if refpath[:4] != "raw/" {
fail500(w, "refpath != raw/...", nil) fail500(w, "refpath != raw/...", nil)
return return
} }
refpath = refpath[4:] refpath = refpath[4:]
if !verifyDownloadAccess(w, r, project) { // Query download access auth for this project
// XXX verifyDownloadAccess already emitted 403 headers etc ... authReply := verifyDownloadAccess(w, r.u, project)
if authReply.RepoPath == "" {
// access denied - copy auth reply to client in full -
// there are HTTP code and other headers / body relevant for
// about why access was denied.
for k, v := range authReply.w.HeaderMap {
w.Header()[k] = v
}
w.WriteHeader(authReply.w.Code)
io.Copy(w, authReply.w.Body)
return return
} }
handleGetBlobRaw2(w, r, refpath) // Access granted - we can emit the blob
emitBlob(w, authReply.RepoPath, refpath)
}
Tend := time.Now() /*
Cache-Control: private
ETag: "4c10677531b44f555ebbdaff24a9b2d6"
log.Printf("Tall: %s", Tend.Sub(Tstart)) X-Content-Type-Options: nosniff
} Content-Disposition: inline
Content-Transfer-Encoding: binary
Content-Type: text/plain; charset=utf-8
*/
func handleGetBlobRaw2(w http.ResponseWriter, r *gitRequest, refpath string) { func emitBlob(w http.ResponseWriter, repopath string, refpath string) {
Tstart := time.Now() // Communicate with `git cat-file --batch` trying refs from longest
// XXX we assume <ref>/<path> format and ref not containing "/" // to shortest prefix in refpath. This way we find longest-match for
// XXX but gitlab allows ref with / and tries to do longest-match to existing refs // ref and get object content in the end.
// TODO use reqDownloadAccess respose body - it contain all refs queryCmd := gitCommand("", "git", "--git-dir="+repopath, "cat-file", "--batch")
s := strings.SplitN(refpath, "/", 2) queryStdin, err := queryCmd.StdinPipe()
if len(s) != 2 { if err != nil {
fail500(w, "refpath split", nil) fail500(w, "git cat-file --batch; stdin", err)
return return
} }
defer queryStdin.Close()
queryStdout, err := queryCmd.StdoutPipe()
if err != nil {
fail500(w, "git cat-file --batch; stdout", err)
return
}
defer queryStdout.Close()
queryReader := bufio.NewReader(queryStdout)
ref, path := s[0], s[1] err = queryCmd.Start()
//log.Printf("BLOB2 %v %v", ref, path)
blobCmd := gitCommand(""/*XXX GL_ID*/, "git", "--git-dir="+r.RepoPath, "cat-file", "blob", "--", ref + ":" + path)
blobStdout, err := blobCmd.StdoutPipe()
if err != nil { if err != nil {
fail500(w, "handleGetBlobRaw", err) fail500(w, "git cat-file --batch; start", err)
return return
} }
defer blobStdout.Close() defer cleanUpProcessGroup(queryCmd) // XXX do we really need this?
// refpath components as vector
refpathv := strings.Split(refpath, "/")
// scan from right to left and try to change '/' -> ':' and see if it
// creates a correct object name. If it does - we read object content
// which follows.
// TODO handle communication timeout
var sha1 string
var type_ string
var size int64
for i := len(refpathv); i > 0; i-- {
ref := strings.Join(refpathv[:i], "/")
path := strings.Join(refpathv[i:], "/")
log.Printf("Trying %v %v", ref, path)
_, err := fmt.Fprintf(queryStdin, "%s:%s\n", ref, path)
if err != nil {
fail500(w, "git cat-file --batch; write", err)
return
}
reply, err := queryReader.ReadBytes('\n')
if err != nil {
fail500(w, "git cat-file --batch; read", err)
return
}
log.Printf("<- %s", reply)
// <object> SP missing LF
if bytes.HasSuffix(reply, []byte(" missing\n")) { // XXX byte literal?
continue
}
// <sha1> SP <type> SP <size> LF
_, err = fmt.Sscanf(string(reply), "%s %s %d\n", &sha1, &type_, &size)
if err != nil {
fail500(w, "git cat-file --batch; reply parse", err)
return;
}
if type_ != "blob" {
// XXX -> 404
fail500(w, fmt.Sprintf("git cat-file --batch-check; %v is not blob (is %v)", sha1, type_), nil)
return
}
// so we found this blob object
break
}
if err:= blobCmd.Start(); err != nil { // was the blob found?
fail500(w, "handleGetBlobRaw", err) if sha1 == "" {
// XXX -> 404
fail400(w, "Blob not found", nil)
return return
} }
defer cleanUpProcessGroup(blobCmd) // XXX do we need to cleanup whole group
log.Printf("blob found, size: %d", size)
//setRawHeaders(...) //setRawHeaders(...)
w.WriteHeader(200) // XXX too early w.WriteHeader(200) // XXX too early
//_, err = io.Copy(os.Stdout, blobStdout) log.Printf("111")
//if err != nil { // XXX better use queryStdout instead of queryReader, but we could be
// panic(err) // holding some tail bytes in queryReader after chat phase
//} _, err = io.CopyN(w, queryReader, size)
if _, err := io.Copy(w, blobStdout); err != nil { if err != nil {
logContext("io.Copy", err) logContext("io.CopyN", err)
return return
} }
if err := blobCmd.Wait(); err != nil { log.Printf("222")
logContext("wait", err) err = queryStdin.Close()
if err != nil {
fail500(w, "queryStdin.Close", nil)
return return
} }
Tend := time.Now() log.Printf("333")
log.Printf("Tblob2: %s", Tend.Sub(Tstart)) err = queryCmd.Wait()
if err != nil {
logContext("wait", err)
return
}
} }
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment