fork from codeberg.org

This commit is contained in:
“xHuPo” 2024-09-12 18:35:01 +08:00
commit 50a258ea59
67 changed files with 4587 additions and 0 deletions

10
server/cache/interface.go vendored Normal file
View file

@ -0,0 +1,10 @@
package cache
import "time"
// ICache is an interface that defines how the pages server interacts with the cache.
type ICache interface {
Set(key string, value interface{}, ttl time.Duration) error
Get(key string) (interface{}, bool)
Remove(key string)
}

7
server/cache/memory.go vendored Normal file
View file

@ -0,0 +1,7 @@
package cache
import "github.com/OrlovEvgeny/go-mcache"
func NewInMemoryCache() ICache {
return mcache.New()
}

62
server/context/context.go Normal file
View file

@ -0,0 +1,62 @@
package context
import (
stdContext "context"
"net/http"
"pages-server/server/utils"
)
type Context struct {
RespWriter http.ResponseWriter
Req *http.Request
StatusCode int
}
func New(w http.ResponseWriter, r *http.Request) *Context {
return &Context{
RespWriter: w,
Req: r,
StatusCode: http.StatusOK,
}
}
func (c *Context) Context() stdContext.Context {
if c.Req != nil {
return c.Req.Context()
}
return stdContext.Background()
}
func (c *Context) Response() *http.Response {
if c.Req != nil && c.Req.Response != nil {
return c.Req.Response
}
return nil
}
func (c *Context) String(raw string, status ...int) {
code := http.StatusOK
if len(status) != 0 {
code = status[0]
}
c.RespWriter.WriteHeader(code)
_, _ = c.RespWriter.Write([]byte(raw))
}
func (c *Context) Redirect(uri string, statusCode int) {
http.Redirect(c.RespWriter, c.Req, uri, statusCode)
}
// Path returns the cleaned requested path.
func (c *Context) Path() string {
return utils.CleanPath(c.Req.URL.Path)
}
func (c *Context) Host() string {
return c.Req.URL.Host
}
func (c *Context) TrimHostPort() string {
return utils.TrimHostPort(c.Req.Host)
}

66
server/dns/dns.go Normal file
View file

@ -0,0 +1,66 @@
package dns
import (
"net"
"strings"
"time"
"github.com/hashicorp/golang-lru/v2/expirable"
)
const (
lookupCacheValidity = 30 * time.Second
defaultPagesRepo = "pages"
)
// TODO(#316): refactor to not use global variables
var lookupCache *expirable.LRU[string, string] = expirable.NewLRU[string, string](4096, nil, lookupCacheValidity)
// GetTargetFromDNS searches for CNAME or TXT entries on the request domain ending with MainDomainSuffix.
// If everything is fine, it returns the target data.
func GetTargetFromDNS(domain, mainDomainSuffix, firstDefaultBranch string) (targetOwner, targetRepo, targetBranch string) {
// Get CNAME or TXT
var cname string
var err error
if entry, ok := lookupCache.Get(domain); ok {
cname = entry
} else {
cname, err = net.LookupCNAME(domain)
cname = strings.TrimSuffix(cname, ".")
if err != nil || !strings.HasSuffix(cname, mainDomainSuffix) {
cname = ""
// TODO: check if the A record matches!
names, err := net.LookupTXT(domain)
if err == nil {
for _, name := range names {
name = strings.TrimSuffix(strings.TrimSpace(name), ".")
if strings.HasSuffix(name, mainDomainSuffix) {
cname = name
break
}
}
}
}
_ = lookupCache.Add(domain, cname)
}
if cname == "" {
return
}
cnameParts := strings.Split(strings.TrimSuffix(cname, mainDomainSuffix), ".")
targetOwner = cnameParts[len(cnameParts)-1]
if len(cnameParts) > 1 {
targetRepo = cnameParts[len(cnameParts)-2]
}
if len(cnameParts) > 2 {
targetBranch = cnameParts[len(cnameParts)-3]
}
if targetRepo == "" {
targetRepo = defaultPagesRepo
}
if targetBranch == "" && targetRepo != defaultPagesRepo {
targetBranch = firstDefaultBranch
}
// if targetBranch is still empty, the caller must find the default branch
return
}

127
server/gitea/cache.go Normal file
View file

@ -0,0 +1,127 @@
package gitea
import (
"bytes"
"fmt"
"io"
"net/http"
"time"
"github.com/rs/zerolog/log"
"pages-server/server/cache"
)
const (
// defaultBranchCacheTimeout specifies the timeout for the default branch cache. It can be quite long.
defaultBranchCacheTimeout = 15 * time.Minute
// branchExistenceCacheTimeout specifies the timeout for the branch timestamp & existence cache. It should be shorter
// than fileCacheTimeout, as that gets invalidated if the branch timestamp has changed. That way, repo changes will be
// picked up faster, while still allowing the content to be cached longer if nothing changes.
branchExistenceCacheTimeout = 5 * time.Minute
// fileCacheTimeout specifies the timeout for the file content cache - you might want to make this quite long, depending
// on your available memory.
// TODO: move as option into cache interface
fileCacheTimeout = 5 * time.Minute
// ownerExistenceCacheTimeout specifies the timeout for the existence of a repo/org
ownerExistenceCacheTimeout = 5 * time.Minute
// fileCacheSizeLimit limits the maximum file size that will be cached, and is set to 1 MB by default.
fileCacheSizeLimit = int64(1000 * 1000)
)
type FileResponse struct {
Exists bool
IsSymlink bool
ETag string
MimeType string
Body []byte
}
func (f FileResponse) IsEmpty() bool {
return len(f.Body) == 0
}
func (f FileResponse) createHttpResponse(cacheKey string) (header http.Header, statusCode int) {
header = make(http.Header)
if f.Exists {
statusCode = http.StatusOK
} else {
statusCode = http.StatusNotFound
}
if f.IsSymlink {
header.Set(giteaObjectTypeHeader, objTypeSymlink)
}
header.Set(ETagHeader, f.ETag)
header.Set(ContentTypeHeader, f.MimeType)
header.Set(ContentLengthHeader, fmt.Sprintf("%d", len(f.Body)))
header.Set(PagesCacheIndicatorHeader, "true")
log.Trace().Msgf("fileCache for %q used", cacheKey)
return header, statusCode
}
type BranchTimestamp struct {
Branch string
Timestamp time.Time
notFound bool
}
type writeCacheReader struct {
originalReader io.ReadCloser
buffer *bytes.Buffer
fileResponse *FileResponse
cacheKey string
cache cache.ICache
hasError bool
}
func (t *writeCacheReader) Read(p []byte) (n int, err error) {
log.Trace().Msgf("[cache] read %q", t.cacheKey)
n, err = t.originalReader.Read(p)
if err != nil && err != io.EOF {
log.Trace().Err(err).Msgf("[cache] original reader for %q has returned an error", t.cacheKey)
t.hasError = true
} else if n > 0 {
_, _ = t.buffer.Write(p[:n])
}
return
}
func (t *writeCacheReader) Close() error {
doWrite := !t.hasError
fc := *t.fileResponse
fc.Body = t.buffer.Bytes()
if fc.IsEmpty() {
log.Trace().Msg("[cache] file response is empty")
doWrite = false
}
if doWrite {
err := t.cache.Set(t.cacheKey, fc, fileCacheTimeout)
if err != nil {
log.Trace().Err(err).Msgf("[cache] writer for %q has returned an error", t.cacheKey)
}
}
log.Trace().Msgf("cacheReader for %q saved=%t closed", t.cacheKey, doWrite)
return t.originalReader.Close()
}
func (f FileResponse) CreateCacheReader(r io.ReadCloser, cache cache.ICache, cacheKey string) io.ReadCloser {
if r == nil || cache == nil || cacheKey == "" {
log.Error().Msg("could not create CacheReader")
return nil
}
return &writeCacheReader{
originalReader: r,
buffer: bytes.NewBuffer(make([]byte, 0)),
fileResponse: &f,
cache: cache,
cacheKey: cacheKey,
}
}

330
server/gitea/client.go Normal file
View file

@ -0,0 +1,330 @@
package gitea
import (
"bytes"
"errors"
"fmt"
"io"
"mime"
"net/http"
"net/url"
"path"
"strconv"
"strings"
"time"
"code.gitea.io/sdk/gitea"
"github.com/rs/zerolog/log"
"pages-server/config"
"pages-server/server/cache"
"pages-server/server/version"
)
var ErrorNotFound = errors.New("not found")
const (
// cache key prefixes
branchTimestampCacheKeyPrefix = "branchTime"
defaultBranchCacheKeyPrefix = "defaultBranch"
rawContentCacheKeyPrefix = "rawContent"
ownerExistenceKeyPrefix = "ownerExist"
// pages server
PagesCacheIndicatorHeader = "X-Pages-Cache"
symlinkReadLimit = 10000
// gitea
giteaObjectTypeHeader = "X-Gitea-Object-Type"
objTypeSymlink = "symlink"
// std
ETagHeader = "ETag"
ContentTypeHeader = "Content-Type"
ContentLengthHeader = "Content-Length"
)
type Client struct {
sdkClient *gitea.Client
responseCache cache.ICache
giteaRoot string
followSymlinks bool
supportLFS bool
forbiddenMimeTypes map[string]bool
defaultMimeType string
}
func NewClient(cfg config.ForgeConfig, respCache cache.ICache) (*Client, error) {
// url.Parse returns valid on almost anything...
rootURL, err := url.ParseRequestURI(cfg.Root)
if err != nil {
return nil, fmt.Errorf("invalid forgejo/gitea root url: %w", err)
}
giteaRoot := strings.TrimSuffix(rootURL.String(), "/")
stdClient := http.Client{Timeout: 10 * time.Second}
forbiddenMimeTypes := make(map[string]bool, len(cfg.ForbiddenMimeTypes))
for _, mimeType := range cfg.ForbiddenMimeTypes {
forbiddenMimeTypes[mimeType] = true
}
defaultMimeType := cfg.DefaultMimeType
if defaultMimeType == "" {
defaultMimeType = "application/octet-stream"
}
sdk, err := gitea.NewClient(
giteaRoot,
gitea.SetHTTPClient(&stdClient),
gitea.SetToken(cfg.Token),
gitea.SetUserAgent("pages-server/"+version.Version),
)
return &Client{
sdkClient: sdk,
responseCache: respCache,
giteaRoot: giteaRoot,
followSymlinks: cfg.FollowSymlinks,
supportLFS: cfg.LFSEnabled,
forbiddenMimeTypes: forbiddenMimeTypes,
defaultMimeType: defaultMimeType,
}, err
}
func (client *Client) ContentWebLink(targetOwner, targetRepo, branch, resource string) string {
return path.Join(client.giteaRoot, targetOwner, targetRepo, "src/branch", branch, resource)
}
func (client *Client) GiteaRawContent(targetOwner, targetRepo, ref, resource string) ([]byte, error) {
reader, _, _, err := client.ServeRawContent(targetOwner, targetRepo, ref, resource)
if err != nil {
return nil, err
}
defer reader.Close()
return io.ReadAll(reader)
}
func (client *Client) ServeRawContent(targetOwner, targetRepo, ref, resource string) (io.ReadCloser, http.Header, int, error) {
cacheKey := fmt.Sprintf("%s/%s/%s|%s|%s", rawContentCacheKeyPrefix, targetOwner, targetRepo, ref, resource)
log := log.With().Str("cache_key", cacheKey).Logger()
log.Trace().Msg("try file in cache")
// handle if cache entry exist
if cache, ok := client.responseCache.Get(cacheKey); ok {
cache := cache.(FileResponse)
cachedHeader, cachedStatusCode := cache.createHttpResponse(cacheKey)
// TODO: check against some timestamp mismatch?!?
if cache.Exists {
log.Debug().Msg("[cache] exists")
if cache.IsSymlink {
linkDest := string(cache.Body)
log.Debug().Msgf("[cache] follow symlink from %q to %q", resource, linkDest)
return client.ServeRawContent(targetOwner, targetRepo, ref, linkDest)
} else if !cache.IsEmpty() {
log.Debug().Msgf("[cache] return %d bytes", len(cache.Body))
return io.NopCloser(bytes.NewReader(cache.Body)), cachedHeader, cachedStatusCode, nil
} else if cache.IsEmpty() {
log.Debug().Msg("[cache] is empty")
}
}
}
log.Trace().Msg("file not in cache")
// not in cache, open reader via gitea api
reader, resp, err := client.sdkClient.GetFileReader(targetOwner, targetRepo, ref, resource, client.supportLFS)
if resp != nil {
switch resp.StatusCode {
case http.StatusOK:
// first handle symlinks
{
objType := resp.Header.Get(giteaObjectTypeHeader)
log.Trace().Msgf("server raw content object %q", objType)
if client.followSymlinks && objType == objTypeSymlink {
defer reader.Close()
// read limited chars for symlink
linkDestBytes, err := io.ReadAll(io.LimitReader(reader, symlinkReadLimit))
if err != nil {
return nil, nil, http.StatusInternalServerError, err
}
linkDest := strings.TrimSpace(string(linkDestBytes))
// handle relative links
// we first remove the link from the path, and make a relative join (resolve parent paths like "/../" too)
linkDest = path.Join(path.Dir(resource), linkDest)
// we store symlink not content to reduce duplicates in cache
fileResponse := FileResponse{
Exists: true,
IsSymlink: true,
Body: []byte(linkDest),
ETag: resp.Header.Get(ETagHeader),
}
log.Trace().Msgf("file response has %d bytes", len(fileResponse.Body))
if err := client.responseCache.Set(cacheKey, fileResponse, fileCacheTimeout); err != nil {
log.Error().Err(err).Msg("[cache] error on cache write")
}
log.Debug().Msgf("follow symlink from %q to %q", resource, linkDest)
return client.ServeRawContent(targetOwner, targetRepo, ref, linkDest)
}
}
// now we are sure it's content so set the MIME type
mimeType := client.getMimeTypeByExtension(resource)
resp.Response.Header.Set(ContentTypeHeader, mimeType)
if !shouldRespBeSavedToCache(resp.Response) {
return reader, resp.Response.Header, resp.StatusCode, err
}
// now we write to cache and respond at the same time
fileResp := FileResponse{
Exists: true,
ETag: resp.Header.Get(ETagHeader),
MimeType: mimeType,
}
return fileResp.CreateCacheReader(reader, client.responseCache, cacheKey), resp.Response.Header, resp.StatusCode, nil
case http.StatusNotFound:
if err := client.responseCache.Set(cacheKey, FileResponse{
Exists: false,
ETag: resp.Header.Get(ETagHeader),
}, fileCacheTimeout); err != nil {
log.Error().Err(err).Msg("[cache] error on cache write")
}
return nil, resp.Response.Header, http.StatusNotFound, ErrorNotFound
default:
return nil, resp.Response.Header, resp.StatusCode, fmt.Errorf("unexpected status code '%d'", resp.StatusCode)
}
}
return nil, nil, http.StatusInternalServerError, err
}
func (client *Client) GiteaGetRepoBranchTimestamp(repoOwner, repoName, branchName string) (*BranchTimestamp, error) {
cacheKey := fmt.Sprintf("%s/%s/%s/%s", branchTimestampCacheKeyPrefix, repoOwner, repoName, branchName)
if stamp, ok := client.responseCache.Get(cacheKey); ok && stamp != nil {
branchTimeStamp := stamp.(*BranchTimestamp)
if branchTimeStamp.notFound {
log.Trace().Msgf("[cache] use branch %q not found", branchName)
return &BranchTimestamp{}, ErrorNotFound
}
log.Trace().Msgf("[cache] use branch %q exist", branchName)
return branchTimeStamp, nil
}
branch, resp, err := client.sdkClient.GetRepoBranch(repoOwner, repoName, branchName)
if err != nil {
if resp != nil && resp.StatusCode == http.StatusNotFound {
log.Trace().Msgf("[cache] set cache branch %q not found", branchName)
if err := client.responseCache.Set(cacheKey, &BranchTimestamp{Branch: branchName, notFound: true}, branchExistenceCacheTimeout); err != nil {
log.Error().Err(err).Msg("[cache] error on cache write")
}
return &BranchTimestamp{}, ErrorNotFound
}
return &BranchTimestamp{}, err
}
if resp.StatusCode != http.StatusOK {
return &BranchTimestamp{}, fmt.Errorf("unexpected status code '%d'", resp.StatusCode)
}
stamp := &BranchTimestamp{
Branch: branch.Name,
Timestamp: branch.Commit.Timestamp,
}
log.Trace().Msgf("set cache branch [%s] exist", branchName)
if err := client.responseCache.Set(cacheKey, stamp, branchExistenceCacheTimeout); err != nil {
log.Error().Err(err).Msg("[cache] error on cache write")
}
return stamp, nil
}
func (client *Client) GiteaGetRepoDefaultBranch(repoOwner, repoName string) (string, error) {
cacheKey := fmt.Sprintf("%s/%s/%s", defaultBranchCacheKeyPrefix, repoOwner, repoName)
if branch, ok := client.responseCache.Get(cacheKey); ok && branch != nil {
return branch.(string), nil
}
repo, resp, err := client.sdkClient.GetRepo(repoOwner, repoName)
if err != nil {
return "", err
}
if resp.StatusCode != http.StatusOK {
return "", fmt.Errorf("unexpected status code '%d'", resp.StatusCode)
}
branch := repo.DefaultBranch
if err := client.responseCache.Set(cacheKey, branch, defaultBranchCacheTimeout); err != nil {
log.Error().Err(err).Msg("[cache] error on cache write")
}
return branch, nil
}
func (client *Client) GiteaCheckIfOwnerExists(owner string) (bool, error) {
cacheKey := fmt.Sprintf("%s/%s", ownerExistenceKeyPrefix, owner)
if exist, ok := client.responseCache.Get(cacheKey); ok && exist != nil {
return exist.(bool), nil
}
_, resp, err := client.sdkClient.GetUserInfo(owner)
if resp.StatusCode == http.StatusOK && err == nil {
if err := client.responseCache.Set(cacheKey, true, ownerExistenceCacheTimeout); err != nil {
log.Error().Err(err).Msg("[cache] error on cache write")
}
return true, nil
} else if resp.StatusCode != http.StatusNotFound {
return false, err
}
_, resp, err = client.sdkClient.GetOrg(owner)
if resp.StatusCode == http.StatusOK && err == nil {
if err := client.responseCache.Set(cacheKey, true, ownerExistenceCacheTimeout); err != nil {
log.Error().Err(err).Msg("[cache] error on cache write")
}
return true, nil
} else if resp.StatusCode != http.StatusNotFound {
return false, err
}
if err := client.responseCache.Set(cacheKey, false, ownerExistenceCacheTimeout); err != nil {
log.Error().Err(err).Msg("[cache] error on cache write")
}
return false, nil
}
func (client *Client) getMimeTypeByExtension(resource string) string {
mimeType := mime.TypeByExtension(path.Ext(resource))
mimeTypeSplit := strings.SplitN(mimeType, ";", 2)
if client.forbiddenMimeTypes[mimeTypeSplit[0]] || mimeType == "" {
mimeType = client.defaultMimeType
}
log.Trace().Msgf("probe mime of %q is %q", resource, mimeType)
return mimeType
}
func shouldRespBeSavedToCache(resp *http.Response) bool {
if resp == nil {
return false
}
contentLengthRaw := resp.Header.Get(ContentLengthHeader)
if contentLengthRaw == "" {
return false
}
contentLength, err := strconv.ParseInt(contentLengthRaw, 10, 64)
if err != nil {
log.Error().Err(err).Msg("could not parse content length")
}
// if content to big or could not be determined we not cache it
return contentLength > 0 && contentLength < fileCacheSizeLimit
}

114
server/handler/handler.go Normal file
View file

@ -0,0 +1,114 @@
package handler
import (
"net/http"
"strings"
"github.com/rs/zerolog/log"
"pages-server/config"
"pages-server/html"
"pages-server/server/cache"
"pages-server/server/context"
"pages-server/server/gitea"
)
const (
headerAccessControlAllowOrigin = "Access-Control-Allow-Origin"
headerAccessControlAllowMethods = "Access-Control-Allow-Methods"
defaultPagesRepo = "pages"
)
// Handler handles a single HTTP request to the web server.
func Handler(
cfg config.ServerConfig,
giteaClient *gitea.Client,
canonicalDomainCache, redirectsCache cache.ICache,
) http.HandlerFunc {
return func(w http.ResponseWriter, req *http.Request) {
log.Debug().Msg("\n----------------------------------------------------------")
log := log.With().Strs("Handler", []string{req.Host, req.RequestURI}).Logger()
ctx := context.New(w, req)
ctx.RespWriter.Header().Set("Server", "pages-server")
// Force new default from specification (since November 2020) - see https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Referrer-Policy#strict-origin-when-cross-origin
ctx.RespWriter.Header().Set("Referrer-Policy", "strict-origin-when-cross-origin")
// Enable browser caching for up to 10 minutes
ctx.RespWriter.Header().Set("Cache-Control", "public, max-age=600")
trimmedHost := ctx.TrimHostPort()
// Add HSTS for RawDomain and MainDomain
if hsts := getHSTSHeader(trimmedHost, cfg.MainDomain, cfg.RawDomain); hsts != "" {
ctx.RespWriter.Header().Set("Strict-Transport-Security", hsts)
}
// Handle all http methods
ctx.RespWriter.Header().Set("Allow", http.MethodGet+", "+http.MethodHead+", "+http.MethodOptions)
switch ctx.Req.Method {
case http.MethodOptions:
// return Allow header
ctx.RespWriter.WriteHeader(http.StatusNoContent)
return
case http.MethodGet,
http.MethodHead:
// end switch case and handle allowed requests
break
default:
// Block all methods not required for static pages
ctx.String("Method not allowed", http.StatusMethodNotAllowed)
return
}
// Block blacklisted paths (like ACME challenges)
for _, blacklistedPath := range cfg.BlacklistedPaths {
if strings.HasPrefix(ctx.Path(), blacklistedPath) {
html.ReturnErrorPage(ctx, "requested path is blacklisted", http.StatusForbidden)
return
}
}
// Allow CORS for specified domains
allowCors := false
for _, allowedCorsDomain := range cfg.AllowedCorsDomains {
if strings.EqualFold(trimmedHost, allowedCorsDomain) {
allowCors = true
break
}
}
if allowCors {
ctx.RespWriter.Header().Set(headerAccessControlAllowOrigin, "*")
ctx.RespWriter.Header().Set(headerAccessControlAllowMethods, http.MethodGet+", "+http.MethodHead)
}
// Prepare request information to Gitea
pathElements := strings.Split(strings.Trim(ctx.Path(), "/"), "/")
if cfg.RawDomain != "" && strings.EqualFold(trimmedHost, cfg.RawDomain) {
log.Debug().Msg("raw domain request detected")
handleRaw(log, ctx, giteaClient,
cfg.MainDomain,
trimmedHost,
pathElements,
canonicalDomainCache, redirectsCache)
} else if strings.HasSuffix(trimmedHost, cfg.MainDomain) {
log.Debug().Msg("subdomain request detected")
handleSubDomain(log, ctx, giteaClient,
cfg.MainDomain,
cfg.PagesBranches,
trimmedHost,
pathElements,
canonicalDomainCache, redirectsCache)
} else {
log.Debug().Msg("custom domain request detected")
handleCustomDomain(log, ctx, giteaClient,
cfg.MainDomain,
trimmedHost,
pathElements,
cfg.PagesBranches[0],
canonicalDomainCache, redirectsCache)
}
}
}

View file

@ -0,0 +1,73 @@
package handler
import (
"net/http"
"path"
"strings"
"pages-server/html"
"pages-server/server/cache"
"pages-server/server/context"
"pages-server/server/dns"
"pages-server/server/gitea"
"pages-server/server/upstream"
"github.com/rs/zerolog"
)
func handleCustomDomain(log zerolog.Logger, ctx *context.Context, giteaClient *gitea.Client,
mainDomainSuffix string,
trimmedHost string,
pathElements []string,
firstDefaultBranch string,
canonicalDomainCache, redirectsCache cache.ICache,
) {
// Serve pages from custom domains
targetOwner, targetRepo, targetBranch := dns.GetTargetFromDNS(trimmedHost, mainDomainSuffix, firstDefaultBranch)
if targetOwner == "" {
html.ReturnErrorPage(ctx,
"could not obtain repo owner from custom domain",
http.StatusFailedDependency)
return
}
pathParts := pathElements
canonicalLink := false
if strings.HasPrefix(pathElements[0], "@") {
targetBranch = pathElements[0][1:]
pathParts = pathElements[1:]
canonicalLink = true
}
// Try to use the given repo on the given branch or the default branch
log.Debug().Msg("custom domain preparations, now trying with details from DNS")
if targetOpt, works := tryBranch(log, ctx, giteaClient, &upstream.Options{
TryIndexPages: true,
TargetOwner: targetOwner,
TargetRepo: targetRepo,
TargetBranch: targetBranch,
TargetPath: path.Join(pathParts...),
}, canonicalLink); works {
canonicalDomain, valid := targetOpt.CheckCanonicalDomain(giteaClient, trimmedHost, mainDomainSuffix, canonicalDomainCache)
if !valid {
html.ReturnErrorPage(ctx, "domain not specified in <code>.domains</code> file", http.StatusMisdirectedRequest)
return
} else if canonicalDomain != trimmedHost {
// only redirect if the target is also a codeberg page!
targetOwner, _, _ = dns.GetTargetFromDNS(strings.SplitN(canonicalDomain, "/", 2)[0], mainDomainSuffix, firstDefaultBranch)
if targetOwner != "" {
ctx.Redirect("http://"+canonicalDomain+"/"+targetOpt.TargetPath, http.StatusTemporaryRedirect)
return
}
html.ReturnErrorPage(ctx, "target is no codeberg page", http.StatusFailedDependency)
return
}
log.Debug().Msg("tryBranch, now trying upstream 7")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache, redirectsCache)
return
}
html.ReturnErrorPage(ctx, "could not find target for custom domain", http.StatusFailedDependency)
}

View file

@ -0,0 +1,71 @@
package handler
import (
"fmt"
"net/http"
"path"
"strings"
"github.com/rs/zerolog"
"pages-server/html"
"pages-server/server/cache"
"pages-server/server/context"
"pages-server/server/gitea"
"pages-server/server/upstream"
)
func handleRaw(log zerolog.Logger, ctx *context.Context, giteaClient *gitea.Client,
mainDomainSuffix string,
trimmedHost string,
pathElements []string,
canonicalDomainCache, redirectsCache cache.ICache,
) {
// Serve raw content from RawDomain
log.Debug().Msg("raw domain")
if len(pathElements) < 2 {
html.ReturnErrorPage(
ctx,
"a url in the form of <code>http://{domain}/{owner}/{repo}[/@{branch}]/{path}</code> is required",
http.StatusBadRequest,
)
return
}
// raw.codeberg.org/example/myrepo/@main/index.html
if len(pathElements) > 2 && strings.HasPrefix(pathElements[2], "@") {
log.Debug().Msg("raw domain preparations, now trying with specified branch")
if targetOpt, works := tryBranch(log, ctx, giteaClient, &upstream.Options{
ServeRaw: true,
TargetOwner: pathElements[0],
TargetRepo: pathElements[1],
TargetBranch: pathElements[2][1:],
TargetPath: path.Join(pathElements[3:]...),
}, true); works {
log.Trace().Msg("tryUpstream: serve raw domain with specified branch")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache, redirectsCache)
return
}
log.Debug().Msg("missing branch info")
html.ReturnErrorPage(ctx, "missing branch info", http.StatusFailedDependency)
return
}
log.Debug().Msg("raw domain preparations, now trying with default branch")
if targetOpt, works := tryBranch(log, ctx, giteaClient, &upstream.Options{
TryIndexPages: false,
ServeRaw: true,
TargetOwner: pathElements[0],
TargetRepo: pathElements[1],
TargetPath: path.Join(pathElements[2:]...),
}, true); works {
log.Trace().Msg("tryUpstream: serve raw domain with default branch")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache, redirectsCache)
} else {
html.ReturnErrorPage(ctx,
fmt.Sprintf("raw domain could not find repo <code>%s/%s</code> or repo is empty", targetOpt.TargetOwner, targetOpt.TargetRepo),
http.StatusNotFound)
}
}

View file

@ -0,0 +1,156 @@
package handler
import (
"fmt"
"net/http"
"path"
"strings"
"github.com/rs/zerolog"
"golang.org/x/exp/slices"
"pages-server/html"
"pages-server/server/cache"
"pages-server/server/context"
"pages-server/server/gitea"
"pages-server/server/upstream"
)
func handleSubDomain(log zerolog.Logger, ctx *context.Context, giteaClient *gitea.Client,
mainDomainSuffix string,
defaultPagesBranches []string,
trimmedHost string,
pathElements []string,
canonicalDomainCache, redirectsCache cache.ICache,
) {
// Serve pages from subdomains of MainDomainSuffix
log.Debug().Msg("main domain suffix")
targetOwner := strings.TrimSuffix(trimmedHost, mainDomainSuffix)
targetRepo := pathElements[0]
if targetOwner == "www" {
// www.codeberg.page redirects to codeberg.page // TODO: rm hardcoded - use cname?
ctx.Redirect("http://"+mainDomainSuffix[1:]+ctx.Path(), http.StatusPermanentRedirect)
return
}
// Check if the first directory is a repo with the second directory as a branch
// example.codeberg.page/myrepo/@main/index.html
if len(pathElements) > 1 && strings.HasPrefix(pathElements[1], "@") {
if targetRepo == defaultPagesRepo {
// example.codeberg.org/pages/@... redirects to example.codeberg.org/@...
ctx.Redirect("/"+strings.Join(pathElements[1:], "/"), http.StatusTemporaryRedirect)
return
}
log.Debug().Msg("main domain preparations, now trying with specified repo & branch")
if targetOpt, works := tryBranch(log, ctx, giteaClient, &upstream.Options{
TryIndexPages: true,
TargetOwner: targetOwner,
TargetRepo: pathElements[0],
TargetBranch: pathElements[1][1:],
TargetPath: path.Join(pathElements[2:]...),
}, true); works {
log.Trace().Msg("tryUpstream: serve with specified repo and branch")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache, redirectsCache)
} else {
html.ReturnErrorPage(
ctx,
formatSetBranchNotFoundMessage(pathElements[1][1:], targetOwner, pathElements[0]),
http.StatusFailedDependency,
)
}
return
}
// Check if the first directory is a branch for the defaultPagesRepo
// example.codeberg.page/@main/index.html
if strings.HasPrefix(pathElements[0], "@") {
targetBranch := pathElements[0][1:]
// if the default pages branch can be determined exactly, it does not need to be set
if len(defaultPagesBranches) == 1 && slices.Contains(defaultPagesBranches, targetBranch) {
// example.codeberg.org/@pages/... redirects to example.codeberg.org/...
ctx.Redirect("/"+strings.Join(pathElements[1:], "/"), http.StatusTemporaryRedirect)
return
}
log.Debug().Msg("main domain preparations, now trying with specified branch")
if targetOpt, works := tryBranch(log, ctx, giteaClient, &upstream.Options{
TryIndexPages: true,
TargetOwner: targetOwner,
TargetRepo: defaultPagesRepo,
TargetBranch: targetBranch,
TargetPath: path.Join(pathElements[1:]...),
}, true); works {
log.Trace().Msg("tryUpstream: serve default pages repo with specified branch")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache, redirectsCache)
} else {
html.ReturnErrorPage(
ctx,
formatSetBranchNotFoundMessage(targetBranch, targetOwner, defaultPagesRepo),
http.StatusFailedDependency,
)
}
return
}
for _, defaultPagesBranch := range defaultPagesBranches {
// Check if the first directory is a repo with a default pages branch
// example.codeberg.page/myrepo/index.html
// example.codeberg.page/{PAGES_BRANCHE}/... is not allowed here.
log.Debug().Msg("main domain preparations, now trying with specified repo")
if pathElements[0] != defaultPagesBranch {
if targetOpt, works := tryBranch(log, ctx, giteaClient, &upstream.Options{
TryIndexPages: true,
TargetOwner: targetOwner,
TargetRepo: pathElements[0],
TargetBranch: defaultPagesBranch,
TargetPath: path.Join(pathElements[1:]...),
}, false); works {
log.Debug().Msg("tryBranch, now trying upstream 5")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache, redirectsCache)
return
}
}
// Try to use the defaultPagesRepo on an default pages branch
// example.codeberg.page/index.html
log.Debug().Msg("main domain preparations, now trying with default repo")
if targetOpt, works := tryBranch(log, ctx, giteaClient, &upstream.Options{
TryIndexPages: true,
TargetOwner: targetOwner,
TargetRepo: defaultPagesRepo,
TargetBranch: defaultPagesBranch,
TargetPath: path.Join(pathElements...),
}, false); works {
log.Debug().Msg("tryBranch, now trying upstream 6")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache, redirectsCache)
return
}
}
// Try to use the defaultPagesRepo on its default branch
// example.codeberg.page/index.html
log.Debug().Msg("main domain preparations, now trying with default repo/branch")
if targetOpt, works := tryBranch(log, ctx, giteaClient, &upstream.Options{
TryIndexPages: true,
TargetOwner: targetOwner,
TargetRepo: defaultPagesRepo,
TargetPath: path.Join(pathElements...),
}, false); works {
log.Debug().Msg("tryBranch, now trying upstream 6")
tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache, redirectsCache)
return
}
// Couldn't find a valid repo/branch
html.ReturnErrorPage(ctx,
fmt.Sprintf("could not find a valid repository or branch for repository: <code>%s</code>", targetRepo),
http.StatusNotFound)
}
func formatSetBranchNotFoundMessage(branch, owner, repo string) string {
return fmt.Sprintf("explicitly set branch <code>%q</code> does not exist at <code>%s/%s</code>", branch, owner, repo)
}

View file

@ -0,0 +1,59 @@
package handler
import (
"net/http"
"net/http/httptest"
"testing"
"time"
"pages-server/config"
"pages-server/server/cache"
"pages-server/server/gitea"
"github.com/rs/zerolog/log"
)
func TestHandlerPerformance(t *testing.T) {
cfg := config.ForgeConfig{
Root: "https://codeberg.org",
Token: "",
LFSEnabled: false,
FollowSymlinks: false,
}
giteaClient, _ := gitea.NewClient(cfg, cache.NewInMemoryCache())
serverCfg := config.ServerConfig{
MainDomain: "codeberg.page",
RawDomain: "raw.codeberg.page",
BlacklistedPaths: []string{
"/.well-known/acme-challenge/",
},
AllowedCorsDomains: []string{"raw.codeberg.org", "fonts.codeberg.org", "design.codeberg.org"},
PagesBranches: []string{"pages"},
}
testHandler := Handler(serverCfg, giteaClient, cache.NewInMemoryCache(), cache.NewInMemoryCache())
testCase := func(uri string, status int) {
t.Run(uri, func(t *testing.T) {
req := httptest.NewRequest("GET", uri, http.NoBody)
w := httptest.NewRecorder()
log.Printf("Start: %v\n", time.Now())
start := time.Now()
testHandler(w, req)
end := time.Now()
log.Printf("Done: %v\n", time.Now())
resp := w.Result()
if resp.StatusCode != status {
t.Errorf("request failed with status code %d", resp.StatusCode)
} else {
t.Logf("request took %d milliseconds", end.Sub(start).Milliseconds())
}
})
}
testCase("https://mondstern.codeberg.page/", 404) // TODO: expect 200
testCase("https://codeberg.page/", 404) // TODO: expect 200
testCase("https://example.momar.xyz/", 424)
}

15
server/handler/hsts.go Normal file
View file

@ -0,0 +1,15 @@
package handler
import (
"strings"
)
// getHSTSHeader returns a HSTS header with includeSubdomains & preload for MainDomainSuffix and RawDomain, or an empty
// string for custom domains.
func getHSTSHeader(host, mainDomainSuffix, rawDomain string) string {
if strings.HasSuffix(host, mainDomainSuffix) || strings.EqualFold(host, rawDomain) {
return "max-age=63072000; includeSubdomains; preload"
} else {
return ""
}
}

78
server/handler/try.go Normal file
View file

@ -0,0 +1,78 @@
package handler
import (
"fmt"
"net/http"
"strings"
"github.com/rs/zerolog"
"pages-server/html"
"pages-server/server/cache"
"pages-server/server/context"
"pages-server/server/gitea"
"pages-server/server/upstream"
)
// tryUpstream forwards the target request to the Gitea API, and shows an error page on failure.
func tryUpstream(ctx *context.Context, giteaClient *gitea.Client,
mainDomainSuffix, trimmedHost string,
options *upstream.Options,
canonicalDomainCache cache.ICache,
redirectsCache cache.ICache,
) {
// check if a canonical domain exists on a request on MainDomain
if strings.HasSuffix(trimmedHost, mainDomainSuffix) && !options.ServeRaw {
canonicalDomain, _ := options.CheckCanonicalDomain(giteaClient, "", mainDomainSuffix, canonicalDomainCache)
if !strings.HasSuffix(strings.SplitN(canonicalDomain, "/", 2)[0], mainDomainSuffix) {
canonicalPath := ctx.Req.RequestURI
if options.TargetRepo != defaultPagesRepo {
path := strings.SplitN(canonicalPath, "/", 3)
if len(path) >= 3 {
canonicalPath = "/" + path[2]
}
}
ctx.Redirect("http://"+canonicalDomain+canonicalPath, http.StatusTemporaryRedirect)
return
}
}
// Add host for debugging.
options.Host = trimmedHost
// Try to request the file from the Gitea API
if !options.Upstream(ctx, giteaClient, redirectsCache) {
html.ReturnErrorPage(ctx, fmt.Sprintf("Forge returned %d %s", ctx.StatusCode, http.StatusText(ctx.StatusCode)), ctx.StatusCode)
}
}
// tryBranch checks if a branch exists and populates the target variables. If canonicalLink is non-empty,
// it will also disallow search indexing and add a Link header to the canonical URL.
func tryBranch(log zerolog.Logger, ctx *context.Context, giteaClient *gitea.Client,
targetOptions *upstream.Options, canonicalLink bool,
) (*upstream.Options, bool) {
if targetOptions.TargetOwner == "" || targetOptions.TargetRepo == "" {
log.Debug().Msg("tryBranch: owner or repo is empty")
return nil, false
}
// Replace "~" to "/" so we can access branch that contains slash character
// Branch name cannot contain "~" so doing this is okay
targetOptions.TargetBranch = strings.ReplaceAll(targetOptions.TargetBranch, "~", "/")
// Check if the branch exists, otherwise treat it as a file path
branchExist, _ := targetOptions.GetBranchTimestamp(giteaClient)
if !branchExist {
log.Debug().Msg("tryBranch: branch doesn't exist")
return nil, false
}
if canonicalLink {
// Hide from search machines & add canonical link
ctx.RespWriter.Header().Set("X-Robots-Tag", "noarchive, noindex")
ctx.RespWriter.Header().Set("Link", targetOptions.ContentWebLink(giteaClient)+"; rel=\"canonical\"")
}
log.Debug().Msg("tryBranch: true")
return targetOptions, true
}

21
server/profiling.go Normal file
View file

@ -0,0 +1,21 @@
package server
import (
"net/http"
_ "net/http/pprof"
"github.com/rs/zerolog/log"
)
func StartProfilingServer(listeningAddress string) {
server := &http.Server{
Addr: listeningAddress,
Handler: http.DefaultServeMux,
}
log.Info().Msgf("Starting debug server on %s", listeningAddress)
go func() {
log.Fatal().Err(server.ListenAndServe()).Msg("Failed to start debug server")
}()
}

94
server/startup.go Normal file
View file

@ -0,0 +1,94 @@
package server
import (
"fmt"
"net"
"net/http"
"os"
"strings"
"github.com/rs/zerolog"
"github.com/rs/zerolog/log"
"github.com/urfave/cli/v2"
"pages-server/config"
"pages-server/server/cache"
"pages-server/server/gitea"
"pages-server/server/handler"
)
// Serve sets up and starts the web server.
func Serve(ctx *cli.Context) error {
// initialize logger with Trace, overridden later with actual level
log.Logger = zerolog.New(zerolog.ConsoleWriter{Out: os.Stderr}).With().Timestamp().Logger().Level(zerolog.TraceLevel)
cfg, err := config.ReadConfig(ctx)
if err != nil {
log.Error().Err(err).Msg("could not read config")
}
config.MergeConfig(ctx, cfg)
// Initialize the logger.
logLevel, err := zerolog.ParseLevel(cfg.LogLevel)
if err != nil {
return err
}
log.Logger = zerolog.New(zerolog.ConsoleWriter{Out: os.Stderr}).With().Timestamp().Logger().Level(logLevel)
listeningHTTPAddress := fmt.Sprintf("%s:%d", cfg.Server.Host, cfg.Server.Port)
if cfg.Server.RawDomain != "" {
cfg.Server.AllowedCorsDomains = append(cfg.Server.AllowedCorsDomains, cfg.Server.RawDomain)
}
// Make sure MainDomain has a leading dot
if !strings.HasPrefix(cfg.Server.MainDomain, ".") {
// TODO make this better
cfg.Server.MainDomain = "." + cfg.Server.MainDomain
}
if len(cfg.Server.PagesBranches) == 0 {
return fmt.Errorf("no default branches set (PAGES_BRANCHES)")
}
// canonicalDomainCache stores canonical domains
canonicalDomainCache := cache.NewInMemoryCache()
// redirectsCache stores redirects in _redirects files
redirectsCache := cache.NewInMemoryCache()
// clientResponseCache stores responses from the Gitea server
clientResponseCache := cache.NewInMemoryCache()
giteaClient, err := gitea.NewClient(cfg.Forge, clientResponseCache)
if err != nil {
return fmt.Errorf("could not create new gitea client: %v", err)
}
// Create listener
log.Info().Msgf("Create TCP listener on %s", listeningHTTPAddress)
listener, err := net.Listen("tcp", listeningHTTPAddress)
if err != nil {
return fmt.Errorf("couldn't create listener: %v", err)
}
// // Create listener for http and start listening
// go func() {
// log.Info().Msgf("Start HTTP server listening on %s", listeningHTTPAddress)
// err := http.ListenAndServe(listeningHTTPAddress, nil)
// if err != nil {
// log.Error().Err(err).Msg("Couldn't start HTTP server")
// }
// }()
if ctx.IsSet("enable-profiling") {
StartProfilingServer(ctx.String("profiling-address"))
}
// Create handler based on settings
httpHandler := handler.Handler(cfg.Server, giteaClient, canonicalDomainCache, redirectsCache)
// Start the listener
log.Info().Msgf("Start server using TCP listener on %s", listener.Addr())
return http.Serve(listener, httpHandler)
}

View file

@ -0,0 +1,70 @@
package upstream
import (
"errors"
"strings"
"time"
"github.com/rs/zerolog/log"
"pages-server/server/cache"
"pages-server/server/gitea"
)
// canonicalDomainCacheTimeout specifies the timeout for the canonical domain cache.
var canonicalDomainCacheTimeout = 15 * time.Minute
const canonicalDomainConfig = ".domains"
// CheckCanonicalDomain returns the canonical domain specified in the repo (using the `.domains` file).
func (o *Options) CheckCanonicalDomain(giteaClient *gitea.Client, actualDomain, mainDomainSuffix string, canonicalDomainCache cache.ICache) (domain string, valid bool) {
// Check if this request is cached.
if cachedValue, ok := canonicalDomainCache.Get(o.TargetOwner + "/" + o.TargetRepo + "/" + o.TargetBranch); ok {
domains := cachedValue.([]string)
for _, domain := range domains {
if domain == actualDomain {
valid = true
break
}
}
return domains[0], valid
}
body, err := giteaClient.GiteaRawContent(o.TargetOwner, o.TargetRepo, o.TargetBranch, canonicalDomainConfig)
if err != nil && !errors.Is(err, gitea.ErrorNotFound) {
log.Error().Err(err).Msgf("could not read %s of %s/%s", canonicalDomainConfig, o.TargetOwner, o.TargetRepo)
}
var domains []string
for _, domain := range strings.Split(string(body), "\n") {
domain = strings.ToLower(domain)
domain = strings.TrimSpace(domain)
domain = strings.TrimPrefix(domain, "http://")
domain = strings.TrimPrefix(domain, "http://")
if domain != "" && !strings.HasPrefix(domain, "#") && !strings.ContainsAny(domain, "\t /") && strings.ContainsRune(domain, '.') {
domains = append(domains, domain)
}
if domain == actualDomain {
valid = true
}
}
// Add [owner].[pages-domain] as valid domain.
domains = append(domains, o.TargetOwner+mainDomainSuffix)
if domains[len(domains)-1] == actualDomain {
valid = true
}
// If the target repository isn't called pages, add `/[repository]` to the
// previous valid domain.
if o.TargetRepo != "" && o.TargetRepo != "pages" {
domains[len(domains)-1] += "/" + o.TargetRepo
}
// Add result to cache.
_ = canonicalDomainCache.Set(o.TargetOwner+"/"+o.TargetRepo+"/"+o.TargetBranch, domains, canonicalDomainCacheTimeout)
// Return the first domain from the list and return if any of the domains
// matched the requested domain.
return domains[0], valid
}

28
server/upstream/header.go Normal file
View file

@ -0,0 +1,28 @@
package upstream
import (
"net/http"
"time"
"pages-server/server/context"
"pages-server/server/gitea"
)
// setHeader set values to response header
func (o *Options) setHeader(ctx *context.Context, header http.Header) {
if eTag := header.Get(gitea.ETagHeader); eTag != "" {
ctx.RespWriter.Header().Set(gitea.ETagHeader, eTag)
}
if cacheIndicator := header.Get(gitea.PagesCacheIndicatorHeader); cacheIndicator != "" {
ctx.RespWriter.Header().Set(gitea.PagesCacheIndicatorHeader, cacheIndicator)
}
if length := header.Get(gitea.ContentLengthHeader); length != "" {
ctx.RespWriter.Header().Set(gitea.ContentLengthHeader, length)
}
if mime := header.Get(gitea.ContentTypeHeader); mime == "" || o.ServeRaw {
ctx.RespWriter.Header().Set(gitea.ContentTypeHeader, rawMime)
} else {
ctx.RespWriter.Header().Set(gitea.ContentTypeHeader, mime)
}
ctx.RespWriter.Header().Set(headerLastModified, o.BranchTimestamp.In(time.UTC).Format(http.TimeFormat))
}

47
server/upstream/helper.go Normal file
View file

@ -0,0 +1,47 @@
package upstream
import (
"errors"
"fmt"
"github.com/rs/zerolog/log"
"pages-server/server/gitea"
)
// GetBranchTimestamp finds the default branch (if branch is "") and save branch and it's last modification time to Options
func (o *Options) GetBranchTimestamp(giteaClient *gitea.Client) (bool, error) {
log := log.With().Strs("BranchInfo", []string{o.TargetOwner, o.TargetRepo, o.TargetBranch}).Logger()
if o.TargetBranch == "" {
// Get default branch
defaultBranch, err := giteaClient.GiteaGetRepoDefaultBranch(o.TargetOwner, o.TargetRepo)
if err != nil {
log.Err(err).Msg("Couldn't fetch default branch from repository")
return false, err
}
log.Debug().Msgf("Successfully fetched default branch %q from Gitea", defaultBranch)
o.TargetBranch = defaultBranch
}
timestamp, err := giteaClient.GiteaGetRepoBranchTimestamp(o.TargetOwner, o.TargetRepo, o.TargetBranch)
if err != nil {
if !errors.Is(err, gitea.ErrorNotFound) {
log.Error().Err(err).Msg("Could not get latest commit timestamp from branch")
}
return false, err
}
if timestamp == nil || timestamp.Branch == "" {
return false, fmt.Errorf("empty response")
}
log.Debug().Msgf("Successfully fetched latest commit timestamp from branch: %#v", timestamp)
o.BranchTimestamp = timestamp.Timestamp
o.TargetBranch = timestamp.Branch
return true, nil
}
func (o *Options) ContentWebLink(giteaClient *gitea.Client) string {
return giteaClient.ContentWebLink(o.TargetOwner, o.TargetRepo, o.TargetBranch, o.TargetPath) + "; rel=\"canonical\""
}

View file

@ -0,0 +1,107 @@
package upstream
import (
"strconv"
"strings"
"time"
"pages-server/server/cache"
"pages-server/server/context"
"pages-server/server/gitea"
"github.com/rs/zerolog/log"
)
type Redirect struct {
From string
To string
StatusCode int
}
// rewriteURL returns the destination URL and true if r matches reqURL.
func (r *Redirect) rewriteURL(reqURL string) (dstURL string, ok bool) {
// check if from url matches request url
if strings.TrimSuffix(r.From, "/") == strings.TrimSuffix(reqURL, "/") {
return r.To, true
}
// handle wildcard redirects
if strings.HasSuffix(r.From, "/*") {
trimmedFromURL := strings.TrimSuffix(r.From, "/*")
if reqURL == trimmedFromURL || strings.HasPrefix(reqURL, trimmedFromURL+"/") {
if strings.Contains(r.To, ":splat") {
matched := strings.TrimPrefix(reqURL, trimmedFromURL)
matched = strings.TrimPrefix(matched, "/")
return strings.ReplaceAll(r.To, ":splat", matched), true
}
return r.To, true
}
}
return "", false
}
// redirectsCacheTimeout specifies the timeout for the redirects cache.
var redirectsCacheTimeout = 10 * time.Minute
const redirectsConfig = "_redirects"
// getRedirects returns redirects specified in the _redirects file.
func (o *Options) getRedirects(giteaClient *gitea.Client, redirectsCache cache.ICache) []Redirect {
var redirects []Redirect
cacheKey := o.TargetOwner + "/" + o.TargetRepo + "/" + o.TargetBranch
// Check for cached redirects
if cachedValue, ok := redirectsCache.Get(cacheKey); ok {
redirects = cachedValue.([]Redirect)
} else {
// Get _redirects file and parse
body, err := giteaClient.GiteaRawContent(o.TargetOwner, o.TargetRepo, o.TargetBranch, redirectsConfig)
if err == nil {
for _, line := range strings.Split(string(body), "\n") {
redirectArr := strings.Fields(line)
// Ignore comments and invalid lines
if strings.HasPrefix(line, "#") || len(redirectArr) < 2 {
continue
}
// Get redirect status code
statusCode := 301
if len(redirectArr) == 3 {
statusCode, err = strconv.Atoi(redirectArr[2])
if err != nil {
log.Info().Err(err).Msgf("could not read %s of %s/%s", redirectsConfig, o.TargetOwner, o.TargetRepo)
}
}
redirects = append(redirects, Redirect{
From: redirectArr[0],
To: redirectArr[1],
StatusCode: statusCode,
})
}
}
_ = redirectsCache.Set(cacheKey, redirects, redirectsCacheTimeout)
}
return redirects
}
func (o *Options) matchRedirects(ctx *context.Context, giteaClient *gitea.Client, redirects []Redirect, redirectsCache cache.ICache) (final bool) {
reqURL := ctx.Req.RequestURI
// remove repo and branch from request url
reqURL = strings.TrimPrefix(reqURL, "/"+o.TargetRepo)
reqURL = strings.TrimPrefix(reqURL, "/@"+o.TargetBranch)
for _, redirect := range redirects {
if dstURL, ok := redirect.rewriteURL(reqURL); ok {
// do rewrite if status code is 200
if redirect.StatusCode == 200 {
o.TargetPath = dstURL
o.Upstream(ctx, giteaClient, redirectsCache)
} else {
ctx.Redirect(dstURL, redirect.StatusCode)
}
return true
}
}
return false
}

View file

@ -0,0 +1,36 @@
package upstream
import (
"testing"
)
func TestRedirect_rewriteURL(t *testing.T) {
for _, tc := range []struct {
redirect Redirect
reqURL string
wantDstURL string
wantOk bool
}{
{Redirect{"/", "/dst", 200}, "/", "/dst", true},
{Redirect{"/", "/dst", 200}, "/foo", "", false},
{Redirect{"/src", "/dst", 200}, "/src", "/dst", true},
{Redirect{"/src", "/dst", 200}, "/foo", "", false},
{Redirect{"/src", "/dst", 200}, "/src/foo", "", false},
{Redirect{"/*", "/dst", 200}, "/", "/dst", true},
{Redirect{"/*", "/dst", 200}, "/src", "/dst", true},
{Redirect{"/src/*", "/dst/:splat", 200}, "/src", "/dst/", true},
{Redirect{"/src/*", "/dst/:splat", 200}, "/src/", "/dst/", true},
{Redirect{"/src/*", "/dst/:splat", 200}, "/src/foo", "/dst/foo", true},
{Redirect{"/src/*", "/dst/:splat", 200}, "/src/foo/bar", "/dst/foo/bar", true},
{Redirect{"/src/*", "/dst/:splatsuffix", 200}, "/src/foo", "/dst/foosuffix", true},
{Redirect{"/src/*", "/dst:splat", 200}, "/src/foo", "/dstfoo", true},
{Redirect{"/src/*", "/dst", 200}, "/srcfoo", "", false},
// This is the example from FEATURES.md:
{Redirect{"/articles/*", "/posts/:splat", 302}, "/articles/2022/10/12/post-1/", "/posts/2022/10/12/post-1/", true},
} {
if dstURL, ok := tc.redirect.rewriteURL(tc.reqURL); dstURL != tc.wantDstURL || ok != tc.wantOk {
t.Errorf("%#v.rewriteURL(%q) = %q, %v; want %q, %v",
tc.redirect, tc.reqURL, dstURL, ok, tc.wantDstURL, tc.wantOk)
}
}
}

225
server/upstream/upstream.go Normal file
View file

@ -0,0 +1,225 @@
package upstream
import (
"errors"
"fmt"
"io"
"net/http"
"strings"
"time"
"github.com/rs/zerolog"
"github.com/rs/zerolog/log"
"pages-server/html"
"pages-server/server/cache"
"pages-server/server/context"
"pages-server/server/gitea"
)
const (
headerLastModified = "Last-Modified"
headerIfModifiedSince = "If-Modified-Since"
rawMime = "text/plain; charset=utf-8"
)
var upstreamIndexPages = []string{
"index.html",
}
var upstreamNotFoundPages = []string{
"404.html",
}
type Options struct {
TargetOwner string
TargetRepo string
TargetBranch string
TargetPath string
Host string
TryIndexPages bool
BranchTimestamp time.Time
appendTrailingSlash bool
redirectIfExists string
ServeRaw bool
}
func (o *Options) Upstream(ctx *context.Context, giteaClient *gitea.Client, redirectsCache cache.ICache) bool {
log := log.With().Strs("upstream", []string{o.TargetOwner, o.TargetRepo, o.TargetBranch, o.TargetPath}).Logger()
log.Debug().Msg("Start")
if o.TargetOwner == "" || o.TargetRepo == "" {
html.ReturnErrorPage(ctx, "forge client: either repo owner or name info is missing", http.StatusBadRequest)
return false
}
if o.BranchTimestamp.IsZero() {
branchExist, err := o.GetBranchTimestamp(giteaClient)
if err != nil && errors.Is(err, gitea.ErrorNotFound) || !branchExist {
html.ReturnErrorPage(ctx,
fmt.Sprintf("branch <code>%q</code> for <code>%s/%s</code> not found", o.TargetBranch, o.TargetOwner, o.TargetRepo),
http.StatusNotFound)
return false
}
if err != nil {
html.ReturnErrorPage(ctx,
fmt.Sprintf("could not get timestamp of branch <code>%q</code>: '%v'", o.TargetBranch, err),
http.StatusFailedDependency)
return false
}
}
if ctx.Response() != nil {
ifModifiedSince, err := time.Parse(time.RFC1123, ctx.Response().Header.Get(headerIfModifiedSince))
if err == nil && ifModifiedSince.After(o.BranchTimestamp) {
ctx.RespWriter.WriteHeader(http.StatusNotModified)
log.Trace().Msg("check response against last modified: valid")
return true
}
log.Trace().Msg("check response against last modified: outdated")
}
reader, header, statusCode, err := giteaClient.ServeRawContent(o.TargetOwner, o.TargetRepo, o.TargetBranch, o.TargetPath)
if err != nil {
handleGiteaError(ctx, log, err, statusCode)
return false
}
defer func() {
if reader != nil {
reader.Close()
}
}()
if errors.Is(err, gitea.ErrorNotFound) {
handleNotFound(ctx, log, giteaClient, redirectsCache, o)
return false
}
if err != nil || reader == nil || statusCode != http.StatusOK {
handleUnexpectedError(ctx, log, err, statusCode)
return false
}
handleRedirects(ctx, log, o, redirectsCache)
setHeaders(ctx, header)
writeResponse(ctx, reader)
return true
}
func handleGiteaError(ctx *context.Context, log zerolog.Logger, err error, statusCode int) {
var msg string
if err != nil {
msg = "forge client: returned unexpected error"
log.Error().Err(err).Msg(msg)
msg = fmt.Sprintf("%s: '%v'", msg, err)
}
if statusCode != http.StatusOK {
msg = fmt.Sprintf("forge client: couldn't fetch contents: <code>%d - %s</code>", statusCode, http.StatusText(statusCode))
log.Error().Msg(msg)
}
html.ReturnErrorPage(ctx, msg, http.StatusInternalServerError)
}
func handleNotFound(ctx *context.Context, log zerolog.Logger, giteaClient *gitea.Client, redirectsCache cache.ICache, o *Options) {
log.Debug().Msg("Handling not found error")
redirects := o.getRedirects(giteaClient, redirectsCache)
if o.matchRedirects(ctx, giteaClient, redirects, redirectsCache) {
log.Trace().Msg("redirect")
return
}
if o.TryIndexPages {
log.Trace().Msg("try index page")
optionsForIndexPages := *o
optionsForIndexPages.TryIndexPages = false
optionsForIndexPages.appendTrailingSlash = true
for _, indexPage := range upstreamIndexPages {
optionsForIndexPages.TargetPath = strings.TrimSuffix(o.TargetPath, "/") + "/" + indexPage
if optionsForIndexPages.Upstream(ctx, giteaClient, redirectsCache) {
return
}
}
log.Trace().Msg("try html file with path name")
optionsForIndexPages.appendTrailingSlash = false
optionsForIndexPages.redirectIfExists = strings.TrimSuffix(ctx.Path(), "/") + ".html"
optionsForIndexPages.TargetPath = o.TargetPath + ".html"
if optionsForIndexPages.Upstream(ctx, giteaClient, redirectsCache) {
return
}
}
log.Trace().Msg("not found")
ctx.StatusCode = http.StatusNotFound
if o.TryIndexPages {
log.Trace().Msg("try not found page")
optionsForNotFoundPages := *o
optionsForNotFoundPages.TryIndexPages = false
optionsForNotFoundPages.appendTrailingSlash = false
for _, notFoundPage := range upstreamNotFoundPages {
optionsForNotFoundPages.TargetPath = "/" + notFoundPage
if optionsForNotFoundPages.Upstream(ctx, giteaClient, redirectsCache) {
return
}
}
log.Trace().Msg("not found page missing")
}
}
func handleUnexpectedError(ctx *context.Context, log zerolog.Logger, err error, statusCode int) {
var msg string
if err != nil {
msg = "forge client: returned unexpected error"
log.Error().Err(err).Msg(msg)
msg = fmt.Sprintf("%s: '%v'", msg, err)
}
if statusCode != http.StatusOK {
msg = fmt.Sprintf("forge client: couldn't fetch contents: <code>%d - %s</code>", statusCode, http.StatusText(statusCode))
log.Error().Msg(msg)
}
html.ReturnErrorPage(ctx, msg, http.StatusInternalServerError)
}
func handleRedirects(ctx *context.Context, log zerolog.Logger, o *Options, redirectsCache cache.ICache) {
if o.appendTrailingSlash && !strings.HasSuffix(ctx.Path(), "/") {
log.Trace().Msg("append trailing slash and redirect")
ctx.Redirect(ctx.Path()+"/", http.StatusTemporaryRedirect)
return
}
if strings.HasSuffix(ctx.Path(), "/index.html") && !o.ServeRaw {
log.Trace().Msg("remove index.html from path and redirect")
ctx.Redirect(strings.TrimSuffix(ctx.Path(), "index.html"), http.StatusTemporaryRedirect)
return
}
if o.redirectIfExists != "" {
ctx.Redirect(o.redirectIfExists, http.StatusTemporaryRedirect)
return
}
}
func setHeaders(ctx *context.Context, header http.Header) {
ctx.RespWriter.Header().Set("ETag", header.Get("ETag"))
ctx.RespWriter.Header().Set("Content-Type", header.Get("Content-Type"))
}
func writeResponse(ctx *context.Context, reader io.Reader) {
ctx.RespWriter.WriteHeader(ctx.StatusCode)
if reader != nil {
_, err := io.Copy(ctx.RespWriter, reader)
if err != nil {
log.Error().Err(err).Msgf("Couldn't write body for %q", ctx.Path())
html.ReturnErrorPage(ctx, "", http.StatusInternalServerError)
}
}
}

View file

@ -0,0 +1,220 @@
package upstream
import (
"errors"
"fmt"
"io"
"net/http"
"strings"
"time"
"github.com/rs/zerolog/log"
"pages-server/html"
"pages-server/server/cache"
"pages-server/server/context"
"pages-server/server/gitea"
)
const (
headerLastModified = "Last-Modified"
headerIfModifiedSince = "If-Modified-Since"
rawMime = "text/plain; charset=utf-8"
)
// upstreamIndexPages lists pages that may be considered as index pages for directories.
var upstreamIndexPages = []string{
"index.html",
}
// upstreamNotFoundPages lists pages that may be considered as custom 404 Not Found pages.
var upstreamNotFoundPages = []string{
"404.html",
}
// Options provides various options for the upstream request.
type Options struct {
TargetOwner string
TargetRepo string
TargetBranch string
TargetPath string
// Used for debugging purposes.
Host string
TryIndexPages bool
BranchTimestamp time.Time
// internal
appendTrailingSlash bool
redirectIfExists string
ServeRaw bool
}
// Upstream requests a file from the Gitea API at GiteaRoot and writes it to the request context.
func (o *Options) Upstream(ctx *context.Context, giteaClient *gitea.Client, redirectsCache cache.ICache) bool {
log := log.With().Strs("upstream", []string{o.TargetOwner, o.TargetRepo, o.TargetBranch, o.TargetPath}).Logger()
log.Debug().Msg("Start")
if o.TargetOwner == "" || o.TargetRepo == "" {
html.ReturnErrorPage(ctx, "forge client: either repo owner or name info is missing", http.StatusBadRequest)
return true
}
// Check if the branch exists and when it was modified
if o.BranchTimestamp.IsZero() {
branchExist, err := o.GetBranchTimestamp(giteaClient)
// handle 404
if err != nil && errors.Is(err, gitea.ErrorNotFound) || !branchExist {
html.ReturnErrorPage(ctx,
fmt.Sprintf("branch <code>%q</code> for <code>%s/%s</code> not found", o.TargetBranch, o.TargetOwner, o.TargetRepo),
http.StatusNotFound)
return true
}
// handle unexpected errors
if err != nil {
html.ReturnErrorPage(ctx,
fmt.Sprintf("could not get timestamp of branch <code>%q</code>: '%v'", o.TargetBranch, err),
http.StatusFailedDependency)
return true
}
}
// Check if the browser has a cached version
if ctx.Response() != nil {
if ifModifiedSince, err := time.Parse(time.RFC1123, ctx.Response().Header.Get(headerIfModifiedSince)); err == nil {
if ifModifiedSince.After(o.BranchTimestamp) {
ctx.RespWriter.WriteHeader(http.StatusNotModified)
log.Trace().Msg("check response against last modified: valid")
return true
}
}
log.Trace().Msg("check response against last modified: outdated")
}
log.Debug().Msg("Preparing")
reader, header, statusCode, err := giteaClient.ServeRawContent(o.TargetOwner, o.TargetRepo, o.TargetBranch, o.TargetPath)
if reader != nil {
defer reader.Close()
}
log.Debug().Msg("Aquisting")
// Handle not found error
if err != nil && errors.Is(err, gitea.ErrorNotFound) {
log.Debug().Msg("Handling not found error")
// Get and match redirects
redirects := o.getRedirects(giteaClient, redirectsCache)
if o.matchRedirects(ctx, giteaClient, redirects, redirectsCache) {
log.Trace().Msg("redirect")
return true
}
if o.TryIndexPages {
log.Trace().Msg("try index page")
// copy the o struct & try if an index page exists
optionsForIndexPages := *o
optionsForIndexPages.TryIndexPages = false
optionsForIndexPages.appendTrailingSlash = true
for _, indexPage := range upstreamIndexPages {
optionsForIndexPages.TargetPath = strings.TrimSuffix(o.TargetPath, "/") + "/" + indexPage
if optionsForIndexPages.Upstream(ctx, giteaClient, redirectsCache) {
return true
}
}
log.Trace().Msg("try html file with path name")
// compatibility fix for GitHub Pages (/example → /example.html)
optionsForIndexPages.appendTrailingSlash = false
optionsForIndexPages.redirectIfExists = strings.TrimSuffix(ctx.Path(), "/") + ".html"
optionsForIndexPages.TargetPath = o.TargetPath + ".html"
if optionsForIndexPages.Upstream(ctx, giteaClient, redirectsCache) {
return true
}
}
log.Trace().Msg("not found")
ctx.StatusCode = http.StatusNotFound
if o.TryIndexPages {
log.Trace().Msg("try not found page")
// copy the o struct & try if a not found page exists
optionsForNotFoundPages := *o
optionsForNotFoundPages.TryIndexPages = false
optionsForNotFoundPages.appendTrailingSlash = false
for _, notFoundPage := range upstreamNotFoundPages {
optionsForNotFoundPages.TargetPath = "/" + notFoundPage
if optionsForNotFoundPages.Upstream(ctx, giteaClient, redirectsCache) {
return true
}
}
log.Trace().Msg("not found page missing")
}
return false
}
// handle unexpected client errors
if err != nil || reader == nil || statusCode != http.StatusOK {
log.Debug().Msg("Handling error")
var msg string
if err != nil {
msg = "forge client: returned unexpected error"
log.Error().Err(err).Msg(msg)
msg = fmt.Sprintf("%s: '%v'", msg, err)
}
if reader == nil {
msg = "forge client: returned no reader"
log.Error().Msg(msg)
}
if statusCode != http.StatusOK {
msg = fmt.Sprintf("forge client: couldn't fetch contents: <code>%d - %s</code>", statusCode, http.StatusText(statusCode))
log.Error().Msg(msg)
}
html.ReturnErrorPage(ctx, msg, http.StatusInternalServerError)
return true
}
// Append trailing slash if missing (for index files), and redirect to fix filenames in general
// o.appendTrailingSlash is only true when looking for index pages
if o.appendTrailingSlash && !strings.HasSuffix(ctx.Path(), "/") {
log.Trace().Msg("append trailing slash and redirect")
ctx.Redirect(ctx.Path()+"/", http.StatusTemporaryRedirect)
return true
}
if strings.HasSuffix(ctx.Path(), "/index.html") && !o.ServeRaw {
log.Trace().Msg("remove index.html from path and redirect")
ctx.Redirect(strings.TrimSuffix(ctx.Path(), "index.html"), http.StatusTemporaryRedirect)
return true
}
if o.redirectIfExists != "" {
ctx.Redirect(o.redirectIfExists, http.StatusTemporaryRedirect)
return true
}
// Set ETag & MIME
o.setHeader(ctx, header)
log.Debug().Msg("Prepare response")
ctx.RespWriter.WriteHeader(ctx.StatusCode)
// Write the response body to the original request
if reader != nil {
_, err := io.Copy(ctx.RespWriter, reader)
if err != nil {
log.Error().Err(err).Msgf("Couldn't write body for %q", o.TargetPath)
html.ReturnErrorPage(ctx, "", http.StatusInternalServerError)
return true
}
}
log.Debug().Msg("Sending response")
return true
}

27
server/utils/utils.go Normal file
View file

@ -0,0 +1,27 @@
package utils
import (
"net/url"
"path"
"strings"
)
func TrimHostPort(host string) string {
i := strings.IndexByte(host, ':')
if i >= 0 {
return host[:i]
}
return host
}
func CleanPath(uriPath string) string {
unescapedPath, _ := url.PathUnescape(uriPath)
cleanedPath := path.Join("/", unescapedPath)
// If the path refers to a directory, add a trailing slash.
if !strings.HasSuffix(cleanedPath, "/") && (strings.HasSuffix(unescapedPath, "/") || strings.HasSuffix(unescapedPath, "/.") || strings.HasSuffix(unescapedPath, "/..")) {
cleanedPath += "/"
}
return cleanedPath
}

View file

@ -0,0 +1,69 @@
package utils
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestTrimHostPort(t *testing.T) {
assert.EqualValues(t, "aa", TrimHostPort("aa"))
assert.EqualValues(t, "", TrimHostPort(":"))
assert.EqualValues(t, "example.com", TrimHostPort("example.com:80"))
}
// TestCleanPath is mostly copied from fasthttp, to keep the behaviour we had before migrating away from it.
// Source (MIT licensed): https://github.com/valyala/fasthttp/blob/v1.48.0/uri_test.go#L154
// Copyright (c) 2015-present Aliaksandr Valialkin, VertaMedia, Kirill Danshin, Erik Dubbelboer, FastHTTP Authors
func TestCleanPath(t *testing.T) {
// double slash
testURIPathNormalize(t, "/aa//bb", "/aa/bb")
// triple slash
testURIPathNormalize(t, "/x///y/", "/x/y/")
// multi slashes
testURIPathNormalize(t, "/abc//de///fg////", "/abc/de/fg/")
// encoded slashes
testURIPathNormalize(t, "/xxxx%2fyyy%2f%2F%2F", "/xxxx/yyy/")
// dotdot
testURIPathNormalize(t, "/aaa/..", "/")
// dotdot with trailing slash
testURIPathNormalize(t, "/xxx/yyy/../", "/xxx/")
// multi dotdots
testURIPathNormalize(t, "/aaa/bbb/ccc/../../ddd", "/aaa/ddd")
// dotdots separated by other data
testURIPathNormalize(t, "/a/b/../c/d/../e/..", "/a/c/")
// too many dotdots
testURIPathNormalize(t, "/aaa/../../../../xxx", "/xxx")
testURIPathNormalize(t, "/../../../../../..", "/")
testURIPathNormalize(t, "/../../../../../../", "/")
// encoded dotdots
testURIPathNormalize(t, "/aaa%2Fbbb%2F%2E.%2Fxxx", "/aaa/xxx")
// double slash with dotdots
testURIPathNormalize(t, "/aaa////..//b", "/b")
// fake dotdot
testURIPathNormalize(t, "/aaa/..bbb/ccc/..", "/aaa/..bbb/")
// single dot
testURIPathNormalize(t, "/a/./b/././c/./d.html", "/a/b/c/d.html")
testURIPathNormalize(t, "./foo/", "/foo/")
testURIPathNormalize(t, "./../.././../../aaa/bbb/../../../././../", "/")
testURIPathNormalize(t, "./a/./.././../b/./foo.html", "/b/foo.html")
}
func testURIPathNormalize(t *testing.T, requestURI, expectedPath string) {
cleanedPath := CleanPath(requestURI)
if cleanedPath != expectedPath {
t.Fatalf("Unexpected path %q. Expected %q. requestURI=%q", cleanedPath, expectedPath, requestURI)
}
}

View file

@ -0,0 +1,3 @@
package version
var Version string = "dev"