fix: use lightweight bot/scraper html responses. Fixes #3253

This commit is contained in:
Gabe Kangas
2023-08-16 18:19:09 -07:00
parent 78ec6302b9
commit 1e57cff3e0
7 changed files with 262 additions and 0 deletions

View File

@@ -4,13 +4,18 @@ import (
"encoding/json"
"fmt"
"net/http"
"net/url"
"path/filepath"
"strings"
"github.com/owncast/owncast/config"
"github.com/owncast/owncast/core"
"github.com/owncast/owncast/core/data"
"github.com/owncast/owncast/models"
"github.com/owncast/owncast/router/middleware"
"github.com/owncast/owncast/static"
"github.com/owncast/owncast/utils"
log "github.com/sirupsen/logrus"
)
// IndexHandler handles the default index route.
@@ -24,6 +29,13 @@ func IndexHandler(w http.ResponseWriter, r *http.Request) {
return
}
// For search engine bots and social scrapers return a special
// server-rendered page.
if utils.IsUserAgentABot(r.UserAgent()) && isIndexRequest {
handleScraperMetadataPage(w, r)
return
}
// Set a cache control max-age header
middleware.SetCachingHeaders(w, r)
@@ -93,3 +105,79 @@ func renderIndexHtml(w http.ResponseWriter, nonce string) {
http.Error(w, err.Error(), http.StatusInternalServerError)
}
}
// MetadataPage represents a server-rendered web page for bots and web scrapers.
type MetadataPage struct {
RequestedURL string
Image string
Thumbnail string
TagsString string
Summary string
Name string
Tags []string
SocialHandles []models.SocialHandle
}
// Return a basic HTML page with server-rendered metadata from the config
// to give to Opengraph clients and web scrapers (bots, web crawlers, etc).
func handleScraperMetadataPage(w http.ResponseWriter, r *http.Request) {
tmpl, err := static.GetBotMetadataTemplate()
if err != nil {
log.Errorln(err)
w.WriteHeader(http.StatusInternalServerError)
return
}
scheme := "http"
if siteURL := data.GetServerURL(); siteURL != "" {
if parsed, err := url.Parse(siteURL); err == nil && parsed.Scheme != "" {
scheme = parsed.Scheme
}
}
fullURL, err := url.Parse(fmt.Sprintf("%s://%s%s", scheme, r.Host, r.URL.Path))
if err != nil {
log.Errorln(err)
}
imageURL, err := url.Parse(fmt.Sprintf("%s://%s%s", scheme, r.Host, "/logo/external"))
if err != nil {
log.Errorln(err)
}
status := core.GetStatus()
// If the thumbnail does not exist or we're offline then just use the logo image
var thumbnailURL string
if status.Online && utils.DoesFileExists(filepath.Join(config.DataDirectory, "tmp", "thumbnail.jpg")) {
thumbnail, err := url.Parse(fmt.Sprintf("%s://%s%s", scheme, r.Host, "/thumbnail.jpg"))
if err != nil {
log.Errorln(err)
thumbnailURL = imageURL.String()
} else {
thumbnailURL = thumbnail.String()
}
} else {
thumbnailURL = imageURL.String()
}
tagsString := strings.Join(data.GetServerMetadataTags(), ",")
metadata := MetadataPage{
Name: data.GetServerName(),
RequestedURL: fullURL.String(),
Image: imageURL.String(),
Summary: data.GetServerSummary(),
Thumbnail: thumbnailURL,
TagsString: tagsString,
Tags: data.GetServerMetadataTags(),
SocialHandles: data.GetSocialHandles(),
}
// Set a cache header
middleware.SetCachingHeaders(w, r)
w.Header().Set("Content-Type", "text/html")
if err := tmpl.Execute(w, metadata); err != nil {
log.Errorln(err)
}
}