fix: use lightweight bot/scraper html responses. Fixes #3253
This commit is contained in:
parent
78ec6302b9
commit
1e57cff3e0
@ -4,13 +4,18 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"net/url"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"github.com/owncast/owncast/config"
|
||||||
|
"github.com/owncast/owncast/core"
|
||||||
"github.com/owncast/owncast/core/data"
|
"github.com/owncast/owncast/core/data"
|
||||||
|
"github.com/owncast/owncast/models"
|
||||||
"github.com/owncast/owncast/router/middleware"
|
"github.com/owncast/owncast/router/middleware"
|
||||||
"github.com/owncast/owncast/static"
|
"github.com/owncast/owncast/static"
|
||||||
"github.com/owncast/owncast/utils"
|
"github.com/owncast/owncast/utils"
|
||||||
|
log "github.com/sirupsen/logrus"
|
||||||
)
|
)
|
||||||
|
|
||||||
// IndexHandler handles the default index route.
|
// IndexHandler handles the default index route.
|
||||||
@ -24,6 +29,13 @@ func IndexHandler(w http.ResponseWriter, r *http.Request) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// For search engine bots and social scrapers return a special
|
||||||
|
// server-rendered page.
|
||||||
|
if utils.IsUserAgentABot(r.UserAgent()) && isIndexRequest {
|
||||||
|
handleScraperMetadataPage(w, r)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
// Set a cache control max-age header
|
// Set a cache control max-age header
|
||||||
middleware.SetCachingHeaders(w, r)
|
middleware.SetCachingHeaders(w, r)
|
||||||
|
|
||||||
@ -93,3 +105,79 @@ func renderIndexHtml(w http.ResponseWriter, nonce string) {
|
|||||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// MetadataPage represents a server-rendered web page for bots and web scrapers.
|
||||||
|
type MetadataPage struct {
|
||||||
|
RequestedURL string
|
||||||
|
Image string
|
||||||
|
Thumbnail string
|
||||||
|
TagsString string
|
||||||
|
Summary string
|
||||||
|
Name string
|
||||||
|
Tags []string
|
||||||
|
SocialHandles []models.SocialHandle
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return a basic HTML page with server-rendered metadata from the config
|
||||||
|
// to give to Opengraph clients and web scrapers (bots, web crawlers, etc).
|
||||||
|
func handleScraperMetadataPage(w http.ResponseWriter, r *http.Request) {
|
||||||
|
tmpl, err := static.GetBotMetadataTemplate()
|
||||||
|
if err != nil {
|
||||||
|
log.Errorln(err)
|
||||||
|
w.WriteHeader(http.StatusInternalServerError)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
scheme := "http"
|
||||||
|
|
||||||
|
if siteURL := data.GetServerURL(); siteURL != "" {
|
||||||
|
if parsed, err := url.Parse(siteURL); err == nil && parsed.Scheme != "" {
|
||||||
|
scheme = parsed.Scheme
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fullURL, err := url.Parse(fmt.Sprintf("%s://%s%s", scheme, r.Host, r.URL.Path))
|
||||||
|
if err != nil {
|
||||||
|
log.Errorln(err)
|
||||||
|
}
|
||||||
|
imageURL, err := url.Parse(fmt.Sprintf("%s://%s%s", scheme, r.Host, "/logo/external"))
|
||||||
|
if err != nil {
|
||||||
|
log.Errorln(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
status := core.GetStatus()
|
||||||
|
|
||||||
|
// If the thumbnail does not exist or we're offline then just use the logo image
|
||||||
|
var thumbnailURL string
|
||||||
|
if status.Online && utils.DoesFileExists(filepath.Join(config.DataDirectory, "tmp", "thumbnail.jpg")) {
|
||||||
|
thumbnail, err := url.Parse(fmt.Sprintf("%s://%s%s", scheme, r.Host, "/thumbnail.jpg"))
|
||||||
|
if err != nil {
|
||||||
|
log.Errorln(err)
|
||||||
|
thumbnailURL = imageURL.String()
|
||||||
|
} else {
|
||||||
|
thumbnailURL = thumbnail.String()
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
thumbnailURL = imageURL.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
tagsString := strings.Join(data.GetServerMetadataTags(), ",")
|
||||||
|
metadata := MetadataPage{
|
||||||
|
Name: data.GetServerName(),
|
||||||
|
RequestedURL: fullURL.String(),
|
||||||
|
Image: imageURL.String(),
|
||||||
|
Summary: data.GetServerSummary(),
|
||||||
|
Thumbnail: thumbnailURL,
|
||||||
|
TagsString: tagsString,
|
||||||
|
Tags: data.GetServerMetadataTags(),
|
||||||
|
SocialHandles: data.GetSocialHandles(),
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set a cache header
|
||||||
|
middleware.SetCachingHeaders(w, r)
|
||||||
|
|
||||||
|
w.Header().Set("Content-Type", "text/html")
|
||||||
|
if err := tmpl.Execute(w, metadata); err != nil {
|
||||||
|
log.Errorln(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
1
go.mod
1
go.mod
@ -67,6 +67,7 @@ require (
|
|||||||
github.com/golang-jwt/jwt v3.2.2+incompatible // indirect
|
github.com/golang-jwt/jwt v3.2.2+incompatible // indirect
|
||||||
github.com/gorilla/css v1.0.0 // indirect
|
github.com/gorilla/css v1.0.0 // indirect
|
||||||
github.com/jmespath/go-jmespath v0.4.0 // indirect
|
github.com/jmespath/go-jmespath v0.4.0 // indirect
|
||||||
|
github.com/mssola/user_agent v0.6.0 // indirect
|
||||||
github.com/oschwald/maxminddb-golang v1.11.0 // indirect
|
github.com/oschwald/maxminddb-golang v1.11.0 // indirect
|
||||||
github.com/shoenig/go-m1cpu v0.1.6 // indirect
|
github.com/shoenig/go-m1cpu v0.1.6 // indirect
|
||||||
)
|
)
|
||||||
|
2
go.sum
2
go.sum
@ -85,6 +85,8 @@ github.com/microcosm-cc/bluemonday v1.0.25 h1:4NEwSfiJ+Wva0VxN5B8OwMicaJvD8r9tlJ
|
|||||||
github.com/microcosm-cc/bluemonday v1.0.25/go.mod h1:ZIOjCQp1OrzBBPIJmfX4qDYFuhU02nx4bn030ixfHLE=
|
github.com/microcosm-cc/bluemonday v1.0.25/go.mod h1:ZIOjCQp1OrzBBPIJmfX4qDYFuhU02nx4bn030ixfHLE=
|
||||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||||
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
|
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
|
||||||
|
github.com/mssola/user_agent v0.6.0 h1:uwPR4rtWlCHRFyyP9u2KOV0u8iQXmS7Z7feTrstQwk4=
|
||||||
|
github.com/mssola/user_agent v0.6.0/go.mod h1:TTPno8LPY3wAIEKRpAtkdMT0f8SE24pLRGPahjCH4uw=
|
||||||
github.com/mvdan/xurls v1.1.0 h1:OpuDelGQ1R1ueQ6sSryzi6P+1RtBpfQHM8fJwlE45ww=
|
github.com/mvdan/xurls v1.1.0 h1:OpuDelGQ1R1ueQ6sSryzi6P+1RtBpfQHM8fJwlE45ww=
|
||||||
github.com/mvdan/xurls v1.1.0/go.mod h1:tQlNn3BED8bE/15hnSL2HLkDeLWpNPAwtw7wkEq44oU=
|
github.com/mvdan/xurls v1.1.0/go.mod h1:tQlNn3BED8bE/15hnSL2HLkDeLWpNPAwtw7wkEq44oU=
|
||||||
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
|
github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
|
||||||
|
83
static/metadata.html.tmpl
vendored
Normal file
83
static/metadata.html.tmpl
vendored
Normal file
@ -0,0 +1,83 @@
|
|||||||
|
<!doctype html>
|
||||||
|
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="utf-8">
|
||||||
|
|
||||||
|
<title>{{.Name}}</title>
|
||||||
|
<meta name="description" content="{{.Summary}}">
|
||||||
|
|
||||||
|
<meta property="og:title" content="{{.Name}}">
|
||||||
|
<meta property="og:site_name" content="{{.Name}}">
|
||||||
|
<meta property="og:url" content="{{.RequestedURL}}">
|
||||||
|
<meta property="og:description" content="{{.Summary}}">
|
||||||
|
<meta property="og:type" content="video.other">
|
||||||
|
<meta property="video:tag" content="{{.TagsString}}">
|
||||||
|
|
||||||
|
<meta property="og:image" content="{{.Thumbnail}}">
|
||||||
|
<meta property="og:image:url" content="{{.Thumbnail}}">
|
||||||
|
<meta property="og:image:alt" content="{{.Image}}">
|
||||||
|
|
||||||
|
<meta property="og:video" content='{{.RequestedURL}}embed/video' />
|
||||||
|
<meta property="og:video:secure_url" content='{{.RequestedURL}}embed/video' />
|
||||||
|
<meta property="og:video:height" content="315" />
|
||||||
|
<meta property="og:video:width" content="560" />
|
||||||
|
<meta property="og:video:type" content="text/html" />
|
||||||
|
<meta property="og:video:actor" content="{{.Name}}" />
|
||||||
|
|
||||||
|
<meta property="twitter:title" content="{{.Name}}">
|
||||||
|
<meta property="twitter:url" content="{{.RequestedURL}}">
|
||||||
|
<meta property="twitter:description" content="{{.Summary}}">
|
||||||
|
<meta property="twitter:image" content="{{.Image}}">
|
||||||
|
<meta property="twitter:card" content="player" />
|
||||||
|
<meta property="twitter:player" content='{{.RequestedURL}}embed/video' />
|
||||||
|
<meta property="twitter:player:width" content="560" />
|
||||||
|
<meta property="twitter:player:height" content="315" />
|
||||||
|
|
||||||
|
<link rel="apple-touch-icon" sizes="57x57" href="/img/favicon/apple-icon-57x57.png">
|
||||||
|
<link rel="apple-touch-icon" sizes="60x60" href="/img/favicon/apple-icon-60x60.png">
|
||||||
|
<link rel="apple-touch-icon" sizes="72x72" href="/img/favicon/apple-icon-72x72.png">
|
||||||
|
<link rel="apple-touch-icon" sizes="76x76" href="/img/favicon/apple-icon-76x76.png">
|
||||||
|
<link rel="apple-touch-icon" sizes="114x114" href="/img/favicon/apple-icon-114x114.png">
|
||||||
|
<link rel="apple-touch-icon" sizes="120x120" href="/img/favicon/apple-icon-120x120.png">
|
||||||
|
<link rel="apple-touch-icon" sizes="144x144" href="/img/favicon/apple-icon-144x144.png">
|
||||||
|
<link rel="apple-touch-icon" sizes="152x152" href="/img/favicon/apple-icon-152x152.png">
|
||||||
|
<link rel="apple-touch-icon" sizes="180x180" href="/img/favicon/apple-icon-180x180.png">
|
||||||
|
<link rel="icon" type="image/png" sizes="192x192" href="/img/favicon/android-icon-192x192.png">
|
||||||
|
<link rel="icon" type="image/png" sizes="32x32" href="/img/favicon/favicon-32x32.png">
|
||||||
|
<link rel="icon" type="image/png" sizes="96x96" href="/img/favicon/favicon-96x96.png">
|
||||||
|
<link rel="icon" type="image/png" sizes="16x16" href="/img/favicon/favicon-16x16.png">
|
||||||
|
<link rel="manifest" href="/manifest.json">
|
||||||
|
|
||||||
|
<link rel="authorization_endpoint" href="/api/auth/provider/indieauth">
|
||||||
|
|
||||||
|
<meta name="msapplication-TileColor" content="#ffffff">
|
||||||
|
<meta name="msapplication-TileImage" content="/img/favicon/ms-icon-144x144.png">
|
||||||
|
<meta name="theme-color" content="#ffffff">
|
||||||
|
|
||||||
|
</head>
|
||||||
|
|
||||||
|
<body>
|
||||||
|
<h1>{{.Name}}</h1>
|
||||||
|
|
||||||
|
<center>
|
||||||
|
<img src="{{.Thumbnail}}" width=10% />
|
||||||
|
</center>
|
||||||
|
|
||||||
|
<h3>{{.Summary}}</h3>
|
||||||
|
|
||||||
|
{{range .Tags}}
|
||||||
|
<li>{{.}}</li>
|
||||||
|
{{end}}
|
||||||
|
|
||||||
|
<br/>
|
||||||
|
|
||||||
|
<h3>Links for {{.Name}}:</h3>
|
||||||
|
|
||||||
|
{{range .SocialHandles}}
|
||||||
|
<li><a href="{{.URL}}">{{.Platform}}</a></li>
|
||||||
|
{{end}}
|
||||||
|
|
||||||
|
|
||||||
|
</body>
|
||||||
|
</html>
|
11
static/static.go
vendored
11
static/static.go
vendored
@ -76,3 +76,14 @@ func getFileSystemStaticFileOrDefault(path string, defaultData []byte) []byte {
|
|||||||
|
|
||||||
return data
|
return data
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//go:embed metadata.html.tmpl
|
||||||
|
var botMetadataTemplate embed.FS
|
||||||
|
|
||||||
|
// GetBotMetadataTemplate will return the bot/scraper metadata template.
|
||||||
|
func GetBotMetadataTemplate() (*template.Template, error) {
|
||||||
|
name := "metadata.html.tmpl"
|
||||||
|
t, err := template.ParseFS(botMetadataTemplate, name)
|
||||||
|
tmpl := template.Must(t, err)
|
||||||
|
return tmpl, err
|
||||||
|
}
|
||||||
|
48
test/automated/browser/bot-share-search-scrapers.test.js
Normal file
48
test/automated/browser/bot-share-search-scrapers.test.js
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
const listenForErrors = require('./lib/errors.js').listenForErrors;
|
||||||
|
|
||||||
|
describe('Video embed page', () => {
|
||||||
|
|
||||||
|
async function getMetaTagContent(property) {
|
||||||
|
const selector = `meta[property="${property}"]`;
|
||||||
|
|
||||||
|
const tag = await page.evaluate((selector) => {
|
||||||
|
return document.head.querySelector(selector).getAttribute("content");
|
||||||
|
}, selector);
|
||||||
|
return tag;
|
||||||
|
}
|
||||||
|
|
||||||
|
beforeAll(async () => {
|
||||||
|
await page.setViewport({ width: 1080, height: 720 });
|
||||||
|
listenForErrors(browser, page);
|
||||||
|
page.setUserAgent(
|
||||||
|
"Mastodon"
|
||||||
|
);
|
||||||
|
await page.goto('http://localhost:5309');
|
||||||
|
});
|
||||||
|
|
||||||
|
afterAll(async () => {
|
||||||
|
await page.waitForTimeout(3000);
|
||||||
|
await page.screenshot({ path: 'screenshots/screenshot_bots_share_search_scrapers.png', fullPage: true });
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should have rendered the simple bot accessible html page', async () => {
|
||||||
|
await page.waitForSelector('h1');
|
||||||
|
await page.waitForSelector('h3');
|
||||||
|
|
||||||
|
const ogVideo = await getMetaTagContent('og:video');
|
||||||
|
expect(ogVideo).toBe('http://localhost:5309/embed/video');
|
||||||
|
|
||||||
|
const ogVideoType = await getMetaTagContent('og:video:type');
|
||||||
|
expect(ogVideoType).toBe('text/html');
|
||||||
|
|
||||||
|
// When stream is live the thumbnail is provided as the image.
|
||||||
|
const ogImage = await getMetaTagContent('og:image');
|
||||||
|
expect(ogImage).toBe('http://localhost:5309/thumbnail.jpg');
|
||||||
|
|
||||||
|
const twitterUrl = await getMetaTagContent('twitter:url');
|
||||||
|
expect(twitterUrl).toBe('http://localhost:5309/');
|
||||||
|
|
||||||
|
const twitterImage = await getMetaTagContent('twitter:image');
|
||||||
|
expect(twitterImage).toBe('http://localhost:5309/logo/external');
|
||||||
|
});
|
||||||
|
});
|
@ -16,6 +16,7 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/mssola/user_agent"
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
"github.com/yuin/goldmark"
|
"github.com/yuin/goldmark"
|
||||||
"github.com/yuin/goldmark/extension"
|
"github.com/yuin/goldmark/extension"
|
||||||
@ -120,6 +121,34 @@ func IsUserAgentAPlayer(userAgent string) bool {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// IsUserAgentABot returns if a web client user-agent is seen as a bot.
|
||||||
|
func IsUserAgentABot(userAgent string) bool {
|
||||||
|
if userAgent == "" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
botStrings := []string{
|
||||||
|
"mastodon",
|
||||||
|
"pleroma",
|
||||||
|
"applebot",
|
||||||
|
"whatsapp",
|
||||||
|
"matrix",
|
||||||
|
"synapse",
|
||||||
|
"element",
|
||||||
|
"rocket.chat",
|
||||||
|
"duckduckbot",
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, botString := range botStrings {
|
||||||
|
if strings.Contains(strings.ToLower(userAgent), botString) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ua := user_agent.New(userAgent)
|
||||||
|
return ua.Bot()
|
||||||
|
}
|
||||||
|
|
||||||
// RenderSimpleMarkdown will return HTML without sanitization or specific formatting rules.
|
// RenderSimpleMarkdown will return HTML without sanitization or specific formatting rules.
|
||||||
func RenderSimpleMarkdown(raw string) string {
|
func RenderSimpleMarkdown(raw string) string {
|
||||||
markdown := goldmark.New(
|
markdown := goldmark.New(
|
||||||
|
Loading…
x
Reference in New Issue
Block a user