Added comprehensive sanitization and refactored to use typed API response models.

This commit is contained in:
2026-01-07 11:30:59 -05:00
parent 35086cb751
commit dc0df47257
6 changed files with 186 additions and 46 deletions

View File

@@ -11,7 +11,7 @@ from mautrix.types import TextMessageEventContent, MessageType
from .owncast_client import OwncastClient
from .database import StreamRepository, SubscriptionRepository
from .utils import domainify
from .utils import domainify, sanitize_for_markdown
class CommandHandler:
@@ -203,13 +203,15 @@ class CommandHandler:
# Determine stream name (use domain as fallback)
stream_name = stream_state.name if stream_state.name else domain
safe_stream_name = sanitize_for_markdown(stream_name)
# Start building this stream's entry with stream name as main bullet
body_text += f"- **{stream_name}** \n"
body_text += f"- **{safe_stream_name}** \n"
# Add title if stream is online (as a sub-bullet)
if stream_state.online and stream_state.title:
body_text += f" - Title: {stream_state.title} \n"
safe_title = sanitize_for_markdown(stream_state.title)
body_text += f" - Title: {safe_title} \n"
# Determine status and duration (as a sub-bullet)
if stream_state.online:
@@ -271,13 +273,15 @@ class CommandHandler:
for domain, stream_state in live_streams:
# Determine stream name (use domain as fallback)
stream_name = stream_state.name if stream_state.name else domain
safe_stream_name = sanitize_for_markdown(stream_name)
# Start building this stream's entry with stream name as main bullet
body_text += f"- **{stream_name}** \n"
body_text += f"- **{safe_stream_name}** \n"
# Add title (should be present for live streams)
if stream_state.title:
body_text += f" - Title: {stream_state.title} \n"
safe_title = sanitize_for_markdown(stream_state.title)
body_text += f" - Title: {safe_title} \n"
# Add status with duration
if stream_state.last_connect_time:

View File

@@ -24,6 +24,24 @@ class StreamState:
"""Returns True if the stream is currently online."""
return self.last_connect_time is not None
@classmethod
def from_api_response(cls, response: dict, domain: str) -> "StreamState":
"""
Creates a StreamState from an API response.
:param response: API response as a dictionary (camelCase keys)
:param domain: The stream domain
:return: StreamState instance
"""
from .utils import truncate, MAX_STREAM_TITLE_LENGTH
return cls(
domain=domain,
title=truncate(response.get("streamTitle", ""), MAX_STREAM_TITLE_LENGTH),
last_connect_time=response.get("lastConnectTime"),
last_disconnect_time=response.get("lastDisconnectTime"),
)
@classmethod
def from_db_row(cls, row: dict) -> "StreamState":
"""
@@ -62,4 +80,13 @@ class StreamConfig:
:param response: API response as a dictionary
:return: StreamConfig instance
"""
return cls(name=response.get("name", ""), tags=response.get("tags", []))
from .utils import truncate, MAX_INSTANCE_TITLE_LENGTH, MAX_TAG_LENGTH
# Truncate instance name to max length
name = truncate(response.get("name", ""), MAX_INSTANCE_TITLE_LENGTH)
# Truncate each tag to max length
raw_tags = response.get("tags", [])
tags = [truncate(tag, MAX_TAG_LENGTH) for tag in raw_tags]
return cls(name=name, tags=tags)

View File

@@ -11,7 +11,7 @@ from typing import List
from mautrix.types import TextMessageEventContent, MessageType
from .database import SubscriptionRepository
from .utils import SECONDS_BETWEEN_NOTIFICATIONS
from .utils import SECONDS_BETWEEN_NOTIFICATIONS, sanitize_for_markdown
class NotificationService:
@@ -130,24 +130,33 @@ class NotificationService:
"""
# Use name if available, fallback to domain
stream_name = name if name else domain
safe_stream_name = sanitize_for_markdown(stream_name)
# Choose message based on notification type
if title_change:
body_text = "📝 " + stream_name + " has changed its stream title!"
body_text = "📝 " + safe_stream_name + " has changed its stream title!"
else:
body_text = "🎥 " + stream_name + " is now live!"
body_text = "🎥 " + safe_stream_name + " is now live!"
# Add title if present
if title != "":
body_text += "\nStream Title: " + title
safe_title = sanitize_for_markdown(title)
body_text += "\nStream Title: " + safe_title
# Add stream URL
body_text += "\n\nTo tune in, visit: https://" + domain + "/"
# Add tags if present
if tags and len(tags) > 0:
body_text += "\n\n"
body_text += " ".join("#" + tag for tag in tags)
safe_tags = []
for tag in tags:
safe_tag = sanitize_for_markdown(tag)
if safe_tag and not safe_tag.startswith('.'):
safe_tags.append(safe_tag)
if safe_tags:
body_text += "\n\n"
body_text += " ".join("#" + tag for tag in safe_tags)
return body_text

View File

@@ -8,7 +8,7 @@ import aiohttp
import json
from typing import Optional
from .models import StreamConfig
from .models import StreamConfig, StreamState
from .utils import OWNCAST_STATUS_PATH, OWNCAST_CONFIG_PATH, USER_AGENT
@@ -35,13 +35,13 @@ class OwncastClient:
headers=headers, cookie_jar=cookie_jar, timeout=timeout, connector=connector
)
async def get_stream_state(self, domain: str) -> Optional[dict]:
async def get_stream_state(self, domain: str) -> Optional[StreamState]:
"""
Get the current stream state for a given domain.
HTTPS on port 443 is assumed, no other protocols or ports are supported.
:param domain: The domain (not URL) where the stream is hosted.
:return: A dictionary containing stream state if available, None if an error occurred.
:return: A StreamState with stream state if available, None if an error occurred.
"""
self.log.debug(f"[{domain}] Fetching current stream state...")
status_url = "https://" + domain + OWNCAST_STATUS_PATH
@@ -87,7 +87,7 @@ class OwncastClient:
)
return None
return new_state
return StreamState.from_api_response(new_state, domain)
async def get_stream_config(self, domain: str) -> Optional[StreamConfig]:
"""

View File

@@ -98,10 +98,10 @@ class StreamMonitor:
stream_config = None
# Fetch the latest stream state from the server
new_state_dict = await self.owncast_client.get_stream_state(domain)
new_state = await self.owncast_client.get_stream_state(domain)
# If the fetch failed, increment failure counter and skip the update
if new_state_dict is None:
if new_state is None:
await self.stream_repo.increment_failure_counter(domain)
self.log.warning(
f"[{domain}] Connection failure (counter={failure_counter + 1})"
@@ -130,7 +130,7 @@ class StreamMonitor:
# Does the latest stream state have a last connect time and the old state not have one?
if (
new_state_dict["lastConnectTime"] is not None
new_state.last_connect_time is not None
and old_state.last_connect_time is None
):
# Yes! This stream is now live.
@@ -153,12 +153,12 @@ class StreamMonitor:
# Yes. Has this stream been offline for a short amount of time?
if seconds_since_last_offline < TEMPORARY_OFFLINE_NOTIFICATION_COOLDOWN:
# Yes. Did the stream title change?
if old_state.title != new_state_dict["streamTitle"]:
if old_state.title != new_state.title:
# Yes. The stream was only down for a short time, send a special notification indicating the stream changed its name.
await self.notification_service.notify_stream_live(
domain,
stream_name,
new_state_dict["streamTitle"],
new_state.title,
stream_tags,
title_change=True,
)
@@ -172,7 +172,7 @@ class StreamMonitor:
await self.notification_service.notify_stream_live(
domain,
stream_name,
new_state_dict["streamTitle"],
new_state.title,
stream_tags,
title_change=False,
)
@@ -183,11 +183,11 @@ class StreamMonitor:
)
if (
new_state_dict["lastConnectTime"] is not None
new_state.last_connect_time is not None
and old_state.last_connect_time is not None
):
# Did the stream title change mid-session?
if old_state.title != new_state_dict["streamTitle"]:
if old_state.title != new_state.title:
self.log.info(f"[{domain}] Stream title was changed!")
update_database = True
stream_config = await self.owncast_client.get_stream_config(domain)
@@ -208,7 +208,7 @@ class StreamMonitor:
await self.notification_service.notify_stream_live(
domain,
stream_name,
new_state_dict["streamTitle"],
new_state.title,
stream_tags,
title_change=False,
)
@@ -217,14 +217,14 @@ class StreamMonitor:
await self.notification_service.notify_stream_live(
domain,
stream_name,
new_state_dict["streamTitle"],
new_state.title,
stream_tags,
title_change=True,
)
# Does the latest stream state no longer have a last connect time but the old state does?
elif (
new_state_dict["lastConnectTime"] is None
new_state.last_connect_time is None
and old_state.last_connect_time is not None
):
# Yep. This stream is now offline. Log it.
@@ -246,13 +246,13 @@ class StreamMonitor:
self.log.debug(f"[{domain}] Updating stream state in database...")
# Create updated state object
# Create updated state object (title already truncated in new_state)
updated_state = StreamState(
domain=domain,
name=stream_name,
title=new_state_dict["streamTitle"],
last_connect_time=new_state_dict["lastConnectTime"],
last_disconnect_time=new_state_dict["lastDisconnectTime"],
title=new_state.title,
last_connect_time=new_state.last_connect_time,
last_disconnect_time=new_state.last_disconnect_time,
)
await self.stream_repo.update(updated_state)

View File

@@ -4,6 +4,7 @@
#
# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
import re
from urllib.parse import urlparse
# Path to the GetStatus API call on Owncast instances
@@ -32,6 +33,12 @@ TEMPORARY_OFFLINE_NOTIFICATION_COOLDOWN = 7 * 60 # 7 minutes in seconds
CLEANUP_WARNING_THRESHOLD = 83 * 24 * 60 # 119,520 cycles = 83 days
CLEANUP_DELETE_THRESHOLD = 90 * 24 * 60 # 129,600 cycles = 90 days
# Maximum field lengths based on Owncast's configuration
# Source: https://github.com/owncast/owncast/blob/master/web/utils/config-constants.tsx
MAX_INSTANCE_TITLE_LENGTH = 255 # Server Name (line 81)
MAX_STREAM_TITLE_LENGTH = 100 # Stream Title (line 91)
MAX_TAG_LENGTH = 24 # Per tag (line 208)
def should_query_stream(failure_counter: int) -> bool:
"""
@@ -60,23 +67,116 @@ def should_query_stream(failure_counter: int) -> bool:
def domainify(url: str) -> str:
"""
Take a given URL and convert it to just the domain.
Extract and sanitize a domain from user input.
:param url: URL or domain string
:return: Domain extracted from the URL
Handles URLs, bare domains, and email-style input (user@domain).
Only allows valid domain characters (alphanumeric, hyphens, periods).
:param url: URL, domain, or email-style string
:return: Sanitized domain
"""
# Take whatever input the user provided and try to turn it into just the domain.
# Examples:
# "stream.logal.dev" -> "stream.logal.dev"
# "https://stream.logal.dev" -> "stream.logal.dev"
# "stream.logal.dev/embed/chat/readwrite" -> "stream.logal.dev"
# "https://stream.logal.dev/abcdefghijklmno/123456789" -> "stream.logal.dev"
# "notify@stream.logal.dev" -> "stream.logal.dev"
# Handle email-style format first (e.g., "notify@stream.logal.dev")
if "@" in url:
url = url.split("@")[-1]
parsed_url = urlparse(url)
domain = (parsed_url.netloc or parsed_url.path).lower()
# Prepend // if no scheme so urlparse treats input as netloc
if not url.startswith(('http://', 'https://', '//')):
url = '//' + url
if "@" in domain:
return domain.split("@")[-1]
parsed = urlparse(url)
domain = (parsed.netloc or parsed.path).lower()
return domain
# Strip port and path
domain = domain.split(':')[0].split('/')[0]
# Allow only valid domain characters
return re.sub(r'[^a-z0-9.-]', '', domain).strip('.-')
def truncate(text: str, max_length: int) -> str:
"""
Truncate text to a maximum length.
:param text: The text to truncate
:param max_length: Maximum allowed length
:return: Truncated text, or original if within limit
"""
if not text or len(text) <= max_length:
return text
return text[:max_length]
def escape_markdown(text: str) -> str:
"""
Escape Markdown special characters to prevent injection attacks.
This function sanitizes untrusted external input (like stream names and titles)
before embedding them in Markdown-formatted messages. It prevents malicious
actors from injecting arbitrary Markdown/HTML content.
:param text: The text to escape
:return: The escaped text safe for Markdown rendering
"""
if not text:
return text
# Escape Markdown special characters by prefixing with backslash
# Covers: formatting (*_~`), links ([]()), headings (#), lists (-+),
# blockquotes (>), code blocks (```), and other special characters
special_chars = {
'\\': '\\\\', # Backslash must be first to avoid double-escaping
'*': '\\*',
'_': '\\_',
'[': '\\[',
']': '\\]',
'(': '\\(',
')': '\\)',
'~': '\\~',
'`': '\\`',
'#': '\\#',
'+': '\\+',
'-': '\\-',
'=': '\\=',
'|': '\\|',
'{': '\\{',
'}': '\\}',
'.': '\\.',
'!': '\\!',
'<': '\\<',
'>': '\\>',
'&': '\\&',
}
escaped_text = text
for char, replacement in special_chars.items():
escaped_text = escaped_text.replace(char, replacement)
return escaped_text
def sanitize_for_markdown(text: str) -> str:
"""
Sanitize text for safe Markdown rendering.
Removes newlines, normalizes whitespace, and escapes Markdown special characters.
Use this for any untrusted external content before embedding in Markdown messages.
Note: This function does not truncate. Size limits should be enforced at the
model layer (e.g., in from_api_response methods).
:param text: The text to sanitize
:return: Sanitized and escaped text safe for Markdown rendering
"""
if not text:
return text
# Remove newlines and carriage returns to prevent multi-line injection
sanitized = text.replace('\n', ' ').replace('\r', ' ')
# Collapse multiple spaces into single space
sanitized = ' '.join(sanitized.split())
# Escape Markdown special characters
sanitized = escape_markdown(sanitized)
return sanitized