feat(backend): add TfL disruptions API with Gemini AI shortening

- Add TfL Unified API integration for real-time transport disruptions - Implement batch AI shortening using Gemini 2.5 Flash-Lite - Add in-memory caching with 1-hour TTL - Support Tube, Overground, DLR, Elizabeth Line, and Tram - Sort disruptions by severity with regex-based line name cleanup Co-authored-by: Ona <no-reply@ona.com>
2025-10-24 23:03:35 +00:00
parent bf75062760
commit 81660c2d7e
3 changed files with 456 additions and 0 deletions
--- a/apps/backend/src/tfl/cache.ts
+++ b/apps/backend/src/tfl/cache.ts
@@ -0,0 +1,57 @@
+/**
+ * Simple in-memory cache for shortened disruption descriptions
+ */
+
+interface CacheEntry {
+	shortened: string
+	timestamp: number
+}
+
+const cache = new Map<string, CacheEntry>()
+
+// Cache for 1 hour
+const CACHE_DURATION = 60 * 60 * 1000
+
+/**
+ * Get cached shortened description
+ */
+export function getCachedShortened(originalReason: string): string | null {
+	const entry = cache.get(originalReason)
+	
+	if (!entry) {
+		return null
+	}
+	
+	// Check if expired
+	if (Date.now() - entry.timestamp > CACHE_DURATION) {
+		cache.delete(originalReason)
+		return null
+	}
+	
+	return entry.shortened
+}
+
+/**
+ * Cache a shortened description
+ */
+export function setCachedShortened(originalReason: string, shortened: string): void {
+	cache.set(originalReason, {
+		shortened,
+		timestamp: Date.now(),
+	})
+}
+
+/**
+ * Clear expired cache entries
+ */
+export function clearExpiredCache(): void {
+	const now = Date.now()
+	for (const [key, entry] of cache.entries()) {
+		if (now - entry.timestamp > CACHE_DURATION) {
+			cache.delete(key)
+		}
+	}
+}
+
+// Clear expired entries every 10 minutes
+setInterval(clearExpiredCache, 10 * 60 * 1000)
--- a/apps/backend/src/tfl/gemini.ts
+++ b/apps/backend/src/tfl/gemini.ts
@@ -0,0 +1,204 @@
+/**
+ * Gemini AI integration for shortening TfL disruption descriptions
+ */
+
+import { getCachedShortened, setCachedShortened } from "./cache"
+
+interface DisruptionToShorten {
+	lineName: string
+	status: string
+	reason: string
+}
+
+interface ShortenedResult {
+	lineName: string
+	shortened: string
+}
+
+/**
+ * Strip line name prefix from description
+ */
+function stripLineName(text: string, lineName: string): string {
+	// Escape special regex characters in line name
+	const escapedName = lineName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
+	
+	// Remove patterns like "Central Line:", "CENTRAL LINE:", "Mildmay Line:", etc.
+	const patterns = [
+		new RegExp(`^${escapedName}\\s*Line:\\s*`, "i"),
+		new RegExp(`^${escapedName}:\\s*`, "i"),
+		new RegExp(`^${escapedName.toUpperCase()}\\s*LINE:\\s*`),
+	]
+	
+	let result = text
+	for (const pattern of patterns) {
+		result = result.replace(pattern, "")
+	}
+	
+	return result.trim()
+}
+
+/**
+ * Shorten multiple disruption reasons in a single Gemini API call
+ */
+export async function shortenMultipleDisruptions(
+	disruptions: DisruptionToShorten[]
+): Promise<Map<string, string>> {
+	const apiKey = process.env.GEMINI_API_KEY
+	const results = new Map<string, string>()
+
+	if (!apiKey) {
+		console.warn("GEMINI_API_KEY not set, returning stripped versions")
+		for (const disruption of disruptions) {
+			results.set(disruption.lineName, stripLineName(disruption.reason, disruption.lineName))
+		}
+		return results
+	}
+
+	// Filter disruptions that need shortening
+	const toShorten: DisruptionToShorten[] = []
+	
+	for (const disruption of disruptions) {
+		const stripped = stripLineName(disruption.reason, disruption.lineName)
+		
+		// Check cache first
+		const cached = getCachedShortened(disruption.reason)
+		if (cached) {
+			results.set(disruption.lineName, cached)
+			continue
+		}
+		
+		// If already short after stripping, use that
+		if (stripped.length < 80) {
+			results.set(disruption.lineName, stripped)
+			setCachedShortened(disruption.reason, stripped)
+			continue
+		}
+		
+		// Needs shortening
+		toShorten.push({ ...disruption, reason: stripped })
+	}
+
+	// If nothing needs shortening, return early
+	if (toShorten.length === 0) {
+		return results
+	}
+
+	// Build batch prompt
+	const prompt = buildBatchShorteningPrompt(toShorten)
+
+	try {
+		const response = await fetch(
+			`https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-lite:generateContent?key=${apiKey}`,
+			{
+				method: "POST",
+				headers: {
+					"Content-Type": "application/json",
+				},
+				body: JSON.stringify({
+					contents: [
+						{
+							parts: [
+								{
+									text: prompt,
+								},
+							],
+						},
+					],
+					generationConfig: {
+						temperature: 0.3,
+						maxOutputTokens: 2000, // Higher limit to account for thinking tokens in Gemini 2.5 Flash
+						topP: 0.9,
+					},
+				}),
+			}
+		)
+
+		if (!response.ok) {
+			console.error(`Gemini API error: ${response.status}`)
+			// Fallback to stripped versions
+			for (const disruption of toShorten) {
+				results.set(disruption.lineName, disruption.reason)
+			}
+			return results
+		}
+
+		const data = (await response.json()) as any
+		const responseText = data.candidates?.[0]?.content?.parts?.[0]?.text?.trim() || ""
+
+		// Parse JSON response
+		try {
+			// Extract JSON from markdown code blocks if present
+			let jsonText = responseText
+			const jsonMatch = responseText.match(/```json\s*([\s\S]*?)\s*```/)
+			if (jsonMatch) {
+				jsonText = jsonMatch[1]
+			}
+
+			const shortened = JSON.parse(jsonText) as ShortenedResult[]
+
+			// Map results
+			for (const item of shortened) {
+				results.set(item.lineName, item.shortened)
+				// Cache the result
+				const original = toShorten.find(d => d.lineName === item.lineName)
+				if (original) {
+					setCachedShortened(original.reason, item.shortened)
+				}
+			}
+		} catch (parseError) {
+			console.error("Failed to parse Gemini JSON response:", parseError)
+			console.error("Response was:", responseText)
+			// Fallback to stripped versions
+			for (const disruption of toShorten) {
+				results.set(disruption.lineName, disruption.reason)
+			}
+		}
+	} catch (error) {
+		console.error("Failed to shorten disruptions:", error)
+		// Fallback to stripped versions
+		for (const disruption of toShorten) {
+			results.set(disruption.lineName, disruption.reason)
+		}
+	}
+
+	return results
+}
+
+/**
+ * Builds a batch prompt for Gemini to shorten multiple disruptions at once
+ */
+function buildBatchShorteningPrompt(disruptions: DisruptionToShorten[]): string {
+	const disruptionsList = disruptions.map((d, i) => 
+		`${i + 1}. Line: ${d.lineName}\n   Status: ${d.status}\n   Message: "${d.reason}"`
+	).join('\n\n')
+
+	return `Shorten these London transport disruption messages for a dashboard display. Return your response as a JSON array.
+
+Disruptions to shorten:
+${disruptionsList}
+
+Requirements:
+- Keep each shortened message under 80 characters
+- Be concise but keep essential information (reason, locations, alternatives, time info)
+- DO NOT include line names in the shortened text (they're displayed separately)
+- Use natural, clear language
+- NO emojis
+
+Return ONLY a JSON array in this exact format:
+[
+  {"lineName": "Piccadilly", "shortened": "Suspended Rayners Lane-Uxbridge until Fri due to Storm Benjamin. Use Metropolitan line."},
+  {"lineName": "Central", "shortened": "Minor delays due to train cancellations"},
+  ...
+]
+
+Good examples of shortened messages:
+- "Suspended Rayners Lane-Uxbridge until Fri due to Storm Benjamin. Use Metropolitan line."
+- "Minor delays due to train cancellations"
+- "Minor delays due to earlier incidents at Gospel Oak & Highbury"
+- "Severe delays - signal failure at King's Cross. Use buses/Elizabeth line."
+- "No service Earls Court-Wimbledon until Sun 27 Oct (engineering)"
+
+Generate JSON array:`
+}
+
+
--- a/apps/backend/src/tfl/index.ts
+++ b/apps/backend/src/tfl/index.ts
@@ -0,0 +1,195 @@
+import { Hono } from "hono"
+import { shortenMultipleDisruptions } from "./gemini"
+
+const tfl = new Hono()
+
+interface TflLineStatus {
+	$type: string
+	id: number
+	lineId?: string
+	statusSeverity: number
+	statusSeverityDescription: string
+	reason?: string
+	created: string
+	validityPeriods: {
+		$type: string
+		fromDate: string
+		toDate: string
+		isNow: boolean
+	}[]
+	disruption?: {
+		$type: string
+		category: string
+		categoryDescription: string
+		description: string
+		affectedRoutes: unknown[]
+		affectedStops: unknown[]
+		closureText: string
+	}
+}
+
+interface TflLine {
+	$type: string
+	id: string
+	name: string
+	modeName: string
+	disruptions: unknown[]
+	created: string
+	modified: string
+	lineStatuses: TflLineStatus[]
+	routeSections: unknown[]
+	serviceTypes: {
+		$type: string
+		name: string
+		uri: string
+	}[]
+	crowding: {
+		$type: string
+	}
+}
+
+interface DisruptionSummary {
+	lineId: string
+	lineName: string
+	mode: string
+	status: string
+	statusSeverity: number
+	reason?: string
+	validFrom?: string
+	validTo?: string
+}
+
+interface DisruptionsResponse {
+	lastUpdated: string
+	disruptions: DisruptionSummary[]
+	goodService: string[]
+	totalLines: number
+	disruptedLines: number
+}
+
+// Get current disruptions across all London transport modes
+tfl.get("/disruptions", async (c) => {
+	try {
+		// Fetch status for all major transport modes
+		const modes = ["tube", "overground", "dlr", "elizabeth-line", "tram"]
+		const url = `https://api.tfl.gov.uk/Line/Mode/${modes.join(",")}/Status`
+
+		const response = await fetch(url)
+
+		if (!response.ok) {
+			return new Response(
+				JSON.stringify({
+					error: "Failed to fetch TfL data",
+					status: response.status,
+				}),
+				{
+					status: response.status,
+					headers: { "Content-Type": "application/json" },
+				},
+			)
+		}
+
+		const data = (await response.json()) as TflLine[]
+
+		const disruptions: DisruptionSummary[] = []
+		const goodService: string[] = []
+
+		for (const line of data) {
+			// Get the most severe status for this line
+			const status = line.lineStatuses[0]
+
+			if (!status) continue
+
+			// statusSeverity: 10 = Good Service, anything less is a disruption
+			if (status.statusSeverity === 10) {
+				goodService.push(line.name)
+			} else {
+				const validPeriod = status.validityPeriods.find((p) => p.isNow)
+
+				disruptions.push({
+					lineId: line.id,
+					lineName: line.name,
+					mode: line.modeName,
+					status: status.statusSeverityDescription,
+					statusSeverity: status.statusSeverity,
+					reason: status.reason,
+					validFrom: validPeriod?.fromDate,
+					validTo: validPeriod?.toDate,
+				})
+			}
+		}
+
+		// Sort disruptions by severity (lower number = more severe)
+		disruptions.sort((a, b) => a.statusSeverity - b.statusSeverity)
+
+		// Shorten all disruption reasons in a single Gemini API call
+		const disruptionsToShorten = disruptions
+			.filter(d => d.reason)
+			.map(d => ({
+				lineName: d.lineName,
+				status: d.status,
+				reason: d.reason!,
+			}))
+
+		if (disruptionsToShorten.length > 0) {
+			const shortenedMap = await shortenMultipleDisruptions(disruptionsToShorten)
+			
+			// Apply shortened reasons back to disruptions
+			for (const disruption of disruptions) {
+				const shortened = shortenedMap.get(disruption.lineName)
+				if (shortened) {
+					disruption.reason = shortened
+				}
+			}
+		}
+
+		const summary: DisruptionsResponse = {
+			lastUpdated: new Date().toISOString(),
+			disruptions,
+			goodService: goodService.sort(),
+			totalLines: data.length,
+			disruptedLines: disruptions.length,
+		}
+
+		return c.json(summary)
+	} catch (error) {
+		return c.json(
+			{ error: "Internal server error", message: String(error) },
+			500
+		)
+	}
+})
+
+// Get status for specific line(s)
+tfl.get("/line/:lineIds", async (c) => {
+	try {
+		const lineIds = c.req.param("lineIds")
+
+		const url = `https://api.tfl.gov.uk/Line/${lineIds}/Status`
+
+		const response = await fetch(url)
+
+		if (!response.ok) {
+			return new Response(
+				JSON.stringify({
+					error: "Failed to fetch TfL line data",
+					status: response.status,
+				}),
+				{
+					status: response.status,
+					headers: { "Content-Type": "application/json" },
+				},
+			)
+		}
+
+		const data = await response.json()
+		return c.json(data)
+	} catch (error) {
+		return c.json(
+			{ error: "Internal server error", message: String(error) },
+			500
+		)
+	}
+})
+
+export default tfl