diff --git a/.github/actions/setup-integration-test-env/action.yml b/.github/actions/setup-integration-test-env/action.yml index 87c59d2f..9c95f8bb 100644 --- a/.github/actions/setup-integration-test-env/action.yml +++ b/.github/actions/setup-integration-test-env/action.yml @@ -80,7 +80,7 @@ runs: env: TRUSTED_SERVER__PUBLISHER__ORIGIN_URL: http://127.0.0.1:${{ inputs.origin-port }} TRUSTED_SERVER__PUBLISHER__PROXY_SECRET: integration-test-proxy-secret - TRUSTED_SERVER__EDGE_COOKIE__SECRET_KEY: integration-test-secret-key + TRUSTED_SERVER__EC__PASSPHRASE: integration-test-ec-secret-padded-32 TRUSTED_SERVER__PROXY__CERTIFICATE_CHECK: "false" run: cargo build --package trusted-server-adapter-fastly --release --target wasm32-wasip1 diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 186569da..2da273aa 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -51,6 +51,14 @@ jobs: - name: Run tests run: cargo test --workspace + - name: Verify Fastly WASM release build + env: + TRUSTED_SERVER__PUBLISHER__ORIGIN_URL: http://127.0.0.1:8080 + TRUSTED_SERVER__PUBLISHER__PROXY_SECRET: integration-test-proxy-secret + TRUSTED_SERVER__EC__PASSPHRASE: integration-test-ec-secret-padded-32 + TRUSTED_SERVER__PROXY__CERTIFICATE_CHECK: "false" + run: cargo build --package trusted-server-adapter-fastly --release --target wasm32-wasip1 + test-typescript: name: vitest runs-on: ubuntu-latest diff --git a/CLAUDE.md b/CLAUDE.md index ec76ee46..b5e2b6f0 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -366,7 +366,7 @@ both runtime behavior and build/tooling changes. | `crates/trusted-server-core/src/tsjs.rs` | Script tag generation with module IDs | | `crates/trusted-server-core/src/html_processor.rs` | Injects `` - - The bundle guards anchor clicks by restoring the originally rewritten first‑party link at click time. - - Served through the unified endpoint described below. + - Injected at the top of ``: `` + - The bundle guards anchor clicks by restoring the originally rewritten first‑party link at click time. + - Served through the unified endpoint described below. Helpers: @@ -41,18 +41,19 @@ Helpers: JS bundles (served by publisher module): - Dynamic endpoint: `/static/tsjs=tsjs-unified.min.js?v=` - - At build time, each integration is compiled as a separate IIFE (`tsjs-core.js`, `tsjs-prebid.js`, `tsjs-creative.js`, etc.) - - At runtime, the server concatenates `tsjs-core.js` + enabled integration modules based on `IntegrationRegistry` config - - The URL filename is fixed for backward compatibility; the `?v=` hash changes when modules change + - At build time, each integration is compiled as a separate IIFE (`tsjs-core.js`, `tsjs-prebid.js`, `tsjs-creative.js`, etc.) + - At runtime, the server concatenates `tsjs-core.js` + enabled integration modules based on `IntegrationRegistry` config + - The URL filename is fixed for backward compatibility; the `?v=` hash changes when modules change Behavior is covered by an extensive test suite in `crates/trusted-server-core/src/creative.rs`. ## Edge Cookie (EC) Identifier Propagation -- `edge_cookie.rs` generates an edge cookie identifier per user request and exposes helpers: - - `generate_ec_id` — creates a fresh HMAC-based ID using the client IP address and appends a short random suffix (format: `64hex.6alnum`). - - `get_ec_id` — extracts an existing ID from the `x-ts-ec` header or `ts-ec` cookie. - - `get_or_generate_ec_id` — reuses the existing ID when present, otherwise creates one. -- `publisher.rs::handle_publisher_request` stamps proxied origin responses with `x-ts-ec`, and (when absent) issues the `ts-ec` cookie so the browser keeps the identifier on subsequent requests. +- The `ec/` module owns the EC identity subsystem: + - `ec/generation.rs` — creates HMAC-based IDs using the client IP and publisher passphrase (format: `64hex.6alnum`). + - `ec/mod.rs` — `EcContext` struct with two-phase lifecycle (`read_from_request` + `generate_if_needed`), `get_ec_id` helper. + - `ec/consent.rs` — EC-specific consent gating wrapper. + - `ec/cookies.rs` — `Set-Cookie` header creation and expiration helpers. +- `publisher.rs::handle_publisher_request` issues the `ts-ec` cookie when absent so the browser keeps the identifier on subsequent requests. - `proxy.rs::handle_first_party_proxy` replays the identifier to third-party creative origins by appending `ts-ec=` to the reconstructed target URL, follows redirects (301/302/303/307/308) up to four hops, and keeps downstream fetches linked to the same user scope. - `proxy.rs::handle_first_party_click` adds `ts-ec=` to outbound click redirect URLs so analytics endpoints can associate clicks with impressions without third-party cookies. diff --git a/crates/trusted-server-core/src/auction/README.md b/crates/trusted-server-core/src/auction/README.md index 7685e828..e92c4dbb 100644 --- a/crates/trusted-server-core/src/auction/README.md +++ b/crates/trusted-server-core/src/auction/README.md @@ -257,18 +257,18 @@ The trusted-server handles several types of routes defined in `crates/trusted-se | Route | Method | Handler | Purpose | Line | |---------------------------|--------|--------------------------------|--------------------------------------------------|------| -| `/auction` | POST | `handle_auction()` | Main auction endpoint (Prebid.js/tsjs format) | 162 | -| `/first-party/proxy` | GET | `handle_first_party_proxy()` | Proxy creatives through first-party domain | 167 | -| `/first-party/click` | GET | `handle_first_party_click()` | Track clicks on ads | 170 | -| `/first-party/sign` | GET/POST | `handle_first_party_proxy_sign()` | Generate signed URLs for creatives | 173 | -| `/first-party/proxy-rebuild` | POST | `handle_first_party_proxy_rebuild()` | Rebuild creative HTML with new settings | 176 | -| `/static/tsjs=*` | GET | `handle_tsjs_dynamic()` | Serve tsjs library (Prebid.js alternative) | 145 | -| `/.well-known/trusted-server.json` | GET | `handle_trusted_server_discovery()` | Public key distribution for request signing | 149 | -| `/verify-signature` | POST | `handle_verify_signature()` | Verify signed requests | 154 | -| `/admin/keys/rotate` | POST | `handle_rotate_key()` | Rotate signing keys (admin only) | 158 | -| `/admin/keys/deactivate` | POST | `handle_deactivate_key()` | Deactivate signing keys (admin only) | 159 | -| `/integrations/*` | * | Integration Registry | Provider-specific endpoints (Prebid, etc.) | 179 | -| `*` (fallback) | * | `handle_publisher_request()` | Proxy to publisher origin | 195 | +| `/auction` | POST | `handle_auction()` | Main auction endpoint (Prebid.js/tsjs format) | 84 | +| `/first-party/proxy` | GET | `handle_first_party_proxy()` | Proxy creatives through first-party domain | 84 | +| `/first-party/click` | GET | `handle_first_party_click()` | Track clicks on ads | 85 | +| `/first-party/sign` | GET/POST | `handle_first_party_proxy_sign()` | Generate signed URLs for creatives | 86 | +| `/first-party/proxy-rebuild` | POST | `handle_first_party_proxy_rebuild()` | Rebuild creative HTML with new settings | 89 | +| `/static/tsjs=*` | GET | `handle_tsjs_dynamic()` | Serve tsjs library (Prebid.js alternative) | 66 | +| `/.well-known/ts.jwks.json` | GET | `handle_jwks_endpoint()` | Public key distribution for request signing | 71 | +| `/verify-signature` | POST | `handle_verify_signature()` | Verify signed requests | 74 | +| `/_ts/admin/keys/rotate` | POST | `handle_rotate_key()` | Rotate signing keys (admin only) | 77 | +| `/_ts/admin/keys/deactivate` | POST | `handle_deactivate_key()` | Deactivate signing keys (admin only) | 78 | +| `/integrations/*` | * | Integration Registry | Provider-specific endpoints (Prebid, etc.) | 92 | +| `*` (fallback) | * | `handle_publisher_request()` | Proxy to publisher origin | 108 | ### How Routing Works @@ -277,50 +277,22 @@ The Fastly Compute entrypoint uses pattern matching on `(Method, path)` tuples: ```rust let result = match (method, path.as_str()) { - (Method::GET, path) if path.starts_with("/static/tsjs=") => { - handle_tsjs_dynamic(&req, integration_registry) - } - (Method::GET, "/.well-known/trusted-server.json") => { - handle_trusted_server_discovery(settings, runtime_services, req) - } - (Method::POST, "/verify-signature") => handle_verify_signature(settings, req), - (Method::POST, "/admin/keys/rotate") => handle_rotate_key(settings, req), - (Method::POST, "/admin/keys/deactivate") => handle_deactivate_key(settings, req), + // Auction endpoint (Method::POST, "/auction") => { - match runtime_services_for_consent_route(settings, runtime_services) { - Ok(auction_services) => { - handle_auction(settings, orchestrator, &auction_services, req).await - } - Err(e) => Err(e), - } - } - (Method::GET, "/first-party/proxy") => { - handle_first_party_proxy(settings, runtime_services, req).await - } - (Method::GET, "/first-party/click") => { - handle_first_party_click(settings, runtime_services, req).await - } - (Method::GET, "/first-party/sign") | (Method::POST, "/first-party/sign") => { - handle_first_party_proxy_sign(settings, runtime_services, req).await - } - (Method::POST, "/first-party/proxy-rebuild") => { - handle_first_party_proxy_rebuild(settings, runtime_services, req).await - } - (m, path) if integration_registry.has_route(&m, path) => integration_registry - .handle_proxy(&m, path, settings, runtime_services, req) - .await - .unwrap_or_else(|| { - Err(Report::new(TrustedServerError::BadRequest { - message: format!("Unknown integration route: {path}"), - })) - }), - _ => match runtime_services_for_consent_route(settings, runtime_services) { - Ok(publisher_services) => { - handle_publisher_request(settings, integration_registry, &publisher_services, req) - } - Err(e) => Err(e), + handle_auction(&settings, &orchestrator, &runtime_services, req).await }, -}; + + // First-party endpoints + (Method::GET, "/first-party/proxy") => handle_first_party_proxy(&settings, req).await, + + // Integration registry (dynamic routes) + (m, path) if integration_registry.has_route(&m, path) => { + integration_registry.handle_proxy(&m, path, &settings, req).await + }, + + // Fallback to publisher origin + _ => handle_publisher_request(&settings, &integration_registry, &runtime_services, req), +} ``` #### 2. Integration Registry (Dynamic Routes) @@ -346,7 +318,7 @@ The integration registry checks if a route matches any registered integration ro #### 3. Route Priority Routes are matched in this order: 1. **Exact top-level routes** (`/auction`, `/first-party/proxy`, etc.) -2. **Admin routes** (`/admin/*`) +2. **Admin routes** (`/_ts/admin/*`) 3. **Integration routes** (`/integrations/*`) 4. **Fallback to publisher origin** (all other paths) diff --git a/crates/trusted-server-core/src/auction/endpoints.rs b/crates/trusted-server-core/src/auction/endpoints.rs index 0430f08b..64832eef 100644 --- a/crates/trusted-server-core/src/auction/endpoints.rs +++ b/crates/trusted-server-core/src/auction/endpoints.rs @@ -2,13 +2,20 @@ use error_stack::{Report, ResultExt}; use fastly::{Request, Response}; +use serde_json::Value as JsonValue; use crate::auction::formats::AdRequest; -use crate::compat; -use crate::consent; -use crate::cookies::handle_request_cookies; -use crate::edge_cookie::get_or_generate_ec_id_from_http_request; +use crate::consent::gate_eids_by_consent; +use crate::constants::COOKIE_TS_EIDS; +use crate::ec::eids::{resolve_partner_ids, to_eids}; +use crate::ec::kv::KvIdentityGraph; +use crate::ec::kv_types::MAX_UID_LENGTH; +use crate::ec::log_id; +use crate::ec::prebid_eids::parse_prebid_eids_cookie; +use crate::ec::registry::PartnerRegistry; +use crate::ec::EcContext; use crate::error::TrustedServerError; +use crate::openrtb::{Eid, Uid}; use crate::platform::RuntimeServices; use crate::settings::Settings; @@ -16,6 +23,10 @@ use super::formats::{convert_to_openrtb_response, convert_tsjs_to_auction_reques use super::types::AuctionContext; use super::AuctionOrchestrator; +const MAX_CLIENT_EID_SOURCES: usize = 64; +const MAX_CLIENT_UIDS_PER_SOURCE: usize = 32; +const MAX_CLIENT_EID_SOURCE_BYTES: usize = 255; + /// Handle auction request from /auction endpoint. /// /// This is the main entry point for running header bidding auctions. @@ -32,6 +43,9 @@ use super::AuctionOrchestrator; pub async fn handle_auction( settings: &Settings, orchestrator: &AuctionOrchestrator, + kv: Option<&KvIdentityGraph>, + registry: Option<&PartnerRegistry>, + ec_context: &EcContext, services: &RuntimeServices, mut req: Request, ) -> Result> { @@ -47,53 +61,54 @@ pub async fn handle_auction( body.ad_units.len() ); - let http_req = compat::from_fastly_headers_ref(&req); + // Story 5 middleware contract: auction is a read-only EC route. + // It must not generate EC IDs; it only consumes pre-routed context. + // Only forward the EC ID to auction partners when consent allows it. + // A returning user may still have a ts-ec cookie but have since + // withdrawn consent — forwarding that revoked ID to bidders would + // defeat the consent gating. + let ec_id = if ec_context.ec_allowed() { + ec_context.ec_value() + } else { + // Intentionally omit persistent identity when EC is disallowed. + // This keeps the no-consent / GPC path conservative rather than + // introducing a secondary session-scoped identifier surface here. + None + }; + let consent_context = ec_context.consent().clone(); - // Generate EC ID early so the consent pipeline can use it for - // KV Store fallback/write operations. - let ec_id = get_or_generate_ec_id_from_http_request(settings, services, &http_req) - .change_context(TrustedServerError::Auction { - message: "Failed to generate EC ID".to_string(), - })?; + // Parse client-provided EIDs from the current request body. When the + // current request does not include them, fall back to the persisted + // `ts-eids` cookie so later requests can still forward the browser's + // full OpenRTB-style EID structure. + let client_eids = resolve_client_auction_eids( + body.eids.as_ref(), + extract_cookie_value(&req, COOKIE_TS_EIDS).as_deref(), + ); - // Extract consent from request cookies, headers, and geo. - let cookie_jar = handle_request_cookies(&http_req)?; - let geo = services - .geo() - .lookup(services.client_info.client_ip) - .unwrap_or_else(|e| { - log::warn!("geo lookup failed: {e}"); - None - }); - let consent_context = consent::build_consent_context(&consent::ConsentPipelineInput { - jar: cookie_jar.as_ref(), - req: &http_req, - config: &settings.consent, - geo: geo.as_ref(), - ec_id: Some(ec_id.as_str()), - kv_store: settings - .consent - .consent_store - .as_deref() - .map(|_| services.kv_store()), - }); + // Resolve partner EIDs from the KV identity graph when the user has + // a valid EC and both KV and partner stores are available. + let eids = resolve_auction_eids(kv, registry, ec_context); // Convert tsjs request format to auction request - let auction_request = convert_tsjs_to_auction_request( - &body, - settings, - services, - &req, - consent_context, - &ec_id, - geo, - )?; + let mut auction_request = + convert_tsjs_to_auction_request(&body, settings, &req, consent_context, ec_id)?; + + // Merge current-request client EIDs with KV-resolved EIDs, then apply + // consent gating before attaching them to the auction request. + // `gate_eids_by_consent` checks TCF Purpose 1 + 4. + let merged_eids = merge_auction_eids(client_eids, eids); + let had_eids = merged_eids.as_ref().is_some_and(|v| !v.is_empty()); + auction_request.user.eids = + gate_eids_by_consent(merged_eids, auction_request.user.consent.as_ref()); + if had_eids && auction_request.user.eids.is_none() { + log::warn!("Auction EIDs stripped by TCF consent gating"); + } // Create auction context let context = AuctionContext { settings, request: &req, - client_info: &services.client_info, timeout_ms: settings.auction.timeout_ms, provider_responses: None, services, @@ -101,7 +116,7 @@ pub async fn handle_auction( // Run the auction let result = orchestrator - .run_auction(&auction_request, &context, services) + .run_auction(&auction_request, &context) .await .change_context(TrustedServerError::Auction { message: "Auction orchestration failed".to_string(), @@ -115,5 +130,583 @@ pub async fn handle_auction( ); // Convert to OpenRTB response format with inline creative HTML - convert_to_openrtb_response(&result, settings, &auction_request) + convert_to_openrtb_response(&result, settings, &auction_request, ec_context.ec_allowed()) +} + +/// Resolves partner EIDs from the KV identity graph for bidstream decoration. +/// +/// Returns `None` when any prerequisite is missing (no KV store, no partner +/// store, no EC, consent denied). On KV or partner-resolution errors, logs a +/// warning and returns empty EIDs so the auction can proceed in degraded mode. +fn resolve_auction_eids( + kv: Option<&KvIdentityGraph>, + registry: Option<&PartnerRegistry>, + ec_context: &EcContext, +) -> Option> { + let kv = kv?; + let registry = registry?; + + if !ec_context.ec_allowed() { + return None; + } + + let ec_id = ec_context.ec_value()?; + + let entry = match kv.get(ec_id) { + Ok(Some((entry, _generation))) => entry, + Ok(None) => return Some(Vec::new()), + Err(err) => { + log::warn!( + "Auction KV read failed for EC ID '{}': {err:?}", + log_id(ec_id) + ); + return Some(Vec::new()); + } + }; + + let resolved = resolve_partner_ids(registry, &entry); + Some(to_eids(&resolved)) +} + +fn extract_cookie_value(req: &Request, name: &str) -> Option { + let cookie_header = req.get_header_str("cookie")?; + for pair in cookie_header.split(';') { + let pair = pair.trim(); + if let Some((key, value)) = pair.split_once('=') { + if key.trim() == name { + return Some(value.trim().to_owned()); + } + } + } + None +} + +fn resolve_client_auction_eids( + raw: Option<&JsonValue>, + cookie_value: Option<&str>, +) -> Option> { + parse_client_auction_eids(raw).or_else(|| parse_cookie_auction_eids(cookie_value)) +} + +fn parse_cookie_auction_eids(cookie_value: Option<&str>) -> Option> { + let cookie_value = cookie_value?; + match parse_prebid_eids_cookie(cookie_value) { + Ok(eids) if eids.is_empty() => None, + Ok(eids) => Some(eids), + Err(_) => { + log::trace!("Auction EIDs: failed to parse ts-eids cookie; dropping"); + None + } + } +} + +fn parse_client_auction_eids(raw: Option<&JsonValue>) -> Option> { + let Some(JsonValue::Array(entries)) = raw else { + return None; + }; + + let mut eids = Vec::new(); + + for entry in entries { + if eids.len() >= MAX_CLIENT_EID_SOURCES { + log::debug!( + "Auction EIDs: reached max client EID source count ({MAX_CLIENT_EID_SOURCES})" + ); + break; + } + let JsonValue::Object(entry) = entry else { + log::debug!("Auction EIDs: dropping malformed client EID entry"); + continue; + }; + + let Some(source) = entry + .get("source") + .and_then(JsonValue::as_str) + .filter(|source| !source.trim().is_empty()) + .filter(|source| source.len() <= MAX_CLIENT_EID_SOURCE_BYTES) + .map(str::to_owned) + else { + continue; + }; + + let Some(JsonValue::Array(raw_uids)) = entry.get("uids") else { + continue; + }; + + let uids: Vec<_> = raw_uids + .iter() + .filter_map(parse_client_auction_uid) + .take(MAX_CLIENT_UIDS_PER_SOURCE) + .collect(); + if uids.is_empty() { + continue; + } + + eids.push(Eid { source, uids }); + } + + if eids.is_empty() { + None + } else { + Some(eids) + } +} + +fn parse_client_auction_uid(raw: &JsonValue) -> Option { + let JsonValue::Object(uid) = raw else { + return None; + }; + + let id = uid + .get("id") + .and_then(JsonValue::as_str) + .filter(|id| !id.trim().is_empty()) + .filter(|id| id.len() <= MAX_UID_LENGTH)? + .to_owned(); + + let atype = uid + .get("atype") + .and_then(JsonValue::as_u64) + .and_then(|atype| u8::try_from(atype).ok()); + + let ext = match uid.get("ext") { + Some(JsonValue::Object(_)) => uid.get("ext").cloned(), + _ => None, + }; + + Some(Uid { id, atype, ext }) +} + +fn merge_auction_eids( + client_eids: Option>, + resolved_eids: Option>, +) -> Option> { + let mut merged = Vec::new(); + + for eid in resolved_eids + .into_iter() + .flatten() + .chain(client_eids.into_iter().flatten()) + { + if eid.source.is_empty() { + continue; + } + + let source_index = match merged + .iter() + .position(|existing: &Eid| existing.source == eid.source) + { + Some(index) => index, + None => { + merged.push(Eid { + source: eid.source.clone(), + uids: Vec::new(), + }); + merged.len() - 1 + } + }; + + for uid in eid.uids { + if uid.id.trim().is_empty() || uid.id.len() > MAX_UID_LENGTH { + continue; + } + + if let Some(existing_uid) = merged[source_index] + .uids + .iter_mut() + .find(|existing| existing.id == uid.id) + { + if existing_uid.atype.is_none() { + existing_uid.atype = uid.atype; + } + if existing_uid.ext.is_none() { + existing_uid.ext = uid.ext; + } + } else { + merged[source_index].uids.push(uid); + } + } + } + + merged.retain(|eid| !eid.uids.is_empty()); + + if merged.is_empty() { + None + } else { + Some(merged) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::consent::jurisdiction::Jurisdiction; + use crate::consent::types::ConsentContext; + use crate::openrtb::Uid; + use base64::engine::general_purpose::STANDARD as BASE64; + use base64::Engine as _; + use serde_json::json; + + fn make_ec_context(jurisdiction: Jurisdiction, ec_value: Option<&str>) -> EcContext { + EcContext::new_for_test( + ec_value.map(str::to_owned), + ConsentContext { + jurisdiction, + ..ConsentContext::default() + }, + ) + } + + #[test] + fn resolve_auction_eids_returns_none_without_kv() { + let registry = PartnerRegistry::empty(); + let ec_id = format!("{}.ABC123", "a".repeat(64)); + let ec_context = make_ec_context(Jurisdiction::NonRegulated, Some(&ec_id)); + + let result = resolve_auction_eids(None, Some(®istry), &ec_context); + assert!(result.is_none(), "should return None when KV is missing"); + } + + #[test] + fn resolve_auction_eids_returns_none_without_registry() { + let kv = KvIdentityGraph::new("test_store"); + let ec_id = format!("{}.ABC123", "a".repeat(64)); + let ec_context = make_ec_context(Jurisdiction::NonRegulated, Some(&ec_id)); + + let result = resolve_auction_eids(Some(&kv), None, &ec_context); + assert!( + result.is_none(), + "should return None when registry is missing" + ); + } + + #[test] + fn resolve_auction_eids_returns_none_when_consent_denied() { + let kv = KvIdentityGraph::new("test_store"); + let registry = PartnerRegistry::empty(); + let ec_id = format!("{}.ABC123", "a".repeat(64)); + let ec_context = make_ec_context(Jurisdiction::Unknown, Some(&ec_id)); + + let result = resolve_auction_eids(Some(&kv), Some(®istry), &ec_context); + assert!( + result.is_none(), + "should return None when consent is denied" + ); + } + + #[test] + fn resolve_auction_eids_returns_none_when_no_ec() { + let kv = KvIdentityGraph::new("test_store"); + let registry = PartnerRegistry::empty(); + let ec_context = make_ec_context(Jurisdiction::NonRegulated, None); + + let result = resolve_auction_eids(Some(&kv), Some(®istry), &ec_context); + assert!( + result.is_none(), + "should return None when no EC value is present" + ); + } + + #[test] + fn resolve_auction_eids_returns_empty_on_kv_miss() { + let kv = KvIdentityGraph::new("nonexistent_store"); + let registry = PartnerRegistry::empty(); + let ec_id = format!("{}.ABC123", "a".repeat(64)); + let ec_context = make_ec_context(Jurisdiction::NonRegulated, Some(&ec_id)); + + // KV store doesn't exist, so the get() call will error — should return + // empty Vec (degraded mode), not None. + let result = resolve_auction_eids(Some(&kv), Some(®istry), &ec_context); + let eids = result.expect("should return Some on KV error (degraded mode)"); + assert!( + eids.is_empty(), + "should return empty vec on KV error (degraded mode)" + ); + } + + #[test] + fn resolve_client_auction_eids_falls_back_to_ts_eids_cookie() { + let cookie_payload = json!([ + { + "source": "sharedid.org", + "uids": [ + { "id": "shared_cookie", "atype": 3 }, + { "id": "shared_cookie_2", "ext": { "provider": "example" } } + ] + } + ]); + let encoded = BASE64 + .encode(serde_json::to_vec(&cookie_payload).expect("should serialize cookie payload")); + + let resolved = resolve_client_auction_eids(None, Some(&encoded)) + .expect("should fall back to structured ts-eids cookie"); + + assert_eq!(resolved.len(), 1, "should preserve cookie source entry"); + assert_eq!(resolved[0].source, "sharedid.org"); + assert_eq!( + resolved[0].uids.len(), + 2, + "should preserve multiple cookie UIDs" + ); + assert_eq!(resolved[0].uids[0].id, "shared_cookie"); + assert_eq!( + resolved[0].uids[1].ext, + Some(json!({ "provider": "example" })), + "should preserve UID ext from cookie fallback" + ); + } + + #[test] + fn resolve_client_auction_eids_prefers_request_body_over_cookie() { + let raw = json!([ + { + "source": "id5-sync.com", + "uids": [{ "id": "body_uid", "atype": 1 }] + } + ]); + let cookie_payload = json!([ + { + "source": "sharedid.org", + "uids": [{ "id": "cookie_uid", "atype": 3 }] + } + ]); + let encoded = BASE64 + .encode(serde_json::to_vec(&cookie_payload).expect("should serialize cookie payload")); + + let resolved = resolve_client_auction_eids(Some(&raw), Some(&encoded)) + .expect("should prefer request body EIDs"); + + assert_eq!(resolved.len(), 1, "should use request body when present"); + assert_eq!(resolved[0].source, "id5-sync.com"); + assert_eq!(resolved[0].uids[0].id, "body_uid"); + } + + #[test] + fn parse_client_auction_eids_ignores_malformed_entries() { + let raw = json!([ + { + "source": "id5-sync.com", + "uids": [{ "id": "ID5_abc", "atype": 1 }] + }, + { + "source": "broken.example", + "uids": "not-an-array" + }, + { + "source": "sharedid.org", + "uids": [{ "id": "shared_123" }, { "id": "" }] + } + ]); + + let parsed = parse_client_auction_eids(Some(&raw)).expect("should parse valid EIDs"); + + assert_eq!(parsed.len(), 2, "should keep only valid EID entries"); + assert_eq!(parsed[0].source, "id5-sync.com"); + assert_eq!(parsed[0].uids.len(), 1, "should keep valid UID"); + assert_eq!(parsed[1].source, "sharedid.org"); + assert_eq!(parsed[1].uids.len(), 1, "should drop empty UID values"); + } + + #[test] + fn parse_client_auction_eids_caps_sources_and_uids() { + let entries: Vec<_> = (0..(MAX_CLIENT_EID_SOURCES + 5)) + .map(|source_index| { + let uids: Vec<_> = (0..(MAX_CLIENT_UIDS_PER_SOURCE + 5)) + .map(|uid_index| json!({ "id": format!("uid-{source_index}-{uid_index}") })) + .collect(); + json!({ + "source": format!("source-{source_index}.example.com"), + "uids": uids, + }) + }) + .collect(); + let raw = JsonValue::Array(entries); + + let parsed = parse_client_auction_eids(Some(&raw)).expect("should parse capped EIDs"); + + assert_eq!( + parsed.len(), + MAX_CLIENT_EID_SOURCES, + "should cap client EID sources" + ); + assert!( + parsed + .iter() + .all(|eid| eid.uids.len() == MAX_CLIENT_UIDS_PER_SOURCE), + "should cap UIDs per source" + ); + } + + #[test] + fn parse_client_auction_eids_drops_whitespace_and_oversized_uids() { + let raw = json!([ + { + "source": "id5-sync.com", + "uids": [ + { "id": " " }, + { "id": "x".repeat(MAX_UID_LENGTH + 1) }, + { "id": "valid" } + ] + } + ]); + + let parsed = parse_client_auction_eids(Some(&raw)).expect("should parse valid UID"); + + assert_eq!(parsed.len(), 1, "should retain source with valid UID"); + assert_eq!(parsed[0].uids.len(), 1, "should drop invalid UIDs"); + assert_eq!(parsed[0].uids[0].id, "valid", "should keep valid UID"); + } + + #[test] + fn parse_client_auction_eids_preserves_uid_ext_and_sanitizes_invalid_atype() { + let raw = json!([ + { + "source": "adserver.org", + "uids": [ + { + "id": "uid-with-ext", + "atype": 1, + "ext": { "provider": "liveintent.com", "rtiPartner": "TDID" } + }, + { + "id": "uid-bad-atype", + "atype": 999, + "ext": { "keep": true } + }, + { + "id": "uid-float-atype", + "atype": 1.5 + } + ] + } + ]); + + let parsed = parse_client_auction_eids(Some(&raw)).expect("should parse valid EIDs"); + + assert_eq!(parsed.len(), 1, "should keep valid source"); + assert_eq!(parsed[0].uids.len(), 3, "should keep valid UIDs"); + assert_eq!( + parsed[0].uids[0].atype, + Some(1), + "should preserve valid atype" + ); + assert_eq!( + parsed[0].uids[0].ext, + Some(json!({ "provider": "liveintent.com", "rtiPartner": "TDID" })), + "should preserve uid ext" + ); + assert_eq!( + parsed[0].uids[1].atype, None, + "should drop out-of-range atype without dropping uid" + ); + assert_eq!( + parsed[0].uids[1].ext, + Some(json!({ "keep": true })), + "should preserve ext when atype is invalid" + ); + assert_eq!( + parsed[0].uids[2].atype, None, + "should drop non-integer atype without dropping uid" + ); + } + + #[test] + fn merge_auction_eids_deduplicates_client_and_resolved_ids() { + let client_eids = Some(vec![Eid { + source: "id5-sync.com".to_string(), + uids: vec![Uid { + id: "ID5_abc".to_string(), + atype: Some(1), + ext: None, + }], + }]); + let resolved_eids = Some(vec![ + Eid { + source: "id5-sync.com".to_string(), + uids: vec![Uid { + id: "ID5_abc".to_string(), + atype: Some(1), + ext: None, + }], + }, + Eid { + source: "liveramp.com".to_string(), + uids: vec![Uid { + id: "LR_xyz".to_string(), + atype: Some(3), + ext: None, + }], + }, + ]); + + let merged = merge_auction_eids(client_eids, resolved_eids).expect("should merge EIDs"); + + assert_eq!(merged.len(), 2, "should retain distinct EID sources"); + assert_eq!(merged[0].source, "id5-sync.com"); + assert_eq!(merged[0].uids.len(), 1, "should deduplicate matching UIDs"); + assert_eq!(merged[1].source, "liveramp.com"); + assert_eq!(merged[1].uids[0].id, "LR_xyz"); + } + + #[test] + fn merge_auction_eids_preserves_multiple_uids_per_source() { + let client_eids = Some(vec![Eid { + source: "sharedid.org".to_string(), + uids: vec![Uid { + id: "shared_client".to_string(), + atype: None, + ext: None, + }], + }]); + let resolved_eids = Some(vec![Eid { + source: "sharedid.org".to_string(), + uids: vec![Uid { + id: "shared_server".to_string(), + atype: Some(3), + ext: None, + }], + }]); + + let merged = merge_auction_eids(client_eids, resolved_eids).expect("should merge EIDs"); + + assert_eq!(merged.len(), 1, "should merge same-source entries"); + assert_eq!(merged[0].uids.len(), 2, "should preserve distinct UIDs"); + assert_eq!(merged[0].uids[0].id, "shared_server"); + assert_eq!(merged[0].uids[1].id, "shared_client"); + } + + #[test] + fn merge_auction_eids_prefers_server_resolved_metadata_on_conflict() { + let client_eids = Some(vec![Eid { + source: "adserver.org".to_string(), + uids: vec![Uid { + id: "shared_uid".to_string(), + atype: Some(1), + ext: Some(json!({ "provider": "client" })), + }], + }]); + let resolved_eids = Some(vec![Eid { + source: "adserver.org".to_string(), + uids: vec![Uid { + id: "shared_uid".to_string(), + atype: Some(3), + ext: Some(json!({ "provider": "server" })), + }], + }]); + + let merged = merge_auction_eids(client_eids, resolved_eids).expect("should merge EIDs"); + + assert_eq!(merged.len(), 1, "should merge duplicate source"); + assert_eq!(merged[0].uids.len(), 1, "should deduplicate duplicate uid"); + assert_eq!( + merged[0].uids[0].atype, + Some(3), + "should prefer resolved atype" + ); + assert_eq!( + merged[0].uids[0].ext, + Some(json!({ "provider": "server" })), + "should prefer resolved ext" + ); + } } diff --git a/crates/trusted-server-core/src/auction/formats.rs b/crates/trusted-server-core/src/auction/formats.rs index 5237921a..08d6a6cd 100644 --- a/crates/trusted-server-core/src/auction/formats.rs +++ b/crates/trusted-server-core/src/auction/formats.rs @@ -14,12 +14,12 @@ use uuid::Uuid; use crate::auction::context::ContextValue; use crate::consent::ConsentContext; -use crate::constants::{HEADER_X_TS_EC, HEADER_X_TS_EC_FRESH}; +use crate::constants::{HEADER_X_TS_EC_CONSENT, HEADER_X_TS_EIDS, HEADER_X_TS_EIDS_TRUNCATED}; use crate::creative; -use crate::edge_cookie::generate_ec_id; +use crate::ec::eids::encode_eids_header; use crate::error::TrustedServerError; +use crate::geo::GeoInfo; use crate::openrtb::{to_openrtb_i32, OpenRtbBid, OpenRtbResponse, ResponseExt, SeatBid, ToExt}; -use crate::platform::{GeoInfo, RuntimeServices}; use crate::settings::Settings; use super::orchestrator::OrchestrationResult; @@ -34,6 +34,7 @@ use super::types::{ pub struct AdRequest { pub ad_units: Vec, pub config: Option, + pub eids: Option, } #[derive(Debug, Deserialize)] @@ -83,17 +84,11 @@ pub struct BannerUnit { pub fn convert_tsjs_to_auction_request( body: &AdRequest, settings: &Settings, - services: &RuntimeServices, req: &Request, consent: ConsentContext, - ec_id: &str, - geo: Option, + ec_id: Option<&str>, ) -> Result> { - let ec_id = ec_id.to_owned(); - let fresh_id = - generate_ec_id(settings, services).change_context(TrustedServerError::Auction { - message: "Failed to generate fresh EC ID".to_string(), - })?; + let ec_id = ec_id.map(str::to_owned); // Convert ad units to slots let mut slots = Vec::new(); @@ -140,8 +135,9 @@ pub fn convert_tsjs_to_auction_request( user_agent: req .get_header_str("user-agent") .map(std::string::ToString::to_string), - ip: services.client_info.client_ip.map(|ip| ip.to_string()), - geo, + ip: req.get_client_ip_addr().map(|ip| ip.to_string()), + #[allow(deprecated)] + geo: GeoInfo::from_request(req), }); // Forward allowed config entries from the JS request into the context map. @@ -187,8 +183,8 @@ pub fn convert_tsjs_to_auction_request( }, user: UserInfo { id: ec_id, - fresh_id, consent: Some(consent), + eids: None, }, device, site: Some(SiteInfo { @@ -212,6 +208,7 @@ pub fn convert_to_openrtb_response( result: &OrchestrationResult, settings: &Settings, auction_request: &AuctionRequest, + ec_allowed: bool, ) -> Result> { // Build OpenRTB-style seatbid array let mut seatbids = Vec::with_capacity(result.winning_bids.len()); @@ -312,9 +309,177 @@ pub fn convert_to_openrtb_response( message: "Failed to serialize auction response".to_string(), })?; - Ok(Response::from_status(StatusCode::OK) + let mut response = Response::from_status(StatusCode::OK) .with_header(header::CONTENT_TYPE, "application/json") - .with_header(HEADER_X_TS_EC, &auction_request.user.id) - .with_header(HEADER_X_TS_EC_FRESH, &auction_request.user.fresh_id) - .with_body(body_bytes)) + .with_body(body_bytes); + + // Signal consent status independently of whether EIDs were resolved. + // A user may have granted consent but have no partner syncs yet; + // downstream clients rely on this header to know consent was verified. + if ec_allowed { + response.set_header(HEADER_X_TS_EC_CONSENT, "ok"); + } + + // Attach EID response headers when consent-gated EIDs are available. + // `Some(empty)` means "we looked and found no synced partners" — the + // header is still set (with an encoded empty array) so clients can + // distinguish this from `None` (EIDs not checked / consent denied). + if let Some(ref eids) = auction_request.user.eids { + let (encoded, truncated) = encode_eids_header(eids)?; + response.set_header(HEADER_X_TS_EIDS, encoded); + if truncated { + response.set_header(HEADER_X_TS_EIDS_TRUNCATED, "true"); + } + } + + Ok(response) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::auction::orchestrator::OrchestrationResult; + use crate::auction::types::{AdFormat, AdSlot, MediaType}; + use crate::constants::{HEADER_X_TS_EC_CONSENT, HEADER_X_TS_EIDS, HEADER_X_TS_EIDS_TRUNCATED}; + use crate::openrtb::{Eid, Uid}; + + fn make_minimal_auction_request() -> AuctionRequest { + AuctionRequest { + id: "test-auction".to_owned(), + slots: vec![AdSlot { + id: "slot-1".to_owned(), + formats: vec![AdFormat { + media_type: MediaType::Banner, + width: 300, + height: 250, + }], + floor_price: None, + targeting: HashMap::new(), + bidders: HashMap::new(), + }], + publisher: PublisherInfo { + domain: "test.com".to_owned(), + page_url: None, + }, + user: UserInfo { + id: Some("test-ec-id".to_owned()), + consent: None, + eids: None, + }, + device: None, + site: None, + context: HashMap::new(), + } + } + + fn make_empty_result() -> OrchestrationResult { + OrchestrationResult { + winning_bids: HashMap::new(), + provider_responses: Vec::new(), + mediator_response: None, + total_time_ms: 10, + metadata: HashMap::new(), + } + } + + fn make_settings() -> Settings { + crate::test_support::tests::create_test_settings() + } + + #[test] + fn response_includes_eid_headers_when_eids_present() { + let mut request = make_minimal_auction_request(); + request.user.eids = Some(vec![Eid { + source: "ssp.com".to_owned(), + uids: vec![Uid { + id: "uid-1".to_owned(), + atype: Some(3), + ext: None, + }], + }]); + + let settings = make_settings(); + let result = make_empty_result(); + + let response = convert_to_openrtb_response(&result, &settings, &request, true) + .expect("should build response"); + + assert!( + response.get_header(HEADER_X_TS_EIDS).is_some(), + "should include x-ts-eids header when EIDs are present" + ); + assert_eq!( + response + .get_header(HEADER_X_TS_EC_CONSENT) + .and_then(|v| v.to_str().ok()), + Some("ok"), + "should include x-ts-ec-consent: ok when ec_allowed is true" + ); + assert!( + response.get_header(HEADER_X_TS_EIDS_TRUNCATED).is_none(), + "should not include truncated header for small payload" + ); + } + + #[test] + fn response_sets_consent_header_even_without_eids() { + let request = make_minimal_auction_request(); + let settings = make_settings(); + let result = make_empty_result(); + + let response = convert_to_openrtb_response(&result, &settings, &request, true) + .expect("should build response"); + + assert_eq!( + response + .get_header(HEADER_X_TS_EC_CONSENT) + .and_then(|v| v.to_str().ok()), + Some("ok"), + "should set x-ts-ec-consent: ok based on consent, not EID presence" + ); + assert!( + response.get_header(HEADER_X_TS_EIDS).is_none(), + "should omit x-ts-eids when no EIDs available" + ); + } + + #[test] + fn response_omits_consent_header_when_not_allowed() { + let request = make_minimal_auction_request(); + let settings = make_settings(); + let result = make_empty_result(); + + let response = convert_to_openrtb_response(&result, &settings, &request, false) + .expect("should build response"); + + assert!( + response.get_header(HEADER_X_TS_EC_CONSENT).is_none(), + "should omit x-ts-ec-consent when ec_allowed is false" + ); + assert!( + response.get_header(HEADER_X_TS_EIDS).is_none(), + "should omit x-ts-eids when no EIDs available" + ); + assert!( + response.get_header("x-ts-ec").is_none(), + "should not emit x-ts-ec when a valid EC is present" + ); + } + + #[test] + fn response_omits_ec_header_when_ec_id_is_none() { + let mut request = make_minimal_auction_request(); + request.user.id = None; + + let settings = make_settings(); + let result = make_empty_result(); + + let response = convert_to_openrtb_response(&result, &settings, &request, false) + .expect("should build response"); + + assert!( + response.get_header("x-ts-ec").is_none(), + "should omit x-ts-ec when no EC ID is available" + ); + } } diff --git a/crates/trusted-server-core/src/auction/orchestrator.rs b/crates/trusted-server-core/src/auction/orchestrator.rs index 9cbcd2b9..15ad3221 100644 --- a/crates/trusted-server-core/src/auction/orchestrator.rs +++ b/crates/trusted-server-core/src/auction/orchestrator.rs @@ -1,13 +1,12 @@ //! Auction orchestrator for managing multi-provider auctions. use error_stack::{Report, ResultExt}; +use fastly::http::request::{select, PendingRequest}; use std::collections::HashMap; use std::sync::Arc; use std::time::{Duration, Instant}; use crate::error::TrustedServerError; -use crate::platform::{PlatformPendingRequest, RuntimeServices}; -use crate::proxy::platform_response_to_fastly; use super::config::AuctionConfig; use super::provider::AuctionProvider; @@ -66,7 +65,6 @@ impl AuctionOrchestrator { &self, request: &AuctionRequest, context: &AuctionContext<'_>, - services: &RuntimeServices, ) -> Result> { let start_time = Instant::now(); @@ -74,13 +72,12 @@ impl AuctionOrchestrator { let (strategy_name, result) = if self.config.has_mediator() { ( "parallel_mediation", - self.run_parallel_mediation(request, context, services) - .await?, + self.run_parallel_mediation(request, context).await?, ) } else { ( "parallel_only", - self.run_parallel_only(request, context, services).await?, + self.run_parallel_only(request, context).await?, ) }; @@ -105,12 +102,9 @@ impl AuctionOrchestrator { &self, request: &AuctionRequest, context: &AuctionContext<'_>, - services: &RuntimeServices, ) -> Result> { let mediation_start = Instant::now(); - let provider_responses = self - .run_providers_parallel(request, context, services) - .await?; + let provider_responses = self.run_providers_parallel(request, context).await?; let floor_prices = self.floor_prices_by_slot(request); let (mediator_response, winning_bids) = if let Some(mediator_name) = &self.config.mediator { @@ -128,6 +122,8 @@ impl AuctionOrchestrator { let remaining_ms = remaining_budget_ms(mediation_start, context.timeout_ms); if remaining_ms == 0 { + // lgtm[rust/cleartext-logging] + // This warning reports timeout budget metadata only; no secret settings are logged. log::warn!( "Auction timeout ({}ms) exhausted during bidding phase — skipping mediator", context.timeout_ms @@ -145,7 +141,6 @@ impl AuctionOrchestrator { let mediator_context = AuctionContext { settings: context.settings, request: context.request, - client_info: context.client_info, timeout_ms: remaining_ms, provider_responses: Some(&provider_responses), services: context.services, @@ -158,25 +153,13 @@ impl AuctionOrchestrator { message: format!("Mediator {} failed to launch", mediator.provider_name()), })?; - let platform_resp = services - .http_client() - .wait(PlatformPendingRequest::new(pending)) - .await - .change_context(TrustedServerError::Auction { - message: format!("Mediator {} request failed", mediator.provider_name()), - })?; - let backend_response = platform_response_to_fastly(platform_resp).change_context( - TrustedServerError::Auction { - message: format!( - "Mediator {} returned an unsupported response body", - mediator.provider_name() - ), - }, - )?; + let backend_response = pending.wait().change_context(TrustedServerError::Auction { + message: format!("Mediator {} request failed", mediator.provider_name()), + })?; let response_time_ms = start_time.elapsed().as_millis() as u64; let mediator_resp = mediator - .parse_response(backend_response, response_time_ms) + .parse_response_with_context(backend_response, response_time_ms, &mediator_context) .change_context(TrustedServerError::Auction { message: format!("Mediator {} parse failed", mediator.provider_name()), })?; @@ -225,11 +208,8 @@ impl AuctionOrchestrator { &self, request: &AuctionRequest, context: &AuctionContext<'_>, - services: &RuntimeServices, ) -> Result> { - let provider_responses = self - .run_providers_parallel(request, context, services) - .await?; + let provider_responses = self.run_providers_parallel(request, context).await?; let floor_prices = self.floor_prices_by_slot(request); let winning_bids = self.select_winning_bids(&provider_responses, &floor_prices); @@ -244,14 +224,12 @@ impl AuctionOrchestrator { /// Run all providers in parallel and collect responses. /// - /// Uses [`RuntimeServices::http_client`] and - /// [`crate::platform::PlatformHttpClient::select`] to process responses as - /// they become ready, rather than waiting for each response sequentially. + /// Uses `fastly::http::request::select()` to process responses as they + /// become ready, rather than waiting for each response sequentially. async fn run_providers_parallel( &self, request: &AuctionRequest, context: &AuctionContext<'_>, - services: &RuntimeServices, ) -> Result, Report> { let provider_names = self.config.provider_names(); @@ -273,7 +251,7 @@ impl AuctionOrchestrator { // Maps backend_name -> (provider_name, start_time, provider) let mut backend_to_provider: HashMap = HashMap::new(); - let mut pending_requests: Vec = Vec::new(); + let mut pending_requests: Vec = Vec::new(); for provider_name in provider_names { let provider = match self.providers.get(provider_name) { @@ -300,6 +278,8 @@ impl AuctionOrchestrator { let effective_timeout = remaining_ms.min(provider.timeout_ms()); if effective_timeout == 0 { + // lgtm[rust/cleartext-logging] + // This warning reports timeout budget metadata only; no secret settings are logged. log::warn!( "Auction timeout ({}ms) exhausted before launching '{}' — skipping", context.timeout_ms, @@ -325,7 +305,6 @@ impl AuctionOrchestrator { let provider_context = AuctionContext { settings: context.settings, request: context.request, - client_info: context.client_info, timeout_ms: effective_timeout, provider_responses: context.provider_responses, services: context.services, @@ -342,11 +321,10 @@ impl AuctionOrchestrator { match provider.request_bids(request, &provider_context) { Ok(pending) => { backend_to_provider.insert( - backend_name.clone(), + backend_name, (provider.provider_name(), start_time, provider.as_ref()), ); - pending_requests - .push(PlatformPendingRequest::new(pending).with_backend_name(backend_name)); + pending_requests.push(pending); log::debug!( "Request to '{}' launched successfully", provider.provider_name() @@ -363,6 +341,8 @@ impl AuctionOrchestrator { } let deadline = Duration::from_millis(u64::from(context.timeout_ms)); + // lgtm[rust/cleartext-logging] + // This info log reports request counts and timeout budget only; no secret settings are logged. log::info!( "Launched {} concurrent requests, waiting for responses (timeout: {}ms)...", pending_requests.len(), @@ -381,54 +361,39 @@ impl AuctionOrchestrator { let mut remaining = pending_requests; while !remaining.is_empty() { - let select_result = services - .http_client() - .select(remaining) - .await - .change_context(TrustedServerError::Auction { - message: "HTTP select failed".to_string(), - })?; - remaining = select_result.remaining; + let (result, rest) = select(remaining); + remaining = rest; - match select_result.ready { - Ok(platform_response) => { + match result { + Ok(response) => { // Identify the provider from the backend name - let backend_name = platform_response.backend_name.clone().unwrap_or_default(); + let backend_name = response.get_backend_name().unwrap_or_default().to_string(); if let Some((provider_name, start_time, provider)) = backend_to_provider.remove(&backend_name) { let response_time_ms = start_time.elapsed().as_millis() as u64; - match platform_response_to_fastly(platform_response) { - Ok(response) => { - match provider.parse_response(response, response_time_ms) { - Ok(auction_response) => { - log::info!( - "Provider '{}' returned {} bids (status: {:?}, time: {}ms)", - auction_response.provider, - auction_response.bids.len(), - auction_response.status, - auction_response.response_time_ms - ); - responses.push(auction_response); - } - Err(e) => { - log::warn!( - "Provider '{}' failed to parse response: {:?}", - provider_name, - e - ); - responses.push(AuctionResponse::error( - provider_name, - response_time_ms, - )); - } - } + match provider.parse_response_with_context( + response, + response_time_ms, + context, + ) { + Ok(auction_response) => { + log::info!( + "Provider '{}' returned {} bids (status: {:?}, time: {}ms)", + auction_response.provider, + auction_response.bids.len(), + auction_response.status, + auction_response.response_time_ms + ); + responses.push(auction_response); } Err(e) => { + // lgtm[rust/cleartext-logging] + // This warning reports provider parse failures only; no secret values are logged. log::warn!( - "Provider '{}' returned an unsupported response body: {:?}", + "Provider '{}' failed to parse response: {:?}", provider_name, e ); @@ -454,6 +419,8 @@ impl AuctionOrchestrator { // Remaining PendingRequests are dropped, which abandons the // in-flight HTTP calls on the Fastly host. if auction_start.elapsed() >= deadline && !remaining.is_empty() { + // lgtm[rust/cleartext-logging] + // This warning reports timeout budget metadata only; no secret settings are logged. log::warn!( "Auction timeout ({}ms) reached, dropping {} remaining request(s)", context.timeout_ms, @@ -637,16 +604,6 @@ mod tests { use crate::auction::types::{ AdFormat, AdSlot, AuctionRequest, Bid, MediaType, PublisherInfo, UserInfo, }; - - // All-None ClientInfo used across tests that don't need real IP/TLS data. - // Defined as a const so &EMPTY_CLIENT_INFO has 'static lifetime, avoiding - // the temporary-lifetime issue that arises with &ClientInfo::default(). - const EMPTY_CLIENT_INFO: crate::platform::ClientInfo = crate::platform::ClientInfo { - client_ip: None, - tls_protocol: None, - tls_cipher: None, - }; - use crate::platform::test_support::noop_services; use crate::test_support::tests::crate_test_settings_str; use fastly::Request; use std::collections::{HashMap, HashSet}; @@ -685,9 +642,9 @@ mod tests { page_url: Some("https://test.com/article".to_string()), }, user: UserInfo { - id: "user-123".to_string(), - fresh_id: "fresh-456".to_string(), + id: Some("user-123".to_string()), consent: None, + eids: None, }, device: None, site: None, @@ -787,11 +744,9 @@ mod tests { let request = create_test_auction_request(); let settings = create_test_settings(); let req = Request::get("https://test.com/test"); - let context = create_test_auction_context(&settings, &req, &EMPTY_CLIENT_INFO, 2000); + let context = create_test_auction_context(&settings, &req, 2000); - let result = orchestrator - .run_auction(&request, &context, &noop_services()) - .await; + let result = orchestrator.run_auction(&request, &context).await; assert!(result.is_err()); let err = result.unwrap_err(); diff --git a/crates/trusted-server-core/src/auction/provider.rs b/crates/trusted-server-core/src/auction/provider.rs index cd3fcfc3..7e509043 100644 --- a/crates/trusted-server-core/src/auction/provider.rs +++ b/crates/trusted-server-core/src/auction/provider.rs @@ -44,6 +44,25 @@ pub trait AuctionProvider: Send + Sync { response_time_ms: u64, ) -> Result>; + /// Parse the response with access to the original auction context. + /// + /// Providers that need request-local metadata while transforming responses + /// can override this method. The default preserves the existing + /// response-only provider contract. + /// + /// # Errors + /// + /// Returns an error if the response cannot be parsed into a valid [`AuctionResponse`]. + fn parse_response_with_context( + &self, + response: fastly::Response, + response_time_ms: u64, + context: &AuctionContext<'_>, + ) -> Result> { + let _ = context; + self.parse_response(response, response_time_ms) + } + /// Check if this provider supports a specific media type. fn supports_media_type(&self, media_type: &super::types::MediaType) -> bool { // By default, support banner ads diff --git a/crates/trusted-server-core/src/auction/test_support.rs b/crates/trusted-server-core/src/auction/test_support.rs index 2c5b5438..bfcdd10d 100644 --- a/crates/trusted-server-core/src/auction/test_support.rs +++ b/crates/trusted-server-core/src/auction/test_support.rs @@ -3,7 +3,7 @@ use std::sync::LazyLock; use fastly::Request; use super::AuctionContext; -use crate::platform::{test_support::noop_services, ClientInfo, RuntimeServices}; +use crate::platform::{test_support::noop_services, RuntimeServices}; use crate::settings::Settings; static TEST_SERVICES: LazyLock = LazyLock::new(noop_services); @@ -11,14 +11,12 @@ static TEST_SERVICES: LazyLock = LazyLock::new(noop_services); pub(crate) fn create_test_auction_context<'a>( settings: &'a Settings, request: &'a Request, - client_info: &'a ClientInfo, timeout_ms: u32, ) -> AuctionContext<'a> { let services: &'static RuntimeServices = &TEST_SERVICES; AuctionContext { settings, request, - client_info, timeout_ms, provider_responses: None, services, diff --git a/crates/trusted-server-core/src/auction/types.rs b/crates/trusted-server-core/src/auction/types.rs index 17db5990..9b74d89e 100644 --- a/crates/trusted-server-core/src/auction/types.rs +++ b/crates/trusted-server-core/src/auction/types.rs @@ -6,7 +6,7 @@ use std::collections::HashMap; use crate::auction::context::ContextValue; use crate::geo::GeoInfo; -use crate::platform::{ClientInfo, RuntimeServices}; +use crate::platform::RuntimeServices; use crate::settings::Settings; /// Represents a unified auction request across all providers. @@ -70,10 +70,10 @@ pub struct PublisherInfo { /// Privacy-preserving user information. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct UserInfo { - /// Stable EC ID (from cookie or freshly generated) - pub id: String, - /// Fresh ID for this session - pub fresh_id: String, + /// Stable EC ID (from cookie or freshly generated). + /// `None` when EC is unavailable or consent denies it. + #[serde(skip_serializing_if = "Option::is_none")] + pub id: Option, /// Decoded consent context for this request. /// /// Carries both raw consent strings (for `OpenRTB` forwarding) and decoded @@ -82,6 +82,13 @@ pub struct UserInfo { /// cookies/headers, not from stored data. #[serde(skip)] pub consent: Option, + /// Consent-gated Extended User IDs resolved from the KV identity graph. + /// + /// Populated by the auction handler from partner data when the user has + /// a valid EC and consent permits EID transmission. `None` when no EIDs + /// are available (no EC, consent denied, or KV read failure). + #[serde(skip)] + pub eids: Option>, } /// Device information from request. @@ -103,7 +110,6 @@ pub struct SiteInfo { pub struct AuctionContext<'a> { pub settings: &'a Settings, pub request: &'a Request, - pub client_info: &'a ClientInfo, pub timeout_ms: u32, /// Provider responses from the bidding phase, used by mediators. /// This is `None` for regular bidders and `Some` when calling a mediator. diff --git a/crates/trusted-server-core/src/auth.rs b/crates/trusted-server-core/src/auth.rs index fa882044..088d27e8 100644 --- a/crates/trusted-server-core/src/auth.rs +++ b/crates/trusted-server-core/src/auth.rs @@ -230,7 +230,7 @@ mod tests { #[test] fn allow_admin_path_with_valid_credentials() { let settings = create_test_settings(); - let mut req = build_request(Method::POST, "https://example.com/admin/keys/rotate"); + let mut req = build_request(Method::POST, "https://example.com/_ts/admin/keys/rotate"); let token = STANDARD.encode("admin:admin-pass"); set_authorization(&mut req, &format!("Basic {token}")); @@ -245,7 +245,7 @@ mod tests { #[test] fn challenge_admin_path_with_wrong_credentials() { let settings = create_test_settings(); - let mut req = build_request(Method::POST, "https://example.com/admin/keys/rotate"); + let mut req = build_request(Method::POST, "https://example.com/_ts/admin/keys/rotate"); let token = STANDARD.encode("admin:wrong"); set_authorization(&mut req, &format!("Basic {token}")); @@ -258,7 +258,7 @@ mod tests { #[test] fn challenge_admin_path_with_missing_credentials() { let settings = create_test_settings(); - let req = build_request(Method::POST, "https://example.com/admin/keys/rotate"); + let req = build_request(Method::POST, "https://example.com/_ts/admin/keys/rotate"); let response = enforce_basic_auth(&settings, &req) .expect("should evaluate auth") diff --git a/crates/trusted-server-core/src/compat.rs b/crates/trusted-server-core/src/compat.rs index 54bdd5de..738d0e9f 100644 --- a/crates/trusted-server-core/src/compat.rs +++ b/crates/trusted-server-core/src/compat.rs @@ -142,9 +142,7 @@ pub fn set_fastly_ec_cookie( response: &mut fastly::Response, ec_id: &str, ) { - if let Some(cookie) = crate::cookies::try_build_ec_cookie_value(settings, ec_id) { - response.append_header(header::SET_COOKIE, cookie); - } + crate::ec::cookies::set_ec_cookie(settings, response, ec_id); } /// Expire the EC ID cookie on a `fastly::Response`. @@ -154,14 +152,7 @@ pub fn expire_fastly_ec_cookie( settings: &crate::settings::Settings, response: &mut fastly::Response, ) { - response.append_header( - header::SET_COOKIE, - format!( - "{}=; {}", - crate::constants::COOKIE_TS_EC, - crate::cookies::ec_cookie_attributes(settings, 0), - ), - ); + crate::ec::cookies::expire_ec_cookie(settings, response); } #[cfg(test)] @@ -347,7 +338,8 @@ mod tests { let settings = crate::test_support::tests::create_test_settings(); let mut response = fastly::Response::new(); - set_fastly_ec_cookie(&settings, &mut response, "abc123.XyZ789"); + let ec_id = format!("{}.Ab12z9", "a".repeat(64)); + set_fastly_ec_cookie(&settings, &mut response, &ec_id); let cookie = response .get_header(header::SET_COOKIE) @@ -356,8 +348,8 @@ mod tests { assert_eq!( cookie, Some(format!( - "ts-ec=abc123.XyZ789; Domain={}; Path=/; Secure; HttpOnly; SameSite=Lax; Max-Age=31536000", - settings.publisher.cookie_domain + "ts-ec={ec_id}; Domain=.{}; Path=/; Secure; SameSite=Lax; Max-Age=31536000; HttpOnly", + settings.publisher.domain )), "should set expected EC cookie" ); @@ -377,8 +369,8 @@ mod tests { assert_eq!( cookie, Some(format!( - "ts-ec=; Domain={}; Path=/; Secure; HttpOnly; SameSite=Lax; Max-Age=0", - settings.publisher.cookie_domain + "ts-ec=; Domain=.{}; Path=/; Secure; SameSite=Lax; Max-Age=0; HttpOnly", + settings.publisher.domain )), "should set expected expiry cookie" ); diff --git a/crates/trusted-server-core/src/consent/gpp.rs b/crates/trusted-server-core/src/consent/gpp.rs index 9d0e5c81..ffb770c2 100644 --- a/crates/trusted-server-core/src/consent/gpp.rs +++ b/crates/trusted-server-core/src/consent/gpp.rs @@ -71,11 +71,14 @@ pub fn decode_gpp_string(gpp_string: &str) -> Result Option { } } +/// GPP section IDs that represent US state/national privacy sections. +/// +/// Range 7–23 per the GPP v1 specification: +/// 7=UsNat, 8=UsCa, 9=UsVa, 10=UsCo, 11=UsUt, 12=UsCt, 13=UsFl, +/// 14=UsMt, 15=UsOr, 16=UsTx, 17=UsDe, 18=UsIa, 19=UsNe, 20=UsNh, +/// 21=UsNj, 22=UsTn, 23=UsMn. +const US_SECTION_ID_RANGE: std::ops::RangeInclusive = 7..=23; + +/// Extracts the `sale_opt_out` signal across all US sections in a parsed GPP +/// string. +/// +/// Iterates through section IDs looking for any in the US range (7–23), +/// decodes each US section, and aggregates the result conservatively: +/// +/// - `Some(true)` if any decodable US section says the user opted out of sale +/// - `Some(false)` if at least one decodable US section says they did not opt +/// out and none say they opted out +/// - `None` if no US section is present or no decodable US section yields a +/// usable `sale_opt_out` signal +fn decode_us_sale_opt_out(parsed: &iab_gpp::v1::GPPString) -> Option { + let mut result = None; + + for us_section_id in parsed + .section_ids() + .filter(|id| US_SECTION_ID_RANGE.contains(&(**id as u16))) + { + match parsed.decode_section(*us_section_id) { + Ok(section) => match us_sale_opt_out_from_section(§ion) { + Some(true) => return Some(true), + Some(false) => result = Some(false), + None => {} + }, + Err(e) => { + log::warn!("Failed to decode US GPP section {us_section_id}: {e}"); + } + } + } + + result +} + +fn us_sale_opt_out_from_section(section: &iab_gpp::sections::Section) -> Option { + use iab_gpp::sections::us_common::OptOut; + use iab_gpp::sections::Section; + + // Keep this match in sync with new US-state variants added by `iab_gpp`. + let sale_opt_out = match section { + Section::UsNat(s) => match &s.core { + iab_gpp::sections::usnat::Core::V1(c) => &c.sale_opt_out, + iab_gpp::sections::usnat::Core::V2(c) => &c.sale_opt_out, + _ => return None, + }, + Section::UsCa(s) => &s.core.sale_opt_out, + Section::UsVa(s) => &s.core.sale_opt_out, + Section::UsCo(s) => &s.core.sale_opt_out, + Section::UsUt(s) => &s.core.sale_opt_out, + Section::UsCt(s) => &s.core.sale_opt_out, + Section::UsFl(s) => &s.core.sale_opt_out, + Section::UsMt(s) => &s.core.sale_opt_out, + Section::UsOr(s) => &s.core.sale_opt_out, + Section::UsTx(s) => &s.core.sale_opt_out, + Section::UsDe(s) => &s.core.sale_opt_out, + Section::UsIa(s) => &s.core.sale_opt_out, + Section::UsNe(s) => &s.core.sale_opt_out, + Section::UsNh(s) => &s.core.sale_opt_out, + Section::UsNj(s) => &s.core.sale_opt_out, + Section::UsTn(s) => &s.core.sale_opt_out, + Section::UsMn(s) => &s.core.sale_opt_out, + _ => return None, + }; + + Some(*sale_opt_out == OptOut::OptedOut) +} + /// Parses a `__gpp_sid` cookie value into a vector of section IDs. /// /// The cookie is a comma-separated list of integer section IDs, e.g. `"2,6"`. @@ -239,4 +316,154 @@ mod tests { "all-invalid should be None" ); } + + #[test] + fn decodes_us_sale_opt_out_not_opted_out() { + let result = decode_gpp_string("DBABLA~BVQqAAAAAgA.QA"); + match &result { + Ok(gpp) => { + assert_eq!( + gpp.us_sale_opt_out, + Some(false), + "should extract sale_opt_out=false from UsNat section" + ); + } + Err(e) => { + panic!("GPP decode failed: {e}"); + } + } + } + + fn encode_fibonacci_integer(mut value: u16) -> String { + let mut fibs = vec![1_u16]; + let mut next = 2_u16; + while next <= value { + fibs.push(next); + next = if fibs.len() == 1 { + 2 + } else { + fibs[fibs.len() - 1] + fibs[fibs.len() - 2] + }; + } + + let mut bits = vec![false; fibs.len()]; + for (idx, fib) in fibs.iter().enumerate().rev() { + if *fib <= value { + value -= *fib; + bits[idx] = true; + } + } + bits.push(true); + + bits.into_iter() + .map(|bit| if bit { '1' } else { '0' }) + .collect() + } + + fn encode_header(section_ids: &[u16]) -> String { + const BASE64_URL: &[u8; 64] = + b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"; + + let mut bits = String::from("000011000001"); + bits.push_str(&format!("{:012b}", section_ids.len())); + + let mut previous = 0_u16; + for §ion_id in section_ids { + bits.push('0'); + bits.push_str(&encode_fibonacci_integer(section_id - previous)); + previous = section_id; + } + + while bits.len() % 6 != 0 { + bits.push('0'); + } + + bits.as_bytes() + .chunks(6) + .map(|chunk| { + let value = u8::from_str_radix( + core::str::from_utf8(chunk).expect("should encode header bits as utf8"), + 2, + ) + .expect("should parse 6-bit chunk"); + char::from(BASE64_URL[value as usize]) + }) + .collect() + } + + fn gpp_with_sections(sections: &[(u16, &str)]) -> String { + let ids = sections.iter().map(|(id, _)| *id).collect::>(); + let header = encode_header(&ids); + let section_payloads = sections.iter().map(|(_, raw)| *raw).collect::>(); + format!("{header}~{}", section_payloads.join("~")) + } + + #[test] + fn no_us_section_returns_none() { + let result = decode_gpp_string(GPP_TCF_AND_USP).expect("should decode GPP"); + assert_eq!( + result.us_sale_opt_out, None, + "should return None when no US section (7-23) is present" + ); + } + + #[test] + fn later_us_section_opt_out_overrides_earlier_non_opt_out() { + let gpp = gpp_with_sections(&[(7, "BVQqAAAAAgA.QA"), (9, "BVVVVVVVVWA.AA")]); + + let result = decode_gpp_string(&gpp).expect("should decode multi-section US GPP"); + + assert_eq!( + result.us_sale_opt_out, + Some(true), + "should treat any later decodable opt-out as authoritative" + ); + } + + #[test] + fn multiple_us_sections_without_opt_out_return_false() { + let gpp = gpp_with_sections(&[(7, "BVQqAAAAAgA.QA"), (9, "BVgVVVVVVWA.AA")]); + + let result = decode_gpp_string(&gpp).expect("should decode multi-section US GPP"); + + assert_eq!( + result.us_sale_opt_out, + Some(false), + "should return false when decodable US sections consistently do not opt out" + ); + } + + #[test] + fn valid_opt_out_wins_even_if_another_us_section_is_undecodable() { + let gpp = gpp_with_sections(&[(7, "BVQqAAAAAgA.QA"), (9, "not-a-valid-usva-section")]); + + let result = decode_gpp_string(&gpp).expect("should decode GPP header with raw sections"); + + assert_eq!( + result.us_sale_opt_out, + Some(false), + "should keep a valid non-opt-out signal even when another US section fails to decode" + ); + + let gpp = gpp_with_sections(&[(7, "not-a-valid-usnat-section"), (9, "BVVVVVVVVWA.AA")]); + let result = decode_gpp_string(&gpp).expect("should decode GPP header with raw sections"); + + assert_eq!( + result.us_sale_opt_out, + Some(true), + "should let a valid opt-out win even when another US section fails to decode" + ); + } + + #[test] + fn only_undecodable_us_sections_return_none() { + let gpp = gpp_with_sections(&[(7, "not-a-valid-usnat-section"), (9, "also-invalid")]); + + let result = decode_gpp_string(&gpp).expect("should decode GPP header with raw sections"); + + assert_eq!( + result.us_sale_opt_out, None, + "should return None when no decodable US section yields sale_opt_out" + ); + } } diff --git a/crates/trusted-server-core/src/consent/jurisdiction.rs b/crates/trusted-server-core/src/consent/jurisdiction.rs index df0c5b59..bcc825c5 100644 --- a/crates/trusted-server-core/src/consent/jurisdiction.rs +++ b/crates/trusted-server-core/src/consent/jurisdiction.rs @@ -100,6 +100,7 @@ mod tests { longitude: 0.0, metro_code: 0, region: region.map(str::to_owned), + asn: None, } } diff --git a/crates/trusted-server-core/src/consent/mod.rs b/crates/trusted-server-core/src/consent/mod.rs index 36e7e628..d5aa5fbe 100644 --- a/crates/trusted-server-core/src/consent/mod.rs +++ b/crates/trusted-server-core/src/consent/mod.rs @@ -4,8 +4,13 @@ //! //! 1. **Extract** raw consent strings from cookies and HTTP headers. //! 2. **Decode** each signal into structured data (TCF v2, GPP, US Privacy). -//! 3. **Build** a normalized [`ConsentContext`] that flows through the auction -//! pipeline and populates `OpenRTB` bid requests. +//! 3. **Build** a request-local [`ConsentContext`] that flows through the +//! auction pipeline and populates `OpenRTB` bid requests. +//! +//! Consent is interpreted from request cookies, headers, geolocation, and +//! publisher policy defaults. The consent pipeline does not read from or write +//! to KV storage; EC identity lifecycle state is managed separately by the EC +//! identity graph. //! //! # Supported signals //! @@ -22,8 +27,6 @@ //! req: &req, //! config: &settings.consent, //! geo: geo.as_ref(), -//! ec_id: Some("ec_abc123"), -//! kv_store: Some(runtime_services.kv_store()), //! }); //! ``` @@ -34,7 +37,6 @@ pub mod tcf; pub mod types; pub mod us_privacy; -pub use crate::storage::kv_store as kv; pub use extraction::extract_consent_signals; pub use types::{ ConsentContext, ConsentSource, PrivacyFlag, RawConsentSignals, TcfConsent, UsPrivacy, @@ -68,17 +70,6 @@ pub struct ConsentPipelineInput<'a> { pub config: &'a ConsentConfig, /// Geolocation data from the request (for jurisdiction detection). pub geo: Option<&'a GeoInfo>, - /// EC ID for KV Store consent persistence. - /// - /// When set along with `kv_store`, enables: - /// - **Read fallback**: loads consent from KV when cookies are absent. - /// - **Write-on-change**: persists cookie-sourced consent to KV. - pub ec_id: Option<&'a str>, - /// KV store for consent persistence. - /// - /// `None` when consent persistence is not configured for this request, or - /// when the caller intentionally skips consent KV access. - pub kv_store: Option<&'a dyn crate::platform::PlatformKvStore>, } /// Extracts, decodes, and normalizes consent signals from a request. @@ -93,6 +84,10 @@ pub struct ConsentPipelineInput<'a> { /// 6. Builds a [`ConsentContext`] with both raw and decoded data. /// 7. Logs a summary for observability. /// +/// The returned context reflects request-local consent signals plus policy +/// defaults only. This function does not load persisted consent from KV and +/// does not persist consent to KV. +/// /// Decoding failures are logged and the corresponding decoded field is set to /// `None` — the raw string is still preserved for proxy-mode forwarding. pub fn build_consent_context(input: &ConsentPipelineInput<'_>) -> ConsentContext { @@ -126,24 +121,12 @@ pub fn build_consent_context(input: &ConsentPipelineInput<'_>) -> ConsentContext }; } - // KV Store fallback: if no cookie-based signals exist, try loading - // persisted consent from the KV Store. - if should_try_kv_fallback(&signals) { - if let Some(ctx) = try_kv_fallback(input) { - log_consent_context(&ctx); - return ctx; - } - } - let mut ctx = build_context_from_signals(&signals); ctx.jurisdiction = jurisdiction::detect_jurisdiction(input.geo, input.config); apply_tcf_conflict_resolution(&mut ctx, input.config); apply_expiration_check(&mut ctx, input.config); apply_gpc_us_privacy(&mut ctx, input.config); - // KV Store write: persist cookie-sourced consent for future requests. - try_kv_write(input, &ctx); - log_consent_context(&ctx); ctx } @@ -171,6 +154,8 @@ fn apply_expiration_check(ctx: &mut ConsentContext, config: &ConsentConfig) { return; } + // lgtm[rust/cleartext-logging] + // This warning logs consent age metadata only; no raw consent string is emitted. log::warn!( "TCF consent expired (age: {age_days}d, max: {}d)", config.max_consent_age_days @@ -436,9 +421,8 @@ pub fn build_us_privacy_from_gpc(config: &ConsentConfig) -> Option( eids: Option>, @@ -482,8 +466,12 @@ pub fn gate_eids_by_consent( /// information on a device) must be explicitly consented. If no TCF data is /// available under GDPR, consent is assumed absent and EC is blocked. /// - **US state privacy**: opt-out model — EC is allowed unless the user has -/// explicitly opted out via the US Privacy string or Global Privacy Control. -/// - **Non-regulated / Unknown**: EC is allowed (no consent requirement). +/// explicitly opted out via the US Privacy string **or** Global Privacy +/// Control. GPC is checked independently — it always blocks EC creation +/// regardless of what the US Privacy string says. +/// - **Non-regulated**: EC is allowed (no consent requirement). +/// - **Unknown**: fail-closed — jurisdiction cannot be determined so EC is +/// blocked as a precaution. #[must_use] pub fn allows_ec_creation(ctx: &ConsentContext) -> bool { match &ctx.jurisdiction { @@ -495,64 +483,65 @@ pub fn allows_ec_creation(ctx: &ConsentContext) -> bool { } } jurisdiction::Jurisdiction::UsState(_) => { - // US: opt-out model — allow unless user explicitly opted out. + // GPC is an independent opt-out signal — it always blocks EC + // creation regardless of what the US Privacy string says. + if ctx.gpc { + return false; + } + // When a CMP uses TCF in the US (e.g. Didomi), respect the + // TCF Purpose 1 decision — this is an explicit opt-in signal. + // The Sourcepoint GPP design documents this precedence decision. + if let Some(tcf) = effective_tcf(ctx) { + return tcf.has_storage_consent(); + } + // Check GPP US section for sale opt-out. + if let Some(gpp) = &ctx.gpp { + if let Some(opted_out) = gpp.us_sale_opt_out { + return !opted_out; + } + } + // Check US Privacy string for explicit opt-out. if let Some(usp) = &ctx.us_privacy { - usp.opt_out_sale != PrivacyFlag::Yes - } else { - // No US Privacy string — fall back to GPC signal. - !ctx.gpc + return usp.opt_out_sale != PrivacyFlag::Yes; } + // Spec §6.1.1: "In regulated jurisdictions (GDPR, US state), + // consent cookies/headers must be present for + // allows_ec_creation() to return true." No signals = block. + false } - jurisdiction::Jurisdiction::NonRegulated | jurisdiction::Jurisdiction::Unknown => true, + jurisdiction::Jurisdiction::NonRegulated => true, + // No geolocation data — cannot determine jurisdiction. + // Fail-closed: block EC creation as a precaution. + jurisdiction::Jurisdiction::Unknown => false, } } -// --------------------------------------------------------------------------- -// KV Store integration helpers -// --------------------------------------------------------------------------- - -/// Returns whether KV fallback should be attempted for this request. +/// Returns `true` only when the request contains an explicit EC opt-out signal. /// -/// KV fallback is used only when cookie-based consent signals are absent. -/// A standalone `Sec-GPC` header should not suppress fallback reads. +/// This is intentionally narrower than [`allows_ec_creation`]. Some requests +/// fail closed because consent cannot be verified yet (for example, missing geo +/// or missing/undecodable consent signals in a regulated jurisdiction). Those +/// cases must block *new* EC creation, but they must not be treated as an +/// authoritative withdrawal of an already-issued EC. #[must_use] -fn should_try_kv_fallback(signals: &RawConsentSignals) -> bool { - !signals.has_cookie_signals() -} - -/// Attempts to load consent from the KV Store when cookie signals are empty. -/// -/// Returns `Some(ConsentContext)` if a valid entry was found and decoded, -/// `None` otherwise. Requires both `kv_store` and `ec_id` to be present. -fn try_kv_fallback(input: &ConsentPipelineInput<'_>) -> Option { - let kv_store = input.kv_store?; - let ec_id = input.ec_id?; - - log::debug!("No cookie consent signals, trying KV fallback for '{ec_id}'"); - let mut ctx = kv::load_consent_from_kv(kv_store, ec_id)?; - - // Re-detect jurisdiction from current geo (may differ from stored value). - ctx.jurisdiction = jurisdiction::detect_jurisdiction(input.geo, input.config); - apply_tcf_conflict_resolution(&mut ctx, input.config); - apply_expiration_check(&mut ctx, input.config); - apply_gpc_us_privacy(&mut ctx, input.config); - - Some(ctx) -} - -/// Persists cookie-sourced consent to the KV Store when configured. -/// -/// Only writes when consent signals are non-empty and have changed since -/// the last write (fingerprint comparison). -fn try_kv_write(input: &ConsentPipelineInput<'_>, ctx: &ConsentContext) { - let Some(kv_store) = input.kv_store else { - return; - }; - let Some(ec_id) = input.ec_id else { - return; - }; - - kv::save_consent_to_kv(kv_store, ec_id, ctx, input.config.max_consent_age_days); +pub fn has_explicit_ec_withdrawal(ctx: &ConsentContext) -> bool { + match &ctx.jurisdiction { + jurisdiction::Jurisdiction::Gdpr => { + effective_tcf(ctx).is_some_and(|tcf| !tcf.has_storage_consent()) + } + jurisdiction::Jurisdiction::UsState(_) => { + if ctx.gpc { + return true; + } + if let Some(tcf) = effective_tcf(ctx) { + return !tcf.has_storage_consent(); + } + ctx.us_privacy + .as_ref() + .is_some_and(|usp| usp.opt_out_sale == PrivacyFlag::Yes) + } + jurisdiction::Jurisdiction::NonRegulated | jurisdiction::Jurisdiction::Unknown => false, + } } // --------------------------------------------------------------------------- @@ -618,7 +607,7 @@ mod tests { use super::{ allows_ec_creation, apply_expiration_check, apply_tcf_conflict_resolution, - build_consent_context, build_context_from_signals, should_try_kv_fallback, + build_consent_context, build_context_from_signals, has_explicit_ec_withdrawal, ConsentPipelineInput, }; use crate::consent::jurisdiction::Jurisdiction; @@ -714,38 +703,12 @@ mod tests { version: 1, section_ids: vec![2], eu_tcf: Some(make_tcf(gpp_last_updated_ds, gpp_allows_eids)), + us_sale_opt_out: None, }), ..ConsentContext::default() } } - #[test] - fn kv_fallback_allowed_when_only_gpc_present() { - let signals = RawConsentSignals { - gpc: true, - ..RawConsentSignals::default() - }; - - assert!( - should_try_kv_fallback(&signals), - "should allow KV fallback when only Sec-GPC is present" - ); - } - - #[test] - fn kv_fallback_skipped_when_cookie_signal_present() { - let signals = RawConsentSignals { - raw_tc_string: Some("CPXxGfAPXxGfA".to_owned()), - gpc: true, - ..RawConsentSignals::default() - }; - - assert!( - !should_try_kv_fallback(&signals), - "should skip KV fallback when cookie signals are present" - ); - } - #[test] fn proxy_mode_marks_gdpr_when_raw_tc_exists() { let jar = parse_cookies_to_jar("euconsent-v2=CPXxGfAPXxGfA"); @@ -760,8 +723,6 @@ mod tests { req: &req, config: &config, geo: None, - ec_id: None, - kv_store: None, }); assert!( @@ -790,8 +751,6 @@ mod tests { req: &req, config: &config, geo: None, - ec_id: None, - kv_store: None, }); assert!( @@ -877,6 +836,7 @@ mod tests { version: 1, section_ids: vec![2], eu_tcf: Some(make_tcf(0, true)), + us_sale_opt_out: None, }), ..ConsentContext::default() }; @@ -959,6 +919,7 @@ mod tests { version: 1, section_ids: vec![2], eu_tcf: Some(make_tcf_with_storage(true)), + us_sale_opt_out: None, }), gdpr_applies: true, ..ConsentContext::default() @@ -1020,7 +981,7 @@ mod tests { } #[test] - fn ec_allowed_us_state_no_signals() { + fn ec_blocked_us_state_no_signals() { let ctx = ConsentContext { jurisdiction: Jurisdiction::UsState("CA".to_owned()), us_privacy: None, @@ -1028,8 +989,8 @@ mod tests { ..ConsentContext::default() }; assert!( - allows_ec_creation(&ctx), - "US state + no opt-out signals should allow EC (opt-out model)" + !allows_ec_creation(&ctx), + "US state + no consent signals should block EC (spec §6.1.1: fail-closed)" ); } @@ -1046,14 +1007,41 @@ mod tests { } #[test] - fn ec_allowed_unknown_jurisdiction() { + fn ec_blocked_unknown_jurisdiction() { let ctx = ConsentContext { jurisdiction: Jurisdiction::Unknown, ..ConsentContext::default() }; assert!( - allows_ec_creation(&ctx), - "unknown jurisdiction should allow EC (no geo data available)" + !allows_ec_creation(&ctx), + "unknown jurisdiction should block EC (fail-closed when geo unavailable)" + ); + assert!( + !has_explicit_ec_withdrawal(&ctx), + "unknown jurisdiction should not be treated as an explicit withdrawal" + ); + } + + #[test] + fn ec_blocked_us_state_gpc_overrides_us_privacy() { + let ctx = ConsentContext { + jurisdiction: Jurisdiction::UsState("CA".to_owned()), + us_privacy: Some(UsPrivacy { + version: 1, + notice_given: PrivacyFlag::Yes, + opt_out_sale: PrivacyFlag::No, + lspa_covered: PrivacyFlag::NotApplicable, + }), + gpc: true, + ..ConsentContext::default() + }; + assert!( + !allows_ec_creation(&ctx), + "GPC=true should block EC even when US Privacy says no opt-out" + ); + assert!( + has_explicit_ec_withdrawal(&ctx), + "GPC=true should be treated as an explicit withdrawal signal" ); } @@ -1074,4 +1062,185 @@ mod tests { "US Privacy with opt_out=N/A should allow EC" ); } + + #[test] + fn ec_allowed_us_state_tcf_with_storage_consent() { + let ctx = ConsentContext { + jurisdiction: Jurisdiction::UsState("TN".to_owned()), + tcf: Some(make_tcf_with_storage(true)), + ..ConsentContext::default() + }; + assert!( + allows_ec_creation(&ctx), + "US state + TCF Purpose 1 consented should allow EC (Didomi-style CMP)" + ); + } + + #[test] + fn ec_blocked_us_state_tcf_without_storage_consent() { + let ctx = ConsentContext { + jurisdiction: Jurisdiction::UsState("TN".to_owned()), + tcf: Some(make_tcf_with_storage(false)), + ..ConsentContext::default() + }; + assert!( + !allows_ec_creation(&ctx), + "US state + TCF Purpose 1 denied should block EC" + ); + } + + #[test] + fn ec_blocked_us_state_gpc_overrides_tcf() { + let ctx = ConsentContext { + jurisdiction: Jurisdiction::UsState("TN".to_owned()), + tcf: Some(make_tcf_with_storage(true)), + gpc: true, + ..ConsentContext::default() + }; + assert!( + !allows_ec_creation(&ctx), + "GPC should block EC even when TCF grants storage consent in US state" + ); + } + + #[test] + fn ec_allowed_us_state_tcf_takes_priority_over_us_privacy() { + let ctx = ConsentContext { + jurisdiction: Jurisdiction::UsState("CA".to_owned()), + tcf: Some(make_tcf_with_storage(true)), + us_privacy: Some(UsPrivacy { + version: 1, + notice_given: PrivacyFlag::Yes, + opt_out_sale: PrivacyFlag::Yes, + lspa_covered: PrivacyFlag::NotApplicable, + }), + ..ConsentContext::default() + }; + assert!( + allows_ec_creation(&ctx), + "TCF consent should take priority over US Privacy opt-out when both present" + ); + } + + #[test] + fn ec_allowed_us_state_gpp_no_sale_opt_out() { + let ctx = ConsentContext { + jurisdiction: Jurisdiction::UsState("TN".to_owned()), + gpp: Some(GppConsent { + version: 1, + section_ids: vec![7], + eu_tcf: None, + us_sale_opt_out: Some(false), + }), + ..ConsentContext::default() + }; + assert!( + allows_ec_creation(&ctx), + "US state + GPP US sale_opt_out=false should allow EC" + ); + } + + #[test] + fn ec_blocked_us_state_gpp_sale_opted_out() { + let ctx = ConsentContext { + jurisdiction: Jurisdiction::UsState("TN".to_owned()), + gpp: Some(GppConsent { + version: 1, + section_ids: vec![7], + eu_tcf: None, + us_sale_opt_out: Some(true), + }), + ..ConsentContext::default() + }; + assert!( + !allows_ec_creation(&ctx), + "US state + GPP US sale_opt_out=true should block EC" + ); + } + + #[test] + fn ec_blocked_us_state_gpc_overrides_gpp_us() { + let ctx = ConsentContext { + jurisdiction: Jurisdiction::UsState("TN".to_owned()), + gpc: true, + gpp: Some(GppConsent { + version: 1, + section_ids: vec![7], + eu_tcf: None, + us_sale_opt_out: Some(false), + }), + ..ConsentContext::default() + }; + assert!( + !allows_ec_creation(&ctx), + "GPC should block EC even when GPP US says no opt-out" + ); + } + + #[test] + fn ec_us_state_tcf_takes_priority_over_gpp_us() { + let ctx = ConsentContext { + jurisdiction: Jurisdiction::UsState("TN".to_owned()), + tcf: Some(make_tcf_with_storage(true)), + gpp: Some(GppConsent { + version: 1, + section_ids: vec![7], + eu_tcf: None, + us_sale_opt_out: Some(true), + }), + ..ConsentContext::default() + }; + assert!( + allows_ec_creation(&ctx), + "TCF consent should take priority over GPP US opt-out" + ); + } + + #[test] + fn ec_us_state_gpp_us_takes_priority_over_us_privacy() { + let ctx = ConsentContext { + jurisdiction: Jurisdiction::UsState("TN".to_owned()), + gpp: Some(GppConsent { + version: 1, + section_ids: vec![7], + eu_tcf: None, + us_sale_opt_out: Some(false), + }), + us_privacy: Some(UsPrivacy { + version: 1, + notice_given: PrivacyFlag::Yes, + opt_out_sale: PrivacyFlag::Yes, + lspa_covered: PrivacyFlag::NotApplicable, + }), + ..ConsentContext::default() + }; + assert!( + allows_ec_creation(&ctx), + "GPP US should take priority over us_privacy opt-out" + ); + } + + #[test] + fn ec_us_state_gpp_no_us_section_falls_through_to_us_privacy() { + let ctx = ConsentContext { + jurisdiction: Jurisdiction::UsState("CA".to_owned()), + gpp: Some(GppConsent { + version: 1, + section_ids: vec![2], + eu_tcf: None, + us_sale_opt_out: None, + }), + us_privacy: Some(UsPrivacy { + version: 1, + notice_given: PrivacyFlag::Yes, + opt_out_sale: PrivacyFlag::No, + lspa_covered: PrivacyFlag::NotApplicable, + }), + ..ConsentContext::default() + }; + assert!( + allows_ec_creation(&ctx), + "GPP without US section should fall through to us_privacy" + ); + } } diff --git a/crates/trusted-server-core/src/consent/types.rs b/crates/trusted-server-core/src/consent/types.rs index a68eda9a..44f1a3df 100644 --- a/crates/trusted-server-core/src/consent/types.rs +++ b/crates/trusted-server-core/src/consent/types.rs @@ -302,6 +302,13 @@ pub struct GppConsent { pub section_ids: Vec, /// Decoded EU TCF v2.2 section (if present in GPP, section ID 2). pub eu_tcf: Option, + /// Whether the user opted out of sale of personal information via a US GPP + /// section (IDs 7–23). + /// + /// - `Some(true)` — a US section is present and `sale_opt_out == OptedOut` + /// - `Some(false)` — a US section is present and user did not opt out + /// - `None` — no US section exists in the GPP string + pub us_sale_opt_out: Option, } // --------------------------------------------------------------------------- diff --git a/crates/trusted-server-core/src/consent_config.rs b/crates/trusted-server-core/src/consent_config.rs index e2074c48..e5fed1a9 100644 --- a/crates/trusted-server-core/src/consent_config.rs +++ b/crates/trusted-server-core/src/consent_config.rs @@ -72,14 +72,6 @@ pub struct ConsentConfig { /// but disagree on consent status. #[serde(default)] pub conflict_resolution: ConflictResolutionConfig, - - /// Name of the KV Store used for consent persistence. - /// - /// When set, consent data is persisted per Edge Cookie (EC) ID so that - /// returning users without consent cookies can still have their - /// consent preferences applied. Set to `None` to disable. - #[serde(default, skip_serializing_if = "Option::is_none")] - pub consent_store: Option, } impl Default for ConsentConfig { @@ -92,7 +84,6 @@ impl Default for ConsentConfig { us_states: UsStatesConfig::default(), us_privacy_defaults: UsPrivacyDefaultsConfig::default(), conflict_resolution: ConflictResolutionConfig::default(), - consent_store: None, } } } diff --git a/crates/trusted-server-core/src/constants.rs b/crates/trusted-server-core/src/constants.rs index 0ee9fc76..0dec1dae 100644 --- a/crates/trusted-server-core/src/constants.rs +++ b/crates/trusted-server-core/src/constants.rs @@ -1,10 +1,14 @@ use http::header::HeaderName; pub const COOKIE_TS_EC: &str = "ts-ec"; +pub const COOKIE_TS_EIDS: &str = "ts-eids"; +pub const COOKIE_SHAREDID: &str = "sharedId"; pub const HEADER_X_PUB_USER_ID: HeaderName = HeaderName::from_static("x-pub-user-id"); pub const HEADER_X_TS_EC: HeaderName = HeaderName::from_static("x-ts-ec"); -pub const HEADER_X_TS_EC_FRESH: HeaderName = HeaderName::from_static("x-ts-ec-fresh"); +pub const HEADER_X_TS_EIDS: HeaderName = HeaderName::from_static("x-ts-eids"); +pub const HEADER_X_TS_EC_CONSENT: HeaderName = HeaderName::from_static("x-ts-ec-consent"); +pub const HEADER_X_TS_EIDS_TRUNCATED: HeaderName = HeaderName::from_static("x-ts-eids-truncated"); pub const HEADER_X_CONSENT_ADVERTISING: HeaderName = HeaderName::from_static("x-consent-advertising"); pub const HEADER_X_FORWARDED_FOR: HeaderName = HeaderName::from_static("x-forwarded-for"); @@ -45,7 +49,9 @@ pub const HEADER_REFERER: HeaderName = HeaderName::from_static("referer"); /// in `const` context. pub const INTERNAL_HEADERS: &[&str] = &[ "x-ts-ec", - "x-ts-ec-fresh", + "x-ts-eids", + "x-ts-ec-consent", + "x-ts-eids-truncated", "x-pub-user-id", "x-subject-id", "x-consent-advertising", diff --git a/crates/trusted-server-core/src/cookies.rs b/crates/trusted-server-core/src/cookies.rs index 67f4a4c7..e56c6834 100644 --- a/crates/trusted-server-core/src/cookies.rs +++ b/crates/trusted-server-core/src/cookies.rs @@ -1,22 +1,16 @@ //! Cookie handling utilities. //! -//! This module provides functionality for parsing and creating cookies +//! This module provides functionality for parsing, stripping, and forwarding cookies //! used in the trusted server system. -use std::borrow::Cow; - use cookie::{Cookie, CookieJar}; use edgezero_core::body::Body as EdgeBody; use error_stack::{Report, ResultExt}; use http::header; use http::Request; -use http::Response; -use crate::constants::{ - COOKIE_EUCONSENT_V2, COOKIE_GPP, COOKIE_GPP_SID, COOKIE_TS_EC, COOKIE_US_PRIVACY, -}; +use crate::constants::{COOKIE_EUCONSENT_V2, COOKIE_GPP, COOKIE_GPP_SID, COOKIE_US_PRIVACY}; use crate::error::TrustedServerError; -use crate::settings::Settings; /// Cookie names carrying privacy consent signals. /// @@ -30,50 +24,6 @@ pub const CONSENT_COOKIE_NAMES: &[&str] = &[ COOKIE_US_PRIVACY, ]; -const COOKIE_MAX_AGE: i32 = 365 * 24 * 60 * 60; // 1 year - -fn is_allowed_ec_id_char(c: char) -> bool { - c.is_ascii_alphanumeric() || matches!(c, '.' | '-' | '_') -} - -// Outbound allowlist for cookie sanitization: permits [a-zA-Z0-9._-] as a -// defense-in-depth backstop when setting the Set-Cookie header. This is -// intentionally broader than the inbound format validator -// (`synthetic::is_valid_synthetic_id`), which enforces the exact -// `<64-hex>.<6-alphanumeric>` structure and is used to reject untrusted -// request values before they enter the system. -#[must_use] -pub(crate) fn ec_id_has_only_allowed_chars(ec_id: &str) -> bool { - ec_id.chars().all(is_allowed_ec_id_char) -} - -fn sanitize_ec_id_for_cookie(ec_id: &str) -> Cow<'_, str> { - if ec_id_has_only_allowed_chars(ec_id) { - return Cow::Borrowed(ec_id); - } - - let safe_id = ec_id - .chars() - .filter(|c| is_allowed_ec_id_char(*c)) - .collect::(); - - log::warn!( - "Stripped disallowed characters from EC ID before setting cookie (len {} -> {}); \ - callers should reject invalid request IDs before cookie creation", - ec_id.len(), - safe_id.len(), - ); - - Cow::Owned(safe_id) -} - -pub(crate) fn ec_cookie_attributes(settings: &Settings, max_age: i32) -> String { - format!( - "Domain={}; Path=/; Secure; HttpOnly; SameSite=Lax; Max-Age={max_age}", - settings.publisher.cookie_domain, - ) -} - /// Parses a cookie string into a [`CookieJar`]. /// /// Returns an empty jar if the cookie string is unparseable. @@ -172,7 +122,7 @@ pub fn forward_cookie_header( } } Err(_) => { - // Non-UTF-8 Cookie header — forward as-is + // Non-UTF-8 Cookie header — forward as-is. to.headers_mut() .append(header::COOKIE, cookie_value.clone()); } @@ -180,152 +130,14 @@ pub fn forward_cookie_header( } } -/// Returns `true` if every byte in `value` is a valid RFC 6265 `cookie-octet`. -/// An empty string is always rejected. -/// -/// RFC 6265 restricts cookie values to printable US-ASCII excluding whitespace, -/// double-quote, comma, semicolon, and backslash. Rejecting these characters -/// prevents header-injection attacks where a crafted value could append -/// spurious cookie attributes (e.g. `evil; Domain=.attacker.com`). -/// -/// Non-ASCII characters (multi-byte UTF-8) are always rejected because their -/// byte values exceed `0x7E`. -#[must_use] -pub(crate) fn ec_cookie_value_is_safe(value: &str) -> bool { - // RFC 6265 §4.1.1 cookie-octet: - // 0x21 — '!' - // 0x23–0x2B — '#' through '+' (excludes 0x22 DQUOTE) - // 0x2D–0x3A — '-' through ':' (excludes 0x2C comma) - // 0x3C–0x5B — '<' through '[' (excludes 0x3B semicolon) - // 0x5D–0x7E — ']' through '~' (excludes 0x5C backslash, 0x7F DEL) - // All control characters (0x00–0x20) and non-ASCII (0x80+) are also excluded. - !value.is_empty() - && value - .bytes() - .all(|b| matches!(b, 0x21 | 0x23..=0x2B | 0x2D..=0x3A | 0x3C..=0x5B | 0x5D..=0x7E)) -} - -/// Generates a `Set-Cookie` header value with the following security attributes: -/// - `Secure`: transmitted over HTTPS only. -/// - `HttpOnly`: inaccessible to JavaScript (`document.cookie`), blocking XSS exfiltration. -/// Safe to set because integrations receive the EC ID via the `x-ts-ec` -/// response header instead of reading it from the cookie directly. -/// - `SameSite=Lax`: sent on same-site requests and top-level cross-site navigations. -/// `Strict` is intentionally avoided — it would suppress the cookie on the first -/// request when a user arrives from an external page, breaking first-visit attribution. -/// - `Max-Age`: 1 year retention. -/// -/// The `ec_id` is sanitized via an allowlist before embedding in the cookie value. -/// Only ASCII alphanumeric characters and `.`, `-`, `_` are permitted — matching the -/// known EC ID format (`{64-char-hex}.{6-char-alphanumeric}`). Request-sourced IDs -/// with disallowed characters are rejected earlier in [`crate::edge_cookie::get_ec_id`]; -/// this sanitization remains as a defense-in-depth backstop for unexpected callers. -/// -/// The `cookie_domain` is validated at config load time via [`validator::Validate`] on -/// [`crate::settings::Publisher`]; bad config fails at startup, not per-request. -/// -/// # Examples -/// -/// ```no_run -/// # use trusted_server_core::cookies::create_ec_cookie; -/// # use trusted_server_core::settings::Settings; -/// // `settings` is loaded at startup via `Settings::from_toml_and_env`. -/// # fn example(settings: &Settings) { -/// let cookie = create_ec_cookie(settings, "abc123.xk92ab"); -/// assert!(cookie.contains("HttpOnly")); -/// assert!(cookie.contains("Secure")); -/// # } -/// ``` -#[must_use] -pub fn create_ec_cookie(settings: &Settings, ec_id: &str) -> String { - let safe_id = sanitize_ec_id_for_cookie(ec_id); - - format!( - "{}={}; {}", - COOKIE_TS_EC, - safe_id, - ec_cookie_attributes(settings, COOKIE_MAX_AGE), - ) -} - -#[must_use] -pub(crate) fn try_build_ec_cookie_value(settings: &Settings, ec_id: &str) -> Option { - if !ec_cookie_value_is_safe(ec_id) { - log::warn!( - "Rejecting EC ID for Set-Cookie: value of {} bytes contains characters illegal in a cookie value", - ec_id.len() - ); - return None; - } - - Some(create_ec_cookie(settings, ec_id)) -} - -/// Sets the EC ID cookie on the given response. -/// -/// Validates `ec_id` against RFC 6265 `cookie-octet` rules before -/// interpolation. If the value contains unsafe characters (e.g. semicolons), -/// the cookie is not set and a warning is logged. This prevents an attacker -/// from injecting spurious cookie attributes via a controlled ID value. -/// -/// `cookie_domain` comes from operator configuration and is considered trusted. -/// -/// # Panics -/// -/// Does not panic in practice — the cookie value is validated by -/// [`ec_cookie_value_is_safe`] (early return if invalid) before -/// [`http::HeaderValue::from_str`] is called, so the expect is unreachable. -/// Listed here only because clippy cannot prove it statically. -pub fn set_ec_cookie(settings: &Settings, response: &mut Response, ec_id: &str) { - let Some(cookie) = try_build_ec_cookie_value(settings, ec_id) else { - return; - }; - - response.headers_mut().append( - header::SET_COOKIE, - http::HeaderValue::from_str(&cookie).expect("should build Set-Cookie header value"), - ); -} - -/// Expires the EC cookie by setting `Max-Age=0`. -/// -/// Used when a user revokes consent — the browser will delete the cookie -/// on receipt of this header. -/// -/// # Panics -/// -/// Does not panic in practice — the formatted value contains only ASCII -/// printable characters (constant name, validated domain, static attributes), -/// so [`http::HeaderValue::from_str`] always succeeds. Listed here only -/// because clippy cannot prove it statically. -pub fn expire_ec_cookie(settings: &Settings, response: &mut Response) { - response.headers_mut().append( - header::SET_COOKIE, - http::HeaderValue::from_str(&format!( - "{}=; {}", - COOKIE_TS_EC, - ec_cookie_attributes(settings, 0), - )) - .expect("should build expiry Set-Cookie header value"), - ); -} - #[cfg(test)] mod tests { use http::HeaderValue; use crate::error::TrustedServerError; - use crate::test_support::tests::create_test_settings; use super::*; - fn build_response() -> Response { - Response::builder() - .status(200) - .body(EdgeBody::empty()) - .expect("should build test response") - } - fn build_request(cookie_header: Option<&str>) -> Request { let mut builder = Request::builder().method("GET").uri("http://example.com"); if let Some(cookie_header) = cookie_header { @@ -433,169 +245,6 @@ mod tests { ); } - #[test] - fn test_set_ec_cookie() { - let settings = create_test_settings(); - let mut response = build_response(); - set_ec_cookie(&settings, &mut response, "abc123.XyZ789"); - - let cookie_str = response - .headers() - .get(header::SET_COOKIE) - .expect("Set-Cookie header should be present") - .to_str() - .expect("header should be valid UTF-8"); - - assert_eq!( - cookie_str, - format!( - "{}=abc123.XyZ789; Domain={}; Path=/; Secure; HttpOnly; SameSite=Lax; Max-Age={}", - COOKIE_TS_EC, settings.publisher.cookie_domain, COOKIE_MAX_AGE, - ), - "Set-Cookie header should match expected format" - ); - } - - #[test] - fn test_create_ec_cookie_sanitizes_disallowed_chars_in_id() { - let settings = create_test_settings(); - // Allowlist permits only ASCII alphanumeric, '.', '-', '_'. - // ';', '=', '\r', '\n', spaces, NUL bytes, and other control chars are all stripped. - let result = create_ec_cookie(&settings, "evil;injected\r\nfoo=bar\0baz"); - // Extract the value portion anchored to the cookie name constant to - // avoid false positives from disallowed chars in cookie attributes. - let value = result - .strip_prefix(&format!("{}=", COOKIE_TS_EC)) - .and_then(|s| s.split_once(';').map(|(v, _)| v)) - .expect("should have cookie value portion"); - assert_eq!( - value, "evilinjectedfoobarbaz", - "should strip disallowed characters and preserve safe chars" - ); - } - - #[test] - fn test_create_ec_cookie_preserves_well_formed_id() { - let settings = create_test_settings(); - // A well-formed ID should pass through the allowlist unmodified. - let id = "abc123def0123456789abcdef0123456789abcdef0123456789abcdef01234567.xk92ab"; - let result = create_ec_cookie(&settings, id); - let value = result - .strip_prefix(&format!("{}=", COOKIE_TS_EC)) - .and_then(|s| s.split_once(';').map(|(v, _)| v)) - .expect("should have cookie value portion"); - assert_eq!(value, id, "should not modify a well-formed EC ID"); - } - - #[test] - fn test_set_ec_cookie_rejects_semicolon() { - let settings = create_test_settings(); - let mut response = build_response(); - set_ec_cookie(&settings, &mut response, "evil; Domain=.attacker.com"); - - assert!( - response.headers().get(header::SET_COOKIE).is_none(), - "Set-Cookie should not be set when value contains a semicolon" - ); - } - - #[test] - fn test_set_ec_cookie_rejects_crlf() { - let settings = create_test_settings(); - let mut response = build_response(); - set_ec_cookie(&settings, &mut response, "evil\r\nX-Injected: header"); - - assert!( - response.headers().get(header::SET_COOKIE).is_none(), - "Set-Cookie should not be set when value contains CRLF" - ); - } - - #[test] - fn test_set_ec_cookie_rejects_space() { - let settings = create_test_settings(); - let mut response = build_response(); - set_ec_cookie(&settings, &mut response, "bad value"); - - assert!( - response.headers().get(header::SET_COOKIE).is_none(), - "Set-Cookie should not be set when value contains whitespace" - ); - } - - #[test] - fn test_is_safe_cookie_value_rejects_empty_string() { - assert!(!ec_cookie_value_is_safe(""), "should reject empty string"); - } - - #[test] - fn test_is_safe_cookie_value_accepts_valid_ec_id_characters() { - // Hex digits, dot separator, alphanumeric suffix — the full EC ID character set - assert!( - ec_cookie_value_is_safe("abcdef0123456789.ABCDEFabcdef"), - "should accept hex digits, dots, and alphanumeric characters" - ); - } - - #[test] - fn test_is_safe_cookie_value_rejects_non_ascii() { - assert!( - !ec_cookie_value_is_safe("valüe"), - "should reject non-ASCII UTF-8 characters" - ); - } - - #[test] - fn test_is_safe_cookie_value_rejects_illegal_characters() { - assert!( - !ec_cookie_value_is_safe("val;ue"), - "should reject semicolon" - ); - assert!(!ec_cookie_value_is_safe("val,ue"), "should reject comma"); - assert!( - !ec_cookie_value_is_safe("val\"ue"), - "should reject double-quote" - ); - assert!( - !ec_cookie_value_is_safe("val\\ue"), - "should reject backslash" - ); - assert!(!ec_cookie_value_is_safe("val ue"), "should reject space"); - assert!( - !ec_cookie_value_is_safe("val\x00ue"), - "should reject null byte" - ); - assert!( - !ec_cookie_value_is_safe("val\x7fue"), - "should reject DEL character" - ); - } - - #[test] - fn test_expire_ec_cookie_matches_security_attributes() { - let settings = create_test_settings(); - let mut response = build_response(); - - expire_ec_cookie(&settings, &mut response); - - let cookie_header = response - .headers() - .get(header::SET_COOKIE) - .expect("Set-Cookie header should be present"); - let cookie_str = cookie_header - .to_str() - .expect("header should be valid UTF-8"); - - assert_eq!( - cookie_str, - format!( - "{}=; Domain={}; Path=/; Secure; HttpOnly; SameSite=Lax; Max-Age=0", - COOKIE_TS_EC, settings.publisher.cookie_domain, - ), - "expiry cookie should retain the same security attributes as the live cookie" - ); - } - // --------------------------------------------------------------- // forward_cookie_header tests // --------------------------------------------------------------- @@ -754,7 +403,7 @@ mod tests { #[test] fn test_strip_cookies_with_complex_values() { - // Cookie values can contain '=' characters + // Cookie values can contain '=' characters. let header = "euconsent-v2=BOE=xyz; session=abc=123=def"; let stripped = strip_cookies(header, CONSENT_COOKIE_NAMES); assert_eq!(stripped, "session=abc=123=def"); diff --git a/crates/trusted-server-core/src/ec/auth.rs b/crates/trusted-server-core/src/ec/auth.rs new file mode 100644 index 00000000..f41c8a2b --- /dev/null +++ b/crates/trusted-server-core/src/ec/auth.rs @@ -0,0 +1,110 @@ +//! Shared Bearer-token authentication helpers for EC partner endpoints. +//! +//! Used by both `/_ts/api/v1/identify` and `/_ts/api/v1/batch-sync` so +//! authentication hardening stays consistent across endpoints. + +use fastly::Request; + +use super::partner::hash_api_key; +use super::registry::{PartnerConfig, PartnerRegistry}; + +/// Authenticates a request via Bearer token, returning the matching partner. +pub(super) fn authenticate_bearer<'r>( + registry: &'r PartnerRegistry, + req: &Request, +) -> Option<&'r PartnerConfig> { + let header_value = req.get_header_str("authorization")?; + let token = parse_bearer_token(header_value)?; + let key_hash = hash_api_key(token); + registry.find_by_api_key_hash(&key_hash) +} + +fn parse_bearer_token(header_value: &str) -> Option<&str> { + let mut parts = header_value.split_whitespace(); + let scheme = parts.next()?; + let token = parts.next()?; + + if !scheme.eq_ignore_ascii_case("bearer") || token.is_empty() { + return None; + } + if parts.next().is_some() { + return None; + } + + Some(token) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::redacted::Redacted; + use crate::settings::EcPartner; + + const VALID_API_TOKEN: &str = "auth-test-token-32-bytes-minimum"; + + fn make_test_partner(id: &str, api_token: &str) -> EcPartner { + EcPartner { + id: id.to_owned(), + name: format!("Partner {id}"), + source_domain: format!("{id}.example.com"), + openrtb_atype: EcPartner::default_openrtb_atype(), + bidstream_enabled: true, + api_token: Redacted::new(api_token.to_owned()), + batch_rate_limit: EcPartner::default_batch_rate_limit(), + pull_sync_enabled: false, + pull_sync_url: None, + pull_sync_allowed_domains: vec![], + pull_sync_ttl_sec: EcPartner::default_pull_sync_ttl_sec(), + pull_sync_rate_limit: EcPartner::default_pull_sync_rate_limit(), + ts_pull_token: None, + } + } + + #[test] + fn parse_bearer_token_accepts_case_insensitive_scheme() { + assert_eq!(parse_bearer_token("Bearer tok"), Some("tok")); + assert_eq!(parse_bearer_token("bearer tok"), Some("tok")); + assert_eq!(parse_bearer_token("BEARER tok"), Some("tok")); + } + + #[test] + fn parse_bearer_token_rejects_invalid_shapes() { + assert_eq!(parse_bearer_token("Bearer"), None); + assert_eq!(parse_bearer_token("Bearer "), None); + assert_eq!(parse_bearer_token("Basic abc"), None); + assert_eq!(parse_bearer_token("Bearer a b"), None); + } + + #[test] + fn authenticate_bearer_returns_none_for_missing_header() { + let registry = PartnerRegistry::empty(); + let req = Request::new("GET", "https://edge.example.com/_ts/api/v1/identify"); + + let result = authenticate_bearer(®istry, &req); + assert!(result.is_none(), "should return None without auth header"); + } + + #[test] + fn authenticate_bearer_returns_none_for_malformed_header() { + let registry = PartnerRegistry::empty(); + let mut req = Request::new("GET", "https://edge.example.com/_ts/api/v1/identify"); + req.set_header("authorization", "Basic dXNlcjpwYXNz"); + + let result = authenticate_bearer(®istry, &req); + assert!( + result.is_none(), + "should return None for non-Bearer auth scheme" + ); + } + + #[test] + fn authenticate_bearer_returns_matching_partner_for_valid_token() { + let partners = vec![make_test_partner("ssp_x", VALID_API_TOKEN)]; + let registry = PartnerRegistry::from_config(&partners).expect("should build registry"); + let mut req = Request::new("GET", "https://edge.example.com/_ts/api/v1/identify"); + req.set_header("authorization", format!("Bearer {VALID_API_TOKEN}")); + + let result = authenticate_bearer(®istry, &req).expect("should authenticate partner"); + assert_eq!(result.id, "ssp_x", "should return the matching partner"); + } +} diff --git a/crates/trusted-server-core/src/ec/batch_sync.rs b/crates/trusted-server-core/src/ec/batch_sync.rs new file mode 100644 index 00000000..24c7d65d --- /dev/null +++ b/crates/trusted-server-core/src/ec/batch_sync.rs @@ -0,0 +1,596 @@ +//! Server-to-server batch sync endpoint (`POST /_ts/api/v1/batch-sync`). +//! +//! Partners send authenticated batch ID sync requests via Bearer token. +//! Each mapping associates an `ec_id` (`{64hex}.{6alnum}`) +//! with the partner's user ID. Mappings are individually validated and +//! written to the KV identity graph, with per-mapping rejection reasons +//! reported in the response. +//! +//! Mapping timestamps are retained in the request schema for client +//! compatibility, but the EC identity graph no longer stores per-partner sync +//! timestamps. Valid mappings therefore use idempotent last-write-wins +//! semantics: unchanged UIDs are accepted without a write; different UIDs +//! replace the stored value regardless of timestamp. + +use error_stack::{Report, ResultExt}; +use fastly::http::StatusCode; +use fastly::{Request, Response}; +use serde::{Deserialize, Serialize}; + +use crate::error::TrustedServerError; + +use super::auth::authenticate_bearer; +use super::generation::{is_valid_ec_id, normalize_ec_id_for_kv}; +use super::kv::{KvIdentityGraph, UpsertResult}; +use super::log_id; +use super::rate_limiter::RateLimiter; +use super::registry::PartnerRegistry; + +const REASON_INVALID_EC_ID: &str = "invalid_ec_id"; +const REASON_INVALID_PARTNER_UID: &str = "invalid_partner_uid"; +const REASON_INELIGIBLE: &str = "ineligible"; +const REASON_KV_UNAVAILABLE: &str = "kv_unavailable"; + +/// Maximum number of mappings allowed in a single batch request. +const MAX_BATCH_SIZE: usize = 1000; + +use super::kv_types::MAX_UID_LENGTH; + +trait BatchSyncWriter { + fn upsert_partner_id_if_exists( + &self, + ec_id: &str, + partner_id: &str, + uid: &str, + ) -> Result>; +} + +impl BatchSyncWriter for KvIdentityGraph { + fn upsert_partner_id_if_exists( + &self, + ec_id: &str, + partner_id: &str, + uid: &str, + ) -> Result> { + KvIdentityGraph::upsert_partner_id_if_exists(self, ec_id, partner_id, uid) + } +} + +// --------------------------------------------------------------------------- +// Request / response types +// --------------------------------------------------------------------------- + +#[derive(Debug, Deserialize)] +struct BatchSyncRequest { + mappings: Vec, +} + +#[derive(Debug, Deserialize)] +struct SyncMapping { + ec_id: String, + partner_uid: String, + // Retained for API compatibility. The EC KV body no longer stores + // per-partner timestamps, so this does not order writes. + #[allow(dead_code)] + timestamp: u64, +} + +#[derive(Debug, Serialize)] +struct BatchSyncResponse { + accepted: usize, + rejected: usize, + errors: Vec, +} + +#[derive(Debug, Serialize)] +struct MappingError { + index: usize, + reason: &'static str, +} + +// --------------------------------------------------------------------------- +// Handler +// --------------------------------------------------------------------------- + +/// Handles `POST /_ts/api/v1/batch-sync`. +/// +/// # Errors +/// +/// Returns [`TrustedServerError`] on serialization or KV store failures. +pub fn handle_batch_sync( + kv: &KvIdentityGraph, + registry: &PartnerRegistry, + rate_limiter: &dyn RateLimiter, + mut req: Request, +) -> Result> { + handle_batch_sync_with_writer(kv, registry, rate_limiter, &mut req) +} + +fn handle_batch_sync_with_writer( + writer: &dyn BatchSyncWriter, + registry: &PartnerRegistry, + rate_limiter: &dyn RateLimiter, + req: &mut Request, +) -> Result> { + // 1. Authenticate + let Some(partner) = authenticate_bearer(registry, req) else { + return Ok(error_response(StatusCode::UNAUTHORIZED, "invalid_token")); + }; + + // 2. Rate limit (per-partner, per-minute via batch_rate_limit) + let rate_key = format!("batch:{}", partner.id); + if rate_limiter.exceeded_per_minute(&rate_key, partner.batch_rate_limit)? { + return Ok(error_response( + StatusCode::TOO_MANY_REQUESTS, + "rate_limit_exceeded", + )); + } + + // 3. Parse body (with size limit to prevent OOM before validation) + const MAX_BODY_SIZE: usize = 2 * 1024 * 1024; // 2 MB + if content_length_exceeds_limit(req, MAX_BODY_SIZE) { + return Ok(error_response( + StatusCode::PAYLOAD_TOO_LARGE, + "body_too_large", + )); + } + + let body_bytes = req.take_body_bytes(); + if body_bytes.len() > MAX_BODY_SIZE { + return Ok(error_response( + StatusCode::PAYLOAD_TOO_LARGE, + "body_too_large", + )); + } + let body: BatchSyncRequest = serde_json::from_slice(&body_bytes).map_err(|e| { + Report::new(TrustedServerError::BadRequest { + message: format!("Invalid request body: {e}"), + }) + })?; + + if body.mappings.len() > MAX_BATCH_SIZE { + return Ok(error_response(StatusCode::BAD_REQUEST, "batch_too_large")); + } + + // 4. Process mappings with per-item validation and rejection reasons. + let (accepted, errors) = process_mappings(writer, &partner.id, &body.mappings); + + let rejected = errors.len(); + let status = if rejected > 0 { + StatusCode::MULTI_STATUS + } else { + StatusCode::OK + }; + + let response_body = BatchSyncResponse { + accepted, + rejected, + errors, + }; + + json_response(status, &response_body) +} + +fn content_length_exceeds_limit(req: &Request, max_body_size: usize) -> bool { + req.get_header_str("content-length") + .and_then(|value| value.parse::().ok()) + .is_some_and(|content_length| content_length > max_body_size) +} + +fn process_mappings( + writer: &dyn BatchSyncWriter, + partner_id: &str, + mappings: &[SyncMapping], +) -> (usize, Vec) { + let mut accepted: usize = 0; + let mut errors = Vec::new(); + + for (idx, mapping) in mappings.iter().enumerate() { + let ec_id = normalize_ec_id_for_kv(&mapping.ec_id); + if !is_valid_ec_id(&ec_id) { + errors.push(MappingError { + index: idx, + reason: REASON_INVALID_EC_ID, + }); + continue; + } + + if mapping.partner_uid.trim().is_empty() || mapping.partner_uid.len() > MAX_UID_LENGTH { + errors.push(MappingError { + index: idx, + reason: REASON_INVALID_PARTNER_UID, + }); + continue; + } + match writer.upsert_partner_id_if_exists(&ec_id, partner_id, &mapping.partner_uid) { + Ok(UpsertResult::Written | UpsertResult::Unchanged) => { + accepted += 1; + } + Ok(UpsertResult::NotFound | UpsertResult::ConsentWithdrawn) => { + errors.push(MappingError { + index: idx, + reason: REASON_INELIGIBLE, + }); + } + Err(err) => { + log::warn!( + "Batch sync KV write failed for index {idx} (ec_id '{}'): {err:?}", + log_id(&mapping.ec_id), + ); + errors.push(MappingError { + index: idx, + reason: REASON_KV_UNAVAILABLE, + }); + // Abort remaining mappings on infrastructure failure. + for remaining_idx in (idx + 1)..mappings.len() { + errors.push(MappingError { + index: remaining_idx, + reason: REASON_KV_UNAVAILABLE, + }); + } + break; + } + } + } + + (accepted, errors) +} + +fn json_response( + status: StatusCode, + body: &T, +) -> Result> { + let body = serde_json::to_string(body).change_context(TrustedServerError::EdgeCookie { + message: "Failed to serialize batch sync response".to_owned(), + })?; + + Ok(Response::from_status(status) + .with_content_type(fastly::mime::APPLICATION_JSON) + .with_body(body)) +} + +fn error_response(status: StatusCode, reason: &str) -> Response { + let body = serde_json::json!({ "error": reason }); + Response::from_status(status) + .with_content_type(fastly::mime::APPLICATION_JSON) + .with_body(body.to_string()) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::collections::VecDeque; + + use crate::error::TrustedServerError; + use crate::redacted::Redacted; + use crate::settings::EcPartner; + + // EC ID validation tests are in generation.rs (is_valid_ec_id). + // Verify the import works here with a basic smoke test. + #[test] + fn is_valid_ec_id_smoke_test() { + let valid = format!("{}.ABC123", "a".repeat(64)); + assert!(is_valid_ec_id(&valid)); + assert!(!is_valid_ec_id(&"a".repeat(64))); + } + + struct MockRateLimiter { + should_exceed: bool, + } + + impl RateLimiter for MockRateLimiter { + fn exceeded( + &self, + _key: &str, + _hourly_limit: u32, + ) -> Result> { + Ok(self.should_exceed) + } + + fn exceeded_per_minute( + &self, + _key: &str, + _per_minute_limit: u32, + ) -> Result> { + Ok(self.should_exceed) + } + } + + struct MockWriter { + results: std::cell::RefCell>>>, + } + + impl MockWriter { + fn new(results: Vec>>) -> Self { + Self { + results: std::cell::RefCell::new(results.into()), + } + } + } + + impl BatchSyncWriter for MockWriter { + fn upsert_partner_id_if_exists( + &self, + _ec_id: &str, + _partner_id: &str, + _uid: &str, + ) -> Result> { + self.results + .borrow_mut() + .pop_front() + .expect("should provide mock result for each mapping") + } + } + + fn mapping(ec_id: &str, partner_uid: &str, timestamp: u64) -> SyncMapping { + SyncMapping { + ec_id: ec_id.to_owned(), + partner_uid: partner_uid.to_owned(), + timestamp, + } + } + + fn make_test_partner(id: &str, api_token: &str) -> EcPartner { + EcPartner { + id: id.to_owned(), + name: format!("Partner {id}"), + source_domain: format!("{id}.example.com"), + openrtb_atype: EcPartner::default_openrtb_atype(), + bidstream_enabled: true, + api_token: Redacted::new(api_token.to_owned()), + batch_rate_limit: EcPartner::default_batch_rate_limit(), + pull_sync_enabled: false, + pull_sync_url: None, + pull_sync_allowed_domains: vec![], + pull_sync_ttl_sec: EcPartner::default_pull_sync_ttl_sec(), + pull_sync_rate_limit: EcPartner::default_pull_sync_rate_limit(), + ts_pull_token: None, + } + } + + fn authorized_batch_request(body: &str) -> Request { + let mut req = Request::new("POST", "https://edge.example.com/_ts/api/v1/batch-sync"); + req.set_header("authorization", "Bearer test-token-32-bytes-minimum-value"); + req.set_body(body.to_owned()); + req + } + + fn test_registry() -> PartnerRegistry { + let partners = vec![make_test_partner( + "ssp_x", + "test-token-32-bytes-minimum-value", + )]; + PartnerRegistry::from_config(&partners).expect("should build registry") + } + + #[test] + fn content_length_exceeds_limit_detects_oversized_header() { + let mut req = authorized_batch_request("{}"); + req.set_header("content-length", "2097153"); + + assert!( + content_length_exceeds_limit(&req, 2 * 1024 * 1024), + "should reject oversized content-length before reading body" + ); + } + + #[test] + fn content_length_exceeds_limit_ignores_missing_or_malformed_header() { + let missing = authorized_batch_request("{}"); + let mut malformed = authorized_batch_request("{}"); + malformed.set_header("content-length", "not-a-number"); + + assert!( + !content_length_exceeds_limit(&missing, 2 * 1024 * 1024), + "missing content-length should fall back to post-read size check" + ); + assert!( + !content_length_exceeds_limit(&malformed, 2 * 1024 * 1024), + "malformed content-length should fall back to post-read size check" + ); + } + + #[test] + fn handle_batch_sync_rejects_oversized_content_length_before_body_parse() { + let writer = MockWriter::new(vec![]); + let registry = test_registry(); + let limiter = MockRateLimiter { + should_exceed: false, + }; + let mut req = authorized_batch_request("not-json"); + req.set_header("content-length", "2097153"); + + let response = handle_batch_sync_with_writer(&writer, ®istry, &limiter, &mut req) + .expect("should return oversized response"); + + assert_eq!( + response.get_status(), + StatusCode::PAYLOAD_TOO_LARGE, + "should reject from content-length before parsing body" + ); + } + + #[test] + fn handle_batch_sync_uses_post_read_limit_for_malformed_content_length() { + let writer = MockWriter::new(vec![]); + let registry = test_registry(); + let limiter = MockRateLimiter { + should_exceed: false, + }; + let oversized_body = "{".repeat((2 * 1024 * 1024) + 1); + let mut req = authorized_batch_request(&oversized_body); + req.set_header("content-length", "not-a-number"); + + let response = handle_batch_sync_with_writer(&writer, ®istry, &limiter, &mut req) + .expect("should return oversized response"); + + assert_eq!( + response.get_status(), + StatusCode::PAYLOAD_TOO_LARGE, + "should reject oversized body even when content-length is malformed" + ); + } + + #[test] + fn process_mappings_returns_multistatus_errors_per_mapping() { + let writer = MockWriter::new(vec![Ok(UpsertResult::Written)]); + let mappings = vec![ + mapping("x", "u1", 1), + mapping(&format!("{}.ABC123", "a".repeat(64)), "", 1), + mapping(&format!("{}.ABC123", "a".repeat(64)), "u3", 1), + ]; + + let (accepted, errors) = process_mappings(&writer, "partner", &mappings); + + assert_eq!(accepted, 1, "should count successful writes as accepted"); + assert_eq!(errors.len(), 2, "should reject invalid mappings only"); + assert_eq!(errors[0].index, 0); + assert_eq!(errors[0].reason, REASON_INVALID_EC_ID); + assert_eq!(errors[1].index, 1); + assert_eq!(errors[1].reason, REASON_INVALID_PARTNER_UID); + } + + #[test] + fn process_mappings_aborts_on_kv_unavailable() { + let writer = MockWriter::new(vec![ + Ok(UpsertResult::Written), + Err(Report::new(TrustedServerError::KvStore { + store_name: "ec_store".to_owned(), + message: "down".to_owned(), + })), + Ok(UpsertResult::Written), + ]); + + let mappings = vec![ + mapping(&format!("{}.ABC123", "a".repeat(64)), "u1", 1), + mapping(&format!("{}.ABC123", "b".repeat(64)), "u2", 1), + mapping(&format!("{}.ABC123", "c".repeat(64)), "u3", 1), + ]; + + let (accepted, errors) = process_mappings(&writer, "partner", &mappings); + + assert_eq!(accepted, 1, "should keep accepted count before failure"); + assert_eq!( + errors.len(), + 2, + "should mark current and remaining as unavailable" + ); + assert_eq!(errors[0].index, 1); + assert_eq!(errors[0].reason, REASON_KV_UNAVAILABLE); + assert_eq!(errors[1].index, 2); + assert_eq!(errors[1].reason, REASON_KV_UNAVAILABLE); + } + + #[test] + fn handle_batch_sync_rejects_missing_auth() { + let kv = KvIdentityGraph::new("test_store"); + let registry = PartnerRegistry::empty(); + let limiter = MockRateLimiter { + should_exceed: false, + }; + let req = Request::new("POST", "https://edge.example.com/_ts/api/v1/batch-sync"); + + let response = + handle_batch_sync(&kv, ®istry, &limiter, req).expect("should return response"); + assert_eq!( + response.get_status(), + StatusCode::UNAUTHORIZED, + "should return 401 for missing auth" + ); + } + + #[test] + fn batch_sync_request_deserializes_correctly() { + let json = r#"{"mappings": [{"ec_id": "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.ABC123", "partner_uid": "u1", "timestamp": 100}]}"#; + let parsed: BatchSyncRequest = + serde_json::from_str(json).expect("should deserialize batch sync request"); + assert_eq!(parsed.mappings.len(), 1); + assert_eq!( + parsed.mappings[0].ec_id, + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.ABC123" + ); + assert_eq!(parsed.mappings[0].partner_uid, "u1"); + assert_eq!(parsed.mappings[0].timestamp, 100); + } + + #[test] + fn batch_sync_request_rejects_missing_timestamp() { + let json = r#"{"mappings": [{"ec_id": "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb.ABC123", "partner_uid": "u2"}]}"#; + let result = serde_json::from_str::(json); + assert!( + result.is_err(), + "should reject mapping without required timestamp" + ); + } + + #[test] + fn batch_sync_response_serializes_correctly() { + let response = BatchSyncResponse { + accepted: 5, + rejected: 1, + errors: vec![MappingError { + index: 3, + reason: REASON_INELIGIBLE, + }], + }; + + let json: serde_json::Value = + serde_json::to_value(&response).expect("should serialize batch sync response"); + assert_eq!(json["accepted"], 5); + assert_eq!(json["rejected"], 1); + assert_eq!(json["errors"][0]["index"], 3); + assert_eq!(json["errors"][0]["reason"], REASON_INELIGIBLE); + } + + #[test] + fn process_mappings_collapses_missing_and_withdrawn_to_ineligible() { + let writer = MockWriter::new(vec![ + Ok(UpsertResult::NotFound), + Ok(UpsertResult::ConsentWithdrawn), + ]); + let ec_id = format!("{}.ABC123", "a".repeat(64)); + let mappings = vec![mapping(&ec_id, "uid-1", 100), mapping(&ec_id, "uid-2", 101)]; + + let (accepted, errors) = process_mappings(&writer, "partner", &mappings); + + assert_eq!(accepted, 0, "should not accept ineligible mappings"); + assert_eq!(errors.len(), 2, "should report both errors"); + assert_eq!(errors[0].index, 0); + assert_eq!(errors[0].reason, REASON_INELIGIBLE); + assert_eq!(errors[1].index, 1); + assert_eq!(errors[1].reason, REASON_INELIGIBLE); + } + + #[test] + fn process_mappings_counts_unchanged_as_accepted() { + let writer = MockWriter::new(vec![Ok(UpsertResult::Unchanged)]); + let ec_id = format!("{}.ABC123", "a".repeat(64)); + let mappings = vec![mapping(&ec_id, "uid-1", 100)]; + + let (accepted, errors) = process_mappings(&writer, "partner", &mappings); + + assert_eq!(accepted, 1, "should count unchanged mappings as accepted"); + assert!( + errors.is_empty(), + "should report no errors for unchanged mappings" + ); + } + + #[test] + fn process_mappings_does_not_order_by_timestamp() { + let writer = MockWriter::new(vec![Ok(UpsertResult::Written), Ok(UpsertResult::Written)]); + let ec_id = format!("{}.ABC123", "a".repeat(64)); + let mappings = vec![ + mapping(&ec_id, "uid-new", 200), + mapping(&ec_id, "uid-old", 100), + ]; + + let (accepted, errors) = process_mappings(&writer, "partner", &mappings); + + assert_eq!( + accepted, 2, + "timestamps are compatibility fields and should not reject older mappings" + ); + assert!(errors.is_empty(), "should accept valid mappings"); + } +} diff --git a/crates/trusted-server-core/src/ec/consent.rs b/crates/trusted-server-core/src/ec/consent.rs new file mode 100644 index 00000000..ad9f5dd2 --- /dev/null +++ b/crates/trusted-server-core/src/ec/consent.rs @@ -0,0 +1,77 @@ +//! EC-specific consent gating. +//! +//! This module provides the public consent-check API for the EC subsystem. +//! The underlying logic lives in [`crate::consent::allows_ec_creation`]; this +//! wrapper exists so that EC callers can import from `ec::consent` and the +//! eventual migration path (renaming, adding EC-specific conditions) is +//! contained here. + +use crate::consent::ConsentContext; + +/// Determines whether Edge Cookie creation is permitted based on the +/// user's consent and detected jurisdiction. +/// +/// This is the canonical entry point for EC consent checks. It delegates +/// to [`crate::consent::allows_ec_creation`] today but may diverge as +/// EC-specific consent rules evolve. +/// +/// See [`crate::consent::allows_ec_creation`] for the full decision matrix. +#[must_use] +pub fn ec_consent_granted(consent_context: &ConsentContext) -> bool { + crate::consent::allows_ec_creation(consent_context) +} + +/// Returns `true` when the request carries an explicit EC withdrawal signal. +/// +/// This is intentionally stricter than [`ec_consent_granted`]. A fail-closed +/// result such as unknown jurisdiction or missing consent data must not be +/// treated as an authoritative withdrawal of an already-issued EC. +#[must_use] +pub fn ec_consent_withdrawn(consent_context: &ConsentContext) -> bool { + crate::consent::has_explicit_ec_withdrawal(consent_context) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::consent::jurisdiction::Jurisdiction; + + #[test] + fn ec_consent_granted_allows_non_regulated_requests() { + let ctx = ConsentContext { + jurisdiction: Jurisdiction::NonRegulated, + ..ConsentContext::default() + }; + + assert!( + ec_consent_granted(&ctx), + "non-regulated requests should be allowed" + ); + } + + #[test] + fn ec_consent_granted_blocks_unknown_jurisdiction() { + let ctx = ConsentContext { + jurisdiction: Jurisdiction::Unknown, + ..ConsentContext::default() + }; + + assert!( + !ec_consent_granted(&ctx), + "unknown jurisdiction should fail closed" + ); + } + + #[test] + fn ec_consent_withdrawn_does_not_treat_unknown_jurisdiction_as_revocation() { + let ctx = ConsentContext { + jurisdiction: Jurisdiction::Unknown, + ..ConsentContext::default() + }; + + assert!( + !ec_consent_withdrawn(&ctx), + "unknown jurisdiction should block creation without revoking existing EC" + ); + } +} diff --git a/crates/trusted-server-core/src/ec/cookies.rs b/crates/trusted-server-core/src/ec/cookies.rs new file mode 100644 index 00000000..46304852 --- /dev/null +++ b/crates/trusted-server-core/src/ec/cookies.rs @@ -0,0 +1,349 @@ +//! EC cookie creation and expiration helpers. +//! +//! These functions handle the `Set-Cookie` header for the `ts-ec` cookie. +//! Cookie attributes follow current best practices: +//! +//! - `Domain` is computed as `.{publisher.domain}` for subdomain coverage +//! - `Path=/` makes the cookie available on all paths +//! - `Secure` restricts to HTTPS +//! - `SameSite=Lax` provides CSRF protection while allowing top-level navigations +//! - `Max-Age` of 1 year (or 0 to expire) +//! - `HttpOnly` prevents client-side JS from reading the cookie via +//! `document.cookie`, providing XSS defense-in-depth. The identify +//! endpoint (`/_ts/api/v1/identify`) exposes the EC ID in its response +//! body for legitimate JS use cases. + +use std::borrow::Cow; + +use fastly::http::header; + +use crate::constants::COOKIE_TS_EC; +use crate::settings::Settings; + +/// Maximum age for the EC cookie (1 year in seconds). +const COOKIE_MAX_AGE: i32 = 365 * 24 * 60 * 60; + +fn is_allowed_ec_id_char(c: char) -> bool { + c.is_ascii_alphanumeric() || matches!(c, '.' | '-' | '_') +} + +// Outbound allowlist for cookie sanitization: permits [a-zA-Z0-9._-] as a +// defense-in-depth backstop when setting the Set-Cookie header. This is +// intentionally broader than the inbound format validator +// (`generation::is_valid_ec_id`), which enforces the exact +// `<64-hex>.<6-alphanumeric>` structure and is used to reject untrusted +// request values before they enter the system. +#[must_use] +pub(crate) fn ec_id_has_only_allowed_chars(ec_id: &str) -> bool { + ec_id.chars().all(is_allowed_ec_id_char) +} + +fn sanitize_ec_id_for_cookie(ec_id: &str) -> Cow<'_, str> { + if ec_id_has_only_allowed_chars(ec_id) { + return Cow::Borrowed(ec_id); + } + + let safe_id = ec_id + .chars() + .filter(|c| is_allowed_ec_id_char(*c)) + .collect::(); + + log::warn!( + "Stripped disallowed characters from EC ID before setting cookie (len {} -> {}); \ + callers should reject invalid request IDs before cookie creation", + ec_id.len(), + safe_id.len(), + ); + + Cow::Owned(safe_id) +} + +/// Returns `true` if every byte in `value` is a valid RFC 6265 `cookie-octet`. +/// An empty string is always rejected. +/// +/// RFC 6265 restricts cookie values to printable US-ASCII excluding whitespace, +/// double-quote, comma, semicolon, and backslash. Rejecting these characters +/// prevents header-injection attacks where a crafted value could append +/// spurious cookie attributes (e.g. `evil; Domain=.attacker.com`). +/// +/// Non-ASCII characters (multi-byte UTF-8) are always rejected because their +/// byte values exceed `0x7E`. +#[must_use] +fn is_safe_cookie_value(value: &str) -> bool { + // RFC 6265 §4.1.1 cookie-octet: + // 0x21 — '!' + // 0x23–0x2B — '#' through '+' (excludes 0x22 DQUOTE) + // 0x2D–0x3A — '-' through ':' (excludes 0x2C comma) + // 0x3C–0x5B — '<' through '[' (excludes 0x3B semicolon) + // 0x5D–0x7E — ']' through '~' (excludes 0x5C backslash, 0x7F DEL) + // All control characters (0x00–0x20) and non-ASCII (0x80+) are also excluded. + !value.is_empty() + && value + .bytes() + .all(|b| matches!(b, 0x21 | 0x23..=0x2B | 0x2D..=0x3A | 0x3C..=0x5B | 0x5D..=0x7E)) +} + +/// Formats a `Set-Cookie` header value for the EC cookie. +/// +/// Centralises the cookie attribute string so that changes to security +/// attributes (e.g. adding `Partitioned`) only need updating in one place. +fn format_set_cookie(domain: &str, value: &str, max_age: i32) -> String { + format!( + "{}={}; Domain={}; Path=/; Secure; SameSite=Lax; Max-Age={}; HttpOnly", + COOKIE_TS_EC, value, domain, max_age, + ) +} + +/// Creates an EC cookie `Set-Cookie` header value. +/// +/// Per spec §5.2, the EC cookie domain is computed from +/// `settings.publisher.domain` (not `cookie_domain`) to ensure the EC +/// cookie is always scoped to the publisher's apex domain. The EC ID is +/// sanitized through a narrow outbound allowlist as a defense-in-depth +/// backstop against header injection. +#[must_use] +pub(crate) fn create_ec_cookie(settings: &Settings, ec_id: &str) -> String { + let safe_id = sanitize_ec_id_for_cookie(ec_id); + + format_set_cookie( + &settings.publisher.ec_cookie_domain(), + safe_id.as_ref(), + COOKIE_MAX_AGE, + ) +} + +/// Sets the EC ID cookie on the given response. +/// +/// Validates `ec_id` against RFC 6265 `cookie-octet` rules before +/// interpolation. If the value contains unsafe characters (e.g. semicolons), +/// the cookie is not set and a warning is logged. This prevents an attacker +/// from injecting spurious cookie attributes via a controlled ID value. +/// +/// `cookie_domain` comes from operator configuration and is considered trusted. +/// +/// # Panics (debug only) +/// +/// Debug-asserts that `ec_id` passes [`super::generation::is_valid_ec_id`] +/// as a defense-in-depth check against cookie injection. +pub fn set_ec_cookie(settings: &Settings, response: &mut fastly::Response, ec_id: &str) { + if !is_safe_cookie_value(ec_id) { + log::warn!( + "Rejecting EC ID for Set-Cookie: value of {} bytes contains characters illegal in a cookie value", + ec_id.len() + ); + return; + } + + debug_assert!( + super::generation::is_valid_ec_id(ec_id), + "EC ID must be validated before cookie creation: got '{ec_id}'" + ); + + response.append_header(header::SET_COOKIE, create_ec_cookie(settings, ec_id)); +} + +/// Expires the EC cookie by setting `Max-Age=0`. +/// +/// Used when a user revokes consent — the browser will delete the cookie +/// on receipt of this header. +pub fn expire_ec_cookie(settings: &Settings, response: &mut fastly::Response) { + response.append_header( + header::SET_COOKIE, + format_set_cookie(&settings.publisher.ec_cookie_domain(), "", 0), + ); +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::test_support::tests::create_test_settings; + use fastly::http::header; + + /// A valid EC ID for use in cookie tests. + const TEST_EC_ID: &str = + "aaaaaaaabbbbbbbbccccccccddddddddeeeeeeeeffffffff0000000011111111.abcXYZ"; + + #[test] + fn create_ec_cookie_uses_computed_domain() { + let settings = create_test_settings(); + let result = create_ec_cookie(&settings, TEST_EC_ID); + + assert_eq!( + result, + format!( + "{}={}; Domain=.{}; Path=/; Secure; SameSite=Lax; Max-Age={}; HttpOnly", + COOKIE_TS_EC, TEST_EC_ID, settings.publisher.domain, COOKIE_MAX_AGE, + ), + "should use computed cookie domain (.{{domain}})" + ); + } + + #[test] + fn set_ec_cookie_appends_header() { + let settings = create_test_settings(); + let mut response = fastly::Response::new(); + set_ec_cookie(&settings, &mut response, TEST_EC_ID); + + let cookie_header = response + .get_header(header::SET_COOKIE) + .expect("should have Set-Cookie header"); + let cookie_str = cookie_header.to_str().expect("should be valid UTF-8"); + + assert_eq!( + cookie_str, + create_ec_cookie(&settings, TEST_EC_ID), + "should match create_ec_cookie output" + ); + } + + #[test] + fn create_ec_cookie_sanitizes_disallowed_chars_in_id() { + let settings = create_test_settings(); + let result = create_ec_cookie(&settings, "evil;injected\r\nfoo=bar\0baz"); + let value = result + .strip_prefix(&format!("{}=", COOKIE_TS_EC)) + .and_then(|s| s.split_once(';').map(|(v, _)| v)) + .expect("should have cookie value portion"); + + assert_eq!( + value, "evilinjectedfoobarbaz", + "should strip disallowed characters and preserve safe chars" + ); + } + + #[test] + fn create_ec_cookie_preserves_well_formed_id() { + let settings = create_test_settings(); + let id = "abc123def0123456789abcdef0123456789abcdef0123456789abcdef01234567.xk92ab"; + let result = create_ec_cookie(&settings, id); + let value = result + .strip_prefix(&format!("{}=", COOKIE_TS_EC)) + .and_then(|s| s.split_once(';').map(|(v, _)| v)) + .expect("should have cookie value portion"); + + assert_eq!(value, id, "should not modify a well-formed EC ID"); + } + + #[test] + fn set_ec_cookie_rejects_semicolon() { + let settings = create_test_settings(); + let mut response = fastly::Response::new(); + set_ec_cookie(&settings, &mut response, "evil; Domain=.attacker.com"); + + assert!( + response.get_header(header::SET_COOKIE).is_none(), + "should not set Set-Cookie when value contains a semicolon" + ); + } + + #[test] + fn set_ec_cookie_rejects_crlf() { + let settings = create_test_settings(); + let mut response = fastly::Response::new(); + set_ec_cookie(&settings, &mut response, "evil\r\nX-Injected: header"); + + assert!( + response.get_header(header::SET_COOKIE).is_none(), + "should not set Set-Cookie when value contains CRLF" + ); + } + + #[test] + fn set_ec_cookie_rejects_space() { + let settings = create_test_settings(); + let mut response = fastly::Response::new(); + set_ec_cookie(&settings, &mut response, "bad value"); + + assert!( + response.get_header(header::SET_COOKIE).is_none(), + "should not set Set-Cookie when value contains whitespace" + ); + } + + #[test] + fn is_safe_cookie_value_rejects_empty_string() { + assert!(!is_safe_cookie_value(""), "should reject empty string"); + } + + #[test] + fn is_safe_cookie_value_accepts_valid_ec_id_characters() { + assert!( + is_safe_cookie_value("abcdef0123456789.ABCDEFabcdef"), + "should accept hex digits, dots, and alphanumeric characters" + ); + } + + #[test] + fn is_safe_cookie_value_rejects_non_ascii() { + assert!( + !is_safe_cookie_value("valüe"), + "should reject non-ASCII UTF-8 characters" + ); + } + + #[test] + fn is_safe_cookie_value_rejects_illegal_characters() { + assert!(!is_safe_cookie_value("val;ue"), "should reject semicolon"); + assert!(!is_safe_cookie_value("val,ue"), "should reject comma"); + assert!( + !is_safe_cookie_value("val\"ue"), + "should reject double-quote" + ); + assert!(!is_safe_cookie_value("val\\ue"), "should reject backslash"); + assert!(!is_safe_cookie_value("val ue"), "should reject space"); + assert!( + !is_safe_cookie_value("val\x00ue"), + "should reject null byte" + ); + assert!( + !is_safe_cookie_value("val\x7fue"), + "should reject DEL character" + ); + } + + #[test] + fn expire_ec_cookie_sets_max_age_zero() { + let settings = create_test_settings(); + let mut response = fastly::Response::new(); + expire_ec_cookie(&settings, &mut response); + + let cookie_header = response + .get_header(header::SET_COOKIE) + .expect("should have Set-Cookie header"); + let cookie_str = cookie_header.to_str().expect("should be valid UTF-8"); + + assert!( + cookie_str.contains("Max-Age=0"), + "should set Max-Age=0 to expire cookie" + ); + assert!( + cookie_str.starts_with(&format!("{}=;", COOKIE_TS_EC)), + "should clear cookie value" + ); + assert!( + cookie_str.contains(&format!("Domain=.{}", settings.publisher.domain)), + "should use computed cookie domain" + ); + } + + #[test] + fn expire_ec_cookie_matches_security_attributes() { + let settings = create_test_settings(); + let mut response = fastly::Response::new(); + expire_ec_cookie(&settings, &mut response); + + let cookie_header = response + .get_header(header::SET_COOKIE) + .expect("should have Set-Cookie header"); + let cookie_str = cookie_header.to_str().expect("should be valid UTF-8"); + + assert_eq!( + cookie_str, + format!( + "{}=; Domain=.{}; Path=/; Secure; SameSite=Lax; Max-Age=0; HttpOnly", + COOKIE_TS_EC, settings.publisher.domain, + ), + "expiry cookie should retain the same security attributes as the live cookie" + ); + } +} diff --git a/crates/trusted-server-core/src/ec/device.rs b/crates/trusted-server-core/src/ec/device.rs new file mode 100644 index 00000000..7e540bce --- /dev/null +++ b/crates/trusted-server-core/src/ec/device.rs @@ -0,0 +1,574 @@ +//! Device signal derivation for bot detection and browser classification. +//! +//! All functions in this module are pure computations — no KV I/O or Fastly +//! SDK calls. The Fastly adapter extracts raw strings from the request +//! (`get_tls_ja4()`, `get_client_h2_fingerprint()`, UA header) and passes +//! them here for classification. +//! +//! # Signals +//! +//! - **`is_mobile`** — `0` desktop, `1` mobile, `2` unknown (rare; bots or +//! hardened clients) +//! - **`ja4_class`** — JA4 Section 1 only (browser family identifier) +//! - **`platform_class`** — coarse OS family from UA +//! - **`h2_fp_hash`** — SHA256 prefix (12 hex chars) of raw H2 SETTINGS +//! - **`known_browser`** — `true` if `ja4_class` + `h2_fp_hash` match a known +//! browser pattern; `false` for known bots; `None` for unknown + +use sha2::{Digest as _, Sha256}; + +use super::kv_types::KvDevice; + +/// Device signals derived from a single request. +/// +/// Computed in the Fastly adapter from raw TLS/H2/UA data, then passed to +/// core for storage and gating decisions. This type lives in core so it +/// can be used in [`KvDevice`] construction and tested without Fastly. +#[derive(Debug, Clone, PartialEq)] +pub struct DeviceSignals { + /// `0` = desktop, `1` = mobile, `2` = unknown. + pub is_mobile: u8, + /// JA4 Section 1 (e.g. `"t13d1516h2"`). + pub ja4_class: Option, + /// Coarse OS family: `"mac"`, `"windows"`, `"ios"`, `"android"`, + /// `"linux"`. + pub platform_class: Option, + /// SHA256 prefix (12 hex chars) of raw H2 SETTINGS fingerprint. + pub h2_fp_hash: Option, + /// `true` = known browser, `false` = known bot, `None` = unknown. + pub known_browser: Option, +} + +impl DeviceSignals { + /// Derives all device signals from raw request data. + /// + /// `ua` is the `User-Agent` header value. `ja4` is the full JA4 hash + /// from `req.get_tls_ja4()`. `h2_fp` is the raw H2 SETTINGS string + /// from `req.get_client_h2_fingerprint()`. + #[must_use] + pub fn derive(ua: &str, ja4: Option<&str>, h2_fp: Option<&str>) -> Self { + let is_mobile = parse_is_mobile(ua); + let ja4_class = ja4.and_then(extract_ja4_section1); + let platform_class = parse_platform_class(ua); + let h2_fp_hash = h2_fp.map(compute_h2_fp_hash); + let known_browser = evaluate_known_browser(ja4_class.as_deref(), h2_fp_hash.as_deref()); + + Self { + is_mobile, + ja4_class, + platform_class, + h2_fp_hash, + known_browser, + } + } + + /// Returns `true` when the request looks like a real browser. + /// + /// Checks for the presence of recognizable signals rather than matching + /// against a hardcoded fingerprint allowlist. Real browsers always + /// produce a valid TLS fingerprint (`ja4_class`) and a recognizable UA + /// platform string (`platform_class`). Raw HTTP clients (curl, Python + /// requests, Go net/http, headless scrapers) typically lack one or both. + /// + /// # Threat model + /// + /// This heuristic is intentionally aimed at filtering obvious + /// missing-signal traffic, not at resisting deliberate spoofing. A bot + /// that forges plausible JA4 and UA inputs may still pass; deeper + /// consistency checks can be added later if product requirements demand + /// stronger spoof resistance. + /// + /// `known_browser` is still computed and stored on [`KvDevice`] for + /// analytics but does not gate identity operations. + #[must_use] + pub fn looks_like_browser(&self) -> bool { + self.ja4_class.is_some() && self.platform_class.is_some() + } + + /// Converts these signals into a [`KvDevice`] for KV storage. + #[must_use] + pub fn to_kv_device(&self) -> KvDevice { + KvDevice { + is_mobile: self.is_mobile, + ja4_class: self.ja4_class.clone(), + platform_class: self.platform_class.clone(), + h2_fp_hash: self.h2_fp_hash.clone(), + known_browser: self.known_browser, + } + } +} + +/// Device is a desktop (confirmed via UA platform token). +const MOBILE_DESKTOP: u8 = 0; +/// Device is a mobile (confirmed via UA mobile token). +const MOBILE_MOBILE: u8 = 1; +/// Device type is genuinely unknown (typically bots or hardened clients). +const MOBILE_UNKNOWN: u8 = 2; + +/// Derives mobile signal from the User-Agent string. +/// +/// Returns [`MOBILE_DESKTOP`] for confirmed desktop, +/// [`MOBILE_MOBILE`] for confirmed mobile, +/// [`MOBILE_UNKNOWN`] for genuinely unknown (typically bots or hardened clients). +#[must_use] +fn parse_is_mobile(ua: &str) -> u8 { + // Mobile patterns checked first — more specific. + if ua.contains("iPhone") || ua.contains("iPad") || ua.contains("Android") { + return MOBILE_MOBILE; + } + if ua.contains("Macintosh") || ua.contains("Windows") || ua.contains("Linux") { + return MOBILE_DESKTOP; + } + MOBILE_UNKNOWN +} + +/// Parses coarse OS family from the User-Agent string. +/// +/// Returns `None` when no recognized platform pattern is found. +#[must_use] +fn parse_platform_class(ua: &str) -> Option { + // Order matters: check mobile-specific patterns before generic ones. + if ua.contains("iPhone") || ua.contains("iPad") { + return Some("ios".to_owned()); + } + if ua.contains("Android") { + return Some("android".to_owned()); + } + if ua.contains("Macintosh") { + return Some("mac".to_owned()); + } + if ua.contains("Windows NT") { + return Some("windows".to_owned()); + } + if ua.contains("Linux") { + return Some("linux".to_owned()); + } + None +} + +/// Extracts Section 1 from a full JA4 fingerprint. +/// +/// JA4 format: `section1_section2_section3` separated by underscores. +/// Section 1 identifies browser family (cipher count, extension count, +/// ALPN) without uniquely fingerprinting a device. +/// +/// Returns `None` if the input is empty or has no underscore-delimited +/// section. +#[must_use] +fn extract_ja4_section1(full_ja4: &str) -> Option { + let section1 = full_ja4.split('_').next()?; + if section1.is_empty() { + return None; + } + Some(section1.to_owned()) +} + +/// Computes a 12-hex-char prefix of the SHA256 hash of the raw H2 +/// SETTINGS fingerprint string. +/// +/// The raw string looks like `"1:65536;2:0;4:6291456;6:262144"`. +#[must_use] +fn compute_h2_fp_hash(raw_h2_fp: &str) -> String { + let mut hasher = Sha256::new(); + hasher.update(raw_h2_fp.as_bytes()); + let digest = hasher.finalize(); + hex::encode(&digest[..6]) +} + +/// Known browser fingerprint allowlist. +/// +/// Each entry is `(ja4_class, h2_fp_prefix, known_browser)`. +/// `h2_fp_prefix` is the raw H2 SETTINGS string (not the hash) — we +/// compare against the hash computed from it. +/// +/// Empirically derived from Fastly Compute production responses (2026-04-03). +const KNOWN_BROWSERS: &[(&str, &str, bool)] = &[ + // Chrome/Mac v146 + ("t13d1516h2", "1:65536;2:0;4:6291456;6:262144", true), + // Safari/Mac v26 and Safari/iOS v26 + ("t13d2013h2", "2:0;3:100;4:2097152", true), + // Firefox/Mac v149 + ("t13d1717h2", "1:65536;2:0;4:131072;5:16384", true), +]; + +/// Returns H2 fingerprint hashes for the known browser allowlist. +/// +/// Computed once on first call and cached via `OnceLock`. +fn known_browser_h2_hashes() -> &'static Vec<(&'static str, String, bool)> { + static CACHE: std::sync::OnceLock> = std::sync::OnceLock::new(); + CACHE.get_or_init(|| { + KNOWN_BROWSERS + .iter() + .map(|(ja4, h2_raw, known)| (*ja4, compute_h2_fp_hash(h2_raw), *known)) + .collect() + }) +} + +/// Evaluates whether a request comes from a known browser. +/// +/// Returns `Some(true)` if `ja4_class` + `h2_fp_hash` match a known +/// legitimate browser pattern. Returns `Some(false)` for known +/// bot/scraper patterns. Returns `None` for unrecognized combinations. +/// +/// Both signals must be present for a match — if either is `None`, +/// returns `None`. +#[must_use] +fn evaluate_known_browser(ja4_class: Option<&str>, h2_fp_hash: Option<&str>) -> Option { + let ja4 = ja4_class?; + let h2_hash = h2_fp_hash?; + + for (known_ja4, known_h2_hash, is_browser) in known_browser_h2_hashes() { + if ja4 == *known_ja4 && h2_hash == *known_h2_hash { + return Some(*is_browser); + } + } + + // No match — unknown client. + None +} + +#[cfg(test)] +mod tests { + use super::*; + + // Chrome Mac UA + const CHROME_MAC_UA: &str = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) \ + AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36"; + + // Safari iOS UA + const SAFARI_IOS_UA: &str = "Mozilla/5.0 (iPhone; CPU iPhone OS 26_0 like Mac OS X) \ + AppleWebKit/605.1.15 (KHTML, like Gecko) Version/26.0 Mobile/15E148 Safari/604.1"; + + // Safari Mac UA + const SAFARI_MAC_UA: &str = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) \ + AppleWebKit/605.1.15 (KHTML, like Gecko) Version/26.0 Safari/605.1.15"; + + // Firefox Mac UA + const FIREFOX_MAC_UA: &str = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; \ + rv:149.0) Gecko/20100101 Firefox/149.0"; + + // Android Chrome UA + const CHROME_ANDROID_UA: &str = "Mozilla/5.0 (Linux; Android 14; Pixel 8) \ + AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Mobile Safari/537.36"; + + // Windows Chrome UA + const CHROME_WINDOWS_UA: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) \ + AppleWebKit/537.36 (KHTML, like Gecko) Chrome/146.0.0.0 Safari/537.36"; + + // Bot/empty UA + const BOT_UA: &str = "Googlebot/2.1 (+http://www.google.com/bot.html)"; + + #[test] + fn is_mobile_desktop_browsers() { + assert_eq!(parse_is_mobile(CHROME_MAC_UA), 0, "Chrome/Mac = desktop"); + assert_eq!(parse_is_mobile(SAFARI_MAC_UA), 0, "Safari/Mac = desktop"); + assert_eq!(parse_is_mobile(FIREFOX_MAC_UA), 0, "Firefox/Mac = desktop"); + assert_eq!( + parse_is_mobile(CHROME_WINDOWS_UA), + 0, + "Chrome/Windows = desktop" + ); + } + + #[test] + fn is_mobile_mobile_browsers() { + assert_eq!(parse_is_mobile(SAFARI_IOS_UA), 1, "Safari/iOS = mobile"); + assert_eq!( + parse_is_mobile(CHROME_ANDROID_UA), + 1, + "Chrome/Android = mobile" + ); + } + + #[test] + fn is_mobile_unknown() { + assert_eq!(parse_is_mobile(BOT_UA), 2, "Googlebot = unknown"); + assert_eq!(parse_is_mobile(""), 2, "empty UA = unknown"); + } + + #[test] + fn platform_class_desktop() { + assert_eq!(parse_platform_class(CHROME_MAC_UA).as_deref(), Some("mac")); + assert_eq!( + parse_platform_class(CHROME_WINDOWS_UA).as_deref(), + Some("windows") + ); + assert_eq!(parse_platform_class(FIREFOX_MAC_UA).as_deref(), Some("mac")); + } + + #[test] + fn platform_class_mobile() { + assert_eq!(parse_platform_class(SAFARI_IOS_UA).as_deref(), Some("ios")); + assert_eq!( + parse_platform_class(CHROME_ANDROID_UA).as_deref(), + Some("android") + ); + } + + #[test] + fn platform_class_linux() { + let linux_ua = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36"; + assert_eq!(parse_platform_class(linux_ua).as_deref(), Some("linux")); + } + + #[test] + fn platform_class_unknown() { + assert_eq!(parse_platform_class(BOT_UA), None); + assert_eq!(parse_platform_class(""), None); + } + + #[test] + fn ja4_section1_extraction() { + assert_eq!( + extract_ja4_section1("t13d1516h2_8daaf6152771_e5627efa2ab1").as_deref(), + Some("t13d1516h2"), + "should extract section 1 from full JA4" + ); + } + + #[test] + fn ja4_section1_no_underscore() { + // Some implementations may return just section 1 + assert_eq!( + extract_ja4_section1("t13d1516h2").as_deref(), + Some("t13d1516h2"), + "should handle JA4 with no underscore" + ); + } + + #[test] + fn ja4_section1_empty() { + assert_eq!(extract_ja4_section1(""), None); + } + + #[test] + fn h2_fp_hash_deterministic() { + let hash1 = compute_h2_fp_hash("1:65536;2:0;4:6291456;6:262144"); + let hash2 = compute_h2_fp_hash("1:65536;2:0;4:6291456;6:262144"); + assert_eq!(hash1, hash2, "should be deterministic"); + assert_eq!(hash1.len(), 12, "should be 12 hex chars"); + } + + #[test] + fn h2_fp_hash_different_inputs() { + let chrome = compute_h2_fp_hash("1:65536;2:0;4:6291456;6:262144"); + let safari = compute_h2_fp_hash("2:0;3:100;4:2097152"); + assert_ne!( + chrome, safari, + "different inputs should produce different hashes" + ); + } + + #[test] + fn known_browser_chrome_match() { + let ja4 = "t13d1516h2"; + let h2_hash = compute_h2_fp_hash("1:65536;2:0;4:6291456;6:262144"); + assert_eq!( + evaluate_known_browser(Some(ja4), Some(&h2_hash)), + Some(true), + "Chrome fingerprint should be recognized" + ); + } + + #[test] + fn known_browser_safari_match() { + let ja4 = "t13d2013h2"; + let h2_hash = compute_h2_fp_hash("2:0;3:100;4:2097152"); + assert_eq!( + evaluate_known_browser(Some(ja4), Some(&h2_hash)), + Some(true), + "Safari fingerprint should be recognized" + ); + } + + #[test] + fn known_browser_firefox_match() { + let ja4 = "t13d1717h2"; + let h2_hash = compute_h2_fp_hash("1:65536;2:0;4:131072;5:16384"); + assert_eq!( + evaluate_known_browser(Some(ja4), Some(&h2_hash)), + Some(true), + "Firefox fingerprint should be recognized" + ); + } + + #[test] + fn known_browser_unknown_combination() { + let ja4 = "t13d9999h2"; + let h2_hash = compute_h2_fp_hash("1:1;2:2;3:3"); + assert_eq!( + evaluate_known_browser(Some(ja4), Some(&h2_hash)), + None, + "unknown combination should return None" + ); + } + + #[test] + fn known_browser_mismatched_ja4_h2() { + // Chrome JA4 but Safari H2 + let ja4 = "t13d1516h2"; + let h2_hash = compute_h2_fp_hash("2:0;3:100;4:2097152"); + assert_eq!( + evaluate_known_browser(Some(ja4), Some(&h2_hash)), + None, + "mismatched JA4/H2 should return None" + ); + } + + #[test] + fn known_browser_missing_signals() { + assert_eq!( + evaluate_known_browser(None, Some("abcdef123456")), + None, + "missing JA4 should return None" + ); + assert_eq!( + evaluate_known_browser(Some("t13d1516h2"), None), + None, + "missing H2 hash should return None" + ); + assert_eq!( + evaluate_known_browser(None, None), + None, + "both missing should return None" + ); + } + + #[test] + fn derive_chrome_mac() { + let signals = DeviceSignals::derive( + CHROME_MAC_UA, + Some("t13d1516h2_8daaf6152771_e5627efa2ab1"), + Some("1:65536;2:0;4:6291456;6:262144"), + ); + + assert_eq!(signals.is_mobile, 0); + assert_eq!(signals.ja4_class.as_deref(), Some("t13d1516h2")); + assert_eq!(signals.platform_class.as_deref(), Some("mac")); + assert!(signals.h2_fp_hash.is_some()); + assert_eq!(signals.known_browser, Some(true)); + } + + #[test] + fn derive_safari_ios() { + let signals = DeviceSignals::derive( + SAFARI_IOS_UA, + Some("t13d2013h2_abcdef123456_fedcba654321"), + Some("2:0;3:100;4:2097152"), + ); + + assert_eq!(signals.is_mobile, 1); + assert_eq!(signals.ja4_class.as_deref(), Some("t13d2013h2")); + assert_eq!(signals.platform_class.as_deref(), Some("ios")); + assert_eq!(signals.known_browser, Some(true)); + } + + #[test] + fn derive_bot() { + let signals = DeviceSignals::derive(BOT_UA, None, None); + + assert_eq!(signals.is_mobile, 2); + assert!(signals.ja4_class.is_none()); + assert!(signals.platform_class.is_none()); + assert!(signals.h2_fp_hash.is_none()); + assert_eq!(signals.known_browser, None); + } + + #[test] + fn to_kv_device_conversion() { + let signals = DeviceSignals::derive( + CHROME_MAC_UA, + Some("t13d1516h2_8daaf6152771_e5627efa2ab1"), + Some("1:65536;2:0;4:6291456;6:262144"), + ); + let device = signals.to_kv_device(); + + assert_eq!(device.is_mobile, signals.is_mobile); + assert_eq!(device.ja4_class, signals.ja4_class); + assert_eq!(device.platform_class, signals.platform_class); + assert_eq!(device.h2_fp_hash, signals.h2_fp_hash); + assert_eq!(device.known_browser, signals.known_browser); + } + + #[test] + fn android_is_linux_but_platform_class_android() { + // Android UA contains "Linux" — platform_class should be "android" + // not "linux" because we check Android before Linux. + assert_eq!( + parse_platform_class(CHROME_ANDROID_UA).as_deref(), + Some("android"), + "Android should take precedence over Linux" + ); + // But is_mobile should be 1 since it contains "Android". + assert_eq!(parse_is_mobile(CHROME_ANDROID_UA), 1); + } + + #[test] + fn ipad_is_mobile() { + let ipad_ua = "Mozilla/5.0 (iPad; CPU OS 26_0 like Mac OS X) \ + AppleWebKit/605.1.15 (KHTML, like Gecko) Version/26.0 Safari/604.1"; + assert_eq!(parse_is_mobile(ipad_ua), 1, "iPad should be mobile"); + assert_eq!( + parse_platform_class(ipad_ua).as_deref(), + Some("ios"), + "iPad should be ios" + ); + } + + #[test] + fn looks_like_browser_with_both_signals() { + let signals = DeviceSignals::derive( + CHROME_MAC_UA, + Some("t13d1516h2_8daaf6152771_e5627efa2ab1"), + Some("1:65536;2:0;4:6291456;6:262144"), + ); + assert!( + signals.looks_like_browser(), + "Chrome/Mac should look like a browser" + ); + } + + #[test] + fn looks_like_browser_unknown_fingerprint_still_passes() { + // Chrome/Windows with unknown JA4/H2 — still has ja4_class and platform_class + let signals = DeviceSignals::derive( + CHROME_WINDOWS_UA, + Some("t13d9999h2_unknown_unknown"), + Some("99:99;88:88"), + ); + assert!( + signals.looks_like_browser(), + "unknown fingerprint with valid JA4 + platform should pass" + ); + assert_eq!(signals.known_browser, None, "should not match allowlist"); + } + + #[test] + fn looks_like_browser_rejects_bot() { + let signals = DeviceSignals::derive(BOT_UA, None, None); + assert!( + !signals.looks_like_browser(), + "bot with no JA4 and no platform should be rejected" + ); + } + + #[test] + fn looks_like_browser_rejects_missing_ja4() { + // Real UA but no TLS fingerprint (e.g. HTTP/1.1 or missing SDK support) + let signals = DeviceSignals::derive(CHROME_MAC_UA, None, Some("1:65536")); + assert!( + !signals.looks_like_browser(), + "missing JA4 should be rejected even with valid UA" + ); + } + + #[test] + fn looks_like_browser_rejects_missing_platform() { + // Has JA4 but unrecognizable UA + let signals = DeviceSignals::derive(BOT_UA, Some("t13d1516h2_abc_def"), None); + assert!( + !signals.looks_like_browser(), + "unrecognizable UA should be rejected even with JA4" + ); + } +} diff --git a/crates/trusted-server-core/src/ec/eids.rs b/crates/trusted-server-core/src/ec/eids.rs new file mode 100644 index 00000000..f0debad6 --- /dev/null +++ b/crates/trusted-server-core/src/ec/eids.rs @@ -0,0 +1,273 @@ +//! Shared EID resolution and formatting helpers. +//! +//! Used by both `/_ts/api/v1/identify` and `/auction` to resolve partner IDs from KV +//! entries, convert them to `OpenRTB` EID structures, and build base64-encoded +//! response headers. + +use base64::{engine::general_purpose::STANDARD as BASE64, Engine as _}; +use error_stack::{Report, ResultExt}; + +use crate::error::TrustedServerError; +use crate::openrtb::{Eid, Uid}; + +use super::kv_types::KvEntry; +use super::registry::PartnerRegistry; + +/// Maximum size (in bytes) for the base64-encoded `x-ts-eids` header value. +pub const MAX_EIDS_HEADER_BYTES: usize = 4096; + +/// A partner ID resolved from a KV entry against the partner registry. +/// +/// Only includes partners with `bidstream_enabled = true` and a non-empty UID. +pub struct ResolvedPartnerId { + /// Partner namespace key (e.g. `"liveramp"`). + pub partner_id: String, + /// The synced user ID value. + pub uid: String, + /// The partner's identity source domain (e.g. `"liveramp.com"`). + pub source_domain: String, + /// `OpenRTB` agent type for this partner's identifiers. + pub openrtb_atype: u8, +} + +/// Resolves partner IDs from a KV entry against the partner registry. +/// +/// Filters to partners with `bidstream_enabled = true` and non-empty UIDs, +/// sorted deterministically by partner ID. +#[must_use] +pub fn resolve_partner_ids(registry: &PartnerRegistry, entry: &KvEntry) -> Vec { + let mut resolved = Vec::new(); + + for (partner_id, partner_uid) in &entry.ids { + if partner_uid.uid.is_empty() { + continue; + } + + let Some(partner) = registry.get(partner_id) else { + continue; + }; + if !partner.bidstream_enabled { + continue; + } + + resolved.push(ResolvedPartnerId { + partner_id: partner_id.clone(), + uid: partner_uid.uid.clone(), + source_domain: partner.source_domain.clone(), + openrtb_atype: partner.openrtb_atype, + }); + } + + resolved.sort_by(|a, b| a.partner_id.cmp(&b.partner_id)); + resolved +} + +/// Converts resolved partner IDs to `OpenRTB` `Eid` entries. +#[must_use] +pub fn to_eids(resolved: &[ResolvedPartnerId]) -> Vec { + resolved + .iter() + .map(|item| Eid { + source: item.source_domain.clone(), + uids: vec![Uid { + id: item.uid.clone(), + atype: Some(item.openrtb_atype), + ext: None, + }], + }) + .collect() +} + +/// Builds a base64-encoded EID header value, truncating if needed. +/// +/// Returns `(encoded_value, was_truncated)`. If the full set of EIDs exceeds +/// [`MAX_EIDS_HEADER_BYTES`] after base64 encoding, partners are removed +/// from the end of the deterministic partner ordering until it fits. +/// +/// # Errors +/// +/// Returns an error if JSON serialization fails. +pub fn build_eids_header( + resolved: &[ResolvedPartnerId], +) -> Result<(String, bool), Report> { + let eids = to_eids(resolved); + encode_eids_header(&eids) +} + +/// Encodes a pre-built EID slice into a base64 header value with truncation. +/// +/// Like [`build_eids_header`] but operates on already-constructed `Eid` values +/// (e.g., from `UserInfo.eids` in the auction response path). +/// +/// Returns `(encoded_value, was_truncated)`. +/// +/// # Errors +/// +/// Returns an error if JSON serialization fails. +pub fn encode_eids_header(eids: &[Eid]) -> Result<(String, bool), Report> { + let try_encode = |size: usize| -> Result> { + let json = serde_json::to_vec(&eids[..size]).change_context( + TrustedServerError::Configuration { + message: "Failed to serialize eids header payload".to_owned(), + }, + )?; + Ok(BASE64.encode(json)) + }; + + // Fast path: try the full slice first (common case — no truncation). + let encoded = try_encode(eids.len())?; + if encoded.len() <= MAX_EIDS_HEADER_BYTES { + return Ok((encoded, false)); + } + + // Binary search for the largest count that fits within the limit. + // Invariant: lo always fits, hi never fits. + let mut lo: usize = 0; + let mut hi: usize = eids.len(); + + while lo + 1 < hi { + let mid = lo + (hi - lo) / 2; + let encoded = try_encode(mid)?; + if encoded.len() <= MAX_EIDS_HEADER_BYTES { + lo = mid; + } else { + hi = mid; + } + } + + // `lo` is the largest size that fits. Re-encode it for the final value. + if lo == 0 && !eids.is_empty() { + log::warn!( + "encode_eids_header: no EIDs fit within {MAX_EIDS_HEADER_BYTES}B; emitting empty truncated header" + ); + } + let encoded = try_encode(lo)?; + Ok((encoded, true)) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::redacted::Redacted; + use crate::settings::EcPartner; + + fn make_test_partner(id: &str, source_domain: &str) -> EcPartner { + EcPartner { + id: id.to_owned(), + name: format!("Partner {id}"), + source_domain: source_domain.to_owned(), + openrtb_atype: EcPartner::default_openrtb_atype(), + bidstream_enabled: true, + api_token: Redacted::new(format!("token-{id}-32-bytes-minimum-value")), + batch_rate_limit: EcPartner::default_batch_rate_limit(), + pull_sync_enabled: false, + pull_sync_url: None, + pull_sync_allowed_domains: vec![], + pull_sync_ttl_sec: EcPartner::default_pull_sync_ttl_sec(), + pull_sync_rate_limit: EcPartner::default_pull_sync_rate_limit(), + ts_pull_token: None, + } + } + + #[test] + fn resolve_partner_ids_sorts_by_partner_id() { + let partners = vec![ + make_test_partner("zeta", "zeta.example.com"), + make_test_partner("alpha", "alpha.example.com"), + ]; + let registry = PartnerRegistry::from_config(&partners).expect("should build registry"); + + let mut entry = KvEntry::tombstone(1000); + entry.consent.ok = true; + entry.ids.insert( + "zeta".to_owned(), + super::super::kv_types::KvPartnerId { + uid: "uid-z".to_owned(), + }, + ); + entry.ids.insert( + "alpha".to_owned(), + super::super::kv_types::KvPartnerId { + uid: "uid-a".to_owned(), + }, + ); + + let resolved = resolve_partner_ids(®istry, &entry); + let partner_ids: Vec<&str> = resolved + .iter() + .map(|item| item.partner_id.as_str()) + .collect(); + + assert_eq!( + partner_ids, + vec!["alpha", "zeta"], + "should sort deterministically by partner ID" + ); + } + + #[test] + fn to_eids_maps_resolved_ids_correctly() { + let resolved = vec![ + ResolvedPartnerId { + partner_id: "liveramp".to_owned(), + uid: "LR_xyz".to_owned(), + source_domain: "liveramp.com".to_owned(), + openrtb_atype: 3, + }, + ResolvedPartnerId { + partner_id: "id5".to_owned(), + uid: "ID5_abc".to_owned(), + source_domain: "id5-sync.com".to_owned(), + openrtb_atype: 1, + }, + ]; + + let eids = to_eids(&resolved); + + assert_eq!(eids.len(), 2, "should produce one EID per resolved partner"); + assert_eq!(eids[0].source, "liveramp.com"); + assert_eq!(eids[0].uids[0].id, "LR_xyz"); + assert_eq!(eids[0].uids[0].atype, Some(3)); + assert_eq!(eids[1].source, "id5-sync.com"); + assert_eq!(eids[1].uids[0].id, "ID5_abc"); + assert_eq!(eids[1].uids[0].atype, Some(1)); + } + + #[test] + fn build_eids_header_truncates_when_too_large() { + let mut resolved = Vec::new(); + for idx in 0..64 { + resolved.push(ResolvedPartnerId { + partner_id: format!("partner_{idx}"), + uid: format!("uid_{}", "x".repeat(100)), + source_domain: format!("partner-{idx}.example.com"), + openrtb_atype: 3, + }); + } + + let (encoded, truncated) = + build_eids_header(&resolved).expect("should build truncated header"); + + assert!(truncated, "should report truncation for large payload"); + assert!( + encoded.len() <= MAX_EIDS_HEADER_BYTES, + "should cap encoded header bytes" + ); + } + + #[test] + fn build_eids_header_fits_without_truncation() { + let resolved = vec![ResolvedPartnerId { + partner_id: "ssp".to_owned(), + uid: "u1".to_owned(), + source_domain: "ssp.com".to_owned(), + openrtb_atype: 3, + }]; + + let (encoded, truncated) = + build_eids_header(&resolved).expect("should build header without truncation"); + + assert!(!truncated, "should not truncate small payload"); + assert!(!encoded.is_empty(), "should produce non-empty value"); + } +} diff --git a/crates/trusted-server-core/src/ec/finalize.rs b/crates/trusted-server-core/src/ec/finalize.rs new file mode 100644 index 00000000..9b5c62dc --- /dev/null +++ b/crates/trusted-server-core/src/ec/finalize.rs @@ -0,0 +1,603 @@ +//! EC response finalization. +//! +//! Centralizes post-routing EC behavior so all handlers get consistent cookie +//! and KV semantics. + +use std::collections::HashSet; + +use fastly::Response; + +use super::consent::{ec_consent_granted, ec_consent_withdrawn}; +use crate::settings::Settings; + +use super::cookies::{expire_ec_cookie, set_ec_cookie}; +use super::generation::is_valid_ec_id; +use super::kv::KvIdentityGraph; +use super::log_id; +use super::prebid_eids::{ingest_prebid_eids, ingest_sharedid_cookie}; +use super::registry::PartnerRegistry; +use super::EcContext; + +/// TS-managed response headers tied to EC identity output. +const EC_RESPONSE_HEADERS: &[&str] = &[ + "x-ts-ec", + "x-ts-eids", + "x-ts-ec-consent", + "x-ts-eids-truncated", +]; + +/// Finalizes EC response behavior for all routes. +/// +/// Applies withdrawal handling, last-seen updates, cookie reconciliation, +/// Prebid EID ingestion, and cookie writes for new EC generation. +/// +/// On consent withdrawal, the browser response clears the EC cookie +/// immediately and the EC identity-graph KV tombstone is the authoritative +/// revocation marker. There is no separate consent KV store to clean up. +/// +/// `eids_cookie` should be the raw value of the `ts-eids` cookie extracted +/// from the request *before* routing consumes it. +pub fn ec_finalize_response( + settings: &Settings, + ec_context: &EcContext, + kv: Option<&KvIdentityGraph>, + registry: &PartnerRegistry, + eids_cookie: Option<&str>, + sharedid_cookie: Option<&str>, + response: &mut Response, +) { + let consent_allows_ec = ec_consent_granted(ec_context.consent()); + let consent_withdrawn = ec_consent_withdrawn(ec_context.consent()); + + if !consent_allows_ec { + // Always strip EC-specific response headers when consent is not + // currently usable for this request. This covers both explicit + // revocation and fail-closed cases such as missing geo or undecodable + // consent input. + clear_ec_headers_on_response(response, Some(registry)); + + // Only expire the browser cookie and tombstone the identity-graph row + // when the request carries an explicit withdrawal signal. + if consent_withdrawn && ec_context.cookie_was_present() { + expire_ec_cookie(settings, response); + + // Compute once for the authoritative identity-graph tombstones. + let ids_to_withdraw = withdrawal_ec_ids(ec_context); + + // The identity-graph tombstone is the authoritative withdrawal marker + // for subsequent EC behavior. + if let Some(graph) = kv { + apply_withdrawal_tombstones(&ids_to_withdraw, |ec_id| { + if let Err(err) = graph.write_withdrawal_tombstone(ec_id) { + log::error!( + "Failed to write withdrawal tombstone for EC ID '{}': {err:?}", + log_id(ec_id), + ); + } + }); + } + } + + return; + } + + // Returning user: consent is granted and EC came from request. + if ec_context.ec_was_present() && !ec_context.ec_generated() && consent_allows_ec { + if let (Some(graph), Some(ec_id)) = (kv, ec_context.ec_value()) { + // Ingest Prebid EIDs from cookie if present. + if let Some(cookie) = eids_cookie { + ingest_prebid_eids(cookie, ec_id, graph, registry); + } + if let Some(cookie) = sharedid_cookie { + ingest_sharedid_cookie(cookie, ec_id, graph, registry); + } + } + + // Ordinary returning-user page views no longer refresh the browser + // cookie, emit the EC header, or update KV TTL. + return; + } + + // Newly generated EC in this request. Do not emit a generated EC when + // there is no KV graph: that would mint a browser cookie with no backing + // identity-graph row, producing a phantom ID on later requests. + if ec_context.ec_generated() { + let (Some(graph), Some(ec_id)) = (kv, ec_context.ec_value()) else { + log::info!("Skipping generated EC response write because KV graph is unavailable"); + return; + }; + + if let Some(cookie) = eids_cookie { + ingest_prebid_eids(cookie, ec_id, graph, registry); + } + if let Some(cookie) = sharedid_cookie { + ingest_sharedid_cookie(cookie, ec_id, graph, registry); + } + set_ec_cookie_on_response(settings, ec_context, response); + } +} + +/// Sets the EC cookie on response when an EC ID is available. +pub fn set_ec_cookie_on_response( + settings: &Settings, + ec_context: &EcContext, + response: &mut Response, +) { + if let Some(ec_id) = ec_context.ec_value() { + set_ec_cookie(settings, response, ec_id); + } +} + +/// Removes EC-specific response headers. +/// +/// In addition to the fixed [`EC_RESPONSE_HEADERS`], this also strips dynamic +/// `X-ts-` headers for registered partners. Other `x-ts-*` headers +/// are intentionally preserved because they may be set by non-EC middleware. +fn clear_ec_headers_on_response(response: &mut Response, registry: Option<&PartnerRegistry>) { + for header in EC_RESPONSE_HEADERS { + response.remove_header(*header); + } + + if let Some(registry) = registry { + for partner in registry.all() { + response.remove_header(partner_response_header(&partner.id).as_str()); + } + } +} + +fn partner_response_header(partner_id: &str) -> String { + format!("x-ts-{partner_id}") +} + +/// Clears EC cookie and removes EC-specific response headers. +/// +/// Used when the request carries an explicit withdrawal signal. +pub fn clear_ec_on_response(settings: &Settings, response: &mut Response) { + expire_ec_cookie(settings, response); + clear_ec_headers_on_response(response, None); +} + +fn withdrawal_ec_ids(ec_context: &EcContext) -> HashSet { + let mut hashes = HashSet::new(); + + if let Some(cookie_ec_id) = ec_context.existing_cookie_ec_id() { + if is_valid_ec_id(cookie_ec_id) { + hashes.insert(cookie_ec_id.to_owned()); + } + } + + if let Some(active_ec_id) = ec_context.ec_value() { + if is_valid_ec_id(active_ec_id) { + hashes.insert(active_ec_id.to_owned()); + } + } + + hashes +} + +fn apply_withdrawal_tombstones(ec_ids: &HashSet, mut write_tombstone: F) +where + F: FnMut(&str), +{ + for ec_id in ec_ids { + write_tombstone(ec_id); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::consent::jurisdiction::Jurisdiction; + use crate::consent::types::{ConsentContext, ConsentSource}; + use crate::redacted::Redacted; + use crate::settings::EcPartner; + use crate::test_support::tests::create_test_settings; + + fn make_context( + ec_value: Option<&str>, + cookie_ec_value: Option<&str>, + ec_was_present: bool, + ec_generated: bool, + jurisdiction: Jurisdiction, + ) -> EcContext { + let consent = ConsentContext { + jurisdiction, + source: ConsentSource::Cookie, + ..Default::default() + }; + + make_context_with_consent( + ec_value, + cookie_ec_value, + ec_was_present, + ec_generated, + consent, + ) + } + + fn make_context_with_consent( + ec_value: Option<&str>, + cookie_ec_value: Option<&str>, + ec_was_present: bool, + ec_generated: bool, + consent: ConsentContext, + ) -> EcContext { + EcContext::new_for_test_with_cookie( + ec_value.map(str::to_owned), + cookie_ec_value.map(str::to_owned), + ec_was_present, + ec_generated, + consent, + ) + } + + fn sample_ec_id(suffix: &str) -> String { + format!("{}.{suffix}", "a".repeat(64)) + } + + fn make_partner(id: &str) -> EcPartner { + EcPartner { + id: id.to_owned(), + name: format!("Partner {id}"), + source_domain: format!("{id}.example.com"), + openrtb_atype: EcPartner::default_openrtb_atype(), + bidstream_enabled: true, + api_token: Redacted::new(format!("token-{id}-32-bytes-minimum-value")), + batch_rate_limit: EcPartner::default_batch_rate_limit(), + pull_sync_enabled: false, + pull_sync_url: None, + pull_sync_allowed_domains: vec![], + pull_sync_ttl_sec: EcPartner::default_pull_sync_ttl_sec(), + pull_sync_rate_limit: EcPartner::default_pull_sync_rate_limit(), + ts_pull_token: None, + } + } + + #[test] + fn withdrawal_ec_ids_returns_cookie_ec_only_when_active_missing() { + let cookie_ec = sample_ec_id("cook1e"); + let ec_context = make_context(None, Some(&cookie_ec), true, false, Jurisdiction::Unknown); + + let ids = withdrawal_ec_ids(&ec_context); + + assert_eq!(ids.len(), 1, "should include exactly one EC ID"); + assert!( + ids.contains(&cookie_ec), + "should include the cookie EC value" + ); + } + + #[test] + fn withdrawal_ec_ids_deduplicates_matching_cookie_and_active_ec() { + let ec_id = sample_ec_id("same01"); + let ec_context = make_context( + Some(&ec_id), + Some(&ec_id), + true, + false, + Jurisdiction::Unknown, + ); + + let ids = withdrawal_ec_ids(&ec_context); + + assert_eq!(ids.len(), 1, "should deduplicate identical EC IDs"); + assert!(ids.contains(&ec_id), "should retain the shared EC ID"); + } + + #[test] + fn withdrawal_ec_ids_includes_both_cookie_and_active_when_different() { + let active_ec = sample_ec_id("activ1"); + let cookie_ec = sample_ec_id("cook1e"); + let ec_context = make_context( + Some(&active_ec), + Some(&cookie_ec), + true, + false, + Jurisdiction::Unknown, + ); + + let ids = withdrawal_ec_ids(&ec_context); + + assert_eq!(ids.len(), 2, "should include both distinct EC IDs"); + assert!(ids.contains(&active_ec), "should include active EC ID"); + assert!(ids.contains(&cookie_ec), "should include cookie EC ID"); + } + + #[test] + fn withdrawal_ec_ids_filters_invalid_values() { + let valid_ec = sample_ec_id("valid1"); + let ec_context = make_context( + Some(&valid_ec), + Some("not-an-ec-id"), + true, + false, + Jurisdiction::Unknown, + ); + + let ids = withdrawal_ec_ids(&ec_context); + + assert_eq!(ids.len(), 1, "should ignore malformed EC values"); + assert!(ids.contains(&valid_ec), "should keep the valid EC ID"); + } + + #[test] + fn apply_withdrawal_tombstones_invokes_writer_for_each_ec_id() { + let first = sample_ec_id("first1"); + let second = sample_ec_id("second"); + let mut ids = HashSet::new(); + ids.insert(first.clone()); + ids.insert(second.clone()); + + let mut written = Vec::new(); + apply_withdrawal_tombstones(&ids, |ec_id| written.push(ec_id.to_owned())); + written.sort(); + + let mut expected = vec![first, second]; + expected.sort(); + assert_eq!(written, expected, "should write a tombstone for each EC ID"); + } + + #[test] + fn clear_ec_on_response_removes_headers_and_expires_cookie() { + let settings = create_test_settings(); + let mut response = Response::new(); + response.set_header("x-ts-ec", "abc"); + response.set_header("x-ts-eids", "[]"); + response.set_header("x-ts-unrelated", "keep-me"); + + clear_ec_on_response(&settings, &mut response); + + assert!( + response.get_header("x-ts-ec").is_none(), + "should remove x-ts-ec" + ); + assert!( + response.get_header("x-ts-eids").is_none(), + "should remove x-ts-eids" + ); + assert_eq!( + response.get_header_str("x-ts-unrelated"), + Some("keep-me"), + "should preserve unrelated x-ts headers without a partner registry" + ); + + let set_cookie = response + .get_header("set-cookie") + .expect("should append Set-Cookie for expiry") + .to_str() + .expect("should render set-cookie as utf-8"); + + assert!( + set_cookie.contains("Max-Age=0"), + "should expire the EC cookie" + ); + } + + #[test] + fn finalize_withdrawal_clears_cookie_and_headers() { + let settings = create_test_settings(); + let ec_id = sample_ec_id("aBc123"); + let consent = ConsentContext { + jurisdiction: Jurisdiction::UsState("CA".to_owned()), + gpc: true, + source: ConsentSource::Cookie, + ..Default::default() + }; + let ec_context = + make_context_with_consent(Some(&ec_id), Some(&ec_id), true, false, consent); + let mut response = Response::new(); + response.set_header("x-ts-ec", "stale"); + response.set_header("x-ts-eids", "[]"); + response.set_header("x-ts-ssp_x", "partner-uid-123"); + response.set_header("x-ts-unrelated", "keep-me"); + + let partners = vec![make_partner("ssp_x")]; + let test_registry = PartnerRegistry::from_config(&partners).expect("should build registry"); + ec_finalize_response( + &settings, + &ec_context, + None, + &test_registry, + None, + None, + &mut response, + ); + + assert!( + response.get_header("x-ts-ec").is_none(), + "withdrawal should clear x-ts-ec header" + ); + assert!( + response.get_header("x-ts-eids").is_none(), + "withdrawal should clear x-ts-eids header" + ); + assert!( + response.get_header("x-ts-ssp_x").is_none(), + "withdrawal should clear registered partner header" + ); + assert_eq!( + response.get_header_str("x-ts-unrelated"), + Some("keep-me"), + "withdrawal should preserve unrelated x-ts header" + ); + let set_cookie = response + .get_header("set-cookie") + .expect("withdrawal should expire cookie") + .to_str() + .expect("set-cookie should be utf-8"); + assert!( + set_cookie.contains("Max-Age=0"), + "withdrawal should set Max-Age=0" + ); + } + + #[test] + fn finalize_returning_user_with_cookie_mismatch_sets_no_header_or_cookie() { + let settings = create_test_settings(); + let active_ec = sample_ec_id("activ1"); + let cookie_ec = sample_ec_id("cook1e"); + let ec_context = make_context( + Some(&active_ec), + Some(&cookie_ec), + true, + false, + Jurisdiction::NonRegulated, + ); + let mut response = Response::new(); + + let test_registry = PartnerRegistry::empty(); + ec_finalize_response( + &settings, + &ec_context, + None, + &test_registry, + None, + None, + &mut response, + ); + + assert!( + response.get_header("x-ts-ec").is_none(), + "returning user should not set x-ts-ec" + ); + assert!( + response.get_header("set-cookie").is_none(), + "returning user should not refresh or repair cookie" + ); + } + + #[test] + fn finalize_returning_user_sets_no_header_or_cookie() { + let settings = create_test_settings(); + let ec_id = sample_ec_id("mtch01"); + let ec_context = make_context( + Some(&ec_id), + Some(&ec_id), + true, + false, + Jurisdiction::NonRegulated, + ); + let mut response = Response::new(); + + let test_registry = PartnerRegistry::empty(); + ec_finalize_response( + &settings, + &ec_context, + None, + &test_registry, + None, + None, + &mut response, + ); + + assert!( + response.get_header("x-ts-ec").is_none(), + "returning user should not set x-ts-ec" + ); + assert!( + response.get_header("set-cookie").is_none(), + "returning user should not refresh cookie" + ); + } + + #[test] + fn finalize_generated_ec_without_kv_skips_cookie_and_header() { + let settings = create_test_settings(); + let generated_ec = sample_ec_id("gen123"); + let ec_context = make_context( + Some(&generated_ec), + None, + false, + true, + Jurisdiction::NonRegulated, + ); + let mut response = Response::new(); + + let test_registry = PartnerRegistry::empty(); + ec_finalize_response( + &settings, + &ec_context, + None, + &test_registry, + None, + None, + &mut response, + ); + + assert!( + response.get_header("x-ts-ec").is_none(), + "generated EC without KV should not set response header" + ); + assert!( + response.get_header("set-cookie").is_none(), + "generated EC without KV should not set cookie" + ); + } + + #[test] + fn finalize_denied_without_cookie_is_noop() { + let settings = create_test_settings(); + let ec_context = make_context(None, None, false, false, Jurisdiction::Unknown); + let mut response = Response::new(); + + let test_registry = PartnerRegistry::empty(); + ec_finalize_response( + &settings, + &ec_context, + None, + &test_registry, + None, + None, + &mut response, + ); + + assert!( + response.get_header("x-ts-ec").is_none(), + "should not set EC header" + ); + assert!( + response.get_header("set-cookie").is_none(), + "should not mutate cookie when there is nothing to revoke" + ); + } + + #[test] + fn finalize_unknown_jurisdiction_strips_headers_without_expiring_cookie() { + let settings = create_test_settings(); + let ec_id = sample_ec_id("unk001"); + let ec_context = make_context( + Some(&ec_id), + Some(&ec_id), + true, + false, + Jurisdiction::Unknown, + ); + let mut response = Response::new(); + response.set_header("x-ts-ec", &ec_id); + response.set_header("x-ts-eids", "[]"); + + let test_registry = PartnerRegistry::empty(); + ec_finalize_response( + &settings, + &ec_context, + None, + &test_registry, + None, + None, + &mut response, + ); + + assert!( + response.get_header("x-ts-ec").is_none(), + "should strip EC header when consent cannot be verified" + ); + assert!( + response.get_header("x-ts-eids").is_none(), + "should strip EID header when consent cannot be verified" + ); + assert!( + response.get_header("set-cookie").is_none(), + "should not expire the cookie without an explicit withdrawal signal" + ); + } +} diff --git a/crates/trusted-server-core/src/ec/generation.rs b/crates/trusted-server-core/src/ec/generation.rs new file mode 100644 index 00000000..48068292 --- /dev/null +++ b/crates/trusted-server-core/src/ec/generation.rs @@ -0,0 +1,341 @@ +//! Edge Cookie (EC) ID generation using HMAC. +//! +//! This module provides functionality for generating privacy-preserving EC IDs +//! based on the client IP address and a secret key. + +use std::net::IpAddr; + +use error_stack::{Report, ResultExt}; +use hmac::{Hmac, Mac}; +use rand::Rng; +use sha2::Sha256; + +use crate::error::TrustedServerError; +use crate::settings::Settings; + +type HmacSha256 = Hmac; + +const ALPHANUMERIC_CHARSET: &[u8] = + b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; + +/// Normalizes an IP address for stable EC ID generation. +/// +/// For IPv6 addresses, masks to /64 prefix to handle Privacy Extensions +/// where devices rotate their interface identifier (lower 64 bits). +/// The first 4 segments are hex-encoded without separators. +/// IPv4 addresses are returned unchanged. +/// +/// # Stability +/// +/// The output format is a stable contract — EC hashes stored in KV depend +/// on it. Changing the format would invalidate all existing EC identities. +/// - **IPv4:** decimal-dotted notation (e.g. `"192.168.1.1"`) +/// - **IPv6:** first 4 segments as zero-padded lowercase hex without +/// separators (e.g. `"20010db885a30000"`) +fn normalize_ip(ip: IpAddr) -> String { + match ip { + IpAddr::V4(ipv4) => ipv4.to_string(), + IpAddr::V6(ipv6) => { + let segments = ipv6.segments(); + // Keep only the first 4 segments (64 bits) for /64 prefix. + // Concatenate as zero-padded hex without separators. + format!( + "{:04x}{:04x}{:04x}{:04x}", + segments[0], segments[1], segments[2], segments[3] + ) + } + } +} + +/// Generates a random alphanumeric string of the specified length. +/// +/// Fastly Compute's `wasm32-wasip1` runtime supplies OS randomness through +/// WASI for `rand::thread_rng`; the CI wasm release build verifies that this +/// entropy path remains available for the EC suffix contract. +fn generate_random_suffix(length: usize) -> String { + let mut rng = rand::thread_rng(); + (0..length) + .map(|_| { + let idx = rng.gen_range(0..ALPHANUMERIC_CHARSET.len()); + ALPHANUMERIC_CHARSET[idx] as char + }) + .collect() +} + +/// Generates a fresh EC ID from a pre-captured client IP string. +/// +/// Uses only the client IP (not user-agent or other headers) intentionally: +/// EC IDs are meant to be simple, privacy-preserving identifiers — not +/// high-entropy fingerprints. The random suffix provides per-cookie +/// uniqueness for users behind the same NAT/proxy. +/// +/// Creates an HMAC-SHA256-based ID using the configured secret key and +/// the client IP address, then appends a random suffix for additional +/// uniqueness. The resulting format is `{64hex}.{6alnum}`. +/// +/// **Important:** `client_ip` must be pre-normalized via [`extract_client_ip`]. +/// Raw IPv6 addresses produce different hashes than their normalized /64 +/// form, which would create duplicate identity graph entries. +/// +/// # Errors +/// +/// - [`TrustedServerError::EdgeCookie`] if HMAC generation fails +pub fn generate_ec_id( + settings: &Settings, + client_ip: &str, +) -> Result> { + let mut mac = HmacSha256::new_from_slice(settings.ec.passphrase.expose().as_bytes()) + .change_context(TrustedServerError::EdgeCookie { + message: "Failed to create HMAC instance".to_string(), + })?; + mac.update(client_ip.as_bytes()); + let hmac_hash = hex::encode(mac.finalize().into_bytes()); + + // Append random 6-character alphanumeric suffix for additional uniqueness. + let random_suffix = generate_random_suffix(6); + let ec_id = format!("{hmac_hash}.{random_suffix}"); + + log::trace!("Generated fresh EC ID: {}", super::log_id(&ec_id)); + + Ok(ec_id) +} + +/// Extracts and normalizes the client IP from a request. +/// +/// Returns the normalized IP as a string suitable for HMAC input. +/// +/// # Errors +/// +/// Returns [`TrustedServerError::EdgeCookie`] when the client IP is unavailable +/// (e.g. in certain test or proxy configurations). EC generation requires +/// a valid client IP — there is no fallback. +pub fn extract_client_ip(req: &fastly::Request) -> Result> { + req.get_client_ip_addr().map(normalize_ip).ok_or_else(|| { + Report::new(TrustedServerError::EdgeCookie { + message: "Client IP required for EC generation but unavailable".to_string(), + }) + }) +} + +/// Extracts the stable 64-character hex prefix from an EC ID. +/// +/// Given an EC ID in `{64hex}.{6alnum}` format, returns the `{64hex}` +/// portion. If the ID does not contain a dot separator, returns the +/// entire string. +#[must_use] +pub fn ec_hash(ec_id: &str) -> &str { + // Find the dot separator; if absent, return the entire string. + match ec_id.find('.') { + Some(pos) => &ec_id[..pos], + None => ec_id, + } +} + +/// Normalizes an EC ID for use as a KV key by lowercasing the hash prefix. +/// +/// `hex::encode` (used in [`generate_ec_id`]) always produces lowercase hex, +/// so internal EC IDs are already lowercase. This normalization is a +/// defense-in-depth measure for EC IDs submitted by external partners +/// (via batch sync) that may use uppercase hex. +#[must_use] +pub fn normalize_ec_id_for_kv(ec_id: &str) -> String { + let mut parts = ec_id.splitn(2, '.'); + let hash = parts.next().unwrap_or_default(); + let suffix = parts.next().unwrap_or_default(); + format!("{}.{}", hash.to_ascii_lowercase(), suffix) +} + +/// Checks whether a string is a valid 64-character hex EC hash prefix. +/// +/// Used by batch sync, finalize, and other modules that handle the +/// `{64hex}` portion of an EC ID independently. Accepts both uppercase +/// and lowercase hex; callers that require a specific case should +/// normalize before comparison. +#[must_use] +pub fn is_valid_ec_hash(value: &str) -> bool { + value.len() == 64 && value.bytes().all(|b| b.is_ascii_hexdigit()) +} + +/// Checks whether a string matches the expected EC ID format. +/// +/// The format is `{64hex}.{6alnum}` where the first part is a 64-character +/// **lowercase** hex string and the second part is a 6-character alphanumeric +/// string. Only lowercase hex is accepted; callers must normalize before +/// validation to prevent duplicate KV keys from case-variant EC IDs. The HMAC +/// prefix is lowercase because it comes from `hex::encode`; the random suffix +/// allows mixed-case alphanumeric characters by construction. +#[must_use] +pub fn is_valid_ec_id(value: &str) -> bool { + let mut parts = value.split('.'); + let Some(hmac_part) = parts.next() else { + return false; + }; + let Some(suffix_part) = parts.next() else { + return false; + }; + + // Must have exactly two segments. + if parts.next().is_some() { + return false; + } + + hmac_part.len() == 64 + && suffix_part.len() == 6 + && hmac_part + .bytes() + .all(|b| b.is_ascii_digit() || (b'a'..=b'f').contains(&b)) + && suffix_part.bytes().all(|b| b.is_ascii_alphanumeric()) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::net::{Ipv4Addr, Ipv6Addr}; + + use crate::test_support::tests::create_test_settings; + + #[test] + fn normalize_ipv4_unchanged() { + let ipv4 = IpAddr::V4(Ipv4Addr::new(192, 168, 1, 100)); + assert_eq!(normalize_ip(ipv4), "192.168.1.100"); + } + + #[test] + fn normalize_ipv6_masks_to_64_no_separators() { + let ipv6 = IpAddr::V6(Ipv6Addr::new( + 0x2001, 0x0db8, 0x85a3, 0x0000, 0x8a2e, 0x0370, 0x7334, 0x1234, + )); + assert_eq!( + normalize_ip(ipv6), + "20010db885a30000", + "should concatenate first 4 segments as zero-padded hex without separators" + ); + } + + #[test] + fn normalize_ipv6_different_suffix_same_prefix() { + // Two IPv6 addresses with same /64 prefix but different interface identifiers + // (simulating Privacy Extensions rotation). + let ipv6_a = IpAddr::V6(Ipv6Addr::new( + 0x2001, 0x0db8, 0xabcd, 0x0001, 0x1111, 0x2222, 0x3333, 0x4444, + )); + let ipv6_b = IpAddr::V6(Ipv6Addr::new( + 0x2001, 0x0db8, 0xabcd, 0x0001, 0xaaaa, 0xbbbb, 0xcccc, 0xdddd, + )); + assert_eq!( + normalize_ip(ipv6_a), + normalize_ip(ipv6_b), + "should normalize to the same /64 prefix" + ); + assert_eq!(normalize_ip(ipv6_a), "20010db8abcd0001"); + } + + #[test] + fn generate_produces_valid_format() { + let settings = create_test_settings(); + let ec_id = generate_ec_id(&settings, "192.168.1.1").expect("should generate EC ID"); + assert!( + is_valid_ec_id(&ec_id), + "should match EC ID format: {{64hex}}.{{6alnum}}, got: {ec_id}" + ); + } + + #[test] + fn generate_same_ip_produces_consistent_hash_prefix() { + let settings = create_test_settings(); + let first = generate_ec_id(&settings, "192.168.1.1").expect("should generate first EC ID"); + let second = + generate_ec_id(&settings, "192.168.1.1").expect("should generate second EC ID"); + + assert_eq!( + ec_hash(&first), + ec_hash(&second), + "same IP and passphrase should produce the same HMAC prefix" + ); + assert_ne!( + first, second, + "random suffix should differ between generated EC IDs" + ); + } + + #[test] + fn ec_hash_extracts_prefix() { + let id = format!("{}.Ab12z9", "a".repeat(64)); + assert_eq!(ec_hash(&id), "a".repeat(64)); + } + + #[test] + fn ec_hash_returns_full_string_without_dot() { + assert_eq!(ec_hash("nodot"), "nodot"); + } + + #[test] + fn is_valid_ec_hash_accepts_64_hex() { + assert!(is_valid_ec_hash(&"a".repeat(64))); + assert!(is_valid_ec_hash(&"0123456789abcdef".repeat(4))); + } + + #[test] + fn is_valid_ec_hash_accepts_uppercase_hex() { + assert!( + is_valid_ec_hash(&"A".repeat(64)), + "should accept uppercase hex (callers normalize before KV lookup)" + ); + } + + #[test] + fn is_valid_ec_hash_rejects_wrong_length() { + assert!(!is_valid_ec_hash(&"a".repeat(63))); + assert!(!is_valid_ec_hash(&"a".repeat(65))); + assert!(!is_valid_ec_hash("")); + } + + #[test] + fn is_valid_ec_hash_rejects_non_hex() { + let mut hash = "a".repeat(64); + hash.replace_range(0..1, "g"); + assert!(!is_valid_ec_hash(&hash)); + } + + #[test] + fn is_valid_ec_id_accepts_valid() { + let value = format!("{}.Ab12z9", "a".repeat(64)); + assert!(is_valid_ec_id(&value), "should accept a valid EC ID format"); + } + + #[test] + fn is_valid_ec_id_rejects_missing_suffix() { + let missing_suffix = "a".repeat(64); + assert!( + !is_valid_ec_id(&missing_suffix), + "should reject missing suffix" + ); + } + + #[test] + fn is_valid_ec_id_rejects_invalid_hex() { + let invalid_hex = format!("{}.Ab12z9", "a".repeat(63) + "g"); + assert!( + !is_valid_ec_id(&invalid_hex), + "should reject non-hex HMAC content" + ); + } + + #[test] + fn is_valid_ec_id_rejects_invalid_suffix() { + let invalid_suffix = format!("{}.ab-129", "a".repeat(64)); + assert!( + !is_valid_ec_id(&invalid_suffix), + "should reject non-alphanumeric suffix" + ); + } + + #[test] + fn is_valid_ec_id_rejects_extra_segments() { + let extra_segment = format!("{}.Ab12z9.zz", "a".repeat(64)); + assert!( + !is_valid_ec_id(&extra_segment), + "should reject extra segments" + ); + } +} diff --git a/crates/trusted-server-core/src/ec/identify.rs b/crates/trusted-server-core/src/ec/identify.rs new file mode 100644 index 00000000..b0fc8a49 --- /dev/null +++ b/crates/trusted-server-core/src/ec/identify.rs @@ -0,0 +1,612 @@ +//! Identity lookup endpoint (`GET /_ts/api/v1/identify`). +//! +//! Partners authenticate with a Bearer token and receive only their own +//! synced UID for the active EC ID. + +use error_stack::{Report, ResultExt}; +use fastly::http::{header, StatusCode}; +use fastly::{Request, Response}; +use url::Url; + +use super::auth::authenticate_bearer; +use super::consent::ec_consent_granted; +use crate::error::TrustedServerError; +use crate::openrtb::{Eid, Uid}; +use crate::settings::Settings; + +use super::kv::KvIdentityGraph; +use super::log_id; +use super::registry::PartnerRegistry; +use super::EcContext; + +/// Handles `GET /_ts/api/v1/identify`. +/// +/// Requires Bearer token authentication. Returns only the requesting +/// partner's UID for the active EC ID. +/// +/// # Errors +/// +/// Returns [`TrustedServerError`] for response serialization issues. +pub fn handle_identify( + settings: &Settings, + kv: &KvIdentityGraph, + registry: &PartnerRegistry, + req: &Request, + ec_context: &EcContext, +) -> Result> { + let allowed_origin = match classify_origin(req, settings) { + CorsDecision::Denied => { + return Ok(apply_identify_cache_headers(Response::from_status( + StatusCode::FORBIDDEN, + ))); + } + CorsDecision::NoOrigin => None, + CorsDecision::Allowed(origin) => Some(origin), + }; + + // Authenticate via Bearer token. + let Some(partner) = authenticate_bearer(registry, req) else { + return json_response_with_origin( + StatusCode::UNAUTHORIZED, + &serde_json::json!({ "error": "invalid_token" }), + allowed_origin.as_deref(), + ); + }; + + if !ec_consent_granted(ec_context.consent()) { + return json_response_with_origin( + StatusCode::FORBIDDEN, + &serde_json::json!({ "consent": "denied" }), + allowed_origin.as_deref(), + ); + } + + let Some(ec_id) = ec_context.ec_value() else { + let response = apply_identify_cache_headers(Response::from_status(StatusCode::NO_CONTENT)); + return Ok(apply_cors_headers_if_allowed( + response, + allowed_origin.as_deref(), + )); + }; + + let mut degraded = false; + let mut uid: Option = None; + let mut cluster_size: Option = None; + + match kv.get(ec_id) { + Ok(Some((entry, generation))) => { + // Extract only this partner's UID. + if let Some(partner_uid) = entry.ids.get(&partner.id) { + if !partner_uid.uid.is_empty() { + uid = Some(partner_uid.uid.clone()); + } + } + + // Evaluate cluster size lazily for identify responses. Existing + // stored cluster_size values are reused without a prefix-list call. + match kv.evaluate_cluster(ec_id, &entry, generation) { + Ok(size) => { + cluster_size = size; + } + Err(err) => { + log::warn!("Cluster evaluation failed for '{}': {err:?}", log_id(ec_id)); + } + } + } + Ok(None) => {} + Err(err) => { + log::warn!( + "Identify KV read failed for EC ID '{}': {err:?}", + log_id(ec_id) + ); + degraded = true; + } + } + + let eid = uid.as_ref().map(|u| Eid { + source: partner.source_domain.clone(), + uids: vec![Uid { + id: u.clone(), + atype: Some(partner.openrtb_atype), + ext: None, + }], + }); + + let body = IdentifyResponse { + ec: ec_id.to_owned(), + consent: "ok".to_owned(), + degraded, + partner_id: partner.id.clone(), + uid, + eid, + cluster_size, + }; + + json_response_with_origin(StatusCode::OK, &body, allowed_origin.as_deref()) +} + +/// Handles `OPTIONS /_ts/api/v1/identify` CORS preflight. +/// +/// # Errors +/// +/// Returns [`TrustedServerError`] when response construction fails. +pub fn cors_preflight_identify( + settings: &Settings, + req: &Request, +) -> Result> { + let mut response = match classify_origin(req, settings) { + CorsDecision::Denied => Response::from_status(StatusCode::FORBIDDEN), + CorsDecision::NoOrigin => Response::from_status(StatusCode::OK), + CorsDecision::Allowed(origin) => { + let mut response = Response::from_status(StatusCode::OK); + apply_cors_headers(&mut response, &origin); + response + } + }; + + response.set_body(Vec::new()); + Ok(apply_identify_cache_headers(response)) +} + +#[derive(serde::Serialize)] +struct IdentifyResponse { + ec: String, + consent: String, + degraded: bool, + partner_id: String, + #[serde(skip_serializing_if = "Option::is_none")] + uid: Option, + #[serde(skip_serializing_if = "Option::is_none")] + eid: Option, + #[serde(skip_serializing_if = "Option::is_none")] + cluster_size: Option, +} + +fn json_response_with_origin( + status: StatusCode, + body: &T, + allowed_origin: Option<&str>, +) -> Result> { + let body = serde_json::to_string(body).change_context(TrustedServerError::EdgeCookie { + message: "Failed to serialize identify response".to_owned(), + })?; + + let response = Response::from_status(status) + .with_content_type(fastly::mime::APPLICATION_JSON) + .with_body(body); + let response = apply_identify_cache_headers(response); + + Ok(apply_cors_headers_if_allowed(response, allowed_origin)) +} + +enum CorsDecision { + NoOrigin, + Allowed(String), + Denied, +} + +fn classify_origin(req: &Request, settings: &Settings) -> CorsDecision { + let Some(origin) = req.get_header(header::ORIGIN).and_then(|v| v.to_str().ok()) else { + return CorsDecision::NoOrigin; + }; + + let Ok(origin_url) = Url::parse(origin) else { + return CorsDecision::Denied; + }; + + if origin_url.scheme() != "https" { + return CorsDecision::Denied; + } + + let Some(host) = origin_url.host_str() else { + return CorsDecision::Denied; + }; + + let publisher_host = settings + .publisher + .domain + .trim_end_matches('.') + .to_ascii_lowercase(); + + if origin_authority_contains_uppercase_host(origin) { + return CorsDecision::Denied; + } + + let host = host.to_ascii_lowercase(); + if host == publisher_host || host.ends_with(&format!(".{publisher_host}")) { + return CorsDecision::Allowed(origin.to_owned()); + } + + CorsDecision::Denied +} + +fn origin_authority_contains_uppercase_host(origin: &str) -> bool { + let Some(after_scheme) = origin.strip_prefix("https://") else { + return false; + }; + let authority = after_scheme + .split(['/', '?', '#']) + .next() + .unwrap_or(after_scheme); + let host_port = authority + .rsplit_once('@') + .map_or(authority, |(_, host_port)| host_port); + let host = host_port + .split_once(':') + .map_or(host_port, |(host, _)| host); + + host.bytes().any(|byte| byte.is_ascii_uppercase()) +} + +fn apply_identify_cache_headers(mut response: Response) -> Response { + response.set_header(header::CACHE_CONTROL, "no-store"); + response.set_header(header::PRAGMA, "no-cache"); + response.set_header(header::VARY, "Origin, Authorization"); + response +} + +fn apply_cors_headers_if_allowed(mut response: Response, allowed_origin: Option<&str>) -> Response { + if let Some(origin) = allowed_origin { + apply_cors_headers(&mut response, origin); + } + response +} + +fn apply_cors_headers(response: &mut Response, origin: &str) { + response.set_header(header::ACCESS_CONTROL_ALLOW_ORIGIN, origin); + response.set_header(header::ACCESS_CONTROL_ALLOW_CREDENTIALS, "true"); + response.set_header(header::ACCESS_CONTROL_ALLOW_METHODS, "GET, OPTIONS"); + response.set_header(header::ACCESS_CONTROL_ALLOW_HEADERS, "Authorization"); + response.set_header(header::ACCESS_CONTROL_MAX_AGE, "600"); + response.set_header(header::VARY, "Origin, Authorization"); +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::consent::jurisdiction::Jurisdiction; + use crate::consent::types::{ConsentContext, ConsentSource}; + use crate::ec::registry::PartnerRegistry; + use crate::redacted::Redacted; + use crate::settings::EcPartner; + use crate::test_support::tests::create_test_settings; + + const VALID_API_TOKEN: &str = "identify-test-token-32-bytes-min"; + + fn assert_no_store(response: &Response) { + assert_eq!( + response.get_header_str(header::CACHE_CONTROL), + Some("no-store"), + "identify responses should not be cached" + ); + } + + fn make_ec_context(jurisdiction: Jurisdiction, ec_value: Option<&str>) -> EcContext { + let consent = ConsentContext { + jurisdiction, + source: ConsentSource::Cookie, + ..ConsentContext::default() + }; + EcContext::new_for_test(ec_value.map(str::to_owned), consent) + } + + fn make_test_partner(id: &str, api_token: &str) -> EcPartner { + EcPartner { + id: id.to_owned(), + name: format!("Partner {id}"), + source_domain: format!("{id}.example.com"), + openrtb_atype: EcPartner::default_openrtb_atype(), + bidstream_enabled: true, + api_token: Redacted::new(api_token.to_owned()), + batch_rate_limit: EcPartner::default_batch_rate_limit(), + pull_sync_enabled: false, + pull_sync_url: None, + pull_sync_allowed_domains: vec![], + pull_sync_ttl_sec: EcPartner::default_pull_sync_ttl_sec(), + pull_sync_rate_limit: EcPartner::default_pull_sync_rate_limit(), + ts_pull_token: None, + } + } + + #[test] + fn classify_origin_accepts_publisher_subdomain() { + let settings = create_test_settings(); + let mut req = Request::new("GET", "https://edge.test-publisher.com/identify"); + req.set_header("origin", "https://www.test-publisher.com"); + + let decision = classify_origin(&req, &settings); + assert!( + matches!(decision, CorsDecision::Allowed(_)), + "should allow publisher subdomain origin" + ); + } + + #[test] + fn classify_origin_rejects_mismatch() { + let settings = create_test_settings(); + let mut req = Request::new("GET", "https://edge.test-publisher.com/identify"); + req.set_header("origin", "https://evil.com"); + + let decision = classify_origin(&req, &settings); + assert!( + matches!(decision, CorsDecision::Denied), + "should deny mismatched origin" + ); + } + + #[test] + fn classify_origin_rejects_mixed_case_publisher_host() { + let settings = create_test_settings(); + let mut req = Request::new("GET", "https://edge.test-publisher.com/identify"); + req.set_header("origin", "https://Foo.test-publisher.com"); + + let decision = classify_origin(&req, &settings); + assert!( + matches!(decision, CorsDecision::Denied), + "should deny mixed-case origin hosts instead of reflecting a value browsers reject" + ); + } + + #[test] + fn classify_origin_rejects_http_scheme() { + let settings = create_test_settings(); + let mut req = Request::new("GET", "https://edge.test-publisher.com/identify"); + req.set_header("origin", "http://www.test-publisher.com"); + + let decision = classify_origin(&req, &settings); + assert!( + matches!(decision, CorsDecision::Denied), + "should deny non-https publisher origin" + ); + } + + #[test] + fn classify_origin_allows_absent_origin_header() { + let settings = create_test_settings(); + let req = Request::new("GET", "https://edge.test-publisher.com/identify"); + + let decision = classify_origin(&req, &settings); + assert!( + matches!(decision, CorsDecision::NoOrigin), + "should allow no-origin requests" + ); + } + + #[test] + fn handle_identify_rejects_missing_bearer_token() { + let settings = create_test_settings(); + let kv = KvIdentityGraph::new("missing_store"); + let registry = PartnerRegistry::empty(); + let req = Request::new("GET", "https://edge.test-publisher.com/identify"); + let ec_context = make_ec_context(Jurisdiction::NonRegulated, None); + + let mut response = handle_identify(&settings, &kv, ®istry, &req, &ec_context) + .expect("should construct unauthorized response"); + + assert_eq!( + response.get_header_str(header::ACCESS_CONTROL_ALLOW_ORIGIN), + None, + "should omit CORS headers when Origin is absent" + ); + + assert_eq!( + response.get_status(), + StatusCode::UNAUTHORIZED, + "should return 401 without Bearer token" + ); + assert_no_store(&response); + let body = serde_json::from_slice::(&response.take_body_bytes()) + .expect("should decode JSON body"); + assert_eq!( + body["error"], "invalid_token", + "should return invalid_token error" + ); + } + + #[test] + fn handle_identify_rejects_invalid_bearer_token() { + let settings = create_test_settings(); + let kv = KvIdentityGraph::new("missing_store"); + let partners = vec![make_test_partner("ssp_x", VALID_API_TOKEN)]; + let registry = PartnerRegistry::from_config(&partners).expect("should build registry"); + let mut req = Request::new("GET", "https://edge.test-publisher.com/identify"); + req.set_header("authorization", "Bearer wrong-token"); + let ec_context = make_ec_context(Jurisdiction::NonRegulated, None); + + let response = handle_identify(&settings, &kv, ®istry, &req, &ec_context) + .expect("should construct unauthorized response"); + + assert_eq!( + response.get_status(), + StatusCode::UNAUTHORIZED, + "should return 401 for invalid Bearer token" + ); + assert_no_store(&response); + } + + #[test] + fn handle_identify_denied_consent_returns_403() { + let settings = create_test_settings(); + let kv = KvIdentityGraph::new("missing_store"); + let partners = vec![make_test_partner("ssp_x", VALID_API_TOKEN)]; + let registry = PartnerRegistry::from_config(&partners).expect("should build registry"); + let mut req = Request::new("GET", "https://edge.test-publisher.com/identify"); + req.set_header("authorization", format!("Bearer {VALID_API_TOKEN}")); + let ec_context = make_ec_context(Jurisdiction::Unknown, None); + + let mut response = handle_identify(&settings, &kv, ®istry, &req, &ec_context) + .expect("should construct denied response"); + + assert_eq!( + response.get_status(), + StatusCode::FORBIDDEN, + "should return 403 when consent denies EC" + ); + assert_no_store(&response); + let body = serde_json::from_slice::(&response.take_body_bytes()) + .expect("should decode JSON body"); + assert_eq!( + body, + serde_json::json!({ "consent": "denied" }), + "should return denied consent payload" + ); + } + + #[test] + fn handle_identify_without_ec_returns_204() { + let settings = create_test_settings(); + let kv = KvIdentityGraph::new("missing_store"); + let partners = vec![make_test_partner("ssp_x", VALID_API_TOKEN)]; + let registry = PartnerRegistry::from_config(&partners).expect("should build registry"); + let mut req = Request::new("GET", "https://edge.test-publisher.com/identify"); + req.set_header("authorization", format!("Bearer {VALID_API_TOKEN}")); + let ec_context = make_ec_context(Jurisdiction::NonRegulated, None); + + let response = handle_identify(&settings, &kv, ®istry, &req, &ec_context) + .expect("should construct no-content response"); + + assert_eq!( + response.get_status(), + StatusCode::NO_CONTENT, + "should return 204 when EC is unavailable" + ); + assert_no_store(&response); + } + + #[test] + fn handle_identify_kv_failure_sets_degraded_true() { + let settings = create_test_settings(); + let kv = KvIdentityGraph::new("missing_store"); + let partners = vec![make_test_partner("ssp_x", VALID_API_TOKEN)]; + let registry = PartnerRegistry::from_config(&partners).expect("should build registry"); + let mut req = Request::new("GET", "https://edge.test-publisher.com/identify"); + req.set_header("authorization", format!("Bearer {VALID_API_TOKEN}")); + let ec_id = format!("{}.ABC123", "a".repeat(64)); + let ec_context = make_ec_context(Jurisdiction::NonRegulated, Some(&ec_id)); + + let mut response = handle_identify(&settings, &kv, ®istry, &req, &ec_context) + .expect("should construct degraded identify response"); + + assert_eq!( + response.get_status(), + StatusCode::OK, + "should return 200 on degraded KV read" + ); + assert_no_store(&response); + let body = serde_json::from_slice::(&response.take_body_bytes()) + .expect("should decode identify response JSON"); + + assert_eq!(body["ec"], ec_id, "should echo EC in body"); + assert!( + response.get_header("x-ts-ec").is_none(), + "should not emit x-ts-ec header" + ); + assert_eq!(body["partner_id"], "ssp_x", "should echo partner ID"); + assert_eq!( + body["degraded"], + serde_json::Value::Bool(true), + "should mark response as degraded when KV read fails" + ); + assert!( + body.get("uid").is_none(), + "uid should be omitted when KV read fails" + ); + assert!( + body.get("eid").is_none(), + "eid should be omitted when KV read fails" + ); + } + + #[test] + fn handle_identify_denies_mismatched_browser_origin() { + let settings = create_test_settings(); + let kv = KvIdentityGraph::new("missing_store"); + let partners = vec![make_test_partner("ssp_x", VALID_API_TOKEN)]; + let registry = PartnerRegistry::from_config(&partners).expect("should build registry"); + let mut req = Request::new("GET", "https://edge.test-publisher.com/identify"); + req.set_header("authorization", format!("Bearer {VALID_API_TOKEN}")); + req.set_header("origin", "https://evil.example"); + let ec_context = make_ec_context(Jurisdiction::NonRegulated, None); + + let response = handle_identify(&settings, &kv, ®istry, &req, &ec_context) + .expect("should construct forbidden response"); + + assert_eq!( + response.get_status(), + StatusCode::FORBIDDEN, + "should reject GET from non-publisher origin" + ); + assert_no_store(&response); + } + + #[test] + fn handle_identify_allows_browser_origin_and_reflects_cors_headers() { + let settings = create_test_settings(); + let kv = KvIdentityGraph::new("missing_store"); + let partners = vec![make_test_partner("ssp_x", VALID_API_TOKEN)]; + let registry = PartnerRegistry::from_config(&partners).expect("should build registry"); + let mut req = Request::new("GET", "https://edge.test-publisher.com/identify"); + req.set_header("authorization", format!("Bearer {VALID_API_TOKEN}")); + req.set_header("origin", "https://www.test-publisher.com"); + let ec_context = make_ec_context(Jurisdiction::NonRegulated, None); + + let response = handle_identify(&settings, &kv, ®istry, &req, &ec_context) + .expect("should construct no-content response with CORS headers"); + + assert_eq!( + response.get_status(), + StatusCode::NO_CONTENT, + "should preserve identify response status for allowed browser origin" + ); + assert_no_store(&response); + assert_eq!( + response.get_header_str(header::ACCESS_CONTROL_ALLOW_ORIGIN), + Some("https://www.test-publisher.com"), + "should reflect allowed browser origin on GET responses" + ); + assert_eq!( + response.get_header_str(header::VARY), + Some("Origin, Authorization"), + "should vary on identity request inputs for browser-direct identify responses" + ); + } + + #[test] + fn identify_preflight_denies_mismatched_origin() { + let settings = create_test_settings(); + let mut req = Request::new("OPTIONS", "https://edge.test-publisher.com/identify"); + req.set_header("origin", "https://evil.example"); + + let response = + cors_preflight_identify(&settings, &req).expect("should construct preflight response"); + + assert_eq!( + response.get_status(), + StatusCode::FORBIDDEN, + "should reject preflight from non-publisher origin" + ); + assert_no_store(&response); + } + + #[test] + fn identify_preflight_allows_publisher_origin() { + let settings = create_test_settings(); + let mut req = Request::new("OPTIONS", "https://edge.test-publisher.com/identify"); + req.set_header("origin", "https://www.test-publisher.com"); + + let response = + cors_preflight_identify(&settings, &req).expect("should construct preflight response"); + + assert_eq!( + response.get_status(), + StatusCode::OK, + "should allow preflight from publisher origin" + ); + assert_no_store(&response); + assert_eq!( + response.get_header_str(header::VARY), + Some("Origin, Authorization"), + "should vary on identity request inputs for preflight" + ); + } +} diff --git a/crates/trusted-server-core/src/ec/kv.rs b/crates/trusted-server-core/src/ec/kv.rs new file mode 100644 index 00000000..a431892e --- /dev/null +++ b/crates/trusted-server-core/src/ec/kv.rs @@ -0,0 +1,832 @@ +//! KV identity graph operations. +//! +//! This module provides [`KvIdentityGraph`] which wraps a Fastly KV Store +//! and implements the read-modify-write operations for the EC identity graph. +//! +//! All methods return `Result` — callers decide whether to swallow errors +//! (organic request paths) or propagate them (sync endpoints). See the +//! per-operation error handling policy in the spec §7.5. + +use std::time::Duration; + +use error_stack::{Report, ResultExt}; +use fastly::kv_store::{InsertMode, KVStore}; + +use crate::error::TrustedServerError; + +use super::current_timestamp; +use super::generation::ec_hash; +use super::kv_types::{KvEntry, KvMetadata, KvNetwork}; + +/// Maximum number of CAS retry attempts before giving up. +const MAX_CAS_RETRIES: u32 = 5; + +/// Maximum number of keys to request when counting hash-prefix matches +/// for cluster size evaluation. Anything above this is clearly a large +/// shared network; the exact count doesn't matter. +const CLUSTER_LIST_LIMIT: u32 = 100; + +/// TTL for live entries (1 year), matching the EC cookie `Max-Age`. +const ENTRY_TTL: Duration = Duration::from_secs(365 * 24 * 60 * 60); + +/// TTL for withdrawal tombstones (24 hours). +const TOMBSTONE_TTL: Duration = Duration::from_secs(24 * 60 * 60); + +/// Outcome of an [`KvIdentityGraph::upsert_partner_id_if_exists`] call. +/// +/// Like [`KvIdentityGraph::upsert_partner_id`], this method fails closed when +/// the root entry is missing. This enum encodes the per-mapping rejection +/// reasons needed by the S2S batch sync endpoint. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum UpsertResult { + /// The partner ID was successfully written. + Written, + /// The KV key does not exist — S2S must not create new entries. + NotFound, + /// The entry's `consent.ok` is `false` (withdrawal tombstone). + ConsentWithdrawn, + /// The partner ID already had the requested UID, so no write was needed. + Unchanged, +} + +use super::log_id; + +/// Wraps a Fastly KV Store for EC identity graph operations. +/// +/// Each EC ID (`{64hex}.{6alnum}`) maps to a JSON-encoded [`KvEntry`] +/// containing consent state, geo location, and accumulated partner IDs. +/// +/// Methods use optimistic concurrency (generation markers) for safe +/// read-modify-write operations on concurrent requests. +#[derive(Debug)] +pub struct KvIdentityGraph { + store_name: String, +} + +impl KvIdentityGraph { + /// Creates a new identity graph backed by the named KV store. + #[must_use] + pub fn new(store_name: impl Into) -> Self { + Self { + store_name: store_name.into(), + } + } + + /// Returns the configured store name. + #[must_use] + pub fn store_name(&self) -> &str { + &self.store_name + } + + /// Opens the underlying Fastly KV store. + fn open_store(&self) -> Result> { + KVStore::open(&self.store_name) + .change_context(TrustedServerError::KvStore { + store_name: self.store_name.clone(), + message: "Failed to open KV store".to_owned(), + })? + .ok_or_else(|| { + Report::new(TrustedServerError::KvStore { + store_name: self.store_name.clone(), + message: "KV store not found".to_owned(), + }) + }) + } + + /// Serializes an entry body and metadata for insertion. + fn serialize_entry( + entry: &KvEntry, + store_name: &str, + ) -> Result<(String, String), Report> { + entry.validate().map_err(|message| { + Report::new(TrustedServerError::KvStore { + store_name: store_name.to_owned(), + message: format!("Refusing to serialize invalid KV entry: {message}"), + }) + })?; + + let body = serde_json::to_string(entry).change_context(TrustedServerError::KvStore { + store_name: store_name.to_owned(), + message: "Failed to serialize KV entry body".to_owned(), + })?; + let meta = KvMetadata::from_entry(entry); + let meta_str = + serde_json::to_string(&meta).change_context(TrustedServerError::KvStore { + store_name: store_name.to_owned(), + message: "Failed to serialize KV entry metadata".to_owned(), + })?; + Ok((body, meta_str)) + } + + /// Reads the full entry and its generation marker for CAS writes. + /// + /// Returns `Ok(None)` when the key does not exist. + /// + /// # Errors + /// + /// Returns [`TrustedServerError::KvStore`] on store open or read failure. + pub fn get(&self, ec_id: &str) -> Result, Report> { + let store = self.open_store()?; + let mut response = match store.lookup(ec_id) { + Ok(resp) => resp, + Err(fastly::kv_store::KVStoreError::ItemNotFound) => return Ok(None), + Err(err) => { + return Err( + Report::new(err).change_context(TrustedServerError::KvStore { + store_name: self.store_name.clone(), + message: format!("Failed to read key '{ec_id}'"), + }), + ); + } + }; + + let generation = response.current_generation(); + let body_bytes = response.take_body_bytes(); + let entry = Self::deserialize_entry(&self.store_name, ec_id, &body_bytes)?; + + Ok(Some((entry, generation))) + } + + fn deserialize_entry( + store_name: &str, + ec_id: &str, + body_bytes: &[u8], + ) -> Result> { + let entry: KvEntry = + serde_json::from_slice(body_bytes).change_context(TrustedServerError::KvStore { + store_name: store_name.to_owned(), + message: format!("Failed to deserialize entry for key '{ec_id}'"), + })?; + + entry.validate().map_err(|message| { + Report::new(TrustedServerError::KvStore { + store_name: store_name.to_owned(), + message: format!("Loaded invalid entry for key '{ec_id}': {message}"), + }) + })?; + + Ok(entry) + } + + /// Reads only the metadata for an EC ID key (no body streaming). + /// + /// Returns `Ok(None)` when the key does not exist or has no metadata. + /// + /// # Errors + /// + /// Returns [`TrustedServerError::KvStore`] on store open or read failure. + pub fn get_metadata( + &self, + ec_id: &str, + ) -> Result, Report> { + let store = self.open_store()?; + let response = match store.lookup(ec_id) { + Ok(resp) => resp, + Err(fastly::kv_store::KVStoreError::ItemNotFound) => return Ok(None), + Err(err) => { + return Err( + Report::new(err).change_context(TrustedServerError::KvStore { + store_name: self.store_name.clone(), + message: format!("Failed to read metadata for key '{ec_id}'"), + }), + ); + } + }; + + let meta_bytes = match response.metadata() { + Some(bytes) => bytes, + None => return Ok(None), + }; + + let meta: KvMetadata = + serde_json::from_slice(&meta_bytes).change_context(TrustedServerError::KvStore { + store_name: self.store_name.clone(), + message: format!("Failed to deserialize metadata for key '{ec_id}'"), + })?; + + Ok(Some(meta)) + } + + /// Creates a new entry. Fails if the key already exists. + /// + /// Uses `InsertMode::Add` so concurrent creates for the same EC ID + /// are safely rejected (only one wins). + /// + /// # Errors + /// + /// Returns [`TrustedServerError::KvStore`] on store error or if the + /// key already exists (`ItemPreconditionFailed`). + pub fn create(&self, ec_id: &str, entry: &KvEntry) -> Result<(), Report> { + let store = self.open_store()?; + let (body, meta_str) = Self::serialize_entry(entry, &self.store_name)?; + let created = Self::try_insert_add(&store, ec_id, &body, &meta_str, &self.store_name)?; + if created { + Ok(()) + } else { + Err(Report::new(TrustedServerError::KvStore { + store_name: self.store_name.clone(), + message: format!("Key '{ec_id}' already exists"), + })) + } + } + + /// Low-level create using a pre-opened store and pre-serialized data. + /// + /// Returns `true` if the entry was created, `false` if the key already + /// exists (`ItemPreconditionFailed`). Other errors are propagated. + fn try_insert_add( + store: &KVStore, + ec_id: &str, + body: &str, + meta_str: &str, + store_name: &str, + ) -> Result> { + match store + .build_insert() + .mode(InsertMode::Add) + .metadata(meta_str) + .time_to_live(ENTRY_TTL) + .execute(ec_id, body) + { + Ok(()) => Ok(true), + Err(fastly::kv_store::KVStoreError::ItemPreconditionFailed) => Ok(false), + Err(err) => Err( + Report::new(err).change_context(TrustedServerError::KvStore { + store_name: store_name.to_owned(), + message: format!("Failed to create entry for key '{ec_id}'"), + }), + ), + } + } + + /// Creates a new entry, or overwrites an existing tombstone on re-consent. + /// + /// Three-way behavior: + /// - **No existing key** — creates the entry (same as [`create`](Self::create)). + /// - **Existing live entry** (`consent.ok = true`) — no-op, returns `Ok(())`. + /// - **Existing tombstone** (`consent.ok = false`) — CAS overwrite with + /// the new entry. Retries up to [`MAX_CAS_RETRIES`] on conflict. + /// + /// Called by `generate_if_needed()` instead of `create()` so that a + /// user who re-consents within the 24-hour tombstone window recovers + /// immediately. + /// + /// # Errors + /// + /// Returns [`TrustedServerError::KvStore`] on store error or CAS + /// exhaustion. + pub fn create_or_revive( + &self, + ec_id: &str, + entry: &KvEntry, + ) -> Result<(), Report> { + // Serialize once and reuse across the fast path and CAS loop. + let store = self.open_store()?; + let (body, meta_str) = Self::serialize_entry(entry, &self.store_name)?; + + // Try create first — fast path for new entries. + if Self::try_insert_add(&store, ec_id, &body, &meta_str, &self.store_name)? { + return Ok(()); + } + + // Key exists — read it to determine if it's live or a tombstone. + let (existing, generation) = match self.get(ec_id)? { + Some(pair) => pair, + // Raced with a delete — try create again. + None => return self.create(ec_id, entry), + }; + + // Live entry — nothing to do. + if existing.consent.ok { + log::debug!( + "create_or_revive: live entry exists for '{}', no-op", + log_id(ec_id) + ); + return Ok(()); + } + + // Tombstone — CAS overwrite to revive. + log::info!( + "create_or_revive: reviving tombstone for '{}'", + log_id(ec_id) + ); + + let mut current_gen = generation; + for attempt in 0..MAX_CAS_RETRIES { + match store + .build_insert() + .if_generation_match(current_gen) + .metadata(&meta_str) + .time_to_live(ENTRY_TTL) + .execute(ec_id, body.as_str()) + { + Ok(()) => return Ok(()), + Err(fastly::kv_store::KVStoreError::ItemPreconditionFailed) => { + log::debug!( + "create_or_revive: CAS conflict on attempt {}/{MAX_CAS_RETRIES} for '{}'", + attempt + 1, + log_id(ec_id), + ); + // Re-read immediately to get a fresh generation. Sleeping in + // the CAS loop would block the Fastly Compute request worker. + match self.get(ec_id)? { + Some((refreshed, gen)) => { + if refreshed.consent.ok { + // Someone else revived it — done. + return Ok(()); + } + current_gen = gen; + } + None => return self.create(ec_id, entry), + } + } + Err(err) => { + return Err( + Report::new(err).change_context(TrustedServerError::KvStore { + store_name: self.store_name.clone(), + message: format!( + "Failed to revive tombstone for key '{ec_id}' on attempt {}", + attempt + 1, + ), + }), + ); + } + } + } + + Err(Report::new(TrustedServerError::KvStore { + store_name: self.store_name.clone(), + message: format!( + "CAS conflict after {MAX_CAS_RETRIES} retries reviving tombstone for '{ec_id}'" + ), + })) + } + + /// Atomically merges a partner ID into the existing entry. + /// + /// Uses CAS (generation markers) to avoid clobbering concurrent writes + /// from other partners. Retries up to [`MAX_CAS_RETRIES`] on conflict. + /// + /// If the root entry does not exist (e.g. the initial `create_or_revive` + /// failed), creates a minimal live entry first — this is the recovery + /// path for best-effort EC creation misses. + /// + /// # Errors + /// + /// Returns [`TrustedServerError::KvStore`] on store error or CAS + /// exhaustion after [`MAX_CAS_RETRIES`] attempts. + pub fn upsert_partner_id( + &self, + ec_id: &str, + partner_id: &str, + uid: &str, + ) -> Result<(), Report> { + // Open store once for write operations. Note: `self.get()` opens + // its own handle internally — this is intentional since `KVStore::open` + // is a cheap name lookup, and keeping the read/write APIs independent + // simplifies the method signatures. + let store = self.open_store()?; + + for attempt in 0..MAX_CAS_RETRIES { + let (mut entry, generation) = match self.get(ec_id)? { + Some(pair) => pair, + None => { + log::info!( + "upsert_partner_id: no entry for '{}', rejecting partner upsert", + log_id(ec_id) + ); + return Err(Report::new(TrustedServerError::KvStore { + store_name: self.store_name.clone(), + message: format!( + "Cannot upsert partner '{partner_id}' for missing key '{ec_id}'" + ), + })); + } + }; + + // Reject upserts on withdrawn entries — a late sync must not + // repopulate partner IDs after consent withdrawal. + if !entry.consent.ok { + log::info!( + "upsert_partner_id: entry for '{}' is a tombstone, rejecting upsert", + log_id(ec_id), + ); + return Err(Report::new(TrustedServerError::KvStore { + store_name: self.store_name.clone(), + message: format!( + "Cannot upsert partner '{partner_id}' for withdrawn key '{ec_id}'" + ), + })); + } + + if entry + .ids + .get(partner_id) + .is_some_and(|existing| existing.uid == uid) + { + return Ok(()); + } + + // Merge the partner ID. + entry.ids.insert( + partner_id.to_owned(), + super::kv_types::KvPartnerId { + uid: uid.to_owned(), + }, + ); + + let (body, meta_str) = Self::serialize_entry(&entry, &self.store_name)?; + + match store + .build_insert() + .if_generation_match(generation) + .metadata(&meta_str) + .time_to_live(ENTRY_TTL) + .execute(ec_id, body.as_str()) + { + Ok(()) => return Ok(()), + Err(fastly::kv_store::KVStoreError::ItemPreconditionFailed) => { + log::debug!( + "upsert_partner_id: CAS conflict on attempt {}/{MAX_CAS_RETRIES} for '{}'", + attempt + 1, + log_id(ec_id), + ); + // Loop will re-read on next iteration. Do not sleep here: + // blocking sleeps burn edge compute while holding the request worker. + } + Err(err) => { + return Err( + Report::new(err).change_context(TrustedServerError::KvStore { + store_name: self.store_name.clone(), + message: format!( + "Failed to upsert partner '{partner_id}' for key '{ec_id}'" + ), + }), + ); + } + } + } + + Err(Report::new(TrustedServerError::KvStore { + store_name: self.store_name.clone(), + message: format!( + "CAS conflict after {MAX_CAS_RETRIES} retries upserting partner '{partner_id}' for '{ec_id}'" + ), + })) + } + + /// Upserts a partner ID only if the KV entry already exists. + /// + /// Unlike [`Self::upsert_partner_id`], this method does **not** create + /// entries for missing keys. Used by the S2S batch sync endpoint where + /// the KV entry must have been created by the organic EC flow. + /// + /// Returns [`UpsertResult::Unchanged`] when the existing UID already + /// matches the incoming UID, skipping the write. + /// + /// # Errors + /// + /// Returns [`TrustedServerError::KvStore`] on store I/O or CAS + /// exhaustion errors. + pub fn upsert_partner_id_if_exists( + &self, + ec_id: &str, + partner_id: &str, + uid: &str, + ) -> Result> { + let store = self.open_store()?; + + for attempt in 0..MAX_CAS_RETRIES { + let (mut entry, generation) = match self.get(ec_id)? { + Some(pair) => pair, + None => return Ok(UpsertResult::NotFound), + }; + + if !entry.consent.ok { + return Ok(UpsertResult::ConsentWithdrawn); + } + + if entry + .ids + .get(partner_id) + .is_some_and(|existing| existing.uid == uid) + { + return Ok(UpsertResult::Unchanged); + } + + entry.ids.insert( + partner_id.to_owned(), + super::kv_types::KvPartnerId { + uid: uid.to_owned(), + }, + ); + + let (body, meta_str) = Self::serialize_entry(&entry, &self.store_name)?; + + match store + .build_insert() + .if_generation_match(generation) + .metadata(&meta_str) + .time_to_live(ENTRY_TTL) + .execute(ec_id, body.as_str()) + { + Ok(()) => return Ok(UpsertResult::Written), + Err(fastly::kv_store::KVStoreError::ItemPreconditionFailed) => { + log::debug!( + "upsert_partner_id_if_exists: CAS conflict on attempt {}/{MAX_CAS_RETRIES} for '{}'", + attempt + 1, + log_id(ec_id), + ); + // Retry immediately; sleeping here blocks the edge worker. + } + Err(err) => { + return Err( + Report::new(err).change_context(TrustedServerError::KvStore { + store_name: self.store_name.clone(), + message: format!( + "Failed to upsert partner '{partner_id}' for key '{ec_id}'" + ), + }), + ); + } + } + } + + Err(Report::new(TrustedServerError::KvStore { + store_name: self.store_name.clone(), + message: format!( + "CAS conflict after {MAX_CAS_RETRIES} retries upserting partner '{partner_id}' for '{ec_id}'" + ), + })) + } + + /// Writes a withdrawal tombstone for consent enforcement. + /// + /// Overwrites the entry with `consent.ok = false`, empty partner IDs, + /// and a 24-hour TTL. Uses unconditional overwrite (no CAS) since the + /// entry is being withdrawn regardless of concurrent state. + /// + /// The tombstone preserves consent enforcement for batch sync clients + /// (`POST /_ts/api/v1/batch-sync`) during the 24-hour revocation window. + /// + /// # Errors + /// + /// Returns [`TrustedServerError::KvStore`] on store error. Callers on + /// the browser path should log at `error` level and continue — cookie + /// deletion is the primary enforcement mechanism. + pub fn write_withdrawal_tombstone( + &self, + ec_id: &str, + ) -> Result<(), Report> { + let store = self.open_store()?; + let entry = KvEntry::tombstone(current_timestamp()); + let (body, meta_str) = Self::serialize_entry(&entry, &self.store_name)?; + + store + .build_insert() + .metadata(&meta_str) + .time_to_live(TOMBSTONE_TTL) + .execute(ec_id, body) + .change_context(TrustedServerError::KvStore { + store_name: self.store_name.clone(), + message: format!("Failed to write tombstone for key '{ec_id}'"), + }) + } + + /// Counts the number of keys sharing the same EC hash prefix. + /// + /// Uses the Fastly KV list API with a prefix filter, limited to + /// [`CLUSTER_LIST_LIMIT`] keys. If the limit is reached, the count + /// is capped — the exact number beyond the limit is not meaningful + /// for disambiguation. + /// + /// # Errors + /// + /// Returns [`TrustedServerError::KvStore`] on store error. + pub fn count_hash_prefix_keys( + &self, + hash_prefix: &str, + ) -> Result> { + let store = self.open_store()?; + + // Request a single page of up to CLUSTER_LIST_LIMIT keys. + // The prefix ensures we only match EC IDs derived from the same + // IP+passphrase (i.e. same 64-hex hash). + let page = store + .build_list() + .prefix(hash_prefix) + .limit(CLUSTER_LIST_LIMIT) + .execute() + .change_context(TrustedServerError::KvStore { + store_name: self.store_name.clone(), + message: format!( + "Failed to list keys with prefix '{}'", + &hash_prefix[..hash_prefix.len().min(8)], + ), + })?; + + #[allow(clippy::cast_possible_truncation)] + let count = page.keys().len() as u32; + Ok(count) + } + + /// Evaluates the cluster size for an EC entry. + /// + /// Returns the stored `cluster_size` when it has already been evaluated. + /// Otherwise, counts the number of keys sharing the same hash prefix via + /// [`count_hash_prefix_keys`](Self::count_hash_prefix_keys) and writes the + /// result back to the entry. The CAS write is best-effort — on conflict, + /// the computed value is still returned. + /// + /// # Errors + /// + /// Returns [`TrustedServerError::KvStore`] on store or list failure. + pub fn evaluate_cluster( + &self, + ec_id: &str, + entry: &KvEntry, + generation: u64, + ) -> Result, Report> { + if let Some(cluster_size) = entry + .network + .as_ref() + .and_then(|network| network.cluster_size) + { + log::trace!("evaluate_cluster: using stored cluster_size"); + return Ok(Some(cluster_size)); + } + + // Compute cluster size via prefix list. + let hash_prefix = ec_hash(ec_id); + let cluster_size = self.count_hash_prefix_keys(hash_prefix)?; + + log::debug!( + "evaluate_cluster: computed cluster_size={cluster_size} for '{}'", + log_id(ec_id) + ); + + // Best-effort CAS write-back — update only the cluster size so any + // future `network` fields are preserved across this lazy write. + let mut updated_entry = entry.clone(); + let mut network = updated_entry + .network + .unwrap_or(KvNetwork { cluster_size: None }); + network.cluster_size = Some(cluster_size); + updated_entry.network = Some(network); + + let store = self.open_store()?; + let (body, meta_str) = Self::serialize_entry(&updated_entry, &self.store_name)?; + + match store + .build_insert() + .if_generation_match(generation) + .metadata(&meta_str) + .time_to_live(ENTRY_TTL) + .execute(ec_id, body.as_str()) + { + Ok(()) => {} + Err(fastly::kv_store::KVStoreError::ItemPreconditionFailed) => { + log::debug!( + "evaluate_cluster: CAS conflict writing cluster_size for '{}', \ + returning computed value anyway", + log_id(ec_id), + ); + } + Err(err) => { + // Log but don't fail — the computed value is still valid. + log::warn!( + "evaluate_cluster: failed to write cluster_size for '{}': {err}", + log_id(ec_id) + ); + } + } + + Ok(Some(cluster_size)) + } + + /// Hard-deletes the entry. + /// + /// Reserved for the IAB data deletion framework (deferred). For consent + /// withdrawal, use [`write_withdrawal_tombstone`](Self::write_withdrawal_tombstone). + /// + /// # Errors + /// + /// Returns [`TrustedServerError::KvStore`] on store error. + pub fn delete(&self, ec_id: &str) -> Result<(), Report> { + let store = self.open_store()?; + store + .delete(ec_id) + .change_context(TrustedServerError::KvStore { + store_name: self.store_name.clone(), + message: format!("Failed to delete key '{ec_id}'"), + }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn constants_have_expected_values() { + assert_eq!(MAX_CAS_RETRIES, 5); + assert_eq!(ENTRY_TTL, Duration::from_secs(31_536_000)); + assert_eq!(TOMBSTONE_TTL, Duration::from_secs(86_400)); + assert_eq!(CLUSTER_LIST_LIMIT, 100); + } + + #[test] + fn current_timestamp_is_nonzero() { + let ts = current_timestamp(); + assert!(ts > 0, "should return a nonzero timestamp"); + } + + #[test] + fn serialize_entry_produces_valid_json() { + let entry = KvEntry::tombstone(1000); + let (body, meta) = + KvIdentityGraph::serialize_entry(&entry, "test-store").expect("should serialize entry"); + + // Verify body is valid JSON. + let _: KvEntry = + serde_json::from_str(&body).expect("should deserialize body back to KvEntry"); + + // Verify metadata is valid JSON. + let _: KvMetadata = + serde_json::from_str(&meta).expect("should deserialize metadata back to KvMetadata"); + } + + #[test] + fn deserialize_entry_rejects_invalid_legacy_values() { + let mut entry = KvEntry::tombstone(1000); + entry.ids.insert( + "ssp_x".to_owned(), + crate::ec::kv_types::KvPartnerId { + uid: "x".repeat(crate::ec::kv_types::MAX_UID_LENGTH + 1), + }, + ); + let body = serde_json::to_vec(&entry).expect("should serialize invalid entry payload"); + + let err = KvIdentityGraph::deserialize_entry("test-store", "ec-id", &body) + .expect_err("should reject invalid legacy entry values"); + let err_text = format!("{err}"); + assert!( + err_text.contains("Loaded invalid entry"), + "should report validation failure for loaded entries" + ); + } + + #[test] + fn deserialize_entry_rejects_unsupported_schema_version() { + let mut entry = KvEntry::tombstone(1000); + entry.v = crate::ec::kv_types::SCHEMA_VERSION + 1; + let body = serde_json::to_vec(&entry).expect("should serialize future-version entry"); + + let err = KvIdentityGraph::deserialize_entry("test-store", "ec-id", &body) + .expect_err("should reject unsupported schema versions"); + let err_text = format!("{err}"); + assert!( + err_text.contains("unsupported KV entry schema version"), + "should surface schema version validation failures on load" + ); + } + + #[test] + fn serialize_entry_rejects_invalid_values() { + let mut entry = KvEntry::tombstone(1000); + entry.ids.insert( + "ssp_x".to_owned(), + crate::ec::kv_types::KvPartnerId { + uid: "x".repeat(crate::ec::kv_types::MAX_UID_LENGTH + 1), + }, + ); + + let err = KvIdentityGraph::serialize_entry(&entry, "test-store") + .expect_err("should reject invalid entries before writing"); + let err_text = format!("{err}"); + assert!( + err_text.contains("Refusing to serialize invalid KV entry"), + "should fail closed before serializing invalid KV writes" + ); + } + + #[test] + fn evaluate_cluster_returns_stored_value_without_store_io() { + let kv = KvIdentityGraph::new("nonexistent_store_for_cluster_cache_test"); + let ec_id = format!("{}.ABC123", "a".repeat(64)); + let mut entry = KvEntry::tombstone(1000); + entry.network = Some(KvNetwork { + cluster_size: Some(5), + }); + + let cluster_size = kv + .evaluate_cluster(&ec_id, &entry, 0) + .expect("should not touch store when cluster_size is already known"); + + assert_eq!( + cluster_size, + Some(5), + "should return stored cluster_size without re-listing keys" + ); + } +} diff --git a/crates/trusted-server-core/src/ec/kv_types.rs b/crates/trusted-server-core/src/ec/kv_types.rs new file mode 100644 index 00000000..65bf747c --- /dev/null +++ b/crates/trusted-server-core/src/ec/kv_types.rs @@ -0,0 +1,1214 @@ +//! KV identity graph schema types. +//! +//! These types define the JSON schema stored in the Fastly KV Store for the +//! EC identity graph. Each EC ID (`{64hex}.{6alnum}`) maps to a [`KvEntry`] +//! containing consent state, geo location, and accumulated partner IDs. +//! +//! The schema is versioned (`v: 1`) to allow future migrations. + +use std::collections::{BTreeMap, BTreeSet}; + +use serde::{Deserialize, Serialize}; + +use crate::consent::ConsentContext; +use crate::geo::GeoInfo; + +/// Current schema version for KV entries. +pub const SCHEMA_VERSION: u8 = 1; + +// Unsupported schema versions fail closed on read. Future schema bumps must +// add an explicit lazy migration or backfill path before changing this value. + +/// Maximum number of publisher-domain entries accepted in +/// [`KvPubProperties::seen_domains`]. +/// +/// New entries seed this set with the creation domain only. Runtime organic +/// requests no longer append domains. +pub const MAX_SEEN_DOMAINS: usize = 50; + +/// Maximum allowed hostname length for publisher domains stored in KV. +pub const MAX_STORED_DOMAIN_LENGTH: usize = 255; + +/// Maximum allowed length (in bytes) for a partner UID across all sync +/// mechanisms (pixel, batch, pull). Defined centrally to ensure consistent +/// validation. +pub const MAX_UID_LENGTH: usize = 512; + +/// Full KV entry stored as the body of an EC identity graph record. +/// +/// **KV key:** Full EC ID (`{64hex}.{6alnum}`). +/// **KV value:** JSON-serialized `KvEntry` (max ~5KB). +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct KvEntry { + /// Schema version — always [`SCHEMA_VERSION`]. + pub v: u8, + /// Unix timestamp (seconds) of initial entry creation. + pub created: u64, + /// Consent state sub-object. + pub consent: KvConsent, + /// Geo location sub-object. + pub geo: KvGeo, + /// Creation-time publisher property metadata. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub pub_properties: Option, + /// Device class signals (TLS fingerprint, UA platform). + /// Written once on creation — never updated after. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub device: Option, + /// Network cluster disambiguation data. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub network: Option, + /// Map of partner ID namespace → UID record. + /// Populated by pixel sync, batch sync, and pull sync operations. + #[serde(default, skip_serializing_if = "BTreeMap::is_empty")] + pub ids: BTreeMap, +} + +/// Consent state within a KV entry. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct KvConsent { + /// Raw TCF v2 consent string. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub tcf: Option, + /// Raw GPP consent string. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub gpp: Option, + /// `true` for a live entry, `false` for a withdrawal tombstone. + pub ok: bool, + /// Unix timestamp (seconds) of last consent state change. + pub updated: u64, +} + +/// Geo location within a KV entry. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct KvGeo { + /// ISO 3166-1 alpha-2 country code (e.g. `"US"`). + pub country: String, + /// ISO 3166-2 region code (e.g. `"CA"` for California). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub region: Option, + /// Autonomous System Number (e.g. `7922` = Comcast). + /// Primary signal for distinguishing home ISP vs. corporate VPN. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub asn: Option, + /// DMA/metro code (e.g. `807` = San Francisco). + /// Market-level targeting signal; not personal data. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub dma: Option, +} + +/// A partner user ID within a KV entry. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct KvPartnerId { + /// The partner's user identifier. + pub uid: String, +} + +/// Publisher property metadata captured when an EC entry is created. +/// +/// Earlier schema versions treated `seen_domains` as mutable domain history. +/// To avoid recurring organic-request KV writes, new entries now seed only the +/// creation domain and runtime requests do not append domains. Legacy map-shaped +/// records are accepted on read and reserialized as a domain list on future writes. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct KvPubProperties { + /// Apex domain where this EC entry was first created. + pub origin_domain: String, + /// Bounded set of publisher apex domains seen for this EC entry. + /// + /// New entries include the creation domain only; runtime requests do not + /// update this set. + #[serde( + default, + deserialize_with = "deserialize_seen_domains", + skip_serializing_if = "BTreeSet::is_empty" + )] + pub seen_domains: BTreeSet, +} + +fn deserialize_seen_domains<'de, D>(deserializer: D) -> Result, D::Error> +where + D: serde::Deserializer<'de>, +{ + #[derive(Deserialize)] + #[serde(untagged)] + enum SeenDomains { + List(Vec), + LegacyMap(BTreeMap), + } + + match SeenDomains::deserialize(deserializer)? { + SeenDomains::List(domains) => Ok(domains.into_iter().collect()), + SeenDomains::LegacyMap(domains) => Ok(domains.into_keys().collect()), + } +} + +/// Coarse, non-PII device signals derived from TLS handshake and UA. +/// +/// Used by the `/_ts/api/v1/identify` endpoint for cross-suffix propagation decisions +/// and buyer-facing device quality scoring. Written once on +/// [`KvEntry`] creation — never updated after. +/// +/// **Privacy:** `ja4_class` (Section 1 only) and `platform_class` are +/// category signals, not unique device identifiers. The full JA4 +/// fingerprint (Sections 2–3) is never stored. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct KvDevice { + /// Mobile signal: `0` = confirmed desktop, `1` = confirmed mobile, + /// `2` = genuinely unknown (non-standard client). + /// Derived from UA platform string — no Client Hints required. + pub is_mobile: u8, + /// JA4 Section 1 only — browser family class identifier. + /// e.g. `"t13d1516h2"` = Chrome, `"t13d2013h2"` = Safari. + /// Never stores the full JA4 fingerprint. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub ja4_class: Option, + /// Coarse OS family from UA: `"mac"`, `"windows"`, `"ios"`, + /// `"android"`, `"linux"`. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub platform_class: Option, + /// SHA256 prefix (12 hex chars) of the HTTP/2 SETTINGS fingerprint. + /// Used alongside `ja4_class` for browser confirmation and bot detection. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub h2_fp_hash: Option, + /// `true` = known legitimate browser; `false` = known bot/scraper; + /// `None` = unknown. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub known_browser: Option, +} + +/// Network cluster disambiguation data. +/// +/// Tracks how many distinct EC entries share the same hash prefix. A high +/// count indicates a shared network (corporate VPN, campus); a low count +/// indicates an individual or household. +/// +/// Written only by the `/_ts/api/v1/identify` endpoint when `cluster_size` is +/// missing. Once stored, the value is reused because the EC entry no longer +/// stores a cluster-check timestamp. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct KvNetwork { + /// Number of distinct EC suffixes matching this hash prefix. + /// `None` = not yet evaluated. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub cluster_size: Option, +} + +/// Compact metadata stored alongside the KV entry body. +/// +/// Fastly KV metadata is limited to 2048 bytes and can be read without +/// streaming the full body. Used by batch sync for fast consent checks. +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct KvMetadata { + /// Mirrors [`KvConsent::ok`] — `false` means tombstone. + pub ok: bool, + /// Mirrors [`KvGeo::country`]. + pub country: String, + /// Mirrors [`KvEntry::v`]. + pub v: u8, + /// Mirrors [`KvNetwork::cluster_size`]. `None` = not yet evaluated. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub cluster_size: Option, + /// Mirrors [`KvDevice::is_mobile`]. Enables propagation gating without + /// body read. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub is_mobile: Option, + /// Mirrors [`KvDevice::known_browser`]. Buyer-facing quality signal. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub known_browser: Option, +} + +/// Validates a publisher domain before storing it in KV. +/// +/// Applies a lightweight hostname-shape check and lowercases ASCII labels so +/// stored values remain bounded and consistent. +#[must_use] +pub(crate) fn validated_stored_domain(domain: &str) -> Option { + if domain.is_empty() || domain.len() > MAX_STORED_DOMAIN_LENGTH { + return None; + } + if !domain.is_ascii() { + return None; + } + + let normalized = domain.trim_end_matches('.').to_ascii_lowercase(); + if normalized.is_empty() || normalized.len() > MAX_STORED_DOMAIN_LENGTH { + return None; + } + + let mut labels = normalized.split('.'); + let mut saw_label = false; + for label in &mut labels { + saw_label = true; + if label.is_empty() || label.len() > 63 { + return None; + } + + let bytes = label.as_bytes(); + let first = bytes.first().copied().expect("should have non-empty label"); + let last = bytes.last().copied().expect("should have non-empty label"); + if !first.is_ascii_alphanumeric() || !last.is_ascii_alphanumeric() { + return None; + } + if !bytes + .iter() + .copied() + .all(|byte| byte.is_ascii_alphanumeric() || byte == b'-') + { + return None; + } + } + + saw_label.then_some(normalized) +} + +impl KvEntry { + /// Creates a new live entry from the current request context. + /// + /// `domain` is the publisher's apex domain (e.g. `"autoblog.com"`), + /// used to initialize the [`KvPubProperties`] origin and first visit. + #[must_use] + pub fn new(consent: &ConsentContext, geo: Option<&GeoInfo>, now: u64, domain: &str) -> Self { + let pub_properties = validated_stored_domain(domain).map(|validated_domain| { + let mut seen_domains = BTreeSet::new(); + seen_domains.insert(validated_domain.clone()); + + KvPubProperties { + origin_domain: validated_domain, + seen_domains, + } + }); + + Self { + v: SCHEMA_VERSION, + created: now, + consent: KvConsent { + tcf: consent.raw_tc_string.clone(), + gpp: consent.raw_gpp_string.clone(), + ok: true, + updated: now, + }, + geo: KvGeo::from_geo_info(geo), + pub_properties, + device: None, + network: None, + ids: BTreeMap::new(), + } + } + + /// Creates a minimal live entry for the recovery path. + /// + /// Used by [`super::kv::KvIdentityGraph::upsert_partner_id`] when the + /// root KV entry is missing (e.g. the initial best-effort + /// `create_or_revive` failed on EC generation). + #[must_use] + pub fn minimal(partner_id: &str, uid: &str, now: u64) -> Self { + let mut ids = BTreeMap::new(); + ids.insert( + partner_id.to_owned(), + KvPartnerId { + uid: uid.to_owned(), + }, + ); + Self { + v: SCHEMA_VERSION, + created: now, + consent: KvConsent { + tcf: None, + gpp: None, + ok: true, + updated: now, + }, + geo: KvGeo { + country: "ZZ".to_owned(), + region: None, + asn: None, + dma: None, + }, + pub_properties: None, + device: None, + network: None, + ids, + } + } + + /// Creates a withdrawal tombstone entry. + /// + /// Sets `consent.ok = false`, clears all partner IDs, and uses a + /// placeholder geo. The caller should apply a 24-hour TTL when writing. + /// + /// **Note:** The original `created` timestamp is intentionally not + /// preserved — reading the existing entry first would add latency on + /// the consent-withdrawal hot path, and the tombstone expires in 24h. + #[must_use] + pub fn tombstone(now: u64) -> Self { + Self { + v: SCHEMA_VERSION, + created: now, + consent: KvConsent { + tcf: None, + gpp: None, + ok: false, + updated: now, + }, + geo: KvGeo { + country: "ZZ".to_owned(), + region: None, + asn: None, + dma: None, + }, + pub_properties: None, + device: None, + network: None, + ids: BTreeMap::new(), + } + } + + /// Validates a deserialized entry loaded from KV. + /// + /// Rejects legacy or corrupt records that exceed the current bounded + /// storage contract rather than re-serializing them at unbounded size. + /// + /// # Errors + /// + /// Returns an error string describing the first bounds or shape violation + /// found in the deserialized record. + pub fn validate(&self) -> Result<(), String> { + if self.v != SCHEMA_VERSION { + return Err(format!( + "unsupported KV entry schema version {} (expected {})", + self.v, SCHEMA_VERSION + )); + } + + for (partner_id, partner_uid) in &self.ids { + if partner_uid.uid.len() > MAX_UID_LENGTH { + return Err(format!( + "partner ID '{partner_id}' exceeds MAX_UID_LENGTH ({})", + partner_uid.uid.len() + )); + } + } + + if let Some(pub_properties) = &self.pub_properties { + if validated_stored_domain(&pub_properties.origin_domain).as_deref() + != Some(pub_properties.origin_domain.as_str()) + { + return Err(format!( + "origin_domain '{}' is invalid for stored KV data", + pub_properties.origin_domain + )); + } + + if pub_properties.seen_domains.len() > MAX_SEEN_DOMAINS { + return Err(format!( + "seen_domains exceeds MAX_SEEN_DOMAINS ({})", + pub_properties.seen_domains.len() + )); + } + + for domain in &pub_properties.seen_domains { + if validated_stored_domain(domain).as_deref() != Some(domain.as_str()) { + return Err(format!( + "seen_domains contains invalid stored domain '{domain}'" + )); + } + } + } + + Ok(()) + } +} + +impl KvMetadata { + /// Extracts metadata from a full entry. + #[must_use] + pub fn from_entry(entry: &KvEntry) -> Self { + Self { + ok: entry.consent.ok, + country: entry.geo.country.clone(), + v: entry.v, + cluster_size: entry.network.as_ref().and_then(|n| n.cluster_size), + is_mobile: entry.device.as_ref().map(|d| d.is_mobile), + known_browser: entry.device.as_ref().and_then(|d| d.known_browser), + } + } +} + +impl KvGeo { + /// Creates a `KvGeo` from an optional [`GeoInfo`]. + /// + /// Returns `country: "ZZ"` (unknown) when geo data is unavailable. + #[must_use] + pub fn from_geo_info(geo: Option<&GeoInfo>) -> Self { + match geo { + Some(info) => { + let dma = if info.metro_code > 0 { + Some(info.metro_code) + } else { + None + }; + Self { + country: info.country.clone(), + region: info.region.clone(), + asn: info.asn, + dma, + } + } + None => Self { + country: "ZZ".to_owned(), + region: None, + asn: None, + dma: None, + }, + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn sample_consent_context() -> ConsentContext { + ConsentContext { + raw_tc_string: Some("CP_test_tc_string".to_owned()), + raw_gpp_string: Some("DBA_test_gpp".to_owned()), + ..ConsentContext::default() + } + } + + fn sample_geo_info() -> GeoInfo { + GeoInfo { + city: "San Francisco".to_owned(), + country: "US".to_owned(), + continent: "NorthAmerica".to_owned(), + latitude: 37.7749, + longitude: -122.4194, + metro_code: 807, + region: Some("CA".to_owned()), + asn: Some(7922), + } + } + + #[test] + fn entry_serialization_roundtrip() { + let geo = sample_geo_info(); + let consent = sample_consent_context(); + let mut entry = KvEntry::new(&consent, Some(&geo), 1741824000, "example.com"); + entry.ids.insert( + "liveramp".to_owned(), + KvPartnerId { + uid: "LR_xyz".to_owned(), + }, + ); + + let json = serde_json::to_string(&entry).expect("should serialize KvEntry"); + let deserialized: KvEntry = + serde_json::from_str(&json).expect("should deserialize KvEntry"); + + assert_eq!(deserialized.v, SCHEMA_VERSION); + assert_eq!(deserialized.created, 1741824000); + assert_eq!( + deserialized.consent.tcf.as_deref(), + Some("CP_test_tc_string") + ); + assert_eq!(deserialized.consent.gpp.as_deref(), Some("DBA_test_gpp")); + assert!(deserialized.consent.ok, "should be a live entry"); + assert_eq!(deserialized.geo.country, "US"); + assert_eq!(deserialized.geo.region.as_deref(), Some("CA")); + assert_eq!(deserialized.geo.asn, Some(7922)); + assert_eq!(deserialized.geo.dma, Some(807)); + assert_eq!( + deserialized.ids.get("liveramp").map(|p| p.uid.as_str()), + Some("LR_xyz"), + ); + assert!( + !json.contains("last_seen") + && !json.contains("synced") + && !json.contains("first") + && !json.contains("last") + && !json.contains("cluster_checked") + && !json.contains("visits"), + "serialized entry should omit removed timestamp and visit fields: {json}" + ); + } + + #[test] + fn legacy_timestamp_fields_deserialize_and_are_omitted_on_reserialize() { + let json = r#"{ + "v": 1, + "created": 1000, + "last_seen": 1200, + "consent": { "ok": true, "updated": 1000 }, + "geo": { "country": "US" }, + "ids": { + "liveramp": { "uid": "abc", "synced": 1100 } + }, + "pub_properties": { + "origin_domain": "example.com", + "seen_domains": { + "example.com": { "first": 1000, "last": 1200, "visits": 3 } + } + }, + "network": { + "cluster_size": 5, + "cluster_checked": 1200 + } + }"#; + + let entry: KvEntry = serde_json::from_str(json) + .expect("should deserialize legacy entry with removed timestamp fields"); + + assert_eq!(entry.created, 1000); + assert_eq!(entry.consent.updated, 1000); + assert_eq!( + entry + .ids + .get("liveramp") + .map(|partner| partner.uid.as_str()), + Some("abc") + ); + assert!( + entry + .pub_properties + .as_ref() + .is_some_and(|props| props.seen_domains.contains("example.com")), + "should preserve legacy seen domain key" + ); + assert_eq!( + entry + .network + .as_ref() + .and_then(|network| network.cluster_size), + Some(5) + ); + + let reserialized = serde_json::to_string(&entry).expect("should reserialize entry"); + assert!( + !reserialized.contains("last_seen") + && !reserialized.contains("synced") + && !reserialized.contains("first") + && !reserialized.contains("last") + && !reserialized.contains("cluster_checked") + && !reserialized.contains("visits"), + "reserialized entry should omit removed timestamp and visit fields: {reserialized}" + ); + } + + #[test] + fn metadata_serialization_roundtrip() { + let meta = KvMetadata { + ok: true, + country: "US".to_owned(), + v: 1, + cluster_size: None, + is_mobile: None, + known_browser: None, + }; + + let json = serde_json::to_string(&meta).expect("should serialize KvMetadata"); + let deserialized: KvMetadata = + serde_json::from_str(&json).expect("should deserialize KvMetadata"); + + assert!(deserialized.ok, "should be ok=true"); + assert_eq!(deserialized.country, "US"); + assert_eq!(deserialized.v, 1); + assert!(deserialized.cluster_size.is_none()); + } + + #[test] + fn metadata_with_cluster_size_roundtrip() { + let meta = KvMetadata { + ok: true, + country: "US".to_owned(), + v: 1, + cluster_size: Some(3), + is_mobile: None, + known_browser: None, + }; + + let json = serde_json::to_string(&meta).expect("should serialize KvMetadata"); + let deserialized: KvMetadata = + serde_json::from_str(&json).expect("should deserialize KvMetadata"); + + assert_eq!(deserialized.cluster_size, Some(3)); + } + + #[test] + fn metadata_without_cluster_size_deserializes() { + // Simulates metadata stored before cluster_size was added. + let json = r#"{"ok":true,"country":"US","v":1}"#; + let meta: KvMetadata = serde_json::from_str(json).expect("should deserialize old metadata"); + + assert!(meta.cluster_size.is_none(), "should default to None"); + } + + #[test] + fn metadata_fits_in_2048_bytes() { + // Worst case: all fields populated. + let meta = KvMetadata { + ok: false, + country: "XX".to_owned(), + v: SCHEMA_VERSION, + cluster_size: Some(u32::MAX), + is_mobile: Some(2), + known_browser: Some(true), + }; + let json = serde_json::to_string(&meta).expect("should serialize KvMetadata"); + assert!( + json.len() <= 2048, + "metadata must fit in Fastly's 2048-byte limit, got {} bytes", + json.len() + ); + } + + #[test] + fn new_entry_has_correct_initial_state() { + let consent = sample_consent_context(); + let geo = sample_geo_info(); + let entry = KvEntry::new(&consent, Some(&geo), 1000, "example.com"); + + assert_eq!(entry.v, SCHEMA_VERSION); + assert_eq!(entry.created, 1000); + assert!(entry.consent.ok, "should be a live entry"); + assert_eq!(entry.consent.updated, 1000); + assert_eq!(entry.geo.country, "US"); + assert!(entry.ids.is_empty(), "should have no partner IDs initially"); + + let props = entry + .pub_properties + .as_ref() + .expect("should have pub_properties"); + assert_eq!(props.origin_domain, "example.com"); + assert_eq!(props.seen_domains.len(), 1); + assert!( + props.seen_domains.contains("example.com"), + "should have origin domain" + ); + } + + #[test] + fn new_entry_without_geo_uses_zz() { + let consent = ConsentContext::default(); + let entry = KvEntry::new(&consent, None, 1000, "example.com"); + assert_eq!( + entry.geo.country, "ZZ", + "should use ZZ when geo is unavailable" + ); + assert!(entry.geo.region.is_none()); + assert!(entry.geo.asn.is_none()); + assert!(entry.geo.dma.is_none()); + } + + #[test] + fn validated_stored_domain_accepts_and_normalizes_ascii_hostnames() { + assert_eq!( + validated_stored_domain("Example.COM."), + Some("example.com".to_owned()), + "should lowercase and trim a trailing dot" + ); + } + + #[test] + fn validated_stored_domain_rejects_invalid_shapes() { + assert!( + validated_stored_domain("bad_domain").is_none(), + "underscores should be rejected" + ); + assert!( + validated_stored_domain("-example.com").is_none(), + "labels should not start with hyphens" + ); + assert!( + validated_stored_domain(&"a".repeat(MAX_STORED_DOMAIN_LENGTH + 1)).is_none(), + "overlong hostnames should be rejected" + ); + } + + #[test] + fn new_entry_skips_pub_properties_for_invalid_domain() { + let consent = sample_consent_context(); + let geo = sample_geo_info(); + let entry = KvEntry::new(&consent, Some(&geo), 1000, "bad_domain"); + + assert!( + entry.pub_properties.is_none(), + "invalid stored domains should not be persisted into pub_properties" + ); + } + + #[test] + fn validate_rejects_oversized_partner_uid() { + let mut entry = KvEntry::tombstone(1000); + entry.ids.insert( + "ssp_x".to_owned(), + KvPartnerId { + uid: "x".repeat(MAX_UID_LENGTH + 1), + }, + ); + + let err = entry + .validate() + .expect_err("should reject oversized partner UIDs"); + assert!( + err.contains("MAX_UID_LENGTH"), + "should describe the UID length validation failure" + ); + } + + #[test] + fn validate_rejects_unexpected_schema_version() { + let mut entry = KvEntry::tombstone(1000); + entry.v = SCHEMA_VERSION + 1; + + let err = entry + .validate() + .expect_err("should reject unsupported schema versions"); + assert!( + err.contains("unsupported KV entry schema version"), + "should describe the schema version validation failure" + ); + } + + #[test] + fn validate_rejects_seen_domains_over_cap() { + let consent = sample_consent_context(); + let geo = sample_geo_info(); + let mut entry = KvEntry::new(&consent, Some(&geo), 1000, "example.com"); + let pub_properties = entry + .pub_properties + .as_mut() + .expect("should initialize pub_properties"); + + for idx in 0..MAX_SEEN_DOMAINS { + pub_properties + .seen_domains + .insert(format!("extra-{idx}.example.com")); + } + + let err = entry + .validate() + .expect_err("should reject entries with too many seen domains"); + assert!( + err.contains("MAX_SEEN_DOMAINS"), + "should describe the seen_domains bound violation" + ); + } + + #[test] + fn seen_domains_serialize_in_deterministic_key_order() { + let consent = sample_consent_context(); + let geo = sample_geo_info(); + let mut entry = KvEntry::new(&consent, Some(&geo), 1000, "example.com"); + let pub_properties = entry + .pub_properties + .as_mut() + .expect("should initialize pub_properties"); + + pub_properties + .seen_domains + .insert("z.example.com".to_owned()); + pub_properties + .seen_domains + .insert("a.example.com".to_owned()); + + let json = serde_json::to_string(&entry).expect("should serialize KV entry"); + let a_index = json + .find("a.example.com") + .expect("should contain a.example.com"); + let z_index = json + .find("z.example.com") + .expect("should contain z.example.com"); + assert!( + a_index < z_index, + "should serialize seen_domains in lexical key order" + ); + } + + #[test] + fn validate_rejects_invalid_stored_domain_shapes() { + let consent = sample_consent_context(); + let geo = sample_geo_info(); + let mut entry = KvEntry::new(&consent, Some(&geo), 1000, "example.com"); + let pub_properties = entry + .pub_properties + .as_mut() + .expect("should initialize pub_properties"); + pub_properties.origin_domain = "bad_domain".to_owned(); + + let err = entry + .validate() + .expect_err("should reject invalid stored origin domains"); + assert!( + err.contains("origin_domain"), + "should report the invalid stored origin_domain" + ); + } + + #[test] + fn minimal_entry_has_partner_id_and_placeholder_geo() { + let entry = KvEntry::minimal("ssp_x", "abc123", 1741824000); + + assert_eq!(entry.v, SCHEMA_VERSION); + assert!(entry.consent.ok, "should be a live entry"); + assert_eq!(entry.geo.country, "ZZ"); + assert!( + entry.pub_properties.is_none(), + "minimal entry should have no pub_properties" + ); + assert_eq!(entry.ids.len(), 1); + let partner = entry.ids.get("ssp_x").expect("should have ssp_x entry"); + assert_eq!(partner.uid, "abc123"); + } + + #[test] + fn tombstone_entry_has_correct_shape() { + let entry = KvEntry::tombstone(1741910400); + + assert_eq!(entry.v, SCHEMA_VERSION); + assert!(!entry.consent.ok, "should be a tombstone"); + assert!(entry.ids.is_empty(), "tombstone should have no partner IDs"); + assert_eq!(entry.geo.country, "ZZ"); + assert_eq!(entry.consent.updated, 1741910400); + assert!( + entry.pub_properties.is_none(), + "tombstone should have no pub_properties" + ); + } + + #[test] + fn metadata_from_entry_mirrors_fields() { + let consent = sample_consent_context(); + let geo = sample_geo_info(); + let entry = KvEntry::new(&consent, Some(&geo), 1000, "example.com"); + let meta = KvMetadata::from_entry(&entry); + + assert_eq!(meta.ok, entry.consent.ok); + assert_eq!(meta.country, entry.geo.country); + assert_eq!(meta.v, entry.v); + } + + #[test] + fn tombstone_metadata_has_ok_false() { + let entry = KvEntry::tombstone(1000); + let meta = KvMetadata::from_entry(&entry); + + assert!(!meta.ok, "tombstone metadata should have ok=false"); + } + + #[test] + fn empty_ids_omitted_from_json() { + let entry = KvEntry::tombstone(1000); + let json = serde_json::to_string(&entry).expect("should serialize"); + assert!( + !json.contains("\"ids\""), + "empty ids should be omitted from JSON, got: {json}" + ); + } + + #[test] + fn none_consent_fields_omitted_from_json() { + let entry = KvEntry::tombstone(1000); + let json = serde_json::to_string(&entry).expect("should serialize"); + assert!( + !json.contains("\"tcf\""), + "None tcf should be omitted from JSON" + ); + assert!( + !json.contains("\"gpp\""), + "None gpp should be omitted from JSON" + ); + } + + #[test] + fn none_geo_fields_omitted_from_json() { + let entry = KvEntry::tombstone(1000); + let json = serde_json::to_string(&entry).expect("should serialize"); + assert!( + !json.contains("\"asn\""), + "None asn should be omitted from JSON" + ); + assert!( + !json.contains("\"dma\""), + "None dma should be omitted from JSON" + ); + } + + #[test] + fn geo_with_asn_and_dma_roundtrips() { + let geo = KvGeo { + country: "US".to_owned(), + region: Some("CA".to_owned()), + asn: Some(7922), + dma: Some(807), + }; + let json = serde_json::to_string(&geo).expect("should serialize KvGeo"); + let deserialized: KvGeo = serde_json::from_str(&json).expect("should deserialize KvGeo"); + + assert_eq!(deserialized.asn, Some(7922)); + assert_eq!(deserialized.dma, Some(807)); + } + + #[test] + fn geo_without_asn_deserializes_from_v1_json() { + // Simulates a KvGeo stored before asn/dma fields were added. + let v1_json = r#"{"country":"US","region":"CA"}"#; + let geo: KvGeo = serde_json::from_str(v1_json).expect("should deserialize v1 KvGeo"); + + assert_eq!(geo.country, "US"); + assert_eq!(geo.region.as_deref(), Some("CA")); + assert!(geo.asn.is_none(), "asn should default to None"); + assert!(geo.dma.is_none(), "dma should default to None"); + } + + #[test] + fn pub_properties_roundtrip() { + let consent = sample_consent_context(); + let geo = sample_geo_info(); + let entry = KvEntry::new(&consent, Some(&geo), 1000, "autoblog.com"); + + let json = serde_json::to_string(&entry).expect("should serialize"); + let deserialized: KvEntry = serde_json::from_str(&json).expect("should deserialize"); + + let props = deserialized + .pub_properties + .expect("should have pub_properties"); + assert_eq!(props.origin_domain, "autoblog.com"); + assert_eq!(props.seen_domains.len(), 1); + assert!( + props.seen_domains.contains("autoblog.com"), + "should have origin domain" + ); + } + + #[test] + fn none_pub_properties_omitted_from_json() { + let entry = KvEntry::tombstone(1000); + let json = serde_json::to_string(&entry).expect("should serialize"); + assert!( + !json.contains("\"pub_properties\""), + "None pub_properties should be omitted from JSON, got: {json}" + ); + } + + #[test] + fn entry_without_pub_properties_deserializes() { + // Simulates an entry stored before pub_properties was added. + let json = r#"{ + "v": 1, + "created": 1000, + "last_seen": 1000, + "consent": { "ok": true, "updated": 1000 }, + "geo": { "country": "US" } + }"#; + let entry: KvEntry = + serde_json::from_str(json).expect("should deserialize entry without pub_properties"); + + assert!( + entry.pub_properties.is_none(), + "missing pub_properties should deserialize as None" + ); + } + + #[test] + fn pub_properties_deserializes_new_seen_domains_list_shape() { + let json = r#"{ + "origin_domain": "autoblog.com", + "seen_domains": ["autoblog.com"] + }"#; + + let props: KvPubProperties = + serde_json::from_str(json).expect("should deserialize new seen_domains list shape"); + + assert_eq!(props.origin_domain, "autoblog.com"); + assert_eq!(props.seen_domains.len(), 1); + assert!( + props.seen_domains.contains("autoblog.com"), + "should include listed domain" + ); + } + + #[test] + fn network_roundtrip() { + let network = KvNetwork { + cluster_size: Some(3), + }; + let json = serde_json::to_string(&network).expect("should serialize KvNetwork"); + let deserialized: KvNetwork = + serde_json::from_str(&json).expect("should deserialize KvNetwork"); + + assert_eq!(deserialized.cluster_size, Some(3)); + } + + #[test] + fn network_none_fields_omitted_from_json() { + let network = KvNetwork { cluster_size: None }; + let json = serde_json::to_string(&network).expect("should serialize"); + assert!( + !json.contains("\"cluster_size\""), + "None cluster_size should be omitted, got: {json}" + ); + } + + #[test] + fn none_network_omitted_from_entry_json() { + let entry = KvEntry::tombstone(1000); + let json = serde_json::to_string(&entry).expect("should serialize"); + assert!( + !json.contains("\"network\""), + "None network should be omitted from JSON, got: {json}" + ); + } + + #[test] + fn entry_without_network_deserializes() { + // Simulates an entry stored before network was added. + let json = r#"{ + "v": 1, + "created": 1000, + "last_seen": 1000, + "consent": { "ok": true, "updated": 1000 }, + "geo": { "country": "US" } + }"#; + let entry: KvEntry = + serde_json::from_str(json).expect("should deserialize entry without network"); + + assert!( + entry.network.is_none(), + "missing network should deserialize as None" + ); + } + + #[test] + fn metadata_from_entry_mirrors_cluster_size() { + let consent = sample_consent_context(); + let geo = sample_geo_info(); + let mut entry = KvEntry::new(&consent, Some(&geo), 1000, "example.com"); + entry.network = Some(KvNetwork { + cluster_size: Some(5), + }); + + let meta = KvMetadata::from_entry(&entry); + assert_eq!( + meta.cluster_size, + Some(5), + "metadata should mirror entry network cluster_size" + ); + } + + #[test] + fn metadata_from_entry_without_network_has_none_cluster_size() { + let entry = KvEntry::tombstone(1000); + let meta = KvMetadata::from_entry(&entry); + assert!( + meta.cluster_size.is_none(), + "metadata should have None cluster_size when entry has no network" + ); + } + + #[test] + fn device_roundtrip() { + let device = KvDevice { + is_mobile: 0, + ja4_class: Some("t13d1516h2".to_owned()), + platform_class: Some("mac".to_owned()), + h2_fp_hash: Some("a3f9d21c8b04".to_owned()), + known_browser: Some(true), + }; + let json = serde_json::to_string(&device).expect("should serialize KvDevice"); + let deserialized: KvDevice = + serde_json::from_str(&json).expect("should deserialize KvDevice"); + + assert_eq!(deserialized.is_mobile, 0); + assert_eq!(deserialized.ja4_class.as_deref(), Some("t13d1516h2")); + assert_eq!(deserialized.platform_class.as_deref(), Some("mac")); + assert_eq!(deserialized.h2_fp_hash.as_deref(), Some("a3f9d21c8b04")); + assert_eq!(deserialized.known_browser, Some(true)); + } + + #[test] + fn device_none_fields_omitted_from_json() { + let device = KvDevice { + is_mobile: 2, + ja4_class: None, + platform_class: None, + h2_fp_hash: None, + known_browser: None, + }; + let json = serde_json::to_string(&device).expect("should serialize"); + assert!( + !json.contains("\"ja4_class\""), + "None ja4_class should be omitted, got: {json}" + ); + assert!( + !json.contains("\"known_browser\""), + "None known_browser should be omitted, got: {json}" + ); + } + + #[test] + fn none_device_omitted_from_entry_json() { + let entry = KvEntry::tombstone(1000); + let json = serde_json::to_string(&entry).expect("should serialize"); + assert!( + !json.contains("\"device\""), + "None device should be omitted from JSON, got: {json}" + ); + } + + #[test] + fn entry_without_device_deserializes() { + let json = r#"{ + "v": 1, + "created": 1000, + "last_seen": 1000, + "consent": { "ok": true, "updated": 1000 }, + "geo": { "country": "US" } + }"#; + let entry: KvEntry = + serde_json::from_str(json).expect("should deserialize entry without device"); + + assert!( + entry.device.is_none(), + "missing device should deserialize as None" + ); + } + + #[test] + fn metadata_from_entry_mirrors_device_fields() { + let consent = sample_consent_context(); + let geo = sample_geo_info(); + let mut entry = KvEntry::new(&consent, Some(&geo), 1000, "example.com"); + entry.device = Some(KvDevice { + is_mobile: 1, + ja4_class: Some("t13d2013h2".to_owned()), + platform_class: Some("ios".to_owned()), + h2_fp_hash: None, + known_browser: Some(true), + }); + + let meta = KvMetadata::from_entry(&entry); + assert_eq!( + meta.is_mobile, + Some(1), + "metadata should mirror device is_mobile" + ); + assert_eq!( + meta.known_browser, + Some(true), + "metadata should mirror device known_browser" + ); + } + + #[test] + fn metadata_without_device_fields_deserializes() { + let json = r#"{"ok":true,"country":"US","v":1}"#; + let meta: KvMetadata = serde_json::from_str(json).expect("should deserialize old metadata"); + + assert!(meta.is_mobile.is_none(), "is_mobile should default to None"); + assert!( + meta.known_browser.is_none(), + "known_browser should default to None" + ); + } +} diff --git a/crates/trusted-server-core/src/ec/mod.rs b/crates/trusted-server-core/src/ec/mod.rs new file mode 100644 index 00000000..f20586c1 --- /dev/null +++ b/crates/trusted-server-core/src/ec/mod.rs @@ -0,0 +1,658 @@ +//! Edge Cookie (EC) identity subsystem. +//! +//! This module owns the EC lifecycle: +//! +//! 1. **Read** — [`EcContext::read_from_request`] extracts any existing EC ID +//! from cookies, captures the client IP, and builds the consent +//! context. This is called pre-routing on every request. +//! +//! 2. **Generate** — [`EcContext::generate_if_needed`] creates a new EC ID +//! when none exists and consent allows it. This is called only in organic +//! handlers (publisher proxy, integration proxy) — never in read-only +//! endpoints like `/_ts/api/v1/identify`. +//! +//! # Module structure +//! +//! - auth (private) — shared Bearer-token authentication helpers +//! - [`generation`] — HMAC-based ID generation, IP normalization, format helpers +//! - [`consent`] — EC-specific consent gating wrapper +//! - [`cookies`] — `Set-Cookie` header creation and expiration helpers +//! - [`kv`] — KV Store identity graph operations (CAS, tombstones, debounce) +//! - [`kv_types`] — Schema types for KV identity graph entries +//! - [`device`] — Device signal derivation (UA, JA4, H2 fingerprinting) +//! - [`partner`] — Partner validation helpers (ID format, pull sync config) +//! - [`registry`] — In-memory partner registry built from config +//! - [`rate_limiter`] — Rate limiting abstraction (Fastly Edge Rate Limiting) +//! - [`identify`] — Identity read endpoint (`GET /_ts/api/v1/identify`) +//! - [`eids`] — Shared EID resolution and formatting helpers +//! - [`batch_sync`] — S2S batch sync endpoint (`POST /_ts/api/v1/batch-sync`) +//! - [`pull_sync`] — Background pull-sync dispatcher for organic routes + +mod auth; + +pub mod batch_sync; +pub mod consent; +pub mod cookies; +pub mod device; +pub mod eids; +pub mod finalize; +pub mod generation; +pub mod identify; +pub mod kv; +pub mod kv_types; +pub mod partner; +pub mod prebid_eids; +pub mod pull_sync; +pub mod rate_limiter; +pub mod registry; + +/// Truncates an EC ID for safe inclusion in log messages. +/// +/// Returns the first 8 characters followed by `…` to aid debugging without +/// writing the full user identifier to logs (satisfies the `CodeQL` +/// "cleartext logging of sensitive information" rule). +#[must_use] +pub fn log_id(ec_id: &str) -> String { + let prefix = ec_id.get(..8).unwrap_or(ec_id); + format!("{prefix}…") +} + +use cookie::CookieJar; +use error_stack::Report; +use fastly::Request; + +use crate::compat; +use crate::consent::{self as consent_mod, ConsentContext, ConsentPipelineInput}; +use crate::constants::COOKIE_TS_EC; +use crate::cookies::handle_request_cookies; +use crate::ec::cookies::ec_id_has_only_allowed_chars; +use crate::error::TrustedServerError; +use crate::geo::GeoInfo; +use crate::settings::Settings; +use device::DeviceSignals; + +use self::kv::KvIdentityGraph; +use self::kv_types::KvEntry; + +pub use generation::{ + ec_hash, generate_ec_id, is_valid_ec_hash, is_valid_ec_id, normalize_ec_id_for_kv, +}; + +/// Parsed EC identity from an incoming request. +struct RequestEc { + /// EC ID from the `ts-ec` cookie, if present. + cookie_ec: Option, + /// The parsed cookie jar (retained for consent pipeline input). + jar: Option, +} + +/// Parses EC identity from request cookies in a single pass. +/// +/// # Errors +/// +/// - [`TrustedServerError::InvalidHeaderValue`] if cookie parsing fails +fn parse_ec_from_request(req: &Request) -> Result> { + let http_req = compat::from_fastly_headers_ref(req); + let jar = handle_request_cookies(&http_req)?; + let cookie_ec = jar + .as_ref() + .and_then(|j| j.get(COOKIE_TS_EC)) + .map(cookie::Cookie::value) + .and_then(|value| request_ec_id_if_allowed(value, "ts-ec cookie")); + + Ok(RequestEc { cookie_ec, jar }) +} + +fn request_ec_id_if_allowed(value: &str, source: &str) -> Option { + if ec_id_has_only_allowed_chars(value) { + return Some(value.to_owned()); + } + + log::warn!("Rejected EC ID from {source} with disallowed characters"); + None +} + +/// Gets an existing EC ID from the request. +/// +/// Attempts to retrieve an existing EC ID from the `ts-ec` cookie. +/// +/// Returns `None` if the cookie does not contain a valid EC ID. +/// +/// # Errors +/// +/// - [`TrustedServerError::InvalidHeaderValue`] if cookie parsing fails +pub fn get_ec_id(req: &fastly::Request) -> Result, Report> { + let parsed = parse_ec_from_request(req)?; + let ec_id = parsed.cookie_ec.filter(|v| is_valid_ec_id(v)); + if let Some(ref id) = ec_id { + log::trace!("Existing EC ID found: {}", log_id(id)); + } + Ok(ec_id) +} + +/// Captures the EC state for a single request lifecycle. +/// +/// Created via [`read_from_request`](Self::read_from_request) during +/// pre-routing, then optionally mutated by +/// [`generate_if_needed`](Self::generate_if_needed) in organic handlers. +#[derive(Debug)] +pub struct EcContext { + /// The EC ID value, if one exists (from request) or was generated. + ec_value: Option, + /// The EC ID from the `ts-ec` cookie, if present on the incoming + /// request. Stored separately from `ec_value` because the header may + /// take precedence, but revocation still needs the cookie value. + cookie_ec_value: Option, + /// Whether an EC ID was found on the incoming request (header or cookie). + ec_was_present: bool, + /// Whether a new EC ID was generated during this request. + ec_generated: bool, + /// The consent context for this request. + consent: ConsentContext, + /// The normalized client IP, captured early before the request body + /// is consumed. `None` when the platform cannot determine client IP. + client_ip: Option, + /// Geo information captured pre-routing for downstream KV writes. + geo_info: Option, + /// Device signals derived from TLS/H2/UA in the adapter layer. + /// Set via [`EcContext::set_device_signals`] before + /// [`EcContext::generate_if_needed`] is called. + device_signals: Option, +} + +impl EcContext { + /// Reads EC state from an incoming request without generating a new ID. + /// + /// This is the first phase of the EC lifecycle. It: + /// - Checks the `ts-ec` cookie for an existing EC ID + /// - Captures the client IP (normalized) for later generation + /// - Builds the full [`ConsentContext`] from cookies, headers, and geo + /// + /// Call this pre-routing on **every** request. + /// + /// # Errors + /// + /// Returns an error if cookie parsing fails. + pub fn read_from_request( + settings: &Settings, + req: &Request, + ) -> Result> { + Self::read_from_request_with_geo(settings, req, None) + } + + /// Reads EC state from an incoming request using pre-extracted geo data. + /// + /// Use this when geo has already been resolved in router prelude to avoid + /// duplicate lookup work. + /// + /// # Errors + /// + /// Returns an error if cookie parsing fails. + pub fn read_from_request_with_geo( + settings: &Settings, + req: &Request, + geo_info: Option<&GeoInfo>, + ) -> Result> { + let parsed = parse_ec_from_request(req)?; + + let ec_value = parsed.cookie_ec.clone().filter(|v| is_valid_ec_id(v)); + let ec_was_present = ec_value.is_some(); + + if let Some(ref id) = ec_value { + log::trace!("Existing EC ID found: {}", log_id(id)); + } + + // Capture the client IP now — the request body may be consumed later. + let client_ip = generation::extract_client_ip(req).ok(); + let http_req = compat::from_fastly_headers_ref(req); + + // Build consent context from request-local cookies, headers, and geo. + let consent = consent_mod::build_consent_context(&ConsentPipelineInput { + jar: parsed.jar.as_ref(), + req: &http_req, + config: &settings.consent, + geo: geo_info, + }); + + log::info!( + "EC context: present={}, cookie_present={}, consent_allowed={}, jurisdiction={}", + ec_was_present, + parsed.cookie_ec.is_some(), + consent::ec_consent_granted(&consent), + consent.jurisdiction, + ); + + Ok(Self { + ec_value, + cookie_ec_value: parsed.cookie_ec, + ec_was_present, + ec_generated: false, + consent, + client_ip, + geo_info: geo_info.cloned(), + device_signals: None, + }) + } + + /// Generates a new EC ID if none exists and consent allows it. + /// + /// This is the second phase of the EC lifecycle. Call this only in + /// organic handlers (publisher proxy, integration proxy, auction) — + /// never in read-only endpoints. + /// + /// If an EC ID already exists (from the request), this is a no-op. + /// If consent does not permit EC creation, this is a no-op. + /// + /// # Errors + /// + /// Returns an error if the client IP is unavailable and generation is + /// needed, or if HMAC generation fails. + pub fn generate_if_needed( + &mut self, + settings: &Settings, + kv: Option<&KvIdentityGraph>, + ) -> Result<(), Report> { + if self.ec_value.is_some() { + return Ok(()); + } + + if !consent::ec_consent_granted(&self.consent) { + log::info!( + "EC generation skipped: consent not granted (jurisdiction={})", + self.consent.jurisdiction, + ); + return Ok(()); + } + + let client_ip = self.client_ip.as_deref().ok_or_else(|| { + Report::new(TrustedServerError::EdgeCookie { + message: "Client IP required for EC generation but unavailable".to_string(), + }) + })?; + + let ec_id = generation::generate_ec_id(settings, client_ip)?; + log::info!("Generated new EC ID: {}", log_id(&ec_id)); + self.ec_value = Some(ec_id); + self.ec_generated = true; + + if let (Some(graph), Some(ec_value)) = (kv, self.ec_value.as_deref()) { + let now = current_timestamp(); + let mut entry = KvEntry::new( + &self.consent, + self.geo_info.as_ref(), + now, + &settings.publisher.domain, + ); + entry.device = self + .device_signals + .as_ref() + .map(DeviceSignals::to_kv_device); + + if let Err(err) = graph.create_or_revive(ec_value, &entry) { + log::error!( + "Failed to create or revive EC entry for id '{}' after generation: {err:?}", + log_id(ec_value), + ); + self.ec_value = None; + self.ec_generated = false; + } + } + + Ok(()) + } + + /// Returns the EC ID value, if present (either from request or generated). + #[must_use] + pub fn ec_value(&self) -> Option<&str> { + self.ec_value.as_deref() + } + + /// Returns whether the `ts-ec` cookie was present on the incoming request. + #[must_use] + pub fn cookie_was_present(&self) -> bool { + self.cookie_ec_value.is_some() + } + + /// Returns whether an EC ID was found in the `ts-ec` cookie on the + /// incoming request. + #[must_use] + pub fn ec_was_present(&self) -> bool { + self.ec_was_present + } + + /// Returns whether a new EC ID was generated during this request. + #[must_use] + pub fn ec_generated(&self) -> bool { + self.ec_generated + } + + /// Returns a reference to the consent context for this request. + #[must_use] + pub fn consent(&self) -> &ConsentContext { + &self.consent + } + + /// Returns a mutable reference to the consent context. + /// + /// Used by `/_ts/api/v1/sync` to apply query-param fallback consent for the current + /// request only when pre-routing consent extraction produced an empty + /// context. + pub fn consent_mut(&mut self) -> &mut ConsentContext { + &mut self.consent + } + + /// Sets the device signals derived from the adapter layer. + /// + /// Must be called before [`generate_if_needed`] so that new entries + /// include the [`KvDevice`] record. The adapter derives these from + /// `req.get_tls_ja4()`, `req.get_client_h2_fingerprint()`, and UA. + /// + /// [`KvDevice`]: super::kv_types::KvDevice + /// [`generate_if_needed`]: Self::generate_if_needed + pub fn set_device_signals(&mut self, signals: DeviceSignals) { + self.device_signals = Some(signals); + } + + /// Returns the device signals, if set. + #[must_use] + pub fn device_signals(&self) -> Option<&DeviceSignals> { + self.device_signals.as_ref() + } + + /// Returns the normalized client IP, if available. + #[must_use] + pub fn client_ip(&self) -> Option<&str> { + self.client_ip.as_deref() + } + + /// Returns the pre-routing geo data, if available. + #[must_use] + pub fn geo_info(&self) -> Option<&GeoInfo> { + self.geo_info.as_ref() + } + + /// Returns whether EC creation is permitted by consent for this request. + #[must_use] + pub fn ec_allowed(&self) -> bool { + consent::ec_consent_granted(&self.consent) + } + + /// Returns the existing EC cookie value for revocation handling. + /// + /// When consent is withdrawn, this value is needed to identify the + /// correct KV entry to tombstone. Returns `None` if no cookie was + /// present on the request. This always returns the cookie value. + #[must_use] + pub fn existing_cookie_ec_id(&self) -> Option<&str> { + self.cookie_ec_value.as_deref() + } + + /// Returns `true` when the request carried a cookie EC and the selected + /// active EC differs from that cookie value. + #[must_use] + pub fn cookie_differs_from_active_ec(&self) -> bool { + matches!( + (self.cookie_ec_value.as_deref(), self.ec_value.as_deref()), + (Some(cookie), Some(active)) if cookie != active + ) + } + + /// Returns the stable EC hash prefix from the active EC value. + #[must_use] + pub fn ec_hash(&self) -> Option<&str> { + self.ec_value.as_deref().map(generation::ec_hash) + } + + /// Creates a test-only `EcContext` with explicit field values. + #[cfg(test)] + #[must_use] + pub fn new_for_test(ec_value: Option, consent: ConsentContext) -> Self { + Self { + ec_was_present: ec_value.is_some(), + cookie_ec_value: ec_value.clone(), + ec_value, + ec_generated: false, + consent, + client_ip: None, + geo_info: None, + device_signals: None, + } + } + + /// Creates a test-only [`EcContext`] with explicit client IP. + #[cfg(test)] + #[must_use] + pub fn new_for_test_with_ip( + ec_value: Option, + consent: ConsentContext, + client_ip: Option, + ) -> Self { + Self { + ec_was_present: ec_value.is_some(), + cookie_ec_value: ec_value.clone(), + ec_value, + ec_generated: false, + consent, + client_ip, + geo_info: None, + device_signals: None, + } + } + + /// Creates a test-only [`EcContext`] with independent cookie and active EC + /// values. Use this to test cookie-mismatch and withdrawal scenarios. + #[cfg(test)] + #[must_use] + pub fn new_for_test_with_cookie( + ec_value: Option, + cookie_ec_value: Option, + ec_was_present: bool, + ec_generated: bool, + consent: ConsentContext, + ) -> Self { + Self { + ec_value, + cookie_ec_value, + ec_was_present, + ec_generated, + consent, + client_ip: None, + geo_info: None, + device_signals: None, + } + } +} + +/// Returns the current Unix timestamp in seconds. +/// +/// Uses `std::time::SystemTime` which is supported on `wasm32-wasip1`. +pub(crate) fn current_timestamp() -> u64 { + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_secs()) + .unwrap_or_else(|err| { + log::error!("SystemTime::now() failed, falling back to epoch 0: {err}"); + 0 + }) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::test_support::tests::create_test_settings; + use fastly::http::HeaderValue; + + fn create_test_request(headers: &[(&str, &str)]) -> Request { + let mut req = Request::new("GET", "http://example.com"); + for &(key, value) in headers { + req.set_header( + key, + HeaderValue::from_str(value).expect("should create valid header value"), + ); + } + req + } + + /// Creates a valid EC ID for testing: `{64hex}.{6alnum}`. + fn valid_ec_id(prefix_char: &str, suffix: &str) -> String { + format!("{}.{suffix}", prefix_char.repeat(64)) + } + + #[test] + fn read_from_request_ignores_header_ec() { + let settings = create_test_settings(); + let ec_id = valid_ec_id("a", "HdrEc1"); + let req = create_test_request(&[("x-ts-ec", &ec_id)]); + + let ec = EcContext::read_from_request(&settings, &req).expect("should read EC context"); + + assert!(ec.ec_value().is_none(), "should ignore EC from header"); + assert!(!ec.ec_was_present(), "should not detect EC from header"); + assert!(!ec.cookie_was_present(), "should not detect cookie"); + assert!(!ec.ec_generated(), "should not mark as generated"); + } + + #[test] + fn read_from_request_with_cookie_ec() { + let settings = create_test_settings(); + let ec_id = valid_ec_id("b", "CkEc01"); + let cookie = format!("ts-ec={ec_id}"); + let req = create_test_request(&[("cookie", &cookie)]); + + let ec = EcContext::read_from_request(&settings, &req).expect("should read EC context"); + + assert_eq!(ec.ec_value(), Some(ec_id.as_str())); + assert!(ec.ec_was_present(), "should detect EC from cookie"); + assert!(ec.cookie_was_present(), "should detect cookie"); + assert!(!ec.ec_generated(), "should not mark as generated"); + } + + #[test] + fn read_from_request_cookie_is_authoritative_when_header_present() { + let settings = create_test_settings(); + let header_id = valid_ec_id("a", "Hdr001"); + let cookie_id = valid_ec_id("b", "Ck0001"); + let cookie = format!("ts-ec={cookie_id}"); + let req = create_test_request(&[("x-ts-ec", &header_id), ("cookie", &cookie)]); + + let ec = EcContext::read_from_request(&settings, &req).expect("should read EC context"); + + assert_eq!( + ec.ec_value(), + Some(cookie_id.as_str()), + "should use cookie instead of header" + ); + assert!(ec.cookie_was_present(), "should still detect cookie"); + } + + #[test] + fn read_from_request_no_ec() { + let settings = create_test_settings(); + let req = create_test_request(&[]); + + let ec = EcContext::read_from_request(&settings, &req).expect("should read EC context"); + + assert!(ec.ec_value().is_none(), "should have no EC value"); + assert!(!ec.ec_was_present(), "should not detect EC"); + assert!(!ec.cookie_was_present(), "should not detect cookie"); + } + + #[test] + fn read_from_request_uses_cookie_when_malformed_header_present() { + let settings = create_test_settings(); + let cookie_id = valid_ec_id("c", "FbCk01"); + let cookie = format!("ts-ec={cookie_id}"); + let req = create_test_request(&[("x-ts-ec", "malformed-header"), ("cookie", &cookie)]); + + let ec = EcContext::read_from_request(&settings, &req).expect("should read EC context"); + + assert_eq!( + ec.ec_value(), + Some(cookie_id.as_str()), + "should use cookie when header is malformed" + ); + assert!(ec.cookie_was_present(), "should detect cookie"); + } + + #[test] + fn read_from_request_discards_malformed_header_and_cookie() { + let settings = create_test_settings(); + let req = create_test_request(&[("x-ts-ec", "bad-header"), ("cookie", "ts-ec=bad-cookie")]); + + let ec = EcContext::read_from_request(&settings, &req).expect("should read EC context"); + + assert!( + ec.ec_value().is_none(), + "should discard both malformed header and cookie" + ); + assert!( + !ec.ec_was_present(), + "ec_was_present should be false when no valid EC found" + ); + assert!( + ec.cookie_was_present(), + "cookie_was_present should still be true for withdrawal path" + ); + } + + #[test] + fn generate_if_needed_skips_when_ec_exists() { + let settings = create_test_settings(); + let ec_id = valid_ec_id("d", "Exist1"); + let cookie = format!("ts-ec={ec_id}"); + let req = create_test_request(&[("cookie", &cookie)]); + + let mut ec = EcContext::read_from_request(&settings, &req).expect("should read EC context"); + ec.generate_if_needed(&settings, None) + .expect("should not error when EC already exists"); + + assert_eq!( + ec.ec_value(), + Some(ec_id.as_str()), + "should keep existing EC" + ); + assert!(!ec.ec_generated(), "should not mark as generated"); + } + + #[test] + fn existing_cookie_ec_id_returns_cookie_value() { + let settings = create_test_settings(); + + // With cookie present (valid format) + let cookie_ec = valid_ec_id("e", "CkVal1"); + let cookie = format!("ts-ec={cookie_ec}"); + let req = create_test_request(&[("cookie", &cookie)]); + let ec = EcContext::read_from_request(&settings, &req).expect("should read EC context"); + assert_eq!( + ec.existing_cookie_ec_id(), + Some(cookie_ec.as_str()), + "should return cookie EC ID" + ); + + // With only header (no cookie) + let header_ec = valid_ec_id("f", "HdrVl1"); + let req = create_test_request(&[("x-ts-ec", &header_ec)]); + let ec = EcContext::read_from_request(&settings, &req).expect("should read EC context"); + assert!( + ec.existing_cookie_ec_id().is_none(), + "should return None when only header is present" + ); + + // With both header and cookie — should return cookie value + let header_ec2 = valid_ec_id("a", "Hdr002"); + let cookie_ec2 = valid_ec_id("b", "Ck0002"); + let cookie2 = format!("ts-ec={cookie_ec2}"); + let req = create_test_request(&[("x-ts-ec", &header_ec2), ("cookie", &cookie2)]); + let ec = EcContext::read_from_request(&settings, &req).expect("should read EC context"); + assert_eq!( + ec.ec_value(), + Some(cookie_ec2.as_str()), + "should use cookie as active EC" + ); + assert_eq!( + ec.existing_cookie_ec_id(), + Some(cookie_ec2.as_str()), + "should return cookie value for revocation even when header is present" + ); + } +} diff --git a/crates/trusted-server-core/src/ec/partner.rs b/crates/trusted-server-core/src/ec/partner.rs new file mode 100644 index 00000000..f836724e --- /dev/null +++ b/crates/trusted-server-core/src/ec/partner.rs @@ -0,0 +1,149 @@ +//! Partner validation helpers and ID hashing. +//! +//! Provides partner ID format validation, reserved name checks, and +//! API key hashing. The actual partner registry is in [`super::registry`]. + +use std::sync::OnceLock; + +use regex::Regex; +use sha2::{Digest, Sha256}; + +/// Regex pattern for valid partner IDs. +/// Lowercase alphanumeric, hyphens, underscores; 1-32 characters. +const PARTNER_ID_PATTERN: &str = r"^[a-z0-9_-]{1,32}$"; + +/// Reserved partner IDs that would collide with managed `X-ts-*` headers. +const RESERVED_PARTNER_IDS: &[&str] = &[ + "ec", + "eids", + "ec-consent", + "eids-truncated", + "synthetic", + "ts", + "version", + "env", +]; + +/// Cached compiled regex for partner ID validation. +static PARTNER_ID_REGEX: OnceLock> = OnceLock::new(); + +fn partner_id_regex() -> Result<&'static Regex, String> { + PARTNER_ID_REGEX + .get_or_init(|| { + Regex::new(PARTNER_ID_PATTERN) + .map_err(|e| format!("internal error compiling partner ID regex: {e}")) + }) + .as_ref() + .map_err(Clone::clone) +} + +/// Validates a partner ID format and checks against reserved names. +/// +/// # Errors +/// +/// Returns a descriptive error string on validation failure. +pub fn validate_partner_id(id: &str) -> Result<(), String> { + let re = partner_id_regex()?; + if !re.is_match(id) { + return Err(format!( + "partner ID must match {PARTNER_ID_PATTERN}, got: '{id}'" + )); + } + if RESERVED_PARTNER_IDS.contains(&id) { + return Err(format!("partner ID '{id}' is reserved")); + } + Ok(()) +} + +/// Computes the SHA-256 hex digest of an API key. +#[must_use] +pub fn hash_api_key(api_key: &str) -> String { + let mut hasher = Sha256::new(); + hasher.update(api_key.as_bytes()); + hex::encode(hasher.finalize()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn validate_partner_id_accepts_valid_ids() { + assert!( + validate_partner_id("ssp_x").is_ok(), + "should accept underscored ID" + ); + assert!( + validate_partner_id("dsp-y").is_ok(), + "should accept hyphenated ID" + ); + assert!( + validate_partner_id("liveramp").is_ok(), + "should accept lowercase alpha" + ); + assert!( + validate_partner_id("id5").is_ok(), + "should accept alphanumeric" + ); + } + + #[test] + fn validate_partner_id_rejects_invalid_ids() { + assert!(validate_partner_id("").is_err(), "should reject empty ID"); + assert!( + validate_partner_id("SSP").is_err(), + "should reject uppercase" + ); + assert!( + validate_partner_id("a".repeat(33).as_str()).is_err(), + "should reject >32 chars" + ); + assert!( + validate_partner_id("has space").is_err(), + "should reject spaces" + ); + } + + #[test] + fn validate_partner_id_rejects_reserved_ids() { + assert!( + validate_partner_id("ec").is_err(), + "should reject reserved 'ec'" + ); + assert!( + validate_partner_id("ts").is_err(), + "should reject reserved 'ts'" + ); + assert!( + validate_partner_id("eids").is_err(), + "should reject reserved 'eids'" + ); + } + + #[test] + fn hash_api_key_produces_hex_digest() { + let hash = hash_api_key("test-key"); + assert_eq!(hash.len(), 64, "should produce 64-char hex digest"); + assert!( + hash.chars().all(|c| c.is_ascii_hexdigit()), + "should only contain hex characters" + ); + } + + #[test] + fn hash_api_key_is_deterministic() { + let hash1 = hash_api_key("same-key"); + let hash2 = hash_api_key("same-key"); + assert_eq!(hash1, hash2, "should produce same hash for same input"); + } + + #[test] + fn hash_api_key_differs_for_different_keys() { + let hash1 = hash_api_key("key-a"); + let hash2 = hash_api_key("key-b"); + assert_ne!( + hash1, hash2, + "should produce different hashes for different inputs" + ); + } +} diff --git a/crates/trusted-server-core/src/ec/prebid_eids.rs b/crates/trusted-server-core/src/ec/prebid_eids.rs new file mode 100644 index 00000000..3ff75fe2 --- /dev/null +++ b/crates/trusted-server-core/src/ec/prebid_eids.rs @@ -0,0 +1,398 @@ +//! Prebid EID cookie ingestion. +//! +//! Parses the `ts-eids` cookie written by the TSJS Prebid integration and +//! syncs matched partner UIDs to the KV identity graph. +//! +//! The current cookie format stores a base64-encoded JSON array of full +//! OpenRTB-style `Eid` objects (`{source, uids:[...]}`). For rollout +//! compatibility we also accept the earlier flattened payload shape +//! (`{source, id, atype}` per entry). + +use base64::{engine::general_purpose::STANDARD as BASE64, Engine as _}; +use serde::Deserialize; +use serde_json::Value as JsonValue; + +use crate::openrtb::{Eid, Uid}; + +use super::kv::KvIdentityGraph; +use super::kv_types::MAX_UID_LENGTH; +use super::registry::PartnerRegistry; + +/// Maximum raw `ts-eids` cookie size accepted before base64 decode. +const MAX_EIDS_COOKIE_BYTES: usize = 8 * 1024; + +/// Legacy flattened `ts-eids` cookie entry. +#[derive(Debug, Deserialize)] +struct LegacyCookieEid { + source: String, + id: String, + #[allow(dead_code)] + atype: u8, +} + +/// OpenRTB-style `ts-eids` cookie entry. +#[derive(Debug, Deserialize)] +struct StructuredCookieEid { + source: String, + #[serde(default)] + uids: Vec, +} + +#[derive(Debug, Deserialize)] +struct StructuredCookieUid { + id: String, + #[serde(default)] + atype: Option, + #[serde(default)] + ext: Option, +} + +/// Parses a `ts-eids` cookie value into OpenRTB-style `Eid` entries. +/// +/// Accepts both the current structured cookie format and the earlier legacy +/// flattened format for backwards compatibility. +/// +/// # Errors +/// +/// Returns an error when the cookie exceeds the raw size limit, is not valid +/// base64, or does not contain either supported JSON payload shape. +pub fn parse_prebid_eids_cookie(cookie_value: &str) -> Result, String> { + if eids_cookie_exceeds_size_limit(cookie_value) { + return Err(format!( + "ts-eids cookie too large ({} bytes)", + cookie_value.len() + )); + } + + let bytes = BASE64 + .decode(cookie_value) + .map_err(|e| format!("base64 decode failed: {e}"))?; + + if let Ok(eids) = serde_json::from_slice::>(&bytes) { + return Ok(legacy_cookie_eids_to_openrtb(eids)); + } + + let structured = serde_json::from_slice::>(&bytes) + .map_err(|e| format!("JSON parse failed: {e}"))?; + Ok(structured_cookie_eids_to_openrtb(structured)) +} + +/// Parses a `ts-eids` cookie value and writes matched partner UIDs to KV. +/// +/// `cookie_value` is the raw base64-encoded cookie value, already extracted +/// from the request by the caller. +/// +/// Best-effort: all errors are logged and swallowed so the main request +/// path is never affected. +pub fn ingest_prebid_eids( + cookie_value: &str, + ec_id: &str, + kv: &KvIdentityGraph, + registry: &PartnerRegistry, +) { + if registry.is_empty() { + return; + } + + let eids = match parse_prebid_eids_cookie(cookie_value) { + Ok(eids) => eids, + Err(_) => { + log::trace!("Prebid EIDs: failed to decode ts-eids cookie; dropping"); + return; + } + }; + + for eid in &eids { + let Some(partner) = registry.find_by_source_domain(&eid.source) else { + log::debug!("Prebid EIDs: no partner for source '{}'", eid.source); + continue; + }; + + // KV stores one UID per partner. Preserve the previous cookie-ingestion + // behavior by syncing the first valid UID under each source, while + // skipping malformed candidates instead of dropping the whole source. + let Some(uid) = first_valid_uid(&eid.uids) else { + log::debug!( + "Prebid EIDs: no valid uid for partner '{}' from source '{}'", + partner.id, + eid.source, + ); + continue; + }; + + match kv.upsert_partner_id(ec_id, &partner.id, &uid.id) { + Ok(_) => { + log::debug!( + "Prebid EIDs: synced partner '{}' from source '{}'", + partner.id, + eid.source, + ); + } + Err(err) => { + log::warn!( + "Prebid EIDs: failed to sync partner '{}': {err:?}", + partner.id, + ); + } + } + } +} + +fn first_valid_uid(uids: &[Uid]) -> Option<&Uid> { + uids.iter() + .filter(|uid| !uid.id.trim().is_empty()) + .find(|uid| !eid_id_exceeds_size_limit(&uid.id)) +} + +/// `SharedID` EID source domain used for partner registry lookup. +const SHAREDID_SOURCE_DOMAIN: &str = "sharedid.org"; + +/// Ingests a raw `sharedId` cookie value into the KV identity graph. +/// +/// Prebid's `SharedID` module writes a `sharedId` cookie directly in the +/// browser. This function reads that value and stores it under the +/// configured `SharedID` partner. +/// +/// Best-effort: all errors are logged and swallowed. +pub fn ingest_sharedid_cookie( + cookie_value: &str, + ec_id: &str, + kv: &KvIdentityGraph, + registry: &PartnerRegistry, +) { + let cookie_value = cookie_value.trim(); + if cookie_value.is_empty() { + return; + } + + if sharedid_cookie_exceeds_size_limit(cookie_value) { + log::debug!( + "SharedID: cookie exceeds MAX_UID_LENGTH ({} bytes)", + cookie_value.len() + ); + return; + } + + let Some(partner) = registry.find_by_source_domain(SHAREDID_SOURCE_DOMAIN) else { + log::debug!("SharedID: no partner configured for source '{SHAREDID_SOURCE_DOMAIN}'"); + return; + }; + + match kv.upsert_partner_id(ec_id, &partner.id, cookie_value) { + Ok(_) => { + log::debug!("SharedID: synced partner '{}'", partner.id); + } + Err(err) => { + log::warn!("SharedID: failed to sync partner '{}': {err:?}", partner.id); + } + } +} + +fn eids_cookie_exceeds_size_limit(cookie_value: &str) -> bool { + cookie_value.len() > MAX_EIDS_COOKIE_BYTES +} + +fn eid_id_exceeds_size_limit(uid: &str) -> bool { + uid.len() > MAX_UID_LENGTH +} + +fn sharedid_cookie_exceeds_size_limit(cookie_value: &str) -> bool { + cookie_value.len() > MAX_UID_LENGTH +} + +fn structured_cookie_eids_to_openrtb(entries: Vec) -> Vec { + let mut eids = Vec::new(); + + for entry in entries { + if entry.source.is_empty() { + continue; + } + + let uids: Vec<_> = entry + .uids + .into_iter() + .filter_map(structured_cookie_uid_to_openrtb) + .collect(); + if uids.is_empty() { + continue; + } + + eids.push(Eid { + source: entry.source, + uids, + }); + } + + eids +} + +fn structured_cookie_uid_to_openrtb(uid: StructuredCookieUid) -> Option { + if uid.id.is_empty() { + return None; + } + + let ext = match uid.ext { + Some(JsonValue::Object(_)) => uid.ext, + _ => None, + }; + + Some(Uid { + id: uid.id, + atype: uid.atype, + ext, + }) +} + +fn legacy_cookie_eids_to_openrtb(entries: Vec) -> Vec { + entries + .into_iter() + .filter(|entry| !entry.source.is_empty() && !entry.id.is_empty()) + .map(|entry| Eid { + source: entry.source, + uids: vec![Uid { + id: entry.id, + atype: Some(entry.atype), + ext: None, + }], + }) + .collect() +} + +#[cfg(test)] +mod tests { + use super::*; + use base64::engine::general_purpose::STANDARD as BASE64; + use serde_json::json; + + #[test] + fn parse_prebid_eids_cookie_parses_legacy_flat_payload() { + let eids = vec![ + json!({"source": "id5-sync.com", "id": "ID5_abc", "atype": 1}), + json!({"source": "liveramp.com", "id": "LR_xyz", "atype": 3}), + ]; + let encoded = BASE64.encode(serde_json::to_vec(&eids).expect("should serialize")); + + let decoded = parse_prebid_eids_cookie(&encoded).expect("should decode valid payload"); + assert_eq!(decoded.len(), 2, "should parse both EIDs"); + assert_eq!(decoded[0].source, "id5-sync.com"); + assert_eq!(decoded[0].uids[0].id, "ID5_abc"); + assert_eq!(decoded[1].source, "liveramp.com"); + assert_eq!(decoded[1].uids[0].id, "LR_xyz"); + } + + #[test] + fn parse_prebid_eids_cookie_parses_structured_payload() { + let eids = vec![json!({ + "source": "sharedid.org", + "uids": [ + {"id": "shared_123", "atype": 3}, + {"id": "shared_456", "ext": {"provider": "example"}} + ] + })]; + let encoded = BASE64.encode(serde_json::to_vec(&eids).expect("should serialize")); + + let decoded = parse_prebid_eids_cookie(&encoded).expect("should decode valid payload"); + assert_eq!(decoded.len(), 1, "should parse one structured EID entry"); + assert_eq!(decoded[0].source, "sharedid.org"); + assert_eq!(decoded[0].uids.len(), 2, "should preserve multiple UIDs"); + assert_eq!(decoded[0].uids[0].id, "shared_123"); + assert_eq!(decoded[0].uids[0].atype, Some(3)); + assert_eq!( + decoded[0].uids[1].ext, + Some(json!({"provider": "example"})), + "should preserve UID ext objects" + ); + } + + #[test] + fn parse_prebid_eids_cookie_rejects_invalid_base64() { + let result = parse_prebid_eids_cookie("not-valid-base64!!!"); + assert!(result.is_err(), "should reject invalid base64"); + } + + #[test] + fn parse_prebid_eids_cookie_rejects_invalid_json() { + let encoded = BASE64.encode(b"not json"); + let result = parse_prebid_eids_cookie(&encoded); + assert!(result.is_err(), "should reject invalid JSON"); + } + + #[test] + fn ts_eids_cookie_rejects_oversized_payloads() { + let oversized = "x".repeat(MAX_EIDS_COOKIE_BYTES + 1); + let exact_limit = "x".repeat(MAX_EIDS_COOKIE_BYTES); + + assert!( + eids_cookie_exceeds_size_limit(&oversized), + "should reject cookies larger than the raw size cap" + ); + assert!( + !eids_cookie_exceeds_size_limit(&exact_limit), + "should allow cookies exactly at the raw size cap" + ); + } + + #[test] + fn sharedid_cookie_rejects_values_larger_than_uid_limit() { + let oversized = "x".repeat(MAX_UID_LENGTH + 1); + let exact_limit = "x".repeat(MAX_UID_LENGTH); + + assert!( + sharedid_cookie_exceeds_size_limit(&oversized), + "should reject sharedId values larger than MAX_UID_LENGTH" + ); + assert!( + !sharedid_cookie_exceeds_size_limit(&exact_limit), + "should allow sharedId values exactly at MAX_UID_LENGTH" + ); + } + + #[test] + fn prebid_eid_uid_rejects_values_larger_than_uid_limit() { + let oversized = "x".repeat(MAX_UID_LENGTH + 1); + let exact_limit = "x".repeat(MAX_UID_LENGTH); + + assert!( + eid_id_exceeds_size_limit(&oversized), + "should reject EID values larger than MAX_UID_LENGTH" + ); + assert!( + !eid_id_exceeds_size_limit(&exact_limit), + "should allow EID values exactly at MAX_UID_LENGTH" + ); + } + + #[test] + fn first_valid_uid_skips_oversized_and_uses_later_valid_uid() { + let oversized = "x".repeat(MAX_UID_LENGTH + 1); + let uids = vec![ + Uid { + id: oversized, + atype: Some(1), + ext: None, + }, + Uid { + id: "valid-uid".to_owned(), + atype: Some(1), + ext: None, + }, + ]; + + let uid = first_valid_uid(&uids).expect("should find later valid UID"); + assert_eq!(uid.id, "valid-uid", "should skip oversized first UID"); + } + + #[test] + fn first_valid_uid_rejects_whitespace_only_ids() { + let uids = vec![Uid { + id: " ".to_owned(), + atype: Some(1), + ext: None, + }]; + + assert!( + first_valid_uid(&uids).is_none(), + "should reject whitespace UID" + ); + } +} diff --git a/crates/trusted-server-core/src/ec/pull_sync.rs b/crates/trusted-server-core/src/ec/pull_sync.rs new file mode 100644 index 00000000..ab85f6ee --- /dev/null +++ b/crates/trusted-server-core/src/ec/pull_sync.rs @@ -0,0 +1,658 @@ +//! Pull sync background dispatch. +//! +//! Launches partner pull-sync requests for organic traffic after the client +//! response has been sent. Dispatch is best-effort and never affects client +//! response status. +//! +//! Pull sync currently fills missing partner UIDs only. Once a partner UID is +//! present in the EC identity entry, it is not periodically refreshed because +//! the entry no longer stores per-partner sync timestamps. + +use fastly::http::request::PendingRequest; +use fastly::http::{header, Method, StatusCode}; +use fastly::Request; +use serde::Deserialize; +use url::Url; + +use crate::backend::BackendConfig; +use crate::settings::Settings; + +use super::generation::{ec_hash, is_valid_ec_id}; +use super::kv::KvIdentityGraph; +use super::kv_types::KvEntry; +use super::rate_limiter::RateLimiter; +use super::registry::{PartnerConfig, PartnerRegistry}; + +// `current_timestamp` is defined in the parent `ec` module. +use super::current_timestamp; +use super::EcContext; + +/// Inputs needed to dispatch pull sync after response flush. +#[derive(Debug, Clone)] +pub struct PullSyncContext { + ec_id: String, +} + +impl PullSyncContext { + /// Returns the EC ID for the request. + #[must_use] + pub fn ec_id(&self) -> &str { + &self.ec_id + } +} + +struct InFlightPull { + partner_id: String, + pending: PendingRequest, +} + +#[derive(Debug, Deserialize)] +struct PullSyncResponse { + uid: Option, +} + +/// Builds post-send pull-sync context from the route EC context. +/// +/// Returns `None` when consent denies EC or there is no active EC ID. +#[must_use] +pub fn build_pull_sync_context(ec_context: &EcContext) -> Option { + if !ec_context.ec_allowed() { + return None; + } + + let ec_id_ref = ec_context.ec_value()?; + if !is_valid_ec_id(ec_id_ref) { + log::debug!("Pull sync: skipping dispatch because active EC ID is invalid format"); + return None; + } + + let ec_id = ec_id_ref.to_owned(); + Some(PullSyncContext { ec_id }) +} + +/// Dispatches partner pull-sync requests in the background. +/// +/// This function is best-effort: all errors are logged and swallowed. +pub fn dispatch_pull_sync( + settings: &Settings, + kv: &KvIdentityGraph, + registry: &PartnerRegistry, + rate_limiter: &dyn RateLimiter, + context: &PullSyncContext, +) { + let now = current_timestamp(); + let kv_entry = match kv.get(context.ec_id()) { + Ok(entry) => entry.map(|(entry, _)| entry), + Err(err) => { + log::warn!( + "Pull sync: failed to read identity graph for '{}': {err:?}", + super::log_id(context.ec_id()) + ); + return; + } + }; + + let mut pull_partners = registry.pull_enabled_partners(); + + // Sort by ID for deterministic ordering, then apply a rotating hourly + // offset so that different partners get dispatch priority (§10.3). + pull_partners.sort_by(|a, b| a.id.cmp(&b.id)); + + log::debug!( + "Pull sync: {} pull-enabled partners after filtering", + pull_partners.len(), + ); + + if pull_partners.is_empty() { + return; + } + + // Rotate the partner list so that the starting partner changes each + // hour. This ensures fair distribution when max_concurrency limits + // how many partners are dispatched per request. + let offset = (now / 3600) as usize % pull_partners.len(); + pull_partners.rotate_left(offset); + + let max_concurrency = settings.ec.pull_sync_concurrency.max(1); + let mut in_flight: Vec = Vec::new(); + + for partner in pull_partners { + if !is_partner_pull_eligible(partner, kv_entry.as_ref()) { + continue; + } + + let Some(url) = validated_pull_sync_url(partner) else { + continue; + }; + + let rate_key = pull_rate_limit_key(&partner.id, context.ec_id()); + match rate_limiter.exceeded(&rate_key, partner.pull_sync_rate_limit) { + Ok(true) => { + log::debug!( + "Pull sync: rate-limited partner '{}' for ec_id '{}'", + partner.id, + super::log_id(context.ec_id()) + ); + continue; + } + Ok(false) => {} + Err(err) => { + log::warn!( + "Pull sync: failed to read rate limit for partner '{}': {err:?}", + partner.id + ); + continue; + } + } + + let Some(token) = partner.ts_pull_token.as_ref() else { + log::warn!( + "Pull sync: partner '{}' enabled but missing ts_pull_token", + partner.id + ); + continue; + }; + + let request_url = build_pull_request_url(url, context.ec_id()); + let mut request = Request::new(Method::GET, request_url.as_str()); + request.set_header("authorization", format!("Bearer {}", token.expose())); + + let backend_name = + match BackendConfig::from_url(request_url.as_str(), settings.proxy.certificate_check) { + Ok(name) => name, + Err(err) => { + log::warn!( + "Pull sync: failed to resolve backend for partner '{}': {err:?}", + partner.id + ); + continue; + } + }; + + let pending = match request.send_async(backend_name) { + Ok(pending) => pending, + Err(err) => { + log::warn!( + "Pull sync: failed to dispatch partner '{}': {err:?}", + partner.id + ); + continue; + } + }; + + in_flight.push(InFlightPull { + partner_id: partner.id.clone(), + pending, + }); + + if in_flight.len() >= max_concurrency { + drain_pull_batch(kv, context.ec_id(), &mut in_flight); + } + } + + drain_pull_batch(kv, context.ec_id(), &mut in_flight); +} + +fn is_partner_pull_eligible(partner: &PartnerConfig, kv_entry: Option<&KvEntry>) -> bool { + kv_entry + .and_then(|entry| entry.ids.get(&partner.id)) + .is_none() +} + +fn validated_pull_sync_url(partner: &PartnerConfig) -> Option { + let pull_sync_url = partner.pull_sync_url.as_deref()?; + let parsed = match Url::parse(pull_sync_url) { + Ok(url) => url, + Err(err) => { + log::error!( + "Pull sync: partner '{}' has invalid pull_sync_url '{}': {err}", + partner.id, + pull_sync_url + ); + return None; + } + }; + + if parsed.scheme() != "https" { + log::error!( + "Pull sync: partner '{}' pull_sync_url must use HTTPS, got scheme '{}'", + partner.id, + parsed.scheme() + ); + return None; + } + + let Some(hostname) = parsed.host_str() else { + log::error!( + "Pull sync: partner '{}' pull_sync_url has no hostname: {}", + partner.id, + pull_sync_url + ); + return None; + }; + + let hostname = hostname.trim_end_matches('.').to_ascii_lowercase(); + if !partner.pull_sync_allowed_domains.iter().any(|domain| { + domain + .trim() + .trim_end_matches('.') + .eq_ignore_ascii_case(&hostname) + }) { + log::error!( + "Pull sync: partner '{}' URL host '{}' not in pull_sync_allowed_domains", + partner.id, + hostname + ); + return None; + } + + Some(parsed) +} + +fn build_pull_request_url(mut base_url: Url, ec_id: &str) -> Url { + base_url.query_pairs_mut().append_pair("ec_id", ec_id); + base_url +} + +fn pull_rate_limit_key(partner_id: &str, ec_id: &str) -> String { + format!("pull:{partner_id}:{}", ec_hash(ec_id)) +} + +fn drain_pull_batch(kv: &KvIdentityGraph, ec_id: &str, in_flight: &mut Vec) { + for pending in in_flight.drain(..) { + let partner_id = pending.partner_id; + // The Fastly SDK version used by this crate exposes only blocking + // `PendingRequest::wait()` for a single pending request. Pull sync runs + // after `send_to_client()` and relies on the platform compute cap for + // the hard upper bound until a per-request timeout API is available. + let response = match pending.pending.wait() { + Ok(response) => response, + Err(err) => { + log::warn!( + "Pull sync: request failed for partner '{}': {err:?}", + partner_id + ); + continue; + } + }; + + let Some(uid) = extract_pull_uid(response, &partner_id) else { + continue; + }; + + if let Err(err) = kv.upsert_partner_id(ec_id, &partner_id, &uid) { + log::warn!( + "Pull sync: failed to upsert partner '{}' for ec_id '{}': {err:?}", + partner_id, + super::log_id(ec_id) + ); + } + } +} + +/// Maximum response body size accepted from pull sync partners (64 KiB). +/// +/// The expected response is `{"uid":""}`, so 64 KiB is generous. +/// This prevents a misbehaving partner from exhausting WASM memory. +const MAX_PULL_RESPONSE_BYTES: usize = 64 * 1024; + +fn response_content_length_exceeds_limit(response: &fastly::Response, partner_id: &str) -> bool { + let Some(value) = response.get_header(header::CONTENT_LENGTH) else { + return false; + }; + + let Some(value) = value.to_str().ok() else { + log::warn!( + "Pull sync: partner '{}' returned invalid Content-Length header, rejecting", + partner_id + ); + return true; + }; + + let Ok(length) = value.parse::() else { + log::warn!( + "Pull sync: partner '{}' returned malformed Content-Length header, rejecting", + partner_id + ); + return true; + }; + + if length > MAX_PULL_RESPONSE_BYTES { + log::warn!( + "Pull sync: partner '{}' returned oversized Content-Length ({} bytes), rejecting", + partner_id, + length + ); + return true; + } + + false +} + +fn extract_pull_uid(mut response: fastly::Response, partner_id: &str) -> Option { + let status = response.get_status(); + + if status == StatusCode::NOT_FOUND { + log::debug!( + "Pull sync: partner '{}' returned 404, treating as no-op", + partner_id + ); + return None; + } + + if !status.is_success() { + log::warn!( + "Pull sync: partner '{}' returned non-success status {}", + partner_id, + status + ); + return None; + } + + if response_content_length_exceeds_limit(&response, partner_id) { + return None; + } + + let body = response.take_body_bytes(); + if body.len() > MAX_PULL_RESPONSE_BYTES { + log::warn!( + "Pull sync: partner '{}' returned oversized response ({} bytes), rejecting", + partner_id, + body.len() + ); + return None; + } + let payload = match serde_json::from_slice::(&body) { + Ok(payload) => payload, + Err(err) => { + log::warn!( + "Pull sync: partner '{}' returned invalid JSON body: {err}", + partner_id + ); + return None; + } + }; + + use super::kv_types::MAX_UID_LENGTH; + + let uid = payload.uid.filter(|value| !value.trim().is_empty()); + match uid { + None => { + log::debug!( + "Pull sync: partner '{}' returned null/empty uid, treating as no-op", + partner_id + ); + None + } + Some(ref value) if value.len() > MAX_UID_LENGTH => { + log::warn!( + "Pull sync: partner '{}' returned uid exceeding {} bytes (got {}), rejecting", + partner_id, + MAX_UID_LENGTH, + value.len() + ); + None + } + _ => uid, + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::consent::types::ConsentContext; + use crate::ec::kv_types::KvEntry; + use crate::redacted::Redacted; + + fn pull_partner(ttl_sec: u64) -> PartnerConfig { + PartnerConfig { + id: "ssp_x".to_owned(), + name: "SSP X".to_owned(), + api_key_hash: "deadbeef".to_owned(), + bidstream_enabled: true, + source_domain: "ssp.example.com".to_owned(), + openrtb_atype: 3, + batch_rate_limit: 60, + pull_sync_enabled: true, + pull_sync_url: Some("https://sync.partner.test/pull".to_owned()), + pull_sync_allowed_domains: vec!["sync.partner.test".to_owned()], + pull_sync_ttl_sec: ttl_sec, + pull_sync_rate_limit: 20, + ts_pull_token: Some(Redacted::new("token".to_owned())), + } + } + + #[test] + fn build_pull_sync_context_returns_context_when_valid() { + let consent = ConsentContext { + jurisdiction: crate::consent::jurisdiction::Jurisdiction::NonRegulated, + ..ConsentContext::default() + }; + let ec_id = format!("{}.ABC123", "a".repeat(64)); + let ec_context = EcContext::new_for_test(Some(ec_id), consent); + + let context = build_pull_sync_context(&ec_context) + .expect("should build pull sync context for valid EC"); + assert_eq!( + context.ec_id(), + ec_context.ec_value().expect("ec should be present"), + "should capture the EC ID from context" + ); + } + + #[test] + fn build_pull_sync_context_rejects_invalid_ec_id() { + let consent = ConsentContext { + jurisdiction: crate::consent::jurisdiction::Jurisdiction::NonRegulated, + ..ConsentContext::default() + }; + let ec_context = EcContext::new_for_test(Some("invalid-ec".to_owned()), consent); + + let context = build_pull_sync_context(&ec_context); + assert!( + context.is_none(), + "should reject pull sync context when EC ID format is invalid" + ); + } + + #[test] + fn partner_is_eligible_when_missing_from_entry() { + let partner = pull_partner(3600); + let entry = KvEntry::minimal("other_partner", "uid-1", 100); + + assert!( + is_partner_pull_eligible(&partner, Some(&entry)), + "should dispatch when partner has no stored UID" + ); + } + + #[test] + fn partner_is_not_eligible_when_already_present() { + let partner = pull_partner(3600); + let entry = KvEntry::minimal("ssp_x", "uid-1", 1000); + + assert!( + !is_partner_pull_eligible(&partner, Some(&entry)), + "should skip dispatch when partner already has a stored UID" + ); + } + + #[test] + fn validated_pull_sync_url_rejects_http_scheme() { + let mut partner = pull_partner(3600); + partner.pull_sync_url = Some("http://sync.partner.test/pull".to_owned()); + + let validated = validated_pull_sync_url(&partner); + assert!( + validated.is_none(), + "should reject pull_sync_url with HTTP scheme" + ); + } + + #[test] + fn validated_pull_sync_url_rejects_non_allowlisted_host() { + let mut partner = pull_partner(3600); + partner.pull_sync_url = Some("https://evil.test/pull".to_owned()); + + let validated = validated_pull_sync_url(&partner); + assert!( + validated.is_none(), + "should reject runtime pull_sync_url host outside allowlist" + ); + } + + #[test] + fn validated_pull_sync_url_accepts_normalized_allowlist_match() { + let mut partner = pull_partner(3600); + partner.pull_sync_url = Some("https://SYNC.PARTNER.TEST./pull".to_owned()); + partner.pull_sync_allowed_domains = vec!["sync.partner.test".to_owned()]; + + let validated = validated_pull_sync_url(&partner); + assert!( + validated.is_some(), + "should accept allowlist match after hostname normalization" + ); + } + + #[test] + fn build_pull_request_url_appends_ec_id() { + let url = Url::parse("https://sync.partner.test/pull?x=1").expect("should parse URL"); + let result = build_pull_request_url(url, "ecid123"); + + let query = result.query().expect("should have query string"); + assert!(query.contains("x=1"), "should preserve existing query"); + assert!(query.contains("ec_id=ecid123"), "should append ec_id"); + assert!( + !query.contains("ip="), + "should not forward client IP to partners" + ); + } + + #[test] + fn pull_rate_limit_key_uses_ec_hash_only() { + let first_ec_id = format!("{}.ABC123", "a".repeat(64)); + let second_ec_id = format!("{}.XYZ789", "a".repeat(64)); + + let first_key = pull_rate_limit_key("ssp_x", &first_ec_id); + let second_key = pull_rate_limit_key("ssp_x", &second_ec_id); + + assert_eq!( + first_key, second_key, + "should bucket different suffixes for the same EC hash together" + ); + assert_eq!( + first_key, + format!("pull:ssp_x:{}", "a".repeat(64)), + "should key pull-sync rate limiting by partner ID and EC hash" + ); + } + + #[test] + fn extract_pull_uid_treats_404_as_noop() { + let response = fastly::Response::from_status(StatusCode::NOT_FOUND); + + let uid = extract_pull_uid(response, "ssp_x"); + assert!(uid.is_none(), "should treat 404 as no-op"); + } + + #[test] + fn extract_pull_uid_treats_uid_null_as_noop() { + let response = fastly::Response::from_status(StatusCode::OK).with_body("{\"uid\":null}"); + + let uid = extract_pull_uid(response, "ssp_x"); + assert!(uid.is_none(), "should treat uid=null as no-op"); + } + + #[test] + fn extract_pull_uid_rejects_oversized_uid() { + let long_uid = "x".repeat(513); + let body = format!("{{\"uid\":\"{long_uid}\"}}"); + let response = fastly::Response::from_status(StatusCode::OK).with_body(body); + + let uid = extract_pull_uid(response, "ssp_x"); + assert!(uid.is_none(), "should reject uid exceeding 512 bytes"); + } + + #[test] + fn extract_pull_uid_reads_uid_from_success_body() { + let response = + fastly::Response::from_status(StatusCode::OK).with_body("{\"uid\":\"abc123\"}"); + + let uid = extract_pull_uid(response, "ssp_x"); + assert_eq!( + uid.as_deref(), + Some("abc123"), + "should parse uid from 200 body" + ); + } + + #[test] + fn extract_pull_uid_rejects_oversized_content_length_before_body_read() { + let response = fastly::Response::from_status(StatusCode::OK) + .with_header( + header::CONTENT_LENGTH, + (MAX_PULL_RESPONSE_BYTES + 1).to_string(), + ) + .with_body("{\"uid\":\"abc123\"}"); + + let uid = extract_pull_uid(response, "ssp_x"); + assert!( + uid.is_none(), + "should reject oversized Content-Length before parsing body" + ); + } + + #[test] + fn extract_pull_uid_accepts_small_body_without_content_length() { + let response = + fastly::Response::from_status(StatusCode::OK).with_body("{\"uid\":\"abc123\"}"); + + let uid = extract_pull_uid(response, "ssp_x"); + assert_eq!( + uid.as_deref(), + Some("abc123"), + "should accept small valid response without Content-Length" + ); + } + + #[test] + fn extract_pull_uid_rejects_body_larger_than_limit() { + let body = format!("{{\"uid\":\"{}\"}}", "x".repeat(MAX_PULL_RESPONSE_BYTES)); + let response = fastly::Response::from_status(StatusCode::OK).with_body(body); + + let uid = extract_pull_uid(response, "ssp_x"); + assert!(uid.is_none(), "should reject body larger than limit"); + } + + #[test] + fn rotating_offset_distributes_partners_across_hours() { + // Simulate 3 partners sorted by ID: alpha, beta, gamma. + let ids = vec!["alpha", "beta", "gamma"]; + + // Hour 0: offset = 0 % 3 = 0 → [alpha, beta, gamma] + let ts_h0: u64 = 100; // within hour 0 + let offset_h0 = (ts_h0 / 3600) as usize % ids.len(); + assert_eq!(offset_h0, 0, "hour 0 should start at index 0"); + + // Hour 1: offset = (3600 / 3600) % 3 = 1 → [beta, gamma, alpha] + let offset_h1 = (3600u64 / 3600) as usize % ids.len(); + assert_eq!(offset_h1, 1, "hour 1 should start at index 1"); + + // Hour 2: offset = (7200 / 3600) % 3 = 2 → [gamma, alpha, beta] + let offset_h2 = (7200u64 / 3600) as usize % ids.len(); + assert_eq!(offset_h2, 2, "hour 2 should start at index 2"); + + // Hour 3: offset = (10800 / 3600) % 3 = 0 → wraps back to [alpha, beta, gamma] + let offset_h3 = (10800u64 / 3600) as usize % ids.len(); + assert_eq!(offset_h3, 0, "hour 3 should wrap back to index 0"); + + // Verify rotate_left produces expected ordering + let mut rotated = ids.clone(); + rotated.rotate_left(offset_h1); + assert_eq!( + rotated, + vec!["beta", "gamma", "alpha"], + "hour 1 rotation should move beta to front" + ); + } +} diff --git a/crates/trusted-server-core/src/ec/rate_limiter.rs b/crates/trusted-server-core/src/ec/rate_limiter.rs new file mode 100644 index 00000000..6157d3c4 --- /dev/null +++ b/crates/trusted-server-core/src/ec/rate_limiter.rs @@ -0,0 +1,174 @@ +//! Rate limiting abstraction for EC sync endpoints. +//! +//! Provides a [`RateLimiter`] trait and its Fastly Edge Rate Limiting +//! implementation [`FastlyRateLimiter`]. Used by batch sync and pull sync +//! for per-partner request rate enforcement. + +use error_stack::Report; +use fastly::erl::{CounterDuration, RateCounter}; + +use crate::error::TrustedServerError; + +/// Name of the Fastly rate counter resource used by sync rate limiting. +pub const RATE_COUNTER_NAME: &str = "counter_store"; + +/// Rate limiter abstraction for sync endpoints. +/// +/// Used by batch sync (`/_ts/api/v1/batch-sync`) and pull sync for +/// per-partner request rate enforcement. +pub trait RateLimiter { + /// Returns `true` when the rate limit has been exceeded for the given key. + /// + /// `hourly_limit` is currently approximated via a 60-second Fastly counter + /// window, so the effective budget rounds up to the next whole request per + /// minute. For example, `65/hr` becomes `2/min` (`120/hr` effective), and + /// any positive limit below `60/hr` rounds up to `1/min` (`60/hr` + /// effective). + /// + /// Implementations may use a read-then-increment counter API, so callers + /// should treat this as best-effort throttling: concurrent requests can + /// overshoot the configured limit by the in-flight burst size. + /// + /// # Errors + /// + /// Returns [`TrustedServerError`] on rate counter I/O failure. + fn exceeded(&self, key: &str, hourly_limit: u32) -> Result>; + + /// Returns `true` when the per-minute rate limit has been exceeded. + /// + /// # Errors + /// + /// Returns [`TrustedServerError`] on rate counter I/O failure. + fn exceeded_per_minute( + &self, + key: &str, + per_minute_limit: u32, + ) -> Result> { + // Default implementation maps a per-minute budget to the existing + // hourly API used by pixel sync. + self.exceeded(key, per_minute_limit.saturating_mul(60)) + } +} + +fn hourly_limit_to_per_minute_limit(hourly_limit: u32) -> u32 { + if hourly_limit == 0 { + return 0; + } + + let per_minute_limit = hourly_limit.saturating_add(59) / 60; + per_minute_limit.max(1) +} + +#[cfg(test)] +fn effective_hourly_limit(hourly_limit: u32) -> u32 { + hourly_limit_to_per_minute_limit(hourly_limit).saturating_mul(60) +} + +/// Fastly Edge Rate Limiting implementation of [`RateLimiter`]. +pub struct FastlyRateLimiter { + counter: RateCounter, +} + +impl FastlyRateLimiter { + /// Creates a new rate limiter backed by the named Fastly rate counter. + #[must_use] + pub fn new(counter_name: &str) -> Self { + Self { + counter: RateCounter::open(counter_name), + } + } +} + +impl RateLimiter for FastlyRateLimiter { + fn exceeded(&self, key: &str, hourly_limit: u32) -> Result> { + // Fastly's public rate-counter API currently exposes windows up to 60s. + // Approximate the story's 1h limit by converting to a per-minute budget. + // + // Follow-up: move to exact 1-hour enforcement once platform counters + // expose longer windows or we add a dedicated KV-backed hour bucket. + let per_minute_limit = hourly_limit_to_per_minute_limit(hourly_limit); + if per_minute_limit == 0 { + return Ok(true); + } + + let current = self + .counter + .lookup_count(key, CounterDuration::SixtySecs) + .map_err(|e| { + Report::new(TrustedServerError::KvStore { + store_name: RATE_COUNTER_NAME.to_owned(), + message: format!("Failed to read sync rate counter: {e}"), + }) + })?; + + if current >= per_minute_limit { + return Ok(true); + } + + self.counter.increment(key, 1).map_err(|e| { + Report::new(TrustedServerError::KvStore { + store_name: RATE_COUNTER_NAME.to_owned(), + message: format!("Failed to increment sync rate counter: {e}"), + }) + })?; + + Ok(false) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn zero_hourly_limit_denies_all() { + assert_eq!( + hourly_limit_to_per_minute_limit(0), + 0, + "should preserve deny-all zero limit" + ); + assert_eq!( + effective_hourly_limit(0), + 0, + "should preserve effective zero limit" + ); + } + + #[test] + fn hourly_limit_rounds_up_to_whole_requests_per_minute() { + assert_eq!( + hourly_limit_to_per_minute_limit(65), + 2, + "should round 65/hr up to 2/min" + ); + assert_eq!( + effective_hourly_limit(65), + 120, + "should expose the resulting effective hourly budget" + ); + } + + #[test] + fn small_positive_hourly_limits_round_up_to_sixty_per_hour() { + assert_eq!( + hourly_limit_to_per_minute_limit(1), + 1, + "should round any positive sub-60 hourly limit up to 1/min" + ); + assert_eq!( + effective_hourly_limit(1), + 60, + "should enforce a 60/hr effective minimum with the current counter window" + ); + } + + #[test] + fn effective_hourly_limit_stays_within_hourly_plus_fifty_nine() { + for hourly_limit in [1, 10, 59, 60, 61, 65, 119, 120, 121, 600] { + assert!( + effective_hourly_limit(hourly_limit) <= hourly_limit.saturating_add(59), + "effective hourly limit should never overshoot by more than 59 requests" + ); + } + } +} diff --git a/crates/trusted-server-core/src/ec/registry.rs b/crates/trusted-server-core/src/ec/registry.rs new file mode 100644 index 00000000..612fe581 --- /dev/null +++ b/crates/trusted-server-core/src/ec/registry.rs @@ -0,0 +1,546 @@ +//! In-memory partner registry built from `[[ec.partners]]` configuration. +//! +//! Replaces the KV-backed [`PartnerStore`](super::partner::PartnerStore) with +//! a startup-validated, in-memory registry. Three `HashMap` indexes provide +//! O(1) lookup by partner ID, API key hash, and source domain. + +use std::collections::HashMap; + +use error_stack::{Report, ResultExt}; + +use crate::error::TrustedServerError; +use crate::redacted::Redacted; +use crate::settings::EcPartner; + +use super::partner::{hash_api_key, validate_partner_id}; + +/// Minimum length for inbound partner Bearer API tokens. +pub const MIN_API_TOKEN_LENGTH: usize = 32; + +/// Runtime-ready partner configuration with precomputed API key hash. +#[derive(Debug, Clone)] +pub struct PartnerConfig { + /// Unique partner identifier. + pub id: String, + /// Human-readable partner name. + pub name: String, + /// `OpenRTB` `source.domain` for EID entries. + pub source_domain: String, + /// `OpenRTB` `atype` value. + pub openrtb_atype: u8, + /// Whether this partner's UIDs appear in auction `user.eids`. + pub bidstream_enabled: bool, + /// SHA-256 hex of the partner's API token (precomputed at startup). + pub api_key_hash: String, + /// Max batch sync API requests per partner per minute. + pub batch_rate_limit: u32, + /// Whether server-to-server pull sync is enabled. + pub pull_sync_enabled: bool, + /// URL to call for pull sync. + pub pull_sync_url: Option, + /// Allowlist of domains TS may call for this partner's pull sync. + pub pull_sync_allowed_domains: Vec, + /// Legacy seconds between pull sync refreshes retained for config compatibility. + /// + /// Pull sync currently fills missing partner UIDs only, so this value is + /// not used to refresh existing UIDs. + pub pull_sync_ttl_sec: u64, + /// Max pull sync calls per EC hash per partner per hour. + pub pull_sync_rate_limit: u32, + /// Outbound bearer token for pull sync requests. + pub ts_pull_token: Option>, +} + +/// In-memory partner registry with O(1) lookups by ID, API key hash, +/// and source domain. +/// +/// Built once at startup from `[[ec.partners]]` in `trusted-server.toml`. +/// All validation (ID format, duplicate detection, pull sync consistency) +/// happens during construction. +#[derive(Debug, Clone)] +pub struct PartnerRegistry { + by_id: HashMap, + by_api_key_hash: HashMap, + by_source_domain: HashMap, +} + +impl PartnerRegistry { + /// Builds a registry from the config-defined partner list. + /// + /// # Errors + /// + /// Returns [`TrustedServerError::Configuration`] if any partner has an + /// invalid ID, duplicate ID, duplicate API token hash, duplicate source + /// domain, or invalid pull sync configuration. + pub fn from_config(partners: &[EcPartner]) -> Result> { + let mut by_id = HashMap::with_capacity(partners.len()); + let mut by_api_key_hash = HashMap::with_capacity(partners.len()); + let mut by_source_domain = HashMap::with_capacity(partners.len()); + + for partner in partners { + validate_partner_id(&partner.id).map_err(|msg| { + Report::new(TrustedServerError::Configuration { + message: format!("ec.partners: {msg}"), + }) + })?; + + if by_id.contains_key(&partner.id) { + return Err(Report::new(TrustedServerError::Configuration { + message: format!("ec.partners: duplicate partner ID '{}'", partner.id), + })); + } + + validate_api_token(&partner.id, partner.api_token.expose())?; + + let api_key_hash = hash_api_key(partner.api_token.expose()); + + if by_api_key_hash.contains_key(&api_key_hash) { + return Err(Report::new(TrustedServerError::Configuration { + message: format!( + "ec.partners: partner '{}' has an API token that collides \ + with another partner's token hash", + partner.id + ), + })); + } + + let normalized_source = partner.source_domain.to_ascii_lowercase(); + if by_source_domain.contains_key(&normalized_source) { + return Err(Report::new(TrustedServerError::Configuration { + message: format!( + "ec.partners: duplicate source_domain '{}' (partner '{}')", + partner.source_domain, partner.id + ), + })); + } + + let config = build_partner_config(partner, &api_key_hash)?; + + validate_rate_limits(&config).change_context(TrustedServerError::Configuration { + message: format!("ec.partners: invalid rate limits for '{}'", partner.id), + })?; + + if config.pull_sync_enabled { + validate_pull_sync(&config).change_context(TrustedServerError::Configuration { + message: format!("ec.partners: pull sync config invalid for '{}'", partner.id), + })?; + } + + by_api_key_hash.insert(api_key_hash, partner.id.clone()); + by_source_domain.insert(normalized_source, partner.id.clone()); + by_id.insert(partner.id.clone(), config); + } + + Ok(Self { + by_id, + by_api_key_hash, + by_source_domain, + }) + } + + /// Returns an empty registry (no partners configured). + #[must_use] + pub fn empty() -> Self { + Self { + by_id: HashMap::new(), + by_api_key_hash: HashMap::new(), + by_source_domain: HashMap::new(), + } + } + + /// Looks up a partner by ID. + #[must_use] + pub fn get(&self, partner_id: &str) -> Option<&PartnerConfig> { + self.by_id.get(partner_id) + } + + /// Looks up a partner by the SHA-256 hex hash of their API token. + #[must_use] + pub fn find_by_api_key_hash(&self, hash: &str) -> Option<&PartnerConfig> { + self.by_api_key_hash + .get(hash) + .and_then(|id| self.by_id.get(id)) + } + + /// Looks up a partner by their `source_domain` (case-insensitive). + #[must_use] + pub fn find_by_source_domain(&self, domain: &str) -> Option<&PartnerConfig> { + self.by_source_domain + .get(&domain.to_ascii_lowercase()) + .and_then(|id| self.by_id.get(id)) + } + + /// Returns all partners with `pull_sync_enabled = true`. + #[must_use] + pub fn pull_enabled_partners(&self) -> Vec<&PartnerConfig> { + self.by_id + .values() + .filter(|p| p.pull_sync_enabled) + .collect() + } + + /// Returns an iterator over all configured partners. + /// + /// Iteration order is unspecified; callers that need determinism should + /// sort by partner ID before consuming the results. + pub fn all(&self) -> impl Iterator { + self.by_id.values() + } + + /// Returns the number of configured partners. + #[must_use] + pub fn len(&self) -> usize { + self.by_id.len() + } + + /// Returns `true` if no partners are configured. + #[must_use] + pub fn is_empty(&self) -> bool { + self.by_id.is_empty() + } +} + +fn validate_api_token(partner_id: &str, api_token: &str) -> Result<(), Report> { + if api_token.trim().is_empty() { + return Err(Report::new(TrustedServerError::Configuration { + message: format!("ec.partners: partner '{partner_id}' api_token must not be empty"), + })); + } + + if api_token.len() < MIN_API_TOKEN_LENGTH { + return Err(Report::new(TrustedServerError::Configuration { + message: format!( + "ec.partners: partner '{partner_id}' api_token must be at least {MIN_API_TOKEN_LENGTH} bytes" + ), + })); + } + + Ok(()) +} + +fn build_partner_config( + partner: &EcPartner, + api_key_hash: &str, +) -> Result> { + Ok(PartnerConfig { + id: partner.id.clone(), + name: partner.name.clone(), + source_domain: partner.source_domain.clone(), + openrtb_atype: partner.openrtb_atype, + bidstream_enabled: partner.bidstream_enabled, + api_key_hash: api_key_hash.to_owned(), + batch_rate_limit: partner.batch_rate_limit, + pull_sync_enabled: partner.pull_sync_enabled, + pull_sync_url: partner.pull_sync_url.clone(), + pull_sync_allowed_domains: partner.pull_sync_allowed_domains.clone(), + pull_sync_ttl_sec: partner.pull_sync_ttl_sec, + pull_sync_rate_limit: partner.pull_sync_rate_limit, + ts_pull_token: partner.ts_pull_token.clone(), + }) +} + +fn validate_rate_limits(config: &PartnerConfig) -> Result<(), Report> { + if config.batch_rate_limit == 0 { + return Err(Report::new(TrustedServerError::Configuration { + message: "batch_rate_limit must be greater than 0".to_owned(), + })); + } + + if config.pull_sync_rate_limit == 0 { + return Err(Report::new(TrustedServerError::Configuration { + message: "pull_sync_rate_limit must be greater than 0".to_owned(), + })); + } + + Ok(()) +} + +fn validate_pull_sync(config: &PartnerConfig) -> Result<(), Report> { + let url_str = config.pull_sync_url.as_deref().unwrap_or(""); + if url_str.is_empty() { + return Err(Report::new(TrustedServerError::Configuration { + message: "pull_sync_url is required when pull_sync_enabled is true".to_owned(), + })); + } + + if config + .ts_pull_token + .as_ref() + .map(|token| token.expose().trim().is_empty()) + .unwrap_or(true) + { + return Err(Report::new(TrustedServerError::Configuration { + message: "ts_pull_token is required when pull_sync_enabled is true".to_owned(), + })); + } + + let parsed = url::Url::parse(url_str).map_err(|e| { + Report::new(TrustedServerError::Configuration { + message: format!("pull_sync_url is not a valid URL: {e}"), + }) + })?; + + if parsed.scheme() != "https" { + return Err(Report::new(TrustedServerError::Configuration { + message: format!( + "pull_sync_url must use HTTPS, got scheme '{}'", + parsed.scheme() + ), + })); + } + + let host = parsed + .host_str() + .ok_or_else(|| { + Report::new(TrustedServerError::Configuration { + message: "pull_sync_url has no hostname".to_owned(), + }) + })? + .trim_end_matches('.') + .to_ascii_lowercase(); + + let domain_match = config.pull_sync_allowed_domains.iter().any(|d| { + let normalized = d.trim_end_matches('.').to_ascii_lowercase(); + host == normalized + }); + + if !domain_match { + return Err(Report::new(TrustedServerError::Configuration { + message: format!("pull_sync_url hostname '{host}' not in pull_sync_allowed_domains"), + })); + } + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::redacted::Redacted; + + fn valid_api_token(label: &str) -> String { + format!("{label}-api-token-32-bytes-minimum") + } + + fn make_partner(id: &str, source_domain: &str, api_token: &str) -> EcPartner { + EcPartner { + id: id.to_owned(), + name: format!("Partner {id}"), + source_domain: source_domain.to_owned(), + openrtb_atype: EcPartner::default_openrtb_atype(), + bidstream_enabled: false, + api_token: Redacted::new(api_token.to_owned()), + batch_rate_limit: EcPartner::default_batch_rate_limit(), + pull_sync_enabled: false, + pull_sync_url: None, + pull_sync_allowed_domains: vec![], + pull_sync_ttl_sec: EcPartner::default_pull_sync_ttl_sec(), + pull_sync_rate_limit: EcPartner::default_pull_sync_rate_limit(), + ts_pull_token: None, + } + } + + #[test] + fn empty_config_builds_empty_registry() { + let registry = PartnerRegistry::from_config(&[]).expect("should build empty registry"); + assert!(registry.is_empty(), "should have no partners"); + } + + #[test] + fn lookup_by_id_returns_configured_partner() { + let partners = vec![make_partner( + "ssp_x", + "ssp.example.com", + &valid_api_token("token-a"), + )]; + let registry = PartnerRegistry::from_config(&partners).expect("should build registry"); + + let found = registry.get("ssp_x"); + assert!(found.is_some(), "should find partner by ID"); + assert_eq!( + found.expect("should exist").source_domain, + "ssp.example.com", + "should match source domain" + ); + } + + #[test] + fn lookup_by_api_key_hash_returns_partner() { + let partners = vec![make_partner( + "ssp_x", + "ssp.example.com", + &valid_api_token("my-secret"), + )]; + let registry = PartnerRegistry::from_config(&partners).expect("should build registry"); + + let hash = hash_api_key(&valid_api_token("my-secret")); + let found = registry.find_by_api_key_hash(&hash); + assert!(found.is_some(), "should find partner by API key hash"); + assert_eq!( + found.expect("should exist").id, + "ssp_x", + "should match partner ID" + ); + } + + #[test] + fn lookup_by_source_domain_is_case_insensitive() { + let partners = vec![make_partner( + "ssp_x", + "SSP.Example.Com", + &valid_api_token("token-a"), + )]; + let registry = PartnerRegistry::from_config(&partners).expect("should build registry"); + + let found = registry.find_by_source_domain("ssp.example.com"); + assert!( + found.is_some(), + "should find partner by lowercase source domain" + ); + } + + #[test] + fn duplicate_partner_id_is_rejected() { + let partners = vec![ + make_partner("ssp_x", "a.com", &valid_api_token("token-a")), + make_partner("ssp_x", "b.com", &valid_api_token("token-b")), + ]; + let result = PartnerRegistry::from_config(&partners); + assert!(result.is_err(), "should reject duplicate partner ID"); + } + + #[test] + fn duplicate_source_domain_is_rejected() { + let partners = vec![ + make_partner("ssp_a", "same.com", &valid_api_token("token-a")), + make_partner("ssp_b", "same.com", &valid_api_token("token-b")), + ]; + let result = PartnerRegistry::from_config(&partners); + assert!(result.is_err(), "should reject duplicate source domain"); + } + + #[test] + fn reserved_partner_id_is_rejected() { + let partners = vec![make_partner( + "ec", + "ec.example.com", + &valid_api_token("token-a"), + )]; + let result = PartnerRegistry::from_config(&partners); + assert!(result.is_err(), "should reject reserved partner ID 'ec'"); + } + + #[test] + fn pull_enabled_partners_filters_correctly() { + let mut pull_partner = + make_partner("puller", "pull.example.com", &valid_api_token("token-p")); + pull_partner.pull_sync_enabled = true; + pull_partner.pull_sync_url = Some("https://pull.example.com/sync".to_owned()); + pull_partner.pull_sync_allowed_domains = vec!["pull.example.com".to_owned()]; + pull_partner.ts_pull_token = Some(Redacted::new("outbound-token".to_owned())); + + let partners = vec![ + make_partner( + "no_pull", + "nopull.example.com", + &valid_api_token("token-np"), + ), + pull_partner, + ]; + let registry = PartnerRegistry::from_config(&partners).expect("should build registry"); + + let pull_enabled = registry.pull_enabled_partners(); + assert_eq!( + pull_enabled.len(), + 1, + "should have exactly one pull-enabled partner" + ); + assert_eq!( + pull_enabled[0].id, "puller", + "should be the correct partner" + ); + assert_eq!( + pull_enabled[0] + .ts_pull_token + .as_ref() + .expect("should keep pull token") + .expose(), + "outbound-token", + "should preserve the token without unwrapping it in the registry" + ); + } + + #[test] + fn partner_debug_output_redacts_pull_token() { + let mut partner = make_partner("puller", "pull.example.com", &valid_api_token("token-p")); + partner.pull_sync_enabled = true; + partner.pull_sync_url = Some("https://pull.example.com/sync".to_owned()); + partner.pull_sync_allowed_domains = vec!["pull.example.com".to_owned()]; + partner.ts_pull_token = Some(Redacted::new("outbound-token".to_owned())); + + let registry = PartnerRegistry::from_config(&[partner]).expect("should build registry"); + let configured = registry + .get("puller") + .expect("should find configured partner"); + + let debug_output = format!("{configured:?}"); + assert!( + !debug_output.contains("outbound-token"), + "should not expose the pull token in debug output" + ); + assert!( + debug_output.contains("[REDACTED]"), + "should render the pull token through Redacted debug output" + ); + } + + #[test] + fn empty_api_token_is_rejected() { + let partner = make_partner("ssp_x", "ssp.example.com", " "); + + let result = PartnerRegistry::from_config(&[partner]); + assert!(result.is_err(), "should reject empty api_token"); + } + + #[test] + fn short_api_token_is_rejected() { + let partner = make_partner("ssp_x", "ssp.example.com", "short-token"); + + let result = PartnerRegistry::from_config(&[partner]); + assert!(result.is_err(), "should reject short api_token"); + } + + #[test] + fn api_token_at_minimum_length_is_accepted() { + let token = "x".repeat(MIN_API_TOKEN_LENGTH); + let partners = vec![make_partner("ssp_x", "ssp.example.com", &token)]; + + let registry = PartnerRegistry::from_config(&partners).expect("should build registry"); + assert!( + registry.get("ssp_x").is_some(), + "should accept minimum-length token" + ); + } + + #[test] + fn zero_batch_rate_limit_is_rejected() { + let mut partner = make_partner("ssp_x", "ssp.example.com", &valid_api_token("token-a")); + partner.batch_rate_limit = 0; + + let result = PartnerRegistry::from_config(&[partner]); + assert!(result.is_err(), "should reject zero batch_rate_limit"); + } + + #[test] + fn zero_pull_sync_rate_limit_is_rejected() { + let mut partner = make_partner("puller", "pull.example.com", &valid_api_token("token-p")); + partner.pull_sync_enabled = true; + partner.pull_sync_url = Some("https://pull.example.com/sync".to_owned()); + partner.pull_sync_allowed_domains = vec!["pull.example.com".to_owned()]; + partner.pull_sync_rate_limit = 0; + partner.ts_pull_token = Some(Redacted::new("outbound-token".to_owned())); + + let result = PartnerRegistry::from_config(&[partner]); + assert!(result.is_err(), "should reject zero pull_sync_rate_limit"); + } +} diff --git a/crates/trusted-server-core/src/edge_cookie.rs b/crates/trusted-server-core/src/edge_cookie.rs deleted file mode 100644 index 156e8ee8..00000000 --- a/crates/trusted-server-core/src/edge_cookie.rs +++ /dev/null @@ -1,466 +0,0 @@ -//! Edge Cookie (EC) ID generation using HMAC. -//! -//! This module provides functionality for generating privacy-preserving EC IDs -//! based on the client IP address and a secret key. - -use std::net::IpAddr; - -use edgezero_core::body::Body as EdgeBody; -use error_stack::{Report, ResultExt}; -use fastly::Request as FastlyRequest; -use hmac::{Hmac, Mac}; -use rand::Rng; -use sha2::Sha256; - -use crate::compat; -use crate::constants::{COOKIE_TS_EC, HEADER_X_TS_EC}; -use crate::cookies::{ec_id_has_only_allowed_chars, handle_request_cookies}; -use crate::error::TrustedServerError; -use crate::platform::RuntimeServices; -use crate::settings::Settings; - -type HmacSha256 = Hmac; - -const ALPHANUMERIC_CHARSET: &[u8] = - b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; - -/// Normalizes an IP address for stable EC ID generation. -/// -/// For IPv6 addresses, masks to /64 prefix to handle Privacy Extensions -/// where devices rotate their interface identifier (lower 64 bits). -/// IPv4 addresses are returned unchanged. -fn normalize_ip(ip: IpAddr) -> String { - match ip { - IpAddr::V4(ipv4) => ipv4.to_string(), - IpAddr::V6(ipv6) => { - let segments = ipv6.segments(); - // Keep only the first 4 segments (64 bits) for /64 prefix - format!( - "{:x}:{:x}:{:x}:{:x}::", - segments[0], segments[1], segments[2], segments[3] - ) - } - } -} - -/// Generates a random alphanumeric string of the specified length. -fn generate_random_suffix(length: usize) -> String { - let mut rng = rand::thread_rng(); - (0..length) - .map(|_| { - let idx = rng.gen_range(0..ALPHANUMERIC_CHARSET.len()); - ALPHANUMERIC_CHARSET[idx] as char - }) - .collect() -} - -/// Generates a fresh EC ID based on client IP address. -/// -/// Uses only the client IP (not user-agent or other headers) intentionally: -/// EC IDs are meant to be simple, privacy-preserving identifiers — not -/// high-entropy fingerprints. The random suffix provides per-cookie -/// uniqueness for users behind the same NAT/proxy. -/// -/// Creates an HMAC-SHA256-based ID using the configured secret key and -/// the client IP address, then appends a random suffix for additional -/// uniqueness. The resulting format is `{64hex}.{6alnum}`. -/// -/// # Errors -/// -/// - [`TrustedServerError::Ec`] if HMAC generation fails -pub fn generate_ec_id( - settings: &Settings, - services: &RuntimeServices, -) -> Result> { - // Fallback to "unknown" when client IP is unavailable (e.g., local testing). - // All such requests share the same HMAC base; the random suffix provides uniqueness. - let client_ip = services - .client_info - .client_ip - .map(normalize_ip) - .unwrap_or_else(|| "unknown".to_string()); - - log::trace!("Input for fresh EC ID: client_ip={}", client_ip); - - let mut mac = HmacSha256::new_from_slice(settings.edge_cookie.secret_key.expose().as_bytes()) - .change_context(TrustedServerError::Ec { - message: "Failed to create HMAC instance".to_string(), - })?; - mac.update(client_ip.as_bytes()); - let hmac_hash = hex::encode(mac.finalize().into_bytes()); - - // Append random 6-character alphanumeric suffix for additional uniqueness - let random_suffix = generate_random_suffix(6); - let ec_id = format!("{hmac_hash}.{random_suffix}"); - - log::trace!("Generated fresh EC ID: {}", ec_id); - - Ok(ec_id) -} - -/// Gets an existing EC ID from the request. -/// -/// Attempts to retrieve an existing EC ID from: -/// 1. The `x-ts-ec` header -/// 2. The `ts-ec` cookie -/// -/// Returns `None` if neither source contains an EC ID. -/// -/// # Errors -/// -/// - [`TrustedServerError::InvalidHeaderValue`] if cookie parsing fails -pub(crate) fn get_ec_id_from_http_request( - req: &http::Request, -) -> Result, Report> { - if let Some(ec_id) = req - .headers() - .get(HEADER_X_TS_EC) - .and_then(|h| h.to_str().ok()) - { - if ec_id_has_only_allowed_chars(ec_id) { - log::trace!("Using existing EC ID from header: {}", ec_id); - return Ok(Some(ec_id.to_string())); - } - log::warn!("Rejected EC ID from x-ts-ec header with disallowed characters"); - } - - match handle_request_cookies(req)? { - Some(jar) => { - if let Some(cookie) = jar.get(COOKIE_TS_EC) { - let value = cookie.value(); - if ec_id_has_only_allowed_chars(value) { - log::trace!("Using existing EC ID from cookie: {}", value); - return Ok(Some(value.to_string())); - } - log::warn!("Rejected EC ID from cookie with disallowed characters"); - } - } - None => { - log::debug!("No cookie header found in request"); - } - } - - Ok(None) -} - -/// Gets an existing EC ID from a Fastly request. -/// -/// # Errors -/// -/// - [`TrustedServerError::InvalidHeaderValue`] if cookie parsing fails -pub fn get_ec_id(req: &FastlyRequest) -> Result, Report> { - let http_req = compat::from_fastly_headers_ref(req); - get_ec_id_from_http_request(&http_req) -} - -/// Gets or creates an EC ID from the request. -/// -/// Attempts to retrieve an existing EC ID from: -/// 1. The `x-ts-ec` header -/// 2. The `ts-ec` cookie -/// -/// If neither exists, generates a new EC ID. -/// -/// # Errors -/// -/// Returns an error if ID generation fails. -pub(crate) fn get_or_generate_ec_id_from_http_request( - settings: &Settings, - services: &RuntimeServices, - req: &http::Request, -) -> Result> { - if let Some(id) = get_ec_id_from_http_request(req)? { - return Ok(id); - } - - // If no existing EC ID found, generate a fresh one - let ec_id = generate_ec_id(settings, services)?; - log::trace!("No existing EC ID, generated: {}", ec_id); - Ok(ec_id) -} - -/// Gets or creates an EC ID from a Fastly request. -/// -/// # Errors -/// -/// Returns an error if ID generation fails. -pub fn get_or_generate_ec_id( - settings: &Settings, - services: &RuntimeServices, - req: &FastlyRequest, -) -> Result> { - let http_req = compat::from_fastly_headers_ref(req); - get_or_generate_ec_id_from_http_request(settings, services, &http_req) -} - -#[cfg(test)] -mod tests { - use super::*; - use fastly::http::{HeaderName, HeaderValue}; - use std::net::{Ipv4Addr, Ipv6Addr}; - - use crate::platform::test_support::{noop_services, noop_services_with_client_ip}; - use crate::test_support::tests::create_test_settings; - - #[test] - fn test_normalize_ip_ipv4_unchanged() { - let ipv4 = IpAddr::V4(Ipv4Addr::new(192, 168, 1, 100)); - assert_eq!(normalize_ip(ipv4), "192.168.1.100"); - } - - #[test] - fn test_normalize_ip_ipv6_masks_to_64() { - // Full IPv6 address with interface identifier - let ipv6 = IpAddr::V6(Ipv6Addr::new( - 0x2001, 0x0db8, 0x85a3, 0x0000, 0x8a2e, 0x0370, 0x7334, 0x1234, - )); - assert_eq!(normalize_ip(ipv6), "2001:db8:85a3:0::"); - } - - #[test] - fn test_normalize_ip_ipv6_different_suffix_same_prefix() { - // Two IPv6 addresses with same /64 prefix but different interface identifiers - // (simulating Privacy Extensions rotation) - let ipv6_a = IpAddr::V6(Ipv6Addr::new( - 0x2001, 0x0db8, 0xabcd, 0x0001, 0x1111, 0x2222, 0x3333, 0x4444, - )); - let ipv6_b = IpAddr::V6(Ipv6Addr::new( - 0x2001, 0x0db8, 0xabcd, 0x0001, 0xaaaa, 0xbbbb, 0xcccc, 0xdddd, - )); - // Both should normalize to the same /64 prefix - assert_eq!(normalize_ip(ipv6_a), normalize_ip(ipv6_b)); - assert_eq!(normalize_ip(ipv6_a), "2001:db8:abcd:1::"); - } - - fn create_test_request(headers: Vec<(HeaderName, &str)>) -> FastlyRequest { - let mut req = FastlyRequest::new("GET", "http://example.com"); - for (key, value) in headers { - req.set_header( - key, - HeaderValue::from_str(value).expect("should create valid header value"), - ); - } - - req - } - - fn is_ec_id_format(value: &str) -> bool { - let mut parts = value.split('.'); - let hmac_part = match parts.next() { - Some(part) => part, - None => return false, - }; - let suffix_part = match parts.next() { - Some(part) => part, - None => return false, - }; - if parts.next().is_some() { - return false; - } - if hmac_part.len() != 64 || suffix_part.len() != 6 { - return false; - } - if !hmac_part.chars().all(|c| c.is_ascii_hexdigit()) { - return false; - } - if !suffix_part.chars().all(|c| c.is_ascii_alphanumeric()) { - return false; - } - true - } - - #[test] - fn test_generate_ec_id() { - let settings: Settings = create_test_settings(); - - let ec_id = generate_ec_id(&settings, &noop_services()).expect("should generate EC ID"); - log::debug!("Generated EC ID: {}", ec_id); - assert!( - is_ec_id_format(&ec_id), - "should match EC ID format: {{64hex}}.{{6alnum}}" - ); - } - - #[test] - fn test_generate_ec_id_uses_client_ip() { - let settings = create_test_settings(); - let ip = IpAddr::V4(Ipv4Addr::new(203, 0, 113, 1)); - - let id_with_ip = generate_ec_id(&settings, &noop_services_with_client_ip(ip)) - .expect("should generate EC ID with client IP"); - let id_without_ip = generate_ec_id(&settings, &noop_services()) - .expect("should generate EC ID without client IP"); - - let hmac_with_ip = id_with_ip.split_once('.').expect("should contain dot").0; - let hmac_without_ip = id_without_ip.split_once('.').expect("should contain dot").0; - - assert_ne!( - hmac_with_ip, hmac_without_ip, - "should produce different HMAC when client IP differs" - ); - } - - #[test] - fn test_is_ec_id_format_accepts_valid_value() { - let value = format!("{}.{}", "a".repeat(64), "Ab12z9"); - assert!( - is_ec_id_format(&value), - "should accept a valid EC ID format" - ); - } - - #[test] - fn test_is_ec_id_format_rejects_invalid_values() { - let missing_suffix = "a".repeat(64); - assert!( - !is_ec_id_format(&missing_suffix), - "should reject missing suffix" - ); - - let invalid_hex = format!("{}.{}", "a".repeat(63) + "g", "Ab12z9"); - assert!( - !is_ec_id_format(&invalid_hex), - "should reject non-hex HMAC content" - ); - - let invalid_suffix = format!("{}.{}", "a".repeat(64), "ab-129"); - assert!( - !is_ec_id_format(&invalid_suffix), - "should reject non-alphanumeric suffix" - ); - - let extra_segment = format!("{}.{}.{}", "a".repeat(64), "Ab12z9", "zz"); - assert!( - !is_ec_id_format(&extra_segment), - "should reject extra segments" - ); - } - - #[test] - fn test_get_ec_id_with_header() { - let settings = create_test_settings(); - let req = create_test_request(vec![(HEADER_X_TS_EC, "existing_ec_id")]); - - let ec_id = get_ec_id(&req).expect("should get EC ID"); - assert_eq!(ec_id, Some("existing_ec_id".to_string())); - - let ec_id = get_or_generate_ec_id(&settings, &noop_services(), &req) - .expect("should reuse header EC ID"); - assert_eq!(ec_id, "existing_ec_id"); - } - - #[test] - fn test_get_ec_id_with_cookie() { - let settings = create_test_settings(); - let req = create_test_request(vec![( - fastly::http::header::COOKIE, - &format!("{}=existing_cookie_id", COOKIE_TS_EC), - )]); - - let ec_id = get_ec_id(&req).expect("should get EC ID"); - assert_eq!(ec_id, Some("existing_cookie_id".to_string())); - - let ec_id = get_or_generate_ec_id(&settings, &noop_services(), &req) - .expect("should reuse cookie EC ID"); - assert_eq!(ec_id, "existing_cookie_id"); - } - - #[test] - fn test_get_ec_id_from_http_request_with_header() { - let req = http::Request::builder() - .method("GET") - .uri("http://example.com") - .header(HEADER_X_TS_EC, "existing_http_ec_id") - .body(edgezero_core::body::Body::empty()) - .expect("should build test request"); - - let ec_id = get_ec_id_from_http_request(&req).expect("should get EC ID from http request"); - - assert_eq!(ec_id, Some("existing_http_ec_id".to_string())); - } - - #[test] - fn test_get_or_generate_ec_id_from_http_request_reuses_cookie() { - let settings = create_test_settings(); - let req = http::Request::builder() - .method("GET") - .uri("http://example.com") - .header( - fastly::http::header::COOKIE, - format!("{}=existing_http_cookie_id", COOKIE_TS_EC), - ) - .body(edgezero_core::body::Body::empty()) - .expect("should build test request"); - - let ec_id = get_or_generate_ec_id_from_http_request(&settings, &noop_services(), &req) - .expect("should reuse cookie EC ID from http request"); - - assert_eq!(ec_id, "existing_http_cookie_id"); - } - - #[test] - fn test_get_ec_id_none() { - let req = create_test_request(vec![]); - let ec_id = get_ec_id(&req).expect("should handle missing ID"); - assert!(ec_id.is_none()); - } - - #[test] - fn test_get_or_generate_ec_id_generate_new() { - let settings = create_test_settings(); - let req = create_test_request(vec![]); - - let ec_id = get_or_generate_ec_id(&settings, &noop_services(), &req) - .expect("should get or generate EC ID"); - assert!(!ec_id.is_empty()); - } - - #[test] - fn test_get_ec_id_rejects_invalid_header_and_falls_back_to_cookie() { - let req = create_test_request(vec![ - (HEADER_X_TS_EC, "evil;injected"), - ( - fastly::http::header::COOKIE, - &format!("{}=valid_cookie_id", COOKIE_TS_EC), - ), - ]); - - let ec_id = get_ec_id(&req).expect("should handle invalid header gracefully"); - assert_eq!( - ec_id, - Some("valid_cookie_id".to_string()), - "should reject tampered header and fall back to valid cookie" - ); - } - - #[test] - fn test_get_or_generate_ec_id_replaces_invalid_header() { - let settings = create_test_settings(); - let req = create_test_request(vec![(HEADER_X_TS_EC, "evil;injected")]); - - let ec_id = get_or_generate_ec_id(&settings, &noop_services(), &req) - .expect("should generate fresh ID on invalid header"); - assert_ne!( - ec_id, "evil;injected", - "should not use tampered header value" - ); - assert!( - is_ec_id_format(&ec_id), - "should generate a valid EC ID format when header is rejected" - ); - } - - #[test] - fn test_get_ec_id_rejects_invalid_cookie() { - let req = create_test_request(vec![( - fastly::http::header::COOKIE, - &format!("{}=bad"#; - let params = OwnedProcessResponseParams { - content_encoding: String::new(), - origin_host: "origin.example.com".to_string(), - origin_url: "https://origin.example.com".to_string(), - request_host: "proxy.example.com".to_string(), - request_scheme: "https".to_string(), - content_type: "text/html".to_string(), - }; - - let mut output = Vec::new(); - stream_publisher_body( - Body::from(html.to_vec()), - &mut output, - ¶ms, - &settings, - ®istry, - ) - .expect("should process RSC push"); - - let processed = String::from_utf8(output).expect("valid UTF-8"); - assert!( - !processed.contains("__ts_rsc_payload_"), - "placeholder must be substituted before reaching output. Got: {processed}" - ); - assert!( - processed.contains("proxy.example.com/page"), - "origin URL must be rewritten in the substituted payload. Got: {processed}" - ); - assert!( - !processed.contains("origin.example.com"), - "origin host must not leak. Got: {processed}" - ); - } } diff --git a/crates/trusted-server-core/src/request_signing/endpoints.rs b/crates/trusted-server-core/src/request_signing/endpoints.rs index 988d747f..5afb175e 100644 --- a/crates/trusted-server-core/src/request_signing/endpoints.rs +++ b/crates/trusted-server-core/src/request_signing/endpoints.rs @@ -601,7 +601,7 @@ mod tests { #[test] fn test_handle_rotate_key_with_empty_body() { let settings = crate::test_support::tests::create_test_settings(); - let req = Request::new(Method::POST, "https://test.com/admin/keys/rotate"); + let req = Request::new(Method::POST, "https://test.com/_ts/admin/keys/rotate"); let mut resp = handle_rotate_key(&settings, &noop_services(), req) .expect("should return a response even when stores are unavailable"); @@ -635,7 +635,7 @@ mod tests { }; let body_json = serde_json::to_string(&req_body).expect("should serialize rotate request"); - let mut req = Request::new(Method::POST, "https://test.com/admin/keys/rotate"); + let mut req = Request::new(Method::POST, "https://test.com/_ts/admin/keys/rotate"); req.set_body(body_json); let mut resp = handle_rotate_key(&settings, &noop_services(), req) @@ -664,7 +664,7 @@ mod tests { #[test] fn test_handle_rotate_key_invalid_json() { let settings = crate::test_support::tests::create_test_settings(); - let mut req = Request::new(Method::POST, "https://test.com/admin/keys/rotate"); + let mut req = Request::new(Method::POST, "https://test.com/_ts/admin/keys/rotate"); req.set_body("invalid json"); let result = handle_rotate_key(&settings, &noop_services(), req); @@ -720,7 +720,7 @@ mod tests { let body_json = serde_json::to_string(&req_body).expect("should serialize deactivate request"); - let mut req = Request::new(Method::POST, "https://test.com/admin/keys/deactivate"); + let mut req = Request::new(Method::POST, "https://test.com/_ts/admin/keys/deactivate"); req.set_body(body_json); let mut resp = handle_deactivate_key(&settings, &noop_services(), req) @@ -757,7 +757,7 @@ mod tests { let body_json = serde_json::to_string(&req_body).expect("should serialize deactivate request"); - let mut req = Request::new(Method::POST, "https://test.com/admin/keys/deactivate"); + let mut req = Request::new(Method::POST, "https://test.com/_ts/admin/keys/deactivate"); req.set_body(body_json); let mut resp = handle_deactivate_key(&settings, &noop_services(), req) @@ -790,7 +790,7 @@ mod tests { #[test] fn test_handle_deactivate_key_invalid_json() { let settings = crate::test_support::tests::create_test_settings(); - let mut req = Request::new(Method::POST, "https://test.com/admin/keys/deactivate"); + let mut req = Request::new(Method::POST, "https://test.com/_ts/admin/keys/deactivate"); req.set_body("invalid json"); let result = handle_deactivate_key(&settings, &noop_services(), req); diff --git a/crates/trusted-server-core/src/rsc_flight.rs b/crates/trusted-server-core/src/rsc_flight.rs index 6bd17366..309e9505 100644 --- a/crates/trusted-server-core/src/rsc_flight.rs +++ b/crates/trusted-server-core/src/rsc_flight.rs @@ -1,7 +1,3 @@ -//! RSC flight data processor. -//! -//! See [`crate::platform`] module doc for platform notes. - use std::io; use crate::host_rewrite::rewrite_bare_host_at_boundaries; diff --git a/crates/trusted-server-core/src/settings.rs b/crates/trusted-server-core/src/settings.rs index d851aff7..bd960a34 100644 --- a/crates/trusted-server-core/src/settings.rs +++ b/crates/trusted-server-core/src/settings.rs @@ -5,6 +5,7 @@ use serde::{de::DeserializeOwned, Deserialize, Deserializer, Serialize}; use serde_json::Value as JsonValue; use std::collections::HashMap; use std::ops::{Deref, DerefMut}; +use std::str::FromStr; use std::sync::OnceLock; use url::Url; use validator::{Validate, ValidationError}; @@ -19,7 +20,10 @@ pub const ENVIRONMENT_VARIABLE_SEPARATOR: &str = "__"; #[derive(Debug, Default, Clone, Deserialize, Serialize, Validate)] pub struct Publisher { + #[validate(custom(function = validate_publisher_domain))] pub domain: String, + /// Domain for non-EC cookies. EC cookies use a separate computed domain + /// (see [`ec_cookie_domain`](Self::ec_cookie_domain)). #[validate(custom(function = validate_cookie_domain))] pub cookie_domain: String, #[validate(custom(function = validate_no_trailing_slash))] @@ -34,6 +38,17 @@ impl Publisher { /// Known placeholder values that must not be used in production. pub const PROXY_SECRET_PLACEHOLDERS: &[&str] = &["change-me-proxy-secret", "proxy-secret"]; + /// Returns the EC cookie domain, computed as `.{domain}`. + /// + /// Per spec §5.2, EC cookies derive their domain from + /// `publisher.domain` — **not** from `publisher.cookie_domain`. + /// This ensures the EC cookie is always scoped to the publisher's + /// apex domain regardless of how `cookie_domain` is configured. + #[must_use] + pub fn ec_cookie_domain(&self) -> String { + format!(".{}", self.domain) + } + /// Returns `true` if `proxy_secret` matches a known placeholder value /// (case-insensitive). #[must_use] @@ -203,35 +218,216 @@ impl DerefMut for IntegrationSettings { } } -/// Edge Cookie configuration. -#[allow(unused)] +/// A partner (SSP, DSP, identity vendor) configured in `[[ec.partners]]`. +/// +/// Partners are defined statically in `trusted-server.toml` rather than +/// registered via API. At startup, each partner's `api_token` is hashed +/// (SHA-256) for O(1) auth lookups; the plaintext is never stored at runtime. +#[derive(Debug, Clone, Deserialize, Serialize, Validate)] +pub struct EcPartner { + /// Unique partner identifier. Must match `^[a-z0-9_-]{1,32}$` and + /// not collide with reserved IDs (`ec`, `ts`, `eids`, etc.). + #[validate(custom(function = EcPartner::validate_id))] + pub id: String, + /// Human-readable partner name. + pub name: String, + /// `OpenRTB` `source.domain` for EID entries (e.g. `"liveramp.com"`). + pub source_domain: String, + /// `OpenRTB` `atype` value (typically 3). + #[serde( + default = "EcPartner::default_openrtb_atype", + deserialize_with = "from_value_or_str" + )] + pub openrtb_atype: u8, + /// Whether this partner's UIDs appear in auction `user.eids`. + #[serde(default, deserialize_with = "from_value_or_str")] + pub bidstream_enabled: bool, + /// Plaintext API token. Hashed at startup for auth lookups. + /// Used by batch sync (inbound) and identify (inbound). + pub api_token: Redacted, + /// Max batch sync API requests per partner per minute. + #[serde( + default = "EcPartner::default_batch_rate_limit", + deserialize_with = "from_value_or_str" + )] + pub batch_rate_limit: u32, + /// Whether server-to-server pull sync is enabled for this partner. + #[serde(default, deserialize_with = "from_value_or_str")] + pub pull_sync_enabled: bool, + /// URL to call for pull sync. Required when `pull_sync_enabled`. + #[serde(default)] + pub pull_sync_url: Option, + /// Allowlist of domains TS may call for this partner's pull sync. + #[serde(default, deserialize_with = "vec_from_seq_or_map")] + pub pull_sync_allowed_domains: Vec, + /// Legacy pull-sync refresh interval retained for config compatibility. + /// + /// EC identity entries no longer store per-partner sync timestamps, so + /// this value is not used by the current fill-missing-only pull sync + /// behavior. + #[serde( + default = "EcPartner::default_pull_sync_ttl_sec", + deserialize_with = "from_value_or_str" + )] + pub pull_sync_ttl_sec: u64, + /// Max pull sync calls per EC hash per partner per hour. + #[serde( + default = "EcPartner::default_pull_sync_rate_limit", + deserialize_with = "from_value_or_str" + )] + pub pull_sync_rate_limit: u32, + /// Outbound bearer token for pull sync requests. + #[serde(default)] + pub ts_pull_token: Option>, +} + +impl EcPartner { + const RESERVED_IDS: &[&str] = &[ + "ec", + "eids", + "ec-consent", + "eids-truncated", + "synthetic", + "ts", + "version", + "env", + ]; + + /// Validates a partner ID for safe use in dynamic headers and cookies. + /// + /// # Errors + /// + /// Returns a validation error when `id` does not match the configured + /// lowercase identifier policy or collides with a reserved name. + pub fn validate_id(id: &str) -> Result<(), ValidationError> { + if id.is_empty() || id.len() > 32 { + return Err(ValidationError::new("invalid_partner_id_length")); + } + if Self::RESERVED_IDS.contains(&id) { + return Err(ValidationError::new("reserved_partner_id")); + } + if !id.bytes().all(|byte| { + byte.is_ascii_lowercase() || byte.is_ascii_digit() || byte == b'_' || byte == b'-' + }) { + return Err(ValidationError::new("invalid_partner_id")); + } + Ok(()) + } + + #[must_use] + pub const fn default_openrtb_atype() -> u8 { + 3 + } + + #[must_use] + pub const fn default_batch_rate_limit() -> u32 { + 60 + } + + #[must_use] + pub const fn default_pull_sync_ttl_sec() -> u64 { + 86400 + } + + #[must_use] + pub const fn default_pull_sync_rate_limit() -> u32 { + 10 + } +} + +/// Edge Cookie (EC) configuration. +/// +/// Mapped from the `[ec]` TOML section. Controls EC identity generation, +/// KV store names, and partner registry. #[derive(Debug, Default, Clone, Deserialize, Serialize, Validate)] -pub struct EdgeCookie { - #[validate(custom(function = EdgeCookie::validate_secret_key))] - pub secret_key: Redacted, +pub struct Ec { + /// Publisher passphrase used as HMAC key for EC generation. + #[validate(custom(function = Ec::validate_passphrase))] + pub passphrase: Redacted, + + /// Fastly KV store name for the EC identity graph. + #[serde(default)] + pub ec_store: Option, + + /// Maximum number of concurrent pull-sync requests. + #[serde(default = "Ec::default_pull_sync_concurrency")] + pub pull_sync_concurrency: usize, + + /// Entries with `cluster_size` at or below this value are treated as + /// individual users for identity resolution. B2B publishers should + /// raise this to 50+ since readers are frequently on office networks. + #[serde(default = "Ec::default_cluster_trust_threshold")] + pub cluster_trust_threshold: u32, + + /// Legacy cluster re-check interval retained for config compatibility. + /// + /// EC identity entries no longer store cluster-check timestamps, so this + /// value is not used. `/_ts/api/v1/identify` computes cluster size only + /// when an entry does not already have a stored `cluster_size`. + #[serde(default = "Ec::default_cluster_recheck_secs")] + pub cluster_recheck_secs: u64, + + /// Partners (SSPs, DSPs, identity vendors) for EC identity sync. + #[serde(default, deserialize_with = "vec_from_seq_or_map")] + #[validate(nested)] + pub partners: Vec, } -impl EdgeCookie { +impl Ec { /// Known placeholder values that must not be used in production. - pub const SECRET_KEY_PLACEHOLDERS: &[&str] = &["secret-key", "secret_key", "trusted-server"]; + pub const PASSPHRASE_PLACEHOLDERS: &[&str] = &[ + "secret-key", + "secret_key", + "trusted-server", + "trusted-server-placeholder-secret", + ]; + + /// Default maximum concurrent pull-sync requests. + #[must_use] + pub const fn default_pull_sync_concurrency() -> usize { + 3 + } + + /// Default cluster trust threshold. + #[must_use] + pub const fn default_cluster_trust_threshold() -> u32 { + 10 + } - /// Returns `true` if `secret_key` matches a known placeholder value + /// Default cluster re-check interval (1 hour). + #[must_use] + pub const fn default_cluster_recheck_secs() -> u64 { + 3600 + } + + /// Returns `true` if `passphrase` matches a known placeholder value /// (case-insensitive). #[must_use] - pub fn is_placeholder_secret_key(secret_key: &str) -> bool { - Self::SECRET_KEY_PLACEHOLDERS + pub fn is_placeholder_passphrase(passphrase: &str) -> bool { + Self::PASSPHRASE_PLACEHOLDERS .iter() - .any(|p| p.eq_ignore_ascii_case(secret_key)) + .any(|p| p.eq_ignore_ascii_case(passphrase)) } - /// Validates that the secret key is not empty. + /// Minimum passphrase length for HMAC-SHA256 key strength. + /// + /// The EC passphrase is long-lived keying material for visitor ID + /// derivation. Operators should use a high-entropy random passphrase per + /// the EC setup and key-rotation documentation. + const MIN_PASSPHRASE_LENGTH: usize = 32; + + /// Validates that the passphrase is not empty and meets minimum length. /// /// # Errors /// - /// Returns a validation error if the secret key is empty. - pub fn validate_secret_key(secret_key: &Redacted) -> Result<(), ValidationError> { - if secret_key.expose().is_empty() { - return Err(ValidationError::new("empty_secret_key")); + /// Returns a validation error if the passphrase is empty or shorter + /// than [`Self::MIN_PASSPHRASE_LENGTH`] characters. + pub fn validate_passphrase(passphrase: &Redacted) -> Result<(), ValidationError> { + if passphrase.expose().is_empty() { + return Err(ValidationError::new("empty_passphrase")); + } + if passphrase.expose().len() < Self::MIN_PASSPHRASE_LENGTH { + return Err(ValidationError::new("short_passphrase")); } Ok(()) } @@ -357,6 +553,13 @@ fn default_certificate_check() -> bool { true } +fn is_admin_placeholder_password(password: &str) -> bool { + matches!( + password.trim().to_ascii_lowercase().as_str(), + "changeme" | "password" | "admin" + ) +} + impl Default for Proxy { fn default() -> Self { Self { @@ -417,7 +620,7 @@ pub struct Settings { pub publisher: Publisher, #[serde(default)] #[validate(nested)] - pub edge_cookie: EdgeCookie, + pub ec: Ec, #[serde(default)] pub integrations: IntegrationSettings, #[serde(default, deserialize_with = "vec_from_seq_or_map")] @@ -439,7 +642,6 @@ pub struct Settings { pub debug: DebugConfig, } -#[allow(unused)] impl Settings { /// Creates a new [`Settings`] instance from a pre-built TOML string. /// @@ -458,7 +660,15 @@ impl Settings { settings.proxy.normalize(); settings.consent.validate(); settings.prepare_runtime()?; + + settings.validate().map_err(|err| { + Report::new(TrustedServerError::Configuration { + message: format!("Configuration validation failed: {err}"), + }) + })?; + settings.validate_admin_coverage()?; + settings.validate_admin_handler_passwords()?; Ok(settings) } @@ -504,6 +714,7 @@ impl Settings { settings.prepare_runtime()?; settings.validate_admin_coverage()?; + settings.validate_admin_handler_passwords()?; Ok(settings) } @@ -512,7 +723,7 @@ impl Settings { /// /// # Errors /// - /// Returns a configuration error if any cached runtime artifact cannot be prepared. + /// Returns a configuration error if any handler path regex does not compile. pub fn prepare_runtime(&self) -> Result<(), Report> { for handler in &self.handlers { handler.prepare_runtime()?; @@ -521,6 +732,31 @@ impl Settings { Ok(()) } + /// Rejects known placeholder secret values. + /// + /// # Errors + /// + /// Returns [`TrustedServerError::InsecureDefault`] when one or more secret + /// fields still contain a placeholder value. + pub fn reject_placeholder_secrets(&self) -> Result<(), Report> { + let mut insecure_fields: Vec<&str> = Vec::new(); + + if Ec::is_placeholder_passphrase(self.ec.passphrase.expose()) { + insecure_fields.push("ec.passphrase"); + } + if Publisher::is_placeholder_proxy_secret(self.publisher.proxy_secret.expose()) { + insecure_fields.push("publisher.proxy_secret"); + } + + if insecure_fields.is_empty() { + return Ok(()); + } + + Err(Report::new(TrustedServerError::InsecureDefault { + field: insecure_fields.join(", "), + })) + } + /// Resolve the first handler whose regex matches the request path. /// /// # Errors @@ -546,7 +782,8 @@ impl Settings { /// endpoints are always protected by authentication. /// Update [`ADMIN_ENDPOINTS`](Self::ADMIN_ENDPOINTS) when adding new /// admin routes to `crates/trusted-server-adapter-fastly/src/main.rs`. - pub(crate) const ADMIN_ENDPOINTS: &[&str] = &["/admin/keys/rotate", "/admin/keys/deactivate"]; + pub(crate) const ADMIN_ENDPOINTS: &[&str] = + &["/_ts/admin/keys/rotate", "/_ts/admin/keys/deactivate"]; /// Returns admin endpoint paths that no configured handler covers. /// @@ -590,13 +827,34 @@ impl Settings { Err(Report::new(TrustedServerError::Configuration { message: format!( "No handler covers admin endpoint(s): {}. \ - Add a [[handlers]] entry with a path regex matching /admin/ \ + Add a [[handlers]] entry with a path regex matching /_ts/admin/ \ to protect admin access.", uncovered.join(", ") ), })) } + fn validate_admin_handler_passwords(&self) -> Result<(), Report> { + for handler in &self.handlers { + let covers_admin = Self::ADMIN_ENDPOINTS + .iter() + .try_fold(false, |covered, path| { + handler.matches_path(path).map(|matches| covered || matches) + })?; + + if covers_admin && is_admin_placeholder_password(handler.password.expose()) { + return Err(Report::new(TrustedServerError::Configuration { + message: format!( + "Admin handler `{}` uses a placeholder password; configure a strong secret", + handler.path + ), + })); + } + } + + Ok(()) + } + /// Retrieves the integration configuration of a specific type. /// /// # Errors @@ -613,6 +871,33 @@ impl Settings { } } +fn validate_publisher_domain(value: &str) -> Result<(), ValidationError> { + if value.trim() != value || value.is_empty() || value.len() > 253 { + return Err(ValidationError::new("invalid_publisher_domain")); + } + if value.starts_with('.') || value.ends_with('.') || value.contains(['/', ':']) { + return Err(ValidationError::new("invalid_publisher_domain")); + } + + for label in value.split('.') { + if label.is_empty() || label.len() > 63 { + return Err(ValidationError::new("invalid_publisher_domain")); + } + let bytes = label.as_bytes(); + if bytes.first() == Some(&b'-') || bytes.last() == Some(&b'-') { + return Err(ValidationError::new("invalid_publisher_domain")); + } + if !bytes + .iter() + .all(|byte| byte.is_ascii_alphanumeric() || *byte == b'-') + { + return Err(ValidationError::new("invalid_publisher_domain")); + } + } + + Ok(()) +} + fn validate_cookie_domain(value: &str) -> Result<(), ValidationError> { // `=` is excluded: it only has special meaning in the name=value pair, // not within the Domain attribute value. @@ -650,6 +935,19 @@ fn validate_path(value: &str) -> Result<(), ValidationError> { validation_error }) } +fn from_value_or_str<'de, D, T>(deserializer: D) -> Result +where + D: Deserializer<'de>, + T: DeserializeOwned + FromStr, + T::Err: std::fmt::Display, +{ + let value = JsonValue::deserialize(deserializer)?; + match value { + JsonValue::String(value) => T::from_str(&value).map_err(serde::de::Error::custom), + other => serde_json::from_value(other).map_err(serde::de::Error::custom), + } +} + // Helper: allow Vec fields to deserialize from either a JSON array or a map of numeric indices. // This lets env vars like TRUSTED_SERVER__INTEGRATIONS__PREBID__BIDDERS__0=smartadserver work, which the config env source // represents as an object {"0": "value"} rather than a sequence. Also supports string inputs that are @@ -816,11 +1114,19 @@ mod tests { ); assert_eq!(settings.publisher.domain, "test-publisher.com"); assert_eq!(settings.publisher.cookie_domain, ".test-publisher.com"); + assert_eq!( + settings.publisher.ec_cookie_domain(), + ".test-publisher.com", + "EC cookie domain should be computed as .{{domain}}" + ); assert_eq!( settings.publisher.origin_url, "https://origin.test-publisher.com" ); - assert_eq!(settings.edge_cookie.secret_key.expose(), "test-secret-key"); + assert_eq!( + settings.ec.passphrase.expose(), + "test-secret-key-32-bytes-minimum" + ); settings.validate().expect("Failed to validate settings"); } @@ -832,14 +1138,72 @@ mod tests { r#"origin_url = "https://origin.test-publisher.com/""#, ); - let settings = Settings::from_toml(&toml_str).expect("should parse TOML"); - let result = settings.validate(); + let result = Settings::from_toml(&toml_str); assert!( result.is_err(), "origin_url ending with '/' should fail validation" ); } + #[test] + fn validate_rejects_invalid_publisher_domains() { + for domain in [ + "", + ".example.com", + "example.com.", + "https://example.com", + "bad_domain.com", + ] { + let toml_str = crate_test_settings_str().replace( + r#"domain = "test-publisher.com""#, + &format!(r#"domain = "{domain}""#), + ); + + let result = Settings::from_toml(&toml_str); + assert!(result.is_err(), "should reject invalid domain {domain:?}"); + } + } + + #[test] + fn validate_accepts_localhost_publisher_domain() { + let toml_str = crate_test_settings_str().replace( + r#"domain = "test-publisher.com""#, + r#"domain = "localhost""#, + ); + + let settings = Settings::from_toml(&toml_str).expect("should accept localhost domain"); + assert_eq!(settings.publisher.ec_cookie_domain(), ".localhost"); + } + + #[test] + fn validate_rejects_invalid_ec_partner_ids() { + for partner_id in [ + "Upper", + "bad id", + "ec", + "", + "abcdefghijklmnopqrstuvwxyzabcdefg", + ] { + let toml_str = format!( + r#"{} + [[ec.partners]] + id = "{}" + name = "Invalid Partner" + source_domain = "invalid.example.com" + api_token = "invalid-token" + "#, + crate_test_settings_str(), + partner_id, + ); + + let result = Settings::from_toml(&toml_str); + assert!( + result.is_err(), + "should reject invalid partner ID {partner_id:?}" + ); + } + } + #[test] fn prepare_runtime_rejects_invalid_handler_regex() { let toml_str = crate_test_settings_str().replace(r#"path = "^/secure""#, r#"path = "(""#); @@ -867,35 +1231,55 @@ mod tests { } #[test] - fn is_placeholder_secret_key_rejects_all_known_placeholders() { - for placeholder in EdgeCookie::SECRET_KEY_PLACEHOLDERS { + fn is_placeholder_passphrase_rejects_all_known_placeholders() { + for placeholder in Ec::PASSPHRASE_PLACEHOLDERS { assert!( - EdgeCookie::is_placeholder_secret_key(placeholder), - "should detect placeholder secret_key '{placeholder}'" + Ec::is_placeholder_passphrase(placeholder), + "should detect placeholder passphrase '{placeholder}'" ); } } #[test] - fn is_placeholder_secret_key_is_case_insensitive() { + fn is_placeholder_passphrase_is_case_insensitive() { assert!( - EdgeCookie::is_placeholder_secret_key("SECRET-KEY"), - "should detect case-insensitive placeholder secret_key" + Ec::is_placeholder_passphrase("SECRET-KEY"), + "should detect case-insensitive placeholder passphrase" ); assert!( - EdgeCookie::is_placeholder_secret_key("Trusted-Server"), - "should detect mixed-case placeholder secret_key" + Ec::is_placeholder_passphrase("Trusted-Server"), + "should detect mixed-case placeholder passphrase" ); } #[test] - fn is_placeholder_secret_key_accepts_non_placeholder() { + fn is_placeholder_passphrase_accepts_non_placeholder() { assert!( - !EdgeCookie::is_placeholder_secret_key("test-secret-key"), - "should accept non-placeholder secret_key" + !Ec::is_placeholder_passphrase("test-secret-key-32-bytes-minimum"), + "should accept non-placeholder passphrase" + ); + } + + #[test] + fn validate_passphrase_rejects_under_32_characters() { + let passphrase = Redacted::new("a".repeat(31)); + + let err = Ec::validate_passphrase(&passphrase).expect_err("should reject short passphrase"); + + assert_eq!( + err.code.as_ref(), + "short_passphrase", + "should report short passphrase validation error" ); } + #[test] + fn validate_passphrase_accepts_32_characters() { + let passphrase = Redacted::new("a".repeat(32)); + + Ec::validate_passphrase(&passphrase).expect("should accept 32-character passphrase"); + } + #[test] fn is_placeholder_proxy_secret_rejects_all_known_placeholders() { for placeholder in Publisher::PROXY_SECRET_PLACEHOLDERS { @@ -1213,7 +1597,7 @@ mod tests { (path_key_0, Some("^/env-handler")), (username_key_0, Some("env-user")), (password_key_0, Some("env-pass")), - (path_key_1, Some("^/admin")), + (path_key_1, Some("^/_ts/admin")), (username_key_1, Some("admin")), (password_key_1, Some("admin-pass")), ], @@ -1229,6 +1613,156 @@ mod tests { ); } + #[test] + fn test_ec_partners_override_with_indexed_env() { + let toml_str = crate_test_settings_str(); + + let origin_key = format!( + "{}{}PUBLISHER{}ORIGIN_URL", + ENVIRONMENT_VARIABLE_PREFIX, + ENVIRONMENT_VARIABLE_SEPARATOR, + ENVIRONMENT_VARIABLE_SEPARATOR + ); + let partner_0_id_key = format!( + "{}{}EC{}PARTNERS{}0{}ID", + ENVIRONMENT_VARIABLE_PREFIX, + ENVIRONMENT_VARIABLE_SEPARATOR, + ENVIRONMENT_VARIABLE_SEPARATOR, + ENVIRONMENT_VARIABLE_SEPARATOR, + ENVIRONMENT_VARIABLE_SEPARATOR + ); + let partner_0_name_key = format!( + "{}{}EC{}PARTNERS{}0{}NAME", + ENVIRONMENT_VARIABLE_PREFIX, + ENVIRONMENT_VARIABLE_SEPARATOR, + ENVIRONMENT_VARIABLE_SEPARATOR, + ENVIRONMENT_VARIABLE_SEPARATOR, + ENVIRONMENT_VARIABLE_SEPARATOR + ); + let partner_0_source_domain_key = format!( + "{}{}EC{}PARTNERS{}0{}SOURCE_DOMAIN", + ENVIRONMENT_VARIABLE_PREFIX, + ENVIRONMENT_VARIABLE_SEPARATOR, + ENVIRONMENT_VARIABLE_SEPARATOR, + ENVIRONMENT_VARIABLE_SEPARATOR, + ENVIRONMENT_VARIABLE_SEPARATOR + ); + let partner_0_openrtb_atype_key = format!( + "{}{}EC{}PARTNERS{}0{}OPENRTB_ATYPE", + ENVIRONMENT_VARIABLE_PREFIX, + ENVIRONMENT_VARIABLE_SEPARATOR, + ENVIRONMENT_VARIABLE_SEPARATOR, + ENVIRONMENT_VARIABLE_SEPARATOR, + ENVIRONMENT_VARIABLE_SEPARATOR + ); + let partner_0_bidstream_enabled_key = format!( + "{}{}EC{}PARTNERS{}0{}BIDSTREAM_ENABLED", + ENVIRONMENT_VARIABLE_PREFIX, + ENVIRONMENT_VARIABLE_SEPARATOR, + ENVIRONMENT_VARIABLE_SEPARATOR, + ENVIRONMENT_VARIABLE_SEPARATOR, + ENVIRONMENT_VARIABLE_SEPARATOR + ); + let partner_0_api_token_key = format!( + "{}{}EC{}PARTNERS{}0{}API_TOKEN", + ENVIRONMENT_VARIABLE_PREFIX, + ENVIRONMENT_VARIABLE_SEPARATOR, + ENVIRONMENT_VARIABLE_SEPARATOR, + ENVIRONMENT_VARIABLE_SEPARATOR, + ENVIRONMENT_VARIABLE_SEPARATOR + ); + let partner_1_id_key = format!( + "{}{}EC{}PARTNERS{}1{}ID", + ENVIRONMENT_VARIABLE_PREFIX, + ENVIRONMENT_VARIABLE_SEPARATOR, + ENVIRONMENT_VARIABLE_SEPARATOR, + ENVIRONMENT_VARIABLE_SEPARATOR, + ENVIRONMENT_VARIABLE_SEPARATOR + ); + let partner_1_name_key = format!( + "{}{}EC{}PARTNERS{}1{}NAME", + ENVIRONMENT_VARIABLE_PREFIX, + ENVIRONMENT_VARIABLE_SEPARATOR, + ENVIRONMENT_VARIABLE_SEPARATOR, + ENVIRONMENT_VARIABLE_SEPARATOR, + ENVIRONMENT_VARIABLE_SEPARATOR + ); + let partner_1_source_domain_key = format!( + "{}{}EC{}PARTNERS{}1{}SOURCE_DOMAIN", + ENVIRONMENT_VARIABLE_PREFIX, + ENVIRONMENT_VARIABLE_SEPARATOR, + ENVIRONMENT_VARIABLE_SEPARATOR, + ENVIRONMENT_VARIABLE_SEPARATOR, + ENVIRONMENT_VARIABLE_SEPARATOR + ); + let partner_1_openrtb_atype_key = format!( + "{}{}EC{}PARTNERS{}1{}OPENRTB_ATYPE", + ENVIRONMENT_VARIABLE_PREFIX, + ENVIRONMENT_VARIABLE_SEPARATOR, + ENVIRONMENT_VARIABLE_SEPARATOR, + ENVIRONMENT_VARIABLE_SEPARATOR, + ENVIRONMENT_VARIABLE_SEPARATOR + ); + let partner_1_bidstream_enabled_key = format!( + "{}{}EC{}PARTNERS{}1{}BIDSTREAM_ENABLED", + ENVIRONMENT_VARIABLE_PREFIX, + ENVIRONMENT_VARIABLE_SEPARATOR, + ENVIRONMENT_VARIABLE_SEPARATOR, + ENVIRONMENT_VARIABLE_SEPARATOR, + ENVIRONMENT_VARIABLE_SEPARATOR + ); + let partner_1_api_token_key = format!( + "{}{}EC{}PARTNERS{}1{}API_TOKEN", + ENVIRONMENT_VARIABLE_PREFIX, + ENVIRONMENT_VARIABLE_SEPARATOR, + ENVIRONMENT_VARIABLE_SEPARATOR, + ENVIRONMENT_VARIABLE_SEPARATOR, + ENVIRONMENT_VARIABLE_SEPARATOR + ); + + temp_env::with_vars( + [ + (origin_key, Some("https://origin.test-publisher.com")), + (partner_0_id_key, Some("envpartner0")), + (partner_0_name_key, Some("Env Partner 0")), + (partner_0_source_domain_key, Some("envpartner0.example.com")), + (partner_0_openrtb_atype_key, Some("1")), + (partner_0_bidstream_enabled_key, Some("true")), + (partner_0_api_token_key, Some("env-token-0")), + (partner_1_id_key, Some("envpartner1")), + (partner_1_name_key, Some("Env Partner 1")), + (partner_1_source_domain_key, Some("envpartner1.example.com")), + (partner_1_openrtb_atype_key, Some("3")), + (partner_1_bidstream_enabled_key, Some("false")), + (partner_1_api_token_key, Some("env-token-1")), + ], + || { + let settings = Settings::from_toml_and_env(&toml_str) + .expect("Settings should load indexed EC partners from env"); + + assert_eq!(settings.ec.partners.len(), 2); + assert_eq!(settings.ec.partners[0].id, "envpartner0"); + assert_eq!(settings.ec.partners[0].name, "Env Partner 0"); + assert_eq!( + settings.ec.partners[0].source_domain, + "envpartner0.example.com" + ); + assert_eq!(settings.ec.partners[0].openrtb_atype, 1); + assert!(settings.ec.partners[0].bidstream_enabled); + assert_eq!(settings.ec.partners[0].api_token.expose(), "env-token-0"); + assert_eq!(settings.ec.partners[1].id, "envpartner1"); + assert_eq!(settings.ec.partners[1].name, "Env Partner 1"); + assert_eq!( + settings.ec.partners[1].source_domain, + "envpartner1.example.com" + ); + assert_eq!(settings.ec.partners[1].openrtb_atype, 3); + assert!(!settings.ec.partners[1].bidstream_enabled); + assert_eq!(settings.ec.partners[1].api_token.expose(), "env-token-1"); + }, + ); + } + #[test] fn test_invalid_handler_override_fails_during_runtime_preparation() { let toml_str = crate_test_settings_str(); @@ -1936,8 +2470,8 @@ mod tests { origin_url = "https://origin.test-publisher.com" proxy_secret = "unit-test-proxy-secret" - [edge_cookie] - secret_key = "test-secret-key" + [ec] + passphrase = "test-secret-key-32-bytes-minimum" [request_signing] config_store_id = "test-config-store-id" @@ -1957,8 +2491,8 @@ mod tests { .expect("should check admin coverage"); assert_eq!( uncovered, - vec!["/admin/keys/rotate", "/admin/keys/deactivate"], - "should report both admin endpoints as uncovered" + vec!["/_ts/admin/keys/rotate", "/_ts/admin/keys/deactivate",], + "should report all admin endpoints as uncovered" ); } @@ -1970,7 +2504,7 @@ mod tests { .expect("should check admin coverage"); assert!( uncovered.is_empty(), - "should report no uncovered admin endpoints when handler covers /admin" + "should report no uncovered admin endpoints when handler covers /_ts/admin" ); } @@ -1979,7 +2513,7 @@ mod tests { let toml_str = settings_str_without_admin_handler() + r#" [[handlers]] - path = "^/admin/keys/rotate$" + path = "^/_ts/admin/keys/rotate$" username = "admin" password = "secret" "#; @@ -1991,8 +2525,8 @@ mod tests { .expect("should check admin coverage"); assert_eq!( uncovered, - vec!["/admin/keys/deactivate"], - "should detect that only deactivate is uncovered" + vec!["/_ts/admin/keys/deactivate"], + "should detect endpoints not covered by the rotate-only handler" ); } @@ -2022,6 +2556,30 @@ mod tests { ); } + #[test] + fn from_toml_rejects_admin_handler_placeholder_password() { + let toml_str = crate_test_settings_str() + .replace(r#"password = "admin-pass""#, r#"password = "changeme""#); + + let result = Settings::from_toml(&toml_str); + assert!( + result.is_err(), + "should reject placeholder password on admin handler" + ); + let err = format!("{:?}", result.expect_err("should reject placeholder")); + assert!( + err.contains("placeholder password"), + "error should mention placeholder admin password, got: {err}" + ); + } + + #[test] + fn from_toml_accepts_non_placeholder_admin_password() { + let settings = Settings::from_toml(&crate_test_settings_str()) + .expect("should accept non-placeholder admin password"); + assert_eq!(settings.handlers.len(), 2, "should parse handlers"); + } + #[test] fn from_toml_rejects_config_without_admin_handler() { let result = Settings::from_toml(&settings_str_without_admin_handler()); @@ -2062,9 +2620,9 @@ mod tests { .lines() .filter_map(|line| { let trimmed = line.trim(); - // Match arms look like: (Method::POST, "/admin/...") => ... - if trimmed.starts_with('(') && trimmed.contains("\"/admin/") { - let start = trimmed.find("\"/admin/")?; + // Match arms look like: (Method::POST, "/_ts/admin/...") => ... + if trimmed.starts_with('(') && trimmed.contains("\"/_ts/admin/") { + let start = trimmed.find("\"/_ts/admin/")?; let rest = &trimmed[start + 1..]; let end = rest.find('"')?; Some(&rest[..end]) diff --git a/crates/trusted-server-core/src/settings_data.rs b/crates/trusted-server-core/src/settings_data.rs index f69fc7ba..2f0a29fd 100644 --- a/crates/trusted-server-core/src/settings_data.rs +++ b/crates/trusted-server-core/src/settings_data.rs @@ -3,7 +3,7 @@ use error_stack::{Report, ResultExt}; use validator::Validate; use crate::error::TrustedServerError; -use crate::settings::{EdgeCookie, Publisher, Settings}; +use crate::settings::Settings; pub use crate::auction_config_types::AuctionConfig; @@ -40,37 +40,126 @@ pub fn get_settings() -> Result> { ); } - if EdgeCookie::is_placeholder_secret_key(settings.edge_cookie.secret_key.expose()) { - log::warn!( - "INSECURE: edge_cookie.secret_key is set to a default placeholder — \ - HMAC-SHA256 signatures can be forged. \ - Override via TRUSTED_SERVER__EDGE_COOKIE__SECRET_KEY at build time" + settings.reject_placeholder_secrets()?; + + Ok(settings) +} + +#[cfg(test)] +mod tests { + use crate::error::TrustedServerError; + use crate::settings::Settings; + use crate::test_support::tests::crate_test_settings_str; + + /// Builds a TOML string with the given secret values swapped in. + /// + /// # Panics + /// + /// Panics if the replacement patterns no longer match the test TOML, + /// which would cause the substitution to silently no-op. + fn toml_with_secrets(passphrase: &str, proxy_secret: &str) -> String { + let original = crate_test_settings_str(); + let after_passphrase = original.replace( + r#"passphrase = "test-secret-key-32-bytes-minimum""#, + &format!(r#"passphrase = "{passphrase}""#), ); + assert_ne!( + after_passphrase, original, + "should have replaced passphrase value" + ); + let result = after_passphrase.replace( + r#"proxy_secret = "unit-test-proxy-secret""#, + &format!(r#"proxy_secret = "{proxy_secret}""#), + ); + assert_ne!( + result, after_passphrase, + "should have replaced proxy_secret value" + ); + result } - if Publisher::is_placeholder_proxy_secret(settings.publisher.proxy_secret.expose()) { - log::warn!( - "INSECURE: publisher.proxy_secret is set to a default placeholder — \ - XChaCha20-Poly1305 encrypted URLs can be decrypted by anyone. \ - Override via TRUSTED_SERVER__PUBLISHER__PROXY_SECRET at build time" + #[test] + fn rejects_placeholder_passphrase() { + let toml = toml_with_secrets("trusted-server-placeholder-secret", "real-proxy-secret"); + let settings = Settings::from_toml(&toml).expect("should parse TOML"); + let err = settings + .reject_placeholder_secrets() + .expect_err("should reject placeholder secret_key"); + let root = err.current_context(); + assert!( + matches!(root, TrustedServerError::InsecureDefault { field } if field.contains("ec.passphrase")), + "error should mention ec.passphrase, got: {root}" ); } - Ok(settings) -} + #[test] + fn rejects_placeholder_proxy_secret() { + let toml = toml_with_secrets( + "production-secret-key-32-bytes-min", + "change-me-proxy-secret", + ); + let settings = Settings::from_toml(&toml).expect("should parse TOML"); + let err = settings + .reject_placeholder_secrets() + .expect_err("should reject placeholder proxy_secret"); + let root = err.current_context(); + assert!( + matches!(root, TrustedServerError::InsecureDefault { field } if field.contains("publisher.proxy_secret")), + "error should mention publisher.proxy_secret, got: {root}" + ); + } -#[cfg(test)] -mod tests { - use super::*; + #[test] + fn rejects_both_placeholders_in_single_error() { + let toml = toml_with_secrets( + "trusted-server-placeholder-secret", + "change-me-proxy-secret", + ); + let settings = Settings::from_toml(&toml).expect("should parse TOML"); + let err = settings + .reject_placeholder_secrets() + .expect_err("should reject both placeholder secrets"); + let root = err.current_context(); + match root { + TrustedServerError::InsecureDefault { field } => { + assert!( + field.contains("ec.passphrase"), + "error should mention ec.passphrase, got: {field}" + ); + assert!( + field.contains("publisher.proxy_secret"), + "error should mention publisher.proxy_secret, got: {field}" + ); + } + other => panic!("expected InsecureDefault, got: {other}"), + } + } #[test] - fn get_settings_loads_embedded_toml_successfully() { - // The embedded TOML contains placeholder secrets (e.g. "trusted-server", - // "change-me-proxy-secret"). This is expected — production builds override - // them via TRUSTED_SERVER__* env vars at build time. - let settings = get_settings().expect("should load settings from embedded TOML"); - assert!(!settings.publisher.domain.is_empty()); - assert!(!settings.publisher.cookie_domain.is_empty()); - assert!(!settings.publisher.origin_url.is_empty()); + fn accepts_non_placeholder_secrets() { + let toml = toml_with_secrets( + "production-secret-key-32-bytes-min", + "production-proxy-secret", + ); + let settings = Settings::from_toml(&toml).expect("should parse TOML"); + settings + .reject_placeholder_secrets() + .expect("non-placeholder secrets should pass validation"); + } + + /// Smoke-test the full `get_settings()` pipeline (embedded bytes → UTF-8 → + /// parse → validate → placeholder check). The build-time TOML ships with + /// placeholder secrets, so the expected outcome is an [`InsecureDefault`] + /// error — but reaching that error proves every earlier stage succeeded. + #[test] + fn get_settings_rejects_embedded_placeholder_secrets() { + let err = super::get_settings().expect_err("should reject embedded placeholder secrets"); + assert!( + matches!( + err.current_context(), + TrustedServerError::InsecureDefault { .. } + ), + "should fail with InsecureDefault, got: {err}" + ); } } diff --git a/crates/trusted-server-core/src/storage/kv_store.rs b/crates/trusted-server-core/src/storage/kv_store.rs deleted file mode 100644 index c118005a..00000000 --- a/crates/trusted-server-core/src/storage/kv_store.rs +++ /dev/null @@ -1,570 +0,0 @@ -//! KV Store consent persistence. -//! -//! Stores and retrieves consent data from a KV Store, keyed by EC ID. This -//! provides consent continuity for returning users whose browsers may not -//! have consent cookies on every request. -//! -//! # Storage layout -//! -//! Each entry uses a single JSON body ([`KvConsentEntry`]) containing the raw -//! consent strings, context flags, and a fingerprint for write-on-change -//! detection. -//! -//! # Change detection -//! -//! Writes only occur when consent signals have actually changed. -//! [`consent_fingerprint`] hashes the raw strings into a compact fingerprint -//! stored in the body's `fp` field. On the next request, the existing -//! fingerprint is compared before writing. - -use bytes::Bytes; -use serde::{Deserialize, Serialize}; -use sha2::{Digest, Sha256}; - -use crate::consent::jurisdiction::Jurisdiction; -use crate::consent::types::{ConsentContext, ConsentSource}; -use crate::platform::PlatformKvStore; - -// --------------------------------------------------------------------------- -// KV body (JSON, stored as value) -// --------------------------------------------------------------------------- - -/// Consent data stored in the KV Store body. -/// -/// Contains the raw consent strings needed to reconstruct a [`ConsentContext`]. -/// Decoded data (TCF, GPP, US Privacy) is not stored — it is re-decoded on -/// read to avoid stale decoded state. -/// -/// The `fp` field holds the consent fingerprint for write-on-change detection. -/// Entries written before PR5 lack this field; `#[serde(default)]` treats them -/// as having an empty fingerprint, which always triggers a self-healing -/// re-write. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct KvConsentEntry { - /// Fingerprint of consent signals for write-on-change detection. - /// - /// Written by [`save_consent_to_kv`]. Entries written before PR5 lack - /// this field; `#[serde(default)]` treats them as having an empty - /// fingerprint, which always triggers a self-healing re-write. - #[serde(default)] - pub fp: String, - /// Raw TC String from `euconsent-v2` cookie. - #[serde(skip_serializing_if = "Option::is_none")] - pub raw_tc_string: Option, - /// Raw GPP string from `__gpp` cookie. - #[serde(skip_serializing_if = "Option::is_none")] - pub raw_gpp_string: Option, - /// GPP section IDs (decoded or from `__gpp_sid` cookie). - #[serde(skip_serializing_if = "Option::is_none")] - pub gpp_section_ids: Option>, - /// Raw US Privacy string from `us_privacy` cookie. - #[serde(skip_serializing_if = "Option::is_none")] - pub raw_us_privacy: Option, - /// Raw Google Additional Consent (AC) string. - #[serde(skip_serializing_if = "Option::is_none")] - pub raw_ac_string: Option, - - /// Whether GDPR applies to this request. - pub gdpr_applies: bool, - /// Global Privacy Control signal. - pub gpc: bool, - /// Serialized jurisdiction (e.g. `"GDPR"`, `"US-CA"`, `"unknown"`). - pub jurisdiction: String, - - /// When this entry was stored (deciseconds since Unix epoch). - pub stored_at_ds: u64, -} - -// --------------------------------------------------------------------------- -// Conversions -// --------------------------------------------------------------------------- - -/// Builds a [`KvConsentEntry`] from a [`ConsentContext`]. -/// -/// Captures only the raw strings and contextual flags. Decoded data is -/// intentionally omitted — it will be re-decoded on read. The `fp` field is -/// initialized to an empty string and must be set by the caller before writing. -#[must_use] -pub fn entry_from_context(ctx: &ConsentContext, now_ds: u64) -> KvConsentEntry { - KvConsentEntry { - fp: String::new(), - raw_tc_string: ctx.raw_tc_string.clone(), - raw_gpp_string: ctx.raw_gpp_string.clone(), - gpp_section_ids: ctx.gpp_section_ids.clone(), - raw_us_privacy: ctx.raw_us_privacy.clone(), - raw_ac_string: ctx.raw_ac_string.clone(), - gdpr_applies: ctx.gdpr_applies, - gpc: ctx.gpc, - jurisdiction: ctx.jurisdiction.to_string(), - stored_at_ds: now_ds, - } -} - -/// Converts a [`KvConsentEntry`] into [`crate::consent::types::RawConsentSignals`] -/// suitable for re-decoding via [`crate::consent::build_context_from_signals`]. -#[must_use] -pub fn signals_from_entry(entry: &KvConsentEntry) -> crate::consent::types::RawConsentSignals { - crate::consent::types::RawConsentSignals { - raw_tc_string: entry.raw_tc_string.clone(), - raw_gpp_string: entry.raw_gpp_string.clone(), - raw_gpp_sid: entry.gpp_section_ids.as_ref().map(|ids| { - ids.iter() - .map(ToString::to_string) - .collect::>() - .join(",") - }), - raw_us_privacy: entry.raw_us_privacy.clone(), - gpc: entry.gpc, - } -} - -/// Reconstructs a [`ConsentContext`] from a KV Store entry. -/// -/// Re-decodes the raw strings to populate structured fields (TCF, GPP, US -/// Privacy). The `source` is set to [`ConsentSource::KvStore`] and the -/// `jurisdiction` is parsed from the stored string representation. -#[must_use] -pub fn context_from_entry(entry: &KvConsentEntry) -> ConsentContext { - let signals = signals_from_entry(entry); - let mut ctx = crate::consent::build_context_from_signals(&signals); - - // Restore context fields that aren't derived from raw signals. - ctx.gdpr_applies = entry.gdpr_applies; - ctx.gpc = entry.gpc; - ctx.raw_ac_string = entry.raw_ac_string.clone(); - ctx.jurisdiction = parse_jurisdiction(&entry.jurisdiction); - ctx.source = ConsentSource::KvStore; - - ctx -} - -// --------------------------------------------------------------------------- -// Fingerprinting -// --------------------------------------------------------------------------- - -/// Computes a compact fingerprint of the consent signals for change detection. -/// -/// Returns the first 16 hex characters of a SHA-256 hash computed over all -/// raw consent strings and the GPC flag. This is sufficient for detecting -/// changes without storing full hashes. -#[must_use] -pub fn consent_fingerprint(ctx: &ConsentContext) -> String { - let mut hasher = Sha256::new(); - - // Feed each signal into the hash, separated by a sentinel byte to - // prevent ambiguity (e.g., None+Some("x") vs Some("x")+None). - hash_optional(&mut hasher, ctx.raw_tc_string.as_deref()); - hash_optional(&mut hasher, ctx.raw_gpp_string.as_deref()); - hash_optional(&mut hasher, ctx.raw_us_privacy.as_deref()); - hash_optional(&mut hasher, ctx.raw_ac_string.as_deref()); - hasher.update(if ctx.gpc { b"1" } else { b"0" }); - - // Include GPP section IDs so SID-only changes trigger a KV write. - if let Some(sids) = &ctx.gpp_section_ids { - let mut sorted = sids.clone(); - sorted.sort_unstable(); - for sid in &sorted { - hasher.update(sid.to_string().as_bytes()); - hasher.update(b"\xFF"); - } - } else { - hasher.update(b"\x00"); - } - - let result = hasher.finalize(); - hex::encode(&result[..8]) // 16 hex chars = 8 bytes = 64 bits -} - -/// Feeds an optional string into the hasher with sentinel bytes. -fn hash_optional(hasher: &mut Sha256, value: Option<&str>) { - match value { - Some(s) => { - hasher.update(b"\x01"); - hasher.update(s.as_bytes()); - } - None => hasher.update(b"\x00"), - } -} - -/// Parses a jurisdiction string back into a [`Jurisdiction`] enum. -fn parse_jurisdiction(s: &str) -> Jurisdiction { - match s { - "GDPR" => Jurisdiction::Gdpr, - "non-regulated" => Jurisdiction::NonRegulated, - "unknown" => Jurisdiction::Unknown, - s if s.starts_with("US-") => Jurisdiction::UsState(s[3..].to_owned()), - _ => Jurisdiction::Unknown, - } -} - -// --------------------------------------------------------------------------- -// KV Store operations -// --------------------------------------------------------------------------- - -/// Checks whether the stored consent fingerprint matches the current one. -/// -/// Returns `true` when the stored body's `fp` field equals `new_fp`, meaning -/// no write is needed. Returns `false` when the key is absent, the body -/// cannot be deserialized, or the fingerprint differs. -/// -/// Entries written before PR5 have an empty `fp` (via `#[serde(default)]`), -/// which never matches a computed fingerprint and triggers a self-healing -/// re-write. -fn fingerprint_unchanged(store: &dyn PlatformKvStore, key: &str, new_fp: &str) -> bool { - let bytes = match futures::executor::block_on(store.get_bytes(key)) { - Ok(Some(bytes)) => bytes, - _ => return false, - }; - - serde_json::from_slice::(&bytes) - .map(|entry| entry.fp == new_fp) - .unwrap_or(false) -} - -/// Loads consent data from the KV store for a given EC ID. -/// -/// Returns `Some(ConsentContext)` if a valid entry is found, [`None`] if the -/// key does not exist or deserialization fails. Errors are logged but never -/// propagated — KV failures must not break the request pipeline. -/// -/// # Arguments -/// -/// * `store` — KV store opened by the adapter. -/// * `ec_id` — Edge Cookie ID used as the KV key. -#[must_use] -pub fn load_consent_from_kv(store: &dyn PlatformKvStore, ec_id: &str) -> Option { - let bytes = match futures::executor::block_on(store.get_bytes(ec_id)) { - Ok(Some(bytes)) => bytes, - Ok(None) => { - log::debug!("Consent KV lookup miss for '{ec_id}'"); - return None; - } - Err(e) => { - log::debug!("Consent KV lookup error for '{ec_id}': {e}"); - return None; - } - }; - - match serde_json::from_slice::(&bytes) { - Ok(entry) => { - log::info!( - "Loaded consent from KV store for '{ec_id}' (stored_at_ds={})", - entry.stored_at_ds - ); - Some(context_from_entry(&entry)) - } - Err(e) => { - log::warn!("Failed to deserialize consent KV entry for '{ec_id}': {e}"); - None - } - } -} - -/// Saves consent data to the KV store, writing only when signals have changed. -/// -/// Compares the fingerprint of current consent signals against the fingerprint -/// embedded in the stored entry. If they match, the write is skipped. -/// The fingerprint is embedded in the body so no KV metadata is required. -/// -/// # Arguments -/// -/// * `store` — KV store opened by the adapter. -/// * `ec_id` — Edge Cookie ID used as the KV key. -/// * `ctx` — Current request's consent context. -/// * `max_age_days` — TTL for the entry, matching `max_consent_age_days`. -pub fn save_consent_to_kv( - store: &dyn PlatformKvStore, - ec_id: &str, - ctx: &ConsentContext, - max_age_days: u32, -) { - if ctx.is_empty() { - log::debug!("Skipping consent KV write: consent is empty"); - return; - } - - let fp = consent_fingerprint(ctx); - - if fingerprint_unchanged(store, ec_id, &fp) { - log::debug!("Consent unchanged for '{ec_id}' (fp={fp}), skipping write"); - return; - } - - let mut entry = entry_from_context(ctx, crate::consent::now_deciseconds()); - entry.fp = fp.clone(); - - let body = match serde_json::to_vec(&entry) { - Ok(body) => Bytes::from(body), - Err(e) => { - log::warn!("Failed to serialize consent entry for '{ec_id}': {e}"); - return; - } - }; - - let ttl = std::time::Duration::from_secs(u64::from(max_age_days) * 86_400); - - match futures::executor::block_on(store.put_bytes_with_ttl(ec_id, body, ttl)) { - Ok(()) => { - log::info!("Saved consent to KV store for '{ec_id}' (fp={fp}, ttl={max_age_days}d)"); - } - Err(e) => { - log::warn!("Failed to write consent to KV store for '{ec_id}': {e}"); - } - } -} - -/// Deletes a consent entry from the KV store for a given EC ID. -/// -/// Used when a user revokes consent — the existing EC cookie is being -/// expired, so the persisted consent data must also be removed. -/// -/// Errors are logged but never propagated — KV failures must not -/// break the request pipeline. -pub fn delete_consent_from_kv(store: &dyn PlatformKvStore, ec_id: &str) { - match futures::executor::block_on(store.delete(ec_id)) { - Ok(()) => { - log::info!("Deleted consent KV entry for '{ec_id}' (consent revoked)"); - } - Err(e) => { - log::warn!("Failed to delete consent KV entry for '{ec_id}': {e}"); - } - } -} - -// --------------------------------------------------------------------------- -// Tests -// --------------------------------------------------------------------------- - -#[cfg(test)] -fn make_test_context() -> ConsentContext { - ConsentContext { - raw_tc_string: Some("CPXxGfAPXxGfA".to_owned()), - raw_gpp_string: Some("DBACNYA~CPXxGfA".to_owned()), - gpp_section_ids: Some(vec![2, 6]), - raw_us_privacy: Some("1YNN".to_owned()), - raw_ac_string: None, - gdpr_applies: true, - tcf: None, - gpp: None, - us_privacy: None, - expired: false, - gpc: false, - jurisdiction: Jurisdiction::Gdpr, - source: ConsentSource::Cookie, - } -} - -#[cfg(test)] -mod tests { - use super::*; - use crate::consent::jurisdiction::Jurisdiction; - use crate::consent::types::{ConsentContext, ConsentSource}; - - #[test] - fn entry_roundtrip() { - let ctx = make_test_context(); - let entry = entry_from_context(&ctx, 1_000_000); - let json = serde_json::to_string(&entry).expect("should serialize"); - let restored: KvConsentEntry = serde_json::from_str(&json).expect("should deserialize"); - - assert_eq!(restored.raw_tc_string, ctx.raw_tc_string); - assert_eq!(restored.raw_gpp_string, ctx.raw_gpp_string); - assert_eq!(restored.gpp_section_ids, ctx.gpp_section_ids); - assert_eq!(restored.raw_us_privacy, ctx.raw_us_privacy); - assert_eq!(restored.gdpr_applies, ctx.gdpr_applies); - assert_eq!(restored.gpc, ctx.gpc); - assert_eq!(restored.jurisdiction, "GDPR"); - assert_eq!(restored.stored_at_ds, 1_000_000); - } - - #[test] - fn kv_consent_entry_roundtrip_preserves_fp() { - let ctx = make_test_context(); - let fp = consent_fingerprint(&ctx); - let mut entry = entry_from_context(&ctx, 1_000_000); - entry.fp = fp.clone(); - let json = serde_json::to_string(&entry).expect("should serialize"); - let restored: KvConsentEntry = serde_json::from_str(&json).expect("should deserialize"); - - assert_eq!( - restored.fp, fp, - "should preserve fingerprint through roundtrip" - ); - } - - #[test] - fn entry_fits_in_2000_bytes() { - let ctx = make_test_context(); - let mut entry = entry_from_context(&ctx, 1_000_000); - entry.fp = consent_fingerprint(&ctx); - let json = serde_json::to_string(&entry).expect("should serialize"); - assert!( - json.len() <= 2000, - "entry JSON must fit in 2000 bytes, was {} bytes", - json.len() - ); - } - - #[test] - fn context_roundtrip_via_entry() { - let original = make_test_context(); - let entry = entry_from_context(&original, 1_000_000); - let restored = context_from_entry(&entry); - - assert_eq!(restored.raw_tc_string, original.raw_tc_string); - assert_eq!(restored.raw_gpp_string, original.raw_gpp_string); - assert_eq!(restored.raw_us_privacy, original.raw_us_privacy); - assert_eq!(restored.gdpr_applies, original.gdpr_applies); - assert_eq!(restored.gpc, original.gpc); - assert_eq!(restored.jurisdiction, original.jurisdiction); - assert_eq!(restored.source, ConsentSource::KvStore); - } - - #[test] - fn fingerprint_deterministic() { - let ctx = make_test_context(); - let fp1 = consent_fingerprint(&ctx); - let fp2 = consent_fingerprint(&ctx); - assert_eq!(fp1, fp2, "fingerprint should be deterministic"); - assert_eq!(fp1.len(), 16, "fingerprint should be 16 hex chars"); - } - - #[test] - fn fingerprint_changes_with_different_signals() { - let ctx1 = make_test_context(); - let mut ctx2 = make_test_context(); - ctx2.raw_tc_string = Some("DIFFERENT_TC_STRING".to_owned()); - - assert_ne!( - consent_fingerprint(&ctx1), - consent_fingerprint(&ctx2), - "different TC strings should produce different fingerprints" - ); - } - - #[test] - fn fingerprint_changes_with_gpc() { - let mut ctx1 = make_test_context(); - ctx1.gpc = false; - let mut ctx2 = make_test_context(); - ctx2.gpc = true; - - assert_ne!( - consent_fingerprint(&ctx1), - consent_fingerprint(&ctx2), - "different GPC values should produce different fingerprints" - ); - } - - #[test] - fn fingerprint_distinguishes_none_from_empty() { - let mut ctx_none = make_test_context(); - ctx_none.raw_tc_string = None; - let mut ctx_empty = make_test_context(); - ctx_empty.raw_tc_string = Some(String::new()); - - assert_ne!( - consent_fingerprint(&ctx_none), - consent_fingerprint(&ctx_empty), - "None vs empty string should produce different fingerprints" - ); - } - - #[test] - fn signals_from_entry_roundtrip() { - let ctx = make_test_context(); - let entry = entry_from_context(&ctx, 1_000_000); - let signals = signals_from_entry(&entry); - - assert_eq!(signals.raw_tc_string, ctx.raw_tc_string); - assert_eq!(signals.raw_gpp_string, ctx.raw_gpp_string); - assert_eq!(signals.raw_us_privacy, ctx.raw_us_privacy); - assert_eq!(signals.gpc, ctx.gpc); - // gpp_sid is serialized as "2,6" from the section IDs - assert_eq!(signals.raw_gpp_sid, Some("2,6".to_owned())); - } - - #[test] - fn parse_jurisdiction_roundtrip() { - assert_eq!(parse_jurisdiction("GDPR"), Jurisdiction::Gdpr); - assert_eq!( - parse_jurisdiction("US-CA"), - Jurisdiction::UsState("CA".to_owned()) - ); - assert_eq!( - parse_jurisdiction("non-regulated"), - Jurisdiction::NonRegulated - ); - assert_eq!(parse_jurisdiction("unknown"), Jurisdiction::Unknown); - assert_eq!( - parse_jurisdiction("something-else"), - Jurisdiction::Unknown, - "unrecognized jurisdiction should default to Unknown" - ); - } - - #[test] - fn empty_entry_serializes_compact() { - let ctx = ConsentContext::default(); - let entry = entry_from_context(&ctx, 0); - let json = serde_json::to_string(&entry).expect("should serialize"); - // With skip_serializing_if = "Option::is_none", omitted fields keep it small. - assert!( - !json.contains("raw_tc_string"), - "None fields should be omitted from JSON" - ); - } - - #[test] - fn entry_preserves_ac_string() { - let mut ctx = make_test_context(); - ctx.raw_ac_string = Some("2~1234.5678~dv.".to_owned()); - let entry = entry_from_context(&ctx, 0); - let restored = context_from_entry(&entry); - - assert_eq!( - restored.raw_ac_string, - Some("2~1234.5678~dv.".to_owned()), - "AC string should survive roundtrip" - ); - } -} - -#[cfg(test)] -mod new_api_tests { - use super::*; - use edgezero_core::key_value_store::NoopKvStore; - - fn noop() -> NoopKvStore { - NoopKvStore - } - - #[test] - fn load_returns_none_when_key_absent() { - let result = load_consent_from_kv(&noop(), "some-ec-id"); - assert!(result.is_none(), "should return None when key is absent"); - } - - #[test] - fn save_does_not_panic_with_noop_store() { - let ctx = make_test_context(); - save_consent_to_kv(&noop(), "some-ec-id", &ctx, 30); - } - - #[test] - fn delete_does_not_panic_with_noop_store() { - delete_consent_from_kv(&noop(), "some-ec-id"); - } - - #[test] - fn kv_consent_entry_missing_fp_deserialises_as_empty() { - let json = r#"{"gdpr_applies":true,"gpc":false,"jurisdiction":"GDPR","stored_at_ds":0}"#; - let entry: KvConsentEntry = - serde_json::from_str(json).expect("should deserialize legacy entry"); - assert_eq!( - entry.fp, - String::new(), - "should default fp to empty string for legacy entries" - ); - } -} diff --git a/crates/trusted-server-core/src/storage/mod.rs b/crates/trusted-server-core/src/storage/mod.rs index 60f55067..62c42ba1 100644 --- a/crates/trusted-server-core/src/storage/mod.rs +++ b/crates/trusted-server-core/src/storage/mod.rs @@ -8,7 +8,6 @@ //! via [`crate::platform::RuntimeServices`]. pub(crate) mod config_store; -pub mod kv_store; pub(crate) mod secret_store; pub use config_store::FastlyConfigStore; diff --git a/crates/trusted-server-core/src/streaming_processor.rs b/crates/trusted-server-core/src/streaming_processor.rs index ec5f8ddf..ccb3b25b 100644 --- a/crates/trusted-server-core/src/streaming_processor.rs +++ b/crates/trusted-server-core/src/streaming_processor.rs @@ -5,19 +5,6 @@ //! - Pluggable content processors (text replacement, HTML rewriting, etc.) //! - Memory-efficient streaming //! - UTF-8 boundary handling -//! -//! # Platform notes -//! -//! This module is **platform-agnostic** (verified 2026-03-31; see -//! `docs/superpowers/plans/2026-03-31-pr8-content-rewriting-verification.md`). It has zero -//! `fastly` imports. [`StreamingPipeline::process`] is generic over -//! `R: Read + W: Write` — any reader or writer works, including -//! `fastly::Body` (which implements `std::io::Read`) or standard -//! `std::io::Cursor<&[u8]>`. -//! -//! Future adapters (Cloudflare Workers, Axum, Spin) do not need to implement any compression or -//! streaming interface. See `crate::platform` module doc for the -//! authoritative note. use std::cell::RefCell; use std::io::{self, Read, Write}; diff --git a/crates/trusted-server-core/src/streaming_replacer.rs b/crates/trusted-server-core/src/streaming_replacer.rs index c6048b8e..9c438858 100644 --- a/crates/trusted-server-core/src/streaming_replacer.rs +++ b/crates/trusted-server-core/src/streaming_replacer.rs @@ -2,8 +2,6 @@ //! //! This module provides functionality for replacing patterns in content //! in streaming fashion, handling content that may be split across multiple chunks. -//! -//! See [`crate::platform`] module doc for platform notes. // Note: std::io::{Read, Write} were previously used by stream_process function // which has been removed in favor of StreamingPipeline diff --git a/crates/trusted-server-core/src/test_support.rs b/crates/trusted-server-core/src/test_support.rs index 8fdfaa85..39432755 100644 --- a/crates/trusted-server-core/src/test_support.rs +++ b/crates/trusted-server-core/src/test_support.rs @@ -15,7 +15,7 @@ pub mod tests { password = "pass" [[handlers]] - path = "^/admin" + path = "^/_ts/admin" username = "admin" password = "admin-pass" @@ -34,8 +34,8 @@ pub mod tests { enabled = false rewrite_attributes = ["href", "link", "url"] - [edge_cookie] - secret_key = "test-secret-key" + [ec] + passphrase = "test-secret-key-32-bytes-minimum" [request_signing] config_store_id = "test-config-store-id" secret_store_id = "test-secret-store-id" diff --git a/docs/.vitepress/config.mts b/docs/.vitepress/config.mts index d66a820c..36ed84c5 100644 --- a/docs/.vitepress/config.mts +++ b/docs/.vitepress/config.mts @@ -78,6 +78,7 @@ export default withMermaid( text: 'Core Concepts', items: [ { text: 'Edge Cookies', link: '/guide/edge-cookies' }, + { text: 'EC Setup Guide', link: '/guide/ec-setup-guide' }, { text: 'GDPR Compliance', link: '/guide/gdpr-compliance' }, { text: 'Ad Serving', link: '/guide/ad-serving' }, { diff --git a/docs/guide/api-reference.md b/docs/guide/api-reference.md index 880efc83..208cb2c3 100644 --- a/docs/guide/api-reference.md +++ b/docs/guide/api-reference.md @@ -5,6 +5,7 @@ Quick reference for all Trusted Server HTTP endpoints. ## Endpoint Categories - [First-Party Endpoints](#first-party-endpoints) - Core ad serving and proxying +- [Edge Cookie Endpoints](#edge-cookie-endpoints) - Identity sync and enrichment - [Request Signing](#request-signing-endpoints) - Cryptographic signing and key management - [TSJS Library](#tsjs-library-endpoint) - JavaScript library serving - [Integration Endpoints](#integration-endpoints) - Third-party service proxying @@ -37,7 +38,7 @@ curl "https://edge.example.com/first-party/ad?slot=header-banner&w=728&h=90" **Response Headers:** -- `x-ts-ec` - EC ID (`64hex.6alnum` format) +No EC ID response header is emitted. EC identity is maintained with the `ts-ec` cookie. **Use Cases:** @@ -47,6 +48,72 @@ curl "https://edge.example.com/first-party/ad?slot=header-banner&w=728&h=90" --- +## Edge Cookie Endpoints + +Partners are configured statically in `[[ec.partners]]` and loaded into an in-memory registry at startup. There is no runtime partner-registration endpoint and the legacy browser pixel sync endpoint has been removed; browser-resolved IDs are ingested through Prebid EID cookies. + +--- + +### GET /\_ts/api/v1/identify + +Returns EC identity plus the authenticated partner's UID and EID for the current user. + +**Auth:** Bearer token (`Authorization: Bearer `) + +**Request:** + +- Uses `ts-ec` cookie and consent signals + +**Response (example):** + +```json +{ + "ec": "954d...e0c3.nZ1GxL", + "consent": "ok", + "degraded": false, + "partner_id": "mocktioneer", + "uid": "mock-user-123", + "eid": { + "source": "formally-vital-lion.edgecompute.app", + "uids": [{ "id": "mock-user-123", "atype": 3 }] + } +} +``` + +--- + +### POST /\_ts/api/v1/batch-sync + +Server-to-server batch sync endpoint for writing EC ID to partner UID mappings. Mapping timestamps are retained in the request schema for compatibility, but they no longer order writes because EC identity entries do not store per-partner sync timestamps. Valid mappings use idempotent last-write-wins semantics. + +**Auth:** Bearer token (`Authorization: Bearer `) + +**Request Body:** + +```json +{ + "mappings": [ + { + "ec_id": "954d8e7398dd993f78e3875ca1ef7841249781240e913157c1f2d6a6c960e0c3.nZ1GxL", + "partner_uid": "mock-user-123", + "timestamp": 1775147300 + } + ] +} +``` + +**Response:** + +```json +{ + "accepted": 1, + "rejected": 0, + "errors": [] +} +``` + +--- + ### POST /third-party/ad Client-side auction endpoint for TSJS library. @@ -329,7 +396,7 @@ curl -X POST https://edge.example.com/verify-signature \ --- -### POST /admin/keys/rotate +### POST /\_ts/admin/keys/rotate Generates and activates a new signing key. @@ -359,7 +426,7 @@ If omitted, auto-generates date-based ID (e.g., `ts-2025-01-15-A`). **Example:** ```bash -curl -X POST https://edge.example.com/admin/keys/rotate \ +curl -X POST https://edge.example.com/_ts/admin/keys/rotate \ -u admin:password \ -H "Content-Type: application/json" ``` @@ -374,7 +441,7 @@ See [Key Rotation Guide](./key-rotation.md) for workflow details. --- -### POST /admin/keys/deactivate +### POST /\_ts/admin/keys/deactivate Deactivates or deletes a signing key. @@ -407,7 +474,7 @@ Deactivates or deletes a signing key. **Example:** ```bash -curl -X POST https://edge.example.com/admin/keys/deactivate \ +curl -X POST https://edge.example.com/_ts/admin/keys/deactivate \ -u admin:password \ -H "Content-Type: application/json" \ -d '{"kid":"ts-2025-01-14-A","delete":true}' @@ -547,7 +614,7 @@ Proxies to configured endpoint with `user.id` populated with EC ID. **Response Headers:** -- `x-ts-ec` - EC ID (`64hex.6alnum` format) +No EC ID response header is emitted. EC identity is maintained with the `ts-ec` cookie. --- @@ -588,7 +655,7 @@ Endpoints under protected paths require HTTP Basic Authentication: ```toml [[handlers]] -path = "^/admin" +path = "^/_ts/admin" username = "admin" password = "secure-password" ``` @@ -596,13 +663,13 @@ password = "secure-password" **Usage:** ```bash -curl -u admin:secure-password https://edge.example.com/admin/keys/rotate +curl -u admin:secure-password https://edge.example.com/_ts/admin/keys/rotate ``` **Protected Endpoints:** -- `/admin/keys/rotate` -- `/admin/keys/deactivate` +- `/_ts/admin/keys/rotate` +- `/_ts/admin/keys/deactivate` - Any paths matching configured `handlers` patterns --- diff --git a/docs/guide/auction-orchestration.md b/docs/guide/auction-orchestration.md index 3c5fe62b..3a55bc3d 100644 --- a/docs/guide/auction-orchestration.md +++ b/docs/guide/auction-orchestration.md @@ -549,10 +549,7 @@ Auction results are returned in standard OpenRTB format with an `ext.orchestrato } ``` -The response also includes EC ID headers: - -- `X-TS-EC` — The persistent EC user ID -- `X-TS-EC-Fresh` — A fresh ID generated for this session +EC identity is maintained with the `ts-ec` cookie; auction responses do not emit EC ID headers. ## Creative Processing diff --git a/docs/guide/configuration.md b/docs/guide/configuration.md index bfa655d9..82293c37 100644 --- a/docs/guide/configuration.md +++ b/docs/guide/configuration.md @@ -23,8 +23,8 @@ cookie_domain = ".publisher.com" origin_url = "https://origin.publisher.com" proxy_secret = "your-secure-secret-here" -[edge_cookie] -secret_key = "your-hmac-secret" +[ec] +passphrase = "replace-with-32-plus-byte-random-secret" ``` ### Environment Variable Overrides @@ -37,7 +37,7 @@ at runtime. # Format: TRUSTED_SERVER__SECTION__FIELD export TRUSTED_SERVER__PUBLISHER__DOMAIN=publisher.com export TRUSTED_SERVER__PUBLISHER__ORIGIN_URL=https://origin.publisher.com -export TRUSTED_SERVER__EDGE_COOKIE__SECRET_KEY=your-secret +export TRUSTED_SERVER__EC__PASSPHRASE=replace-with-32-plus-byte-random-secret ``` ### Generate Secure Secrets @@ -60,7 +60,7 @@ openssl rand -base64 32 | Section | Purpose | | ------------------- | -------------------------------------------- | | `[publisher]` | Domain, origin, proxy settings | -| `[edge_cookie]` | Edge Cookie (EC) ID generation | +| `[ec]` | Edge Cookie (EC) ID generation | | `[proxy]` | Proxy SSRF allowlist | | `[request_signing]` | Ed25519 request signing | | `[auction]` | Auction orchestration | @@ -75,8 +75,8 @@ cookie_domain = ".publisher.com" origin_url = "https://origin.publisher.com" proxy_secret = "change-me-to-secure-value" -[edge_cookie] -secret_key = "your-hmac-secret-key" +[ec] +passphrase = "replace-with-32-plus-byte-random-secret" [request_signing] enabled = true @@ -153,19 +153,22 @@ Core publisher settings for domain, origin, and proxy configuration. ### `[publisher]` -| Field | Type | Required | Description | -| --------------- | ------ | -------- | ------------------------------------------------------- | -| `domain` | String | Yes | Publisher's domain name | -| `cookie_domain` | String | Yes | Domain for setting cookies (typically with leading dot) | -| `origin_url` | String | Yes | Full URL of publisher origin server | -| `proxy_secret` | String | Yes | Secret key for encrypting/signing proxy URLs | +| Field | Type | Required | Description | +| --------------- | ------ | -------- | ------------------------------------------------------ | +| `domain` | String | Yes | Publisher's apex domain name | +| `cookie_domain` | String | Yes | Domain for non-EC cookies (typically with leading dot) | +| `origin_url` | String | Yes | Full URL of publisher origin server | +| `proxy_secret` | String | Yes | Secret key for encrypting/signing proxy URLs | + +> **Note:** EC cookies (`ts-ec`) derive their domain automatically as `.{domain}` and +> do not use `cookie_domain`. The `cookie_domain` field is used by other cookie helpers. **Example**: ```toml [publisher] domain = "publisher.com" -cookie_domain = ".publisher.com" # Includes subdomains +cookie_domain = ".publisher.com" origin_url = "https://origin.publisher.com" proxy_secret = "change-me-to-secure-random-value" ``` @@ -199,12 +202,12 @@ TRUSTED_SERVER__PUBLISHER__PROXY_SECRET=your-secret-here #### `cookie_domain` -**Purpose**: Domain scope for EC cookies. +**Purpose**: Domain scope for non-EC cookies. **Usage**: -- Set on `ts-ec` cookie -- Controls cookie sharing across subdomains +- Used by non-EC cookie helpers for domain scoping +- EC cookies (`ts-ec`) use a separate computed domain derived from `domain` **Format**: Domain with optional leading dot @@ -263,32 +266,50 @@ Changing `proxy_secret` invalidates all existing signed URLs. Plan rotations car ## EC Configuration -Settings for generating privacy-preserving Edge Cookie identifiers. +Settings for generating privacy-preserving Edge Cookie identifiers. The `ec_store` KV store is the only KV-backed EC lifecycle store; it holds identity graph state, minimal consent metadata, partner IDs, and withdrawal tombstones. Consent configuration controls request-local interpretation and forwarding, not separate KV persistence. -### `[edge_cookie]` +### `[ec]` -| Field | Type | Required | Description | -| ------------ | ------ | -------- | ----------------------------- | -| `secret_key` | String | Yes | HMAC secret for ID generation | +| Field | Type | Required | Description | +| ------------------------- | -------------- | -------- | ----------------------------------------------------------------------- | +| `passphrase` | String | Yes | Publisher passphrase used as HMAC key | +| `ec_store` | String or null | No | Fastly KV store name for EC identity graph and withdrawal state | +| `pull_sync_concurrency` | Integer | No | Maximum concurrent pull-sync requests per organic response | +| `cluster_trust_threshold` | Integer | No | Cluster size threshold for identity trust decisions | +| `cluster_recheck_secs` | Integer | No | Legacy compatibility setting; cluster rechecks no longer use timestamps | +| `partners` | Array | No | Static partner registry entries | + +::: tip Partner keying +`id` is the partner namespace used for KV storage and response headers (`x-ts-`). `source_domain` is only used to match incoming OpenRTB EID `source` values during ingestion. +::: **Example**: ```toml -[edge_cookie] -secret_key = "your-secure-hmac-secret" +[ec] +passphrase = "replace-with-32-plus-byte-random-secret" +ec_store = "ec_identity_store" + +[[ec.partners]] +id = "mocktioneer" +name = "Mocktioneer SSP" +source_domain = "mocktioneer.example" +api_token = "partner-api-token-32-bytes-minimum" +bidstream_enabled = true ``` **Environment Override**: ```bash -TRUSTED_SERVER__EDGE_COOKIE__SECRET_KEY=your-secret +TRUSTED_SERVER__EC__PASSPHRASE=your-secret +TRUSTED_SERVER__EC__EC_STORE=ec_identity_store ``` ### Field Details -#### `secret_key` +#### `passphrase` -**Purpose**: HMAC secret for EC ID base generation. +**Purpose**: Publisher passphrase used as HMAC key for EC ID generation. **Security**: @@ -436,7 +457,7 @@ Path-based HTTP Basic Authentication. ```toml # Single handler [[handlers]] -path = "^/admin" +path = "^/_ts/admin" username = "admin" password = "secure-password" @@ -456,7 +477,7 @@ password = "api-pass" ```bash # Handler 0 -TRUSTED_SERVER__HANDLERS__0__PATH="^/admin" +TRUSTED_SERVER__HANDLERS__0__PATH="^/_ts/admin" TRUSTED_SERVER__HANDLERS__0__USERNAME="admin" TRUSTED_SERVER__HANDLERS__0__PASSWORD="secure-password" @@ -474,10 +495,10 @@ TRUSTED_SERVER__HANDLERS__1__PASSWORD="api-pass" ```toml # Exact path -path = "^/admin$" # Only /admin +path = "^/_ts/admin$" # Only /_ts/admin # Prefix match -path = "^/admin" # /admin, /admin/users, /admin/settings +path = "^/_ts/admin" # /_ts/admin, /_ts/admin/users, /_ts/admin/settings # Multiple paths path = "^/(admin|secure|private)" @@ -912,8 +933,8 @@ Configuration is validated at startup: **EC Validation**: -- `secret_key` ≥ 1 character -- `secret_key` ≠ known placeholders (`"secret-key"`, `"secret_key"`, `"trusted-server"` — case-insensitive) +- `passphrase` ≥ 1 character +- `passphrase` ≠ known placeholders (`"secret-key"`, `"secret_key"`, `"trusted-server"` — case-insensitive) **Handler Validation**: @@ -970,7 +991,7 @@ TRUSTED_SERVER__PUBLISHER__PROXY_SECRET=$(cat /run/secrets/proxy_secret_staging) ```bash # All secrets from environment TRUSTED_SERVER__PUBLISHER__PROXY_SECRET=$(cat /run/secrets/proxy_secret) -TRUSTED_SERVER__EDGE_COOKIE__SECRET_KEY=$(cat /run/secrets/ec_secret) +TRUSTED_SERVER__EC__PASSPHRASE=$(cat /run/secrets/ec_secret) TRUSTED_SERVER__HANDLERS__0__PASSWORD=$(cat /run/secrets/admin_password) ``` @@ -1024,7 +1045,7 @@ trusted-server.dev.toml # Development overrides **"Configuration field '...' is set to a known placeholder value"**: -- `edge_cookie.secret_key` cannot be `"secret-key"`, `"secret_key"`, or `"trusted-server"` (case-insensitive) +- `ec.passphrase` cannot be `"secret-key"`, `"secret_key"`, or `"trusted-server"` (case-insensitive) - `publisher.proxy_secret` cannot be `"change-me-proxy-secret"` (case-insensitive) - Must be non-empty - Change to a secure random value (see generation commands above) @@ -1032,7 +1053,7 @@ trusted-server.dev.toml # Development overrides **"Invalid regex"**: - Handler `path` must be valid regex -- Test pattern: `echo "^/admin" | grep -E "^/admin"` +- Test pattern: `echo "^/_ts/admin" | grep -E "^/_ts/admin"` - Escape special characters: `\.`, `\$`, etc. **"Integration configuration could not be parsed"**: diff --git a/docs/guide/ec-setup-guide.md b/docs/guide/ec-setup-guide.md new file mode 100644 index 00000000..31beb47d --- /dev/null +++ b/docs/guide/ec-setup-guide.md @@ -0,0 +1,213 @@ +# Edge Cookie Setup Guide + +End-to-end setup and verification guide for Edge Cookie (EC) identity flows. + +This guide covers: + +1. Fastly store setup +2. Partner configuration +3. Server-to-server batch sync (`/_ts/api/v1/batch-sync`) +4. Identity verification (`/_ts/api/v1/identify`) +5. Auction bidstream verification (`/auction`) + +## Prerequisites + +- Trusted Server deployed and reachable (example: `https://getpurpose.ai`) +- Access to update `trusted-server.toml` / deployment configuration +- Fastly CLI authenticated (for store verification) +- A valid TCF consent string (`euconsent-v2`) for consent-required requests + +## 1) Required Configuration + +Set EC configuration in `trusted-server.toml`: + +```toml +[ec] +passphrase = "replace-with-32-plus-byte-random-secret" +ec_store = "ec_identity_store" + +[[ec.partners]] +id = "mocktioneer" +name = "Mocktioneer SSP" +source_domain = "formally-vital-lion.edgecompute.app" +api_token = "test-batch-sync-key-2026" +bidstream_enabled = true +``` + +Required behavior assumptions: + +- `passphrase` is long-lived HMAC-SHA256 keying material for EC ID derivation; use a high-entropy random value of at least 32 characters +- `ec_store` is linked to the active Fastly service version +- `ec_store` is the only KV-backed EC lifecycle store; it contains identity graph state, minimal consent metadata, partner IDs, and withdrawal tombstones +- Live consent is interpreted from request cookies, headers, geolocation, and policy defaults rather than a separate consent KV store +- Partners are configured statically in `[[ec.partners]]` and loaded into an in-memory registry at startup +- `id` is the canonical key used for stored IDs and `x-ts-`-style headers; `source_domain` only controls EID source matching during ingestion +- Partner has `bidstream_enabled = true` if you want `user.ext.eids` in bidstream + +## 2) Configure Demo Variables + +```bash +TS_BASE_URL="https://getpurpose.ai" +MOCK_SSP_URL="https://formally-vital-lion.edgecompute.app" + +PARTNER_ID="mocktioneer" +PARTNER_NAME="Mocktioneer SSP" +PARTNER_API_KEY="test-batch-sync-key-2026" + +# Optional: use a real browser EC if already present +EC_ID="<64hex.6chars>" + +TCF_CONSENT="" +PARTNER_UID="mock-user-$(date +%s)" +``` + +## 3) Configure Partner + +Partners are configured in `trusted-server.toml` and loaded at startup: + +```toml +[[ec.partners]] +id = "mocktioneer" +name = "Mocktioneer SSP" +source_domain = "formally-vital-lion.edgecompute.app" +api_token = "test-batch-sync-key-2026" +bidstream_enabled = true +``` + +Deploy/restart after changing partner configuration. + +## 4) Acquire or Reuse EC Cookie + +If you already have an EC from browser traffic, reuse it. + +Otherwise, attempt generation with consent: + +```bash +curl -si "${TS_BASE_URL}/" \ + -H "Cookie: euconsent-v2=${TCF_CONSENT}" +``` + +Look for: + +- `Set-Cookie: ts-ec=<64hex.6chars>` + +## 5) Batch Sync (S2S) + +Endpoint: `POST /_ts/api/v1/batch-sync` + +Important: request field is `ec_id` (full `{64hex}.{6alnum}` value). The `timestamp` field remains required for API compatibility, but it no longer orders writes because EC identity entries do not store per-partner sync timestamps. Valid mappings are idempotent last-write-wins: unchanged UIDs are accepted without a write, and different UIDs replace the stored value. + +```bash +BATCH_UID="${PARTNER_UID}-batch" +NOW_TS="$(date +%s)" + +curl -X POST "${TS_BASE_URL}/_ts/api/v1/batch-sync" \ + -H "Authorization: Bearer ${PARTNER_API_KEY}" \ + -H "Content-Type: application/json" \ + -d "{ + \"mappings\": [{ + \"ec_id\": \"${EC_ID}\", + \"partner_uid\": \"${BATCH_UID}\", + \"timestamp\": ${NOW_TS} + }] + }" | python3 -m json.tool +``` + +Expected: + +```json +{ + "accepted": 1, + "rejected": 0, + "errors": [] +} +``` + +## 6) Verify Identity + +Endpoint: `GET /_ts/api/v1/identify` + +```bash +curl -s "${TS_BASE_URL}/_ts/api/v1/identify" \ + -H "Authorization: Bearer ${PARTNER_API_KEY}" \ + -H "Cookie: ts-ec=${EC_ID}; euconsent-v2=${TCF_CONSENT}" | python3 -m json.tool +``` + +Expected shape: + +```json +{ + "ec": "", + "consent": "ok", + "degraded": false, + "partner_id": "mocktioneer", + "uid": "mock-user-123", + "eid": { + "source": "formally-vital-lion.edgecompute.app", + "uids": [{ "id": "mock-user-123", "atype": 3 }] + }, + "cluster_size": 12 +} +``` + +## 7) Verify Auction Bidstream Enrichment + +Endpoint: `POST /auction` + +```bash +curl -si -X POST "${TS_BASE_URL}/auction" \ + -H "Cookie: ts-ec=${EC_ID}; euconsent-v2=${TCF_CONSENT}" \ + -H "Content-Type: application/json" \ + -d '{"adUnits":[{"code":"test","mediaTypes":{"banner":{"sizes":[[300,250]]}}}]}' +``` + +Check response headers: + +- `x-ts-ec-consent` +- `x-ts-eids` + +For returning users, ordinary page views should not refresh `Set-Cookie: ts-ec=...`. A `Set-Cookie` header is expected when the EC is newly generated. + +Decode `x-ts-eids`: + +```bash +echo "" | base64 -d | python3 -m json.tool +``` + +Expected decoded payload contains: + +- `source = formally-vital-lion.edgecompute.app` +- `uids[0].id = ` + +## 8) Fastly KV Operational Checks + +List stores: + +```bash +fastly kv-store list +``` + +Check service resource links for active version: + +```bash +fastly resource-link list --service-id --version +``` + +Inspect EC identity entry: + +```bash +fastly kv-store-entry get --store-id --key "${EC_ID}" +``` + +If batch sync returns `ineligible`, check whether the KV entry is missing or has `consent.ok = false` from a withdrawal tombstone. + +## 9) Troubleshooting Quick Map + +| Symptom | Likely Cause | Check | +| ----------------------------------------------------- | ------------------------------ | --------------------------------------------------------------------------- | +| `invalid_token` on batch sync | Wrong partner API key | Re-register partner with known API key | +| `missing field ec_id` | Wrong request schema | Use `ec_id` field | +| `/_ts/api/v1/identify` returns `{"consent":"denied"}` | No consent for current request | Send consent cookie | +| No `uid` in `/_ts/api/v1/identify` | No successful sync yet | Run batch sync or ensure Prebid EID ingestion has populated the partner UID | + +See also: [Edge Cookies](/guide/edge-cookies), [Configuration](/guide/configuration), [API Reference](/guide/api-reference) diff --git a/docs/guide/edge-cookies.md b/docs/guide/edge-cookies.md index d0e31e2e..db913f89 100644 --- a/docs/guide/edge-cookies.md +++ b/docs/guide/edge-cookies.md @@ -8,6 +8,8 @@ Edge Cookies (EC) are privacy-safe identifiers generated on a first site visit u Trusted Server surfaces the current EC ID via response headers and a first-party cookie. For the exact header and cookie names, see the [API Reference](/guide/api-reference). +For full operational onboarding (partner configuration, batch sync, identify, and auction verification), use the [EC Setup Guide](/guide/ec-setup-guide). + ## How They Work ### HMAC-Based Generation @@ -18,9 +20,219 @@ EC IDs use HMAC (Hash-based Message Authentication Code) to generate a determini **IP normalization**: IPv6 addresses are normalized to a /64 prefix before hashing. +### Request Lifecycle + +Every request passes through four phases. EC generation only happens on organic routes (publisher proxy, integration proxy, auction) — read-only endpoints like `/identify` and `/batch-sync` skip generation entirely. During pre-routing, Trusted Server builds consent from request-local cookies, headers, geolocation, and policy defaults; it does not load consent from a separate KV store. + +```mermaid +sequenceDiagram + participant B as Browser + participant TS as Trusted Server + participant KV as KV Store + + B->>TS: Request (ts-ec cookie + consent signals) + Note over TS: Phase 1: Pre-routing
Read EC from cookie
Build consent context
Extract device signals + + alt First Visit (no EC cookie) + Note over TS: Phase 2: Routing (organic only)
generate_if_needed() + TS->>TS: HMAC-SHA256(IP) + random suffix + TS->>KV: Create entry (consent, geo, device) + Note over TS: Phase 3: Finalize
Ingest Prebid EID cookies + TS-->>B: Response + Set-Cookie: ts-ec=... + else Return Visit (EC cookie present) + Note over TS: Phase 2: Routing
EC exists — skip generation + Note over TS: Phase 3: Finalize
Ingest Prebid EID cookies + TS-->>B: Response
(no cookie refresh) + end + + Note over TS,KV: Phase 4: Post-send (background)
Dispatch pull-sync to partners +``` + +### Response Finalization + +After routing completes, the server evaluates consent state and cookie presence to decide what to do with the EC cookie on the response. + +```mermaid +flowchart TD + Start[ec_finalize_response] --> ConsentCheck{Consent
allows EC?} + + ConsentCheck -- "No" --> ExplicitWithdrawal{Explicit
withdrawal?} + ExplicitWithdrawal -- "Yes" --> CookiePresent{Cookie was
present?} + CookiePresent -- "Yes" --> Withdraw["Expire ts-ec cookie
Write withdrawal tombstone in ec_identity_store (24h TTL)
Strip all x-ts-* headers"] + CookiePresent -- "No" --> HeaderOnly["Strip all x-ts-* headers only
(no cookie expiry or KV tombstone)"] + ExplicitWithdrawal -- "No" --> HeaderOnly + + ConsentCheck -- "Yes" --> WasPresent{EC was present
in request?} + WasPresent -- "Yes, not generated" --> Returning["Ingest Prebid EID cookies
No cookie or KV TTL refresh"] + WasPresent -- "No, just generated" --> NewEc["Ingest Prebid EID cookies
Set ts-ec cookie"] +``` + +When consent cannot be verified for the current request — for example, unknown jurisdiction or missing/undecodable consent signals in a regulated region — Trusted Server fails closed for EC use by stripping EC headers, but it does **not** treat that as authoritative revocation of an already-issued EC. + +## Consent Model + +EC creation is gated by jurisdiction. The server detects jurisdiction from geolocation data attached to the request and applies the appropriate consent framework. Live consent comes from request-local signals (`euconsent-v2`, `__gpp`, `__gpp_sid`, `us_privacy`, `Sec-GPC`) plus geolocation and policy defaults; there is no separate consent KV fallback. + +```mermaid +flowchart TD + Start[Detect Jurisdiction] --> J{Jurisdiction?} + + J -- "GDPR
(EU/UK)" --> TCF{TCF string
present?} + TCF -- "Yes" --> P1{Purpose 1
granted?} + P1 -- "Yes" --> Allow([Allow EC]) + P1 -- "No" --> Deny([Deny EC]) + TCF -- "No" --> Deny + + J -- "US State" --> GPC{GPC header
set?} + GPC -- "Yes" --> Deny + GPC -- "No" --> USTCF{TCF from CMP
e.g. Didomi?} + USTCF -- "Yes" --> USP1{Purpose 1
granted?} + USP1 -- "Yes" --> Allow + USP1 -- "No" --> Deny + USTCF -- "No" --> USP{US Privacy
string?} + USP -- "Yes" --> OptOut{Opt-out
sale?} + OptOut -- "No" --> Allow + OptOut -- "Yes" --> Deny + USP -- "No" --> Deny + + J -- "Non-regulated" --> Allow + J -- "Unknown
(no geo data)" --> Deny +``` + +- **GDPR**: Opt-in required. TCF Purpose 1 (store/access device) must be explicitly consented. +- **US State**: Opt-out model with three-tier fallback — GPC always blocks, then TCF if a CMP uses it, then US Privacy string, then fail-closed. +- **Non-regulated**: EC always allowed. +- **Unknown**: Fail-closed when jurisdiction cannot be determined. + +The `ec_identity_store` KV store is the only EC lifecycle store. It holds identity graph state, partner IDs, a minimal consent snapshot used for EC entry metadata, and withdrawal tombstones. Consent interpretation for each request remains based on the live request signals listed above. + +## Partner Sync Channels + +Partner identities flow into the KV identity graph through three channels. Each writes to the same `ids` map in the KV entry via idempotent upsert logic: unchanged UIDs are accepted without a KV write, while different UIDs replace the stored value. + +```mermaid +flowchart LR + subgraph Browser-initiated + Prebid["Prebid EID Cookies
ts-eids + sharedId
Passive cookie ingestion"] + end + + subgraph Server-initiated + Batch["Batch Sync (S2S)
POST /_ts/api/v1/batch-sync
Partner POST + Bearer auth"] + Pull["Pull Sync (Background)
TS calls partner URL
Post-send on organic routes"] + end + + Prebid --> KV[(KV Identity Graph
ids map)] + Batch --> KV + Pull --> KV +``` + +### Prebid EID Cookie Flow + +The `ts-eids` cookie bridges client-side Prebid user ID modules with the server-side identity graph. + +```mermaid +sequenceDiagram + participant Prebid as Prebid.js + participant TSJS as TSJS Prebid Module + participant B as Browser Cookie Jar + participant TS as Trusted Server + participant KV as KV Store + + Prebid->>Prebid: Auction completes + Prebid->>TSJS: bidsBackHandler fires + TSJS->>Prebid: getUserIdsAsEids() + Prebid-->>TSJS: [{source, uids: [{id, atype}]}] + TSJS->>TSJS: Base64 encode full OpenRTB-style EID array
[{source, uids:[{id, atype, ext?}]}] + TSJS->>B: document.cookie = "ts-eids=..." + + Note over B,TS: Next page request + B->>TS: Request with ts-eids cookie + TS->>TS: Base64 decode → parse OpenRTB-style EIDs
match source domains to partners + TS->>KV: upsert_partner_id() per match
(skips write when UID unchanged) +``` + +Current TSJS writers preserve the full OpenRTB-style `{source, uids:[...]}` shape in `ts-eids`. The server remains backward-compatible with earlier flattened `{source, id, atype}` cookies during rollout, but new cookies use the structured `uids[]` form. + +The `sharedId` cookie follows a similar path but is written directly by Prebid's SharedID module rather than by TSJS. The server reads it separately and maps it via the `sharedid.org` source domain. + +### EID Seeding and Prebid Bidstream Forwarding + +EIDs can reach the EC identity graph from either server-side pull sync or browser-side Prebid sync. During a Prebid-routed auction, Trusted Server combines those stored IDs with any same-request EIDs from Prebid.js, applies consent gating, and forwards the merged set to Prebid Server as OpenRTB `user.ext.eids`. Prebid Server then passes those EIDs downstream to demand partners in its OpenRTB requests. + +```mermaid +sequenceDiagram + participant B as Browser / Prebid.js + participant TSJS as TSJS Prebid Module + participant TS as Trusted Server + participant KV as EC KV Identity Graph + participant PS as Prebid Server + participant DSP as Downstream Partners / DSPs + + alt Pull sync seeds partner UID + TS->>DSP: Background pull sync request
(EC ID + consent context) + DSP-->>TS: Partner UID for EC + TS->>KV: Upsert ids[partner_id] = UID + else Prebid sync seeds browser EIDs + B->>B: Prebid User ID modules resolve IDs + B->>TSJS: getUserIdsAsEids() + TSJS->>B: Write ts-eids cookie
Base64 OpenRTB-style EIDs + B->>TS: Next request with ts-eids + TS->>KV: Decode cookie and upsert matched partner UIDs + end + + Note over B,TS: Prebid-routed auction + B->>B: getUserIdsAsEids() for current auction + B->>TS: POST /auction
adUnits + eids[] + ts-ec cookie + TS->>KV: Resolve EC-backed partner IDs + KV-->>TS: Stored partner UIDs + TS->>TS: Convert stored UIDs to EIDs
Merge + dedupe with request eids[]
Apply consent gating + TS->>PS: OpenRTB request
user.ext.eids = merged EID set + PS->>DSP: OpenRTB bid request
user.ext.eids preserved for bidders + DSP-->>PS: OpenRTB bid response + PS-->>TS: OpenRTB seatbid response + TS-->>B: Auction response + x-ts-eids header when available +``` + +The relevant OpenRTB structure forwarded to Prebid Server and downstream partners is: + +```json +{ + "user": { + "id": "", + "ext": { + "eids": [ + { + "source": "id5-sync.com", + "uids": [ + { + "id": "ID5-abc123", + "atype": 1 + } + ] + }, + { + "source": "liveramp.com", + "uids": [ + { + "id": "LR-xyz789", + "atype": 3, + "ext": { + "rtiPartner": "idl" + } + } + ] + } + ] + } + } +} +``` + +Server-resolved EIDs and current-request Prebid EIDs are deduplicated by `source + uid.id`. When a partner UID already exists in KV, pull sync does not periodically refresh it; browser-side Prebid sync can still replace the stored UID if a later `ts-eids` cookie carries a different value for the same configured partner source. + ## Configuration -Configure EC secrets in `trusted-server.toml`. See the full [Configuration Reference](/guide/configuration) for the `[edge_cookie]` section and environment variable overrides. +Configure EC settings in `trusted-server.toml`. See the full [Configuration Reference](/guide/configuration) for the `[ec]` section and environment variable overrides. ## Privacy Considerations @@ -35,8 +247,18 @@ Configure EC secrets in `trusted-server.toml`. See the full [Configuration Refer 2. Rotate secret keys periodically 3. Monitor ID collision rates +## Runtime Behavior Notes + +- Returning requests with consent and an existing `ts-ec` do not refresh the EC cookie or KV TTL. +- Newly generated ECs receive `Set-Cookie: ts-ec=...`. +- When consent is blocked but not explicitly withdrawn, Trusted Server strips EC response headers for that request but leaves any existing `ts-ec` cookie intact; cookie expiry and tombstones happen only on explicit withdrawal. +- `/_ts/api/v1/identify` is read-oriented and returns identity enrichment for the authenticated partner. It computes `cluster_size` only when the EC entry does not already store one. +- `/_ts/api/v1/batch-sync` writes mappings into the EC identity graph. Mapping timestamps are retained for API compatibility but no longer order writes; valid mappings use idempotent last-write-wins semantics. +- Pull sync fills missing partner UIDs only. Existing partner UIDs are not periodically refreshed because EC entries no longer store per-partner sync timestamps. + ## Next Steps +- Follow the [EC Setup Guide](/guide/ec-setup-guide) - Learn about [GDPR Compliance](/guide/gdpr-compliance) - Configure [Ad Serving](/guide/ad-serving) - Learn about [Collective Sync](/guide/collective-sync) for cross-publisher data sharing details and diagrams diff --git a/docs/guide/error-reference.md b/docs/guide/error-reference.md index 99f611aa..7d244bb7 100644 --- a/docs/guide/error-reference.md +++ b/docs/guide/error-reference.md @@ -69,7 +69,7 @@ proxy_secret = "change-me-to-random-string" - `publisher.domain` - `publisher.origin_url` - `publisher.proxy_secret` -- `edge_cookie.secret_key` +- `ec.passphrase` --- @@ -141,17 +141,17 @@ Failed to generate EC ID: HMAC error **Solution:** -1. Ensure `secret_key` is set in `trusted-server.toml`: +1. Ensure `passphrase` is set in `trusted-server.toml`: ```toml -[edge_cookie] -secret_key = "your-secure-hmac-secret" +[ec] +passphrase = "replace-with-32-plus-byte-random-secret" ``` 2. Or set via environment variable: ```bash -TRUSTED_SERVER__EDGE_COOKIE__SECRET_KEY=your-secure-hmac-secret +TRUSTED_SERVER__EC__PASSPHRASE=replace-with-32-plus-byte-random-secret ``` --- @@ -245,7 +245,9 @@ curl -w "%{time_total}\n" https://upstream-service.example.com Warning: Cookie not set due to domain mismatch ``` -**Cause:** `publisher.cookie_domain` doesn't match request domain +**Cause:** `publisher.cookie_domain` doesn't match request domain. +Note: EC cookies (`ts-ec`) use a computed domain from `publisher.domain`, +not `cookie_domain`. **Solution:** @@ -402,7 +404,7 @@ Signing key not found: ts-2025-01-A 3. Run key rotation to generate new key: ```bash -curl -X POST https://edge.example.com/admin/keys/rotate \ +curl -X POST https://edge.example.com/_ts/admin/keys/rotate \ -u admin:password ``` @@ -425,7 +427,7 @@ curl -X POST https://edge.example.com/admin/keys/rotate \ 1. Initialize keys using rotation endpoint: ```bash -curl -X POST https://edge.example.com/admin/keys/rotate \ +curl -X POST https://edge.example.com/_ts/admin/keys/rotate \ -u admin:password ``` diff --git a/docs/guide/fastly.md b/docs/guide/fastly.md index 2a1edd79..4ca1aa24 100644 --- a/docs/guide/fastly.md +++ b/docs/guide/fastly.md @@ -94,8 +94,45 @@ fastly secret-store create --name signing_keys Note the store IDs - you'll need them for your `trusted-server.toml` configuration. +## Create EC KV Store + +Edge Cookie flows require one KV store: + +- Identity graph store (`ec_store`) - EC identity graph, partner IDs, minimal consent metadata, and withdrawal tombstones + +Partners are configured statically in `[[ec.partners]]` and loaded into an in-memory registry at startup. There is no separate consent KV store. Consent is interpreted from live request cookies, headers, geolocation, and policy defaults. + +Create it: + +```bash +fastly kv-store create --name ec_identity_store +``` + +Configure in `trusted-server.toml`: + +```toml +[ec] +passphrase = "replace-with-32-plus-byte-random-secret"}]}},{ +ec_store = "ec_identity_store" +``` + +Verify stores exist: + +```bash +fastly kv-store list +``` + +Verify stores are linked to your active service version: + +```bash +fastly resource-link list --service-id --version +``` + +If EC sync returns `kv_unavailable` or identify responses are degraded, first check that the identity store is present and linked to the active version. Legacy partner/consent KV bindings can be removed once no deployment-specific tooling depends on them. + ## Next Steps - Return to [Getting Started](/guide/getting-started) to continue setup - See [Configuration](/guide/configuration) for detailed configuration options +- See [EC Setup Guide](/guide/ec-setup-guide) for end-to-end EC verification - See [Request Signing](/guide/request-signing) for setting up cryptographic signing diff --git a/docs/guide/first-party-proxy.md b/docs/guide/first-party-proxy.md index b978e35f..1b7e2490 100644 --- a/docs/guide/first-party-proxy.md +++ b/docs/guide/first-party-proxy.md @@ -357,9 +357,8 @@ When proxying, Trusted Server automatically appends the `ts-ec` parameter: **Source Priority**: -1. `x-ts-ec` request header -2. `ts-ec` cookie -3. Generate new ID if missing +1. `ts-ec` cookie +2. Generate new ID if missing **Example**: @@ -420,9 +419,9 @@ Configure proxy behavior in `trusted-server.toml`: ```toml [publisher] domain = "publisher.com" +cookie_domain = ".publisher.com" origin_url = "https://origin.publisher.com" proxy_secret = "your-secure-random-secret" -cookie_domain = ".publisher.com" # For ts-ec cookies ``` ### Proxy Allowlist diff --git a/docs/guide/getting-started.md b/docs/guide/getting-started.md index ce4b328d..a4ed418c 100644 --- a/docs/guide/getting-started.md +++ b/docs/guide/getting-started.md @@ -72,5 +72,6 @@ fastly compute publish ## Next Steps - Learn about [Edge Cookies](/guide/edge-cookies) +- Follow the [EC Setup Guide](/guide/ec-setup-guide) - Understand [GDPR Compliance](/guide/gdpr-compliance) - Configure [Ad Serving](/guide/ad-serving) diff --git a/docs/guide/integration-guide.md b/docs/guide/integration-guide.md index fb8c99da..79576b8b 100644 --- a/docs/guide/integration-guide.md +++ b/docs/guide/integration-guide.md @@ -328,6 +328,16 @@ When the integration is enabled, the `IntegrationAttributeRewriter` removes any The NPM integration lives in `crates/js/lib/src/integrations/prebid/index.ts`. Tests typically assert that publisher references disappear and the deferred `tsjs-prebid.min.js` tag is present. +**5. Hybrid EID forwarding** + +For Prebid-routed auctions, Trusted Server now forwards identity using a hybrid model: + +- TSJS reads current-request EIDs from `pbjs.getUserIdsAsEids()` and includes them in the `/auction` payload. +- The edge resolves additional EIDs from the EC/KV identity graph. +- The auction handler merges and deduplicates both sets. +- The Prebid provider forwards the merged result to Prebid Server as `user.ext.eids`. +- The `ts-eids` cookie is still ingested after the response so future requests can benefit from those IDs even without fresh browser-side resolution. + Reusing these patterns makes it straightforward to convert additional legacy flows (for example, Next.js rewrites) into first-class integrations. ## Future Improvements diff --git a/docs/guide/integrations/prebid.md b/docs/guide/integrations/prebid.md index aa5fb538..af5f3f76 100644 --- a/docs/guide/integrations/prebid.md +++ b/docs/guide/integrations/prebid.md @@ -298,6 +298,111 @@ The build script (`build-all.mjs`) validates that each adapter exists in `prebid Adding a new client-side bidder requires both a config change (`client_side_bidders`) **and** a rebuild with the adapter included in `TSJS_PREBID_ADAPTERS`. Without the adapter in the bundle, the bidder is silently dropped from both server-side and client-side auctions. ::: +## User ID Modules + +Prebid.js can expose publisher-configured User ID Module output via +`pbjs.getUserIdsAsEids()`. The TSJS Prebid shim reads those current-request +EIDs after auctions and forwards them to Trusted Server when they are available. + +User ID submodule inclusion is deterministic for attested builds. The module +preset is checked in at +`crates/js/lib/src/integrations/prebid/user_id_modules.json`, and +`build-all.mjs` generates `src/integrations/prebid/_user_ids.generated.ts` from +that preset. `TSJS_PREBID_USER_ID_MODULES` is intentionally ignored for +production builds so publisher-specific ID choices do not change the attested JS +artifact. + +This is deliberate: Trusted Server replaces the publisher's Prebid.js bundle so +we can install the `trustedServer` adapter and route auctions through `/auction`, +but publishers often have custom or opaque Prebid builds. It is difficult to +know every User ID submodule needed for a publisher before runtime, and making +that list an environment-driven build input would produce different JS bytes per +publisher. Those publisher-specific bundles would undermine deployment +attestation because the trusted artifact hash would vary based on integration +configuration rather than code changes. Keeping a broad, reviewed preset in +source control makes the auction flow predictable while keeping the generated +bundle stable across publishers. + +The current preset includes common ID modules such as Yahoo ConnectID, Criteo, +LiveIntent, SharedID, UID2, ID5, LiveRamp IdentityLink, PubProvidedID, and +Unified ID / TDID. LiveIntent is imported through a local ESM shim because the +public Prebid wrapper contains a CommonJS `require(...)` mode switch that is not +safe for the TSJS IIFE bundle. + +Example EID source mapping: + +| EID source | Included module | +| ----------------------------------------------------------------- | ---------------------- | +| `yahoo.com` | `connectIdSystem` | +| `criteo.com` | `criteoIdSystem` | +| `liveintent.com`, `bidswitch.net`, `openx.net`, `pubmatic.com`, … | `liveIntentIdSystem` | +| `pubcid.org` | `sharedIdSystem` | +| `adserver.org` with `rtiPartner = TDID` | `unifiedIdSystem` | +| `uidapi.com` | `uid2IdSystem` | +| `id5-sync.com` | `id5IdSystem` | +| `liveramp.com` | `identityLinkIdSystem` | + +For local experiments only, `TSJS_PREBID_USER_ID_MODULES_DEV_OVERRIDE` can +replace the preset. Do not use that override for trusted deployments because it +changes the bundle hash. + +This is separate from `TSJS_PREBID_ADAPTERS`, which continues to control +client-side bidder adapter modules. + +## Identity Forwarding + +Trusted Server uses a **hybrid EID forwarding model** for Prebid-routed auctions: + +1. **Current-request EIDs from Prebid.js** are read from `pbjs.getUserIdsAsEids()` in the browser and sent in the `/auction` request body. +2. **Server-side EIDs from the EC/KV identity graph** are resolved on the edge from the current EC ID. +3. Trusted Server **merges and deduplicates** both sets before calling Prebid Server. +4. The merged result is forwarded downstream as `user.ext.eids` in the OpenRTB request. +5. The `ts-eids` cookie is still ingested after the response so later requests can reuse the IDs even when the current auction does not provide them again. + +This means Prebid auctions get same-request transparency for browser-resolved IDs without giving up the durability of the server-managed EC identity graph. + +### Identity flow + +```mermaid +sequenceDiagram + participant B as Browser / Prebid.js + participant T as Trusted Server /auction + participant K as EC + KV identity graph + participant P as Prebid Server + + B->>B: User ID modules resolve EIDs + B->>T: POST /auction\n(adUnits + current-request eids) + T->>K: Resolve EC-backed partner IDs + K-->>T: KV-derived EIDs + T->>T: Merge + dedupe client + KV EIDs + T->>T: Apply consent gating + T->>P: OpenRTB request\nuser.ext.eids = merged set + P-->>T: OpenRTB bid response + T-->>B: Auction response + T->>K: Ingest ts-eids cookie for future requests +``` + +### Merge and deduplication rules + +- Client-request EIDs and KV-resolved EIDs are merged by `source` +- UIDs are deduplicated by `source + id` +- If the same UID appears in both places, it is sent only once downstream +- Distinct UIDs under the same source are preserved +- Consent gating is applied to the **merged** set before forwarding + +### What reaches Prebid Server + +The downstream Prebid Server request includes: + +- `user.id` when EC forwarding is allowed +- `user.ext.eids` containing the merged, deduplicated EID set +- forwarded browser cookies (subject to consent-forwarding mode) + +In practice, this gives operators both: + +- **same-request identity transparency** for Prebid User ID Module output, and +- **future-request continuity** through cookie ingestion and KV-backed partner resolution. + ## Endpoints ### GET /first-party/ad @@ -365,6 +470,7 @@ The `to_openrtb()` method in `PrebidAuctionProvider` builds OpenRTB requests: - Sets `tagid` from the slot ID - Adds site metadata with publisher domain, page URL, `site.ref` from the Referer header, and `site.publisher` from the domain - Injects EC ID in the user object +- Merges current-request browser EIDs with KV-resolved EIDs and forwards the deduplicated result as `user.ext.eids` - Forwards user consent string and sets the GDPR flag based on geo and consent presence - Translates the `Sec-GPC` header to a US Privacy string (`us_privacy`) - Extracts `DNT` and `Accept-Language` headers into device fields diff --git a/docs/guide/key-rotation.md b/docs/guide/key-rotation.md index d4467bc8..ec96d1ea 100644 --- a/docs/guide/key-rotation.md +++ b/docs/guide/key-rotation.md @@ -26,6 +26,10 @@ Key rotation is the process of generating new signing keys and transitioning fro - **Incident-based**: Immediately if compromise suspected - **Before major releases**: Ensure fresh keys for new deployments +## Edge Cookie HMAC Passphrase + +The Edge Cookie `ec.passphrase` is long-lived HMAC-SHA256 keying material used to derive visitor EC IDs. Use a high-entropy random value of at least 32 characters; shorter values are rejected at settings validation. Rotating this passphrase changes derived EC IDs and requires rebuilding or allowing expiry of the existing EC identity graph. + ## Prerequisites Before you can rotate keys, you need to set up the required Fastly stores and API credentials. @@ -240,14 +244,14 @@ You should see a JWKS response with your public keys. ### Using the Rotation Endpoint -**Endpoint**: `POST /admin/keys/rotate` +**Endpoint**: `POST /_ts/admin/keys/rotate` #### Automatic Key ID (Recommended) Let Trusted Server generate a date-based key ID: ```bash -curl -X POST https://your-domain/admin/keys/rotate \ +curl -X POST https://your-domain/_ts/admin/keys/rotate \ -H "Content-Type: application/json" \ -d '{}' ``` @@ -276,7 +280,7 @@ curl -X POST https://your-domain/admin/keys/rotate \ Specify a custom key identifier: ```bash -curl -X POST https://your-domain/admin/keys/rotate \ +curl -X POST https://your-domain/_ts/admin/keys/rotate \ -H "Content-Type: application/json" \ -d '{"kid": "production-2024-q1"}' ``` @@ -356,14 +360,14 @@ Deactivate old keys after: ### Deactivation Endpoint -**Endpoint**: `POST /admin/keys/deactivate` +**Endpoint**: `POST /_ts/admin/keys/deactivate` #### Deactivate (Keep in Storage) Remove from active rotation but keep in storage: ```bash -curl -X POST https://your-domain/admin/keys/deactivate \ +curl -X POST https://your-domain/_ts/admin/keys/deactivate \ -H "Content-Type: application/json" \ -d '{ "kid": "ts-2024-01-15", @@ -388,7 +392,7 @@ curl -X POST https://your-domain/admin/keys/deactivate \ Remove from storage completely: ```bash -curl -X POST https://your-domain/admin/keys/deactivate \ +curl -X POST https://your-domain/_ts/admin/keys/deactivate \ -H "Content-Type: application/json" \ -d '{ "kid": "ts-2024-01-15", @@ -476,14 +480,14 @@ Regular rotation on a fixed schedule: ```bash #!/bin/bash # Rotate signing keys -curl -X POST https://your-domain/admin/keys/rotate +curl -X POST https://your-domain/_ts/admin/keys/rotate # Wait 30 days grace period sleep $((30 * 24 * 60 * 60)) # Deactivate old key OLD_KEY=$(date -d '90 days ago' +ts-%Y-%m-%d) -curl -X POST https://your-domain/admin/keys/deactivate \ +curl -X POST https://your-domain/_ts/admin/keys/deactivate \ -d "{\"kid\": \"$OLD_KEY\", \"delete\": true}" ``` @@ -647,13 +651,13 @@ If a key is compromised: 1. **Immediate**: Rotate to new key ```bash -curl -X POST /admin/keys/rotate +curl -X POST /_ts/admin/keys/rotate ``` 2. **Urgent**: Deactivate compromised key ```bash -curl -X POST /admin/keys/deactivate \ +curl -X POST /_ts/admin/keys/deactivate \ -d '{"kid": "compromised-key", "delete": false}' ``` @@ -664,7 +668,7 @@ curl -X POST /admin/keys/deactivate \ 5. **Cleanup**: Delete compromised key after investigation ```bash -curl -X POST /admin/keys/deactivate \ +curl -X POST /_ts/admin/keys/deactivate \ -d '{"kid": "compromised-key", "delete": true}' ``` diff --git a/docs/guide/onboarding.md b/docs/guide/onboarding.md index 83203d35..5d4d5b32 100644 --- a/docs/guide/onboarding.md +++ b/docs/guide/onboarding.md @@ -40,7 +40,7 @@ Welcome to the Trusted Server project! This guide keeps internal onboarding note | `crates/trusted-server-adapter-fastly/src/main.rs` | Request routing entry point | | `crates/trusted-server-core/src/publisher.rs` | Publisher origin handling | | `crates/trusted-server-core/src/proxy.rs` | First-party proxy implementation | -| `crates/trusted-server-core/src/edge_cookie.rs` | EC ID generation | +| `crates/trusted-server-core/src/ec/` | EC identity subsystem | | `crates/trusted-server-core/src/integrations/registry.rs` | Integration module pattern | | `trusted-server.toml` | Application configuration | @@ -146,7 +146,7 @@ Use this checklist to track your onboarding progress: - [ ] Read through `main.rs` to understand request routing - [ ] Trace a request through `publisher.rs` and `proxy.rs` -- [ ] Understand EC ID generation in `edge_cookie.rs` +- [ ] Understand the EC identity subsystem in `ec/` - [ ] Review an existing integration (e.g., `prebid.rs`) ### Documentation & Contribution diff --git a/docs/guide/testing.md b/docs/guide/testing.md index cfa7ea51..fb12ac77 100644 --- a/docs/guide/testing.md +++ b/docs/guide/testing.md @@ -21,7 +21,7 @@ cargo test Tests are organized alongside source code in `#[cfg(test)]` modules: ```rust -// crates/trusted-server-core/src/edge_cookie.rs +// crates/trusted-server-core/src/ec/generation.rs #[cfg(test)] mod tests { use super::*; @@ -88,7 +88,7 @@ curl http://localhost:7676/.well-known/trusted-server.json ### EC ID Tests -From `crates/trusted-server-core/src/edge_cookie.rs`: +From `crates/trusted-server-core/src/ec/mod.rs`: ```rust #[test] diff --git a/docs/superpowers/plans/2026-04-15-sourcepoint-gpp-consent.md b/docs/superpowers/plans/2026-04-15-sourcepoint-gpp-consent.md new file mode 100644 index 00000000..8c9de843 --- /dev/null +++ b/docs/superpowers/plans/2026-04-15-sourcepoint-gpp-consent.md @@ -0,0 +1,695 @@ +# Sourcepoint GPP Consent for Edge Cookie Generation — Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Enable EC generation for sites using Sourcepoint by mirroring localStorage consent into cookies (client) and recognizing GPP US `sale_opt_out` as a consent signal (server). + +**Architecture:** New JS-only `sourcepoint` integration auto-discovers `_sp_user_consent_*` in localStorage and writes `__gpp` / `__gpp_sid` cookies. Server-side, `GppConsent` gains a `us_sale_opt_out: Option` field extracted from any GPP US section (IDs 7–23). `allows_ec_creation()` checks this field between the existing TCF and `us_privacy` branches. + +**Tech Stack:** TypeScript (Vitest, jsdom), Rust (iab_gpp crate for GPP section decoding) + +**Spec:** `docs/superpowers/specs/2026-04-15-sourcepoint-gpp-consent-design.md` + +--- + +## File Map + +| File | Action | Responsibility | +|---|---|---| +| `crates/trusted-server-core/src/consent/types.rs` | Modify | Add `us_sale_opt_out: Option` to `GppConsent` | +| `crates/trusted-server-core/src/consent/gpp.rs` | Modify | Decode US sections, extract `sale_opt_out` | +| `crates/trusted-server-core/src/consent/mod.rs` | Modify | Add GPP US branch in `allows_ec_creation()`, tests | +| `crates/js/lib/src/integrations/sourcepoint/index.ts` | Create | localStorage auto-discovery, cookie mirroring | +| `crates/js/lib/test/integrations/sourcepoint/index.test.ts` | Create | Vitest tests for cookie mirroring | + +--- + +## Task 1: Add `us_sale_opt_out` field to `GppConsent` + +**Files:** +- Modify: `crates/trusted-server-core/src/consent/types.rs:297-305` + +- [ ] **Step 1: Add the field** + +In `crates/trusted-server-core/src/consent/types.rs`, add `us_sale_opt_out` to `GppConsent`: + +```rust +/// Decoded GPP (Global Privacy Platform) consent data. +/// +/// Wraps the `iab_gpp` crate's decoded output with our domain types. +#[derive(Debug, Clone)] +pub struct GppConsent { + /// GPP header version. + pub version: u8, + /// Active section IDs present in the GPP string. + pub section_ids: Vec, + /// Decoded EU TCF v2.2 section (if present in GPP, section ID 2). + pub eu_tcf: Option, + /// Whether the user opted out of sale of personal information via a US GPP + /// section (IDs 7–23). + /// + /// - `Some(true)` — a US section is present and `sale_opt_out == OptedOut` + /// - `Some(false)` — a US section is present and user did not opt out + /// - `None` — no US section exists in the GPP string + pub us_sale_opt_out: Option, +} +``` + +- [ ] **Step 2: Fix compilation — update all `GppConsent` construction sites** + +There are existing places that construct `GppConsent`. Each needs the new field. Search for them: + +In `crates/trusted-server-core/src/consent/gpp.rs` (~line 74), update `decode_gpp_string`: + +```rust + Ok(GppConsent { + version: 1, + section_ids, + eu_tcf, + us_sale_opt_out: None, // placeholder — Task 2 fills this in + }) +``` + +In `crates/trusted-server-core/src/consent/mod.rs`, find every test that constructs `GppConsent` (search for `GppConsent {`). Add `us_sale_opt_out: None` to each. There are instances around lines 720, 883, and 965: + +```rust + gpp: Some(GppConsent { + version: 1, + section_ids: vec![2], + eu_tcf: Some(...), + us_sale_opt_out: None, + }), +``` + +- [ ] **Step 3: Verify compilation** + +Run: `cargo check --workspace` +Expected: compiles with no errors. + +- [ ] **Step 4: Run tests to confirm nothing broke** + +Run: `cargo test --workspace` +Expected: all existing tests pass. + +- [ ] **Step 5: Commit** + +```bash +git add crates/trusted-server-core/src/consent/types.rs \ + crates/trusted-server-core/src/consent/gpp.rs \ + crates/trusted-server-core/src/consent/mod.rs +git commit -m "Add us_sale_opt_out field to GppConsent" +``` + +--- + +## Task 2: Decode US sale opt-out from GPP sections + +**Files:** +- Modify: `crates/trusted-server-core/src/consent/gpp.rs` + +- [ ] **Step 1: Write the failing test for US sale opt-out extraction** + +Add to the `#[cfg(test)] mod tests` block in `crates/trusted-server-core/src/consent/gpp.rs`: + +```rust + // A GPP string with UsNat section (section ID 7). + // Header "DBABLA" encodes: version=1, section IDs=[7] (UsNat). + // The section string encodes a UsNat v1 core with sale_opt_out=DidNotOptOut (2). + #[test] + fn decodes_us_sale_opt_out_not_opted_out() { + // Build a real GPP string with UsNat section using iab_gpp parsing. + // "DBABLA~BVQqAAAAAgA.QA" is the example from the issue (Sourcepoint payload). + let result = decode_gpp_string("DBABLA~BVQqAAAAAgA.QA"); + match &result { + Ok(gpp) => { + assert_eq!( + gpp.us_sale_opt_out, + Some(false), + "should extract sale_opt_out=false from UsNat section" + ); + } + Err(e) => { + // If the specific GPP string doesn't parse, test with section ID presence. + // The important thing is that the decode_us_sale_opt_out function is wired up. + panic!("GPP decode failed: {e}"); + } + } + } + + #[test] + fn no_us_section_returns_none() { + // GPP_TCF_AND_USP has section IDs [2, 6] — no US sections (7–23). + let result = decode_gpp_string(GPP_TCF_AND_USP).expect("should decode GPP"); + assert_eq!( + result.us_sale_opt_out, None, + "should return None when no US section (7-23) is present" + ); + } +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `cargo test --workspace -p trusted-server-core -- consent::gpp::tests::decodes_us_sale_opt_out` +Expected: FAIL — `us_sale_opt_out` is hardcoded to `None`. + +- [ ] **Step 3: Implement `decode_us_sale_opt_out`** + +In `crates/trusted-server-core/src/consent/gpp.rs`, add after `decode_tcf_from_gpp`: + +```rust +/// GPP section IDs that represent US state/national privacy sections. +/// +/// Range 7–23 per the GPP v1 specification: +/// 7=UsNat, 8=UsCa, 9=UsVa, 10=UsCo, 11=UsUt, 12=UsCt, 13=UsFl, +/// 14=UsMt, 15=UsOr, 16=UsTx, 17=UsDe, 18=UsIa, 19=UsNe, 20=UsNh, +/// 21=UsNj, 22=UsTn, 23=UsMn. +const US_SECTION_ID_RANGE: std::ops::RangeInclusive = 7..=23; + +/// Extracts the `sale_opt_out` signal from the first US section in a parsed +/// GPP string. +/// +/// Iterates through section IDs looking for any in the US range (7–23). +/// For the first match, decodes the section and extracts `sale_opt_out`. +/// +/// Returns `Some(true)` if the user opted out of sale, `Some(false)` if they +/// did not, or `None` if no US section is present. +fn decode_us_sale_opt_out(parsed: &iab_gpp::v1::GPPString) -> Option { + use iab_gpp::sections::us_common::OptOut; + use iab_gpp::sections::Section; + + let us_section_id = parsed + .section_ids() + .find(|id| US_SECTION_ID_RANGE.contains(&(**id as u16)))?; + + match parsed.decode_section(*us_section_id) { + Ok(section) => { + let sale_opt_out = match §ion { + Section::UsNat(s) => match &s.core { + iab_gpp::sections::usnat::Core::V1(c) => &c.sale_opt_out, + iab_gpp::sections::usnat::Core::V2(c) => &c.sale_opt_out, + }, + Section::UsCa(s) => &s.core.sale_opt_out, + Section::UsVa(s) => &s.core.sale_opt_out, + Section::UsCo(s) => &s.core.sale_opt_out, + Section::UsUt(s) => &s.core.sale_opt_out, + Section::UsCt(s) => &s.core.sale_opt_out, + Section::UsFl(s) => &s.core.sale_opt_out, + Section::UsMt(s) => &s.core.sale_opt_out, + Section::UsOr(s) => &s.core.sale_opt_out, + Section::UsTx(s) => &s.core.sale_opt_out, + Section::UsDe(s) => &s.core.sale_opt_out, + Section::UsIa(s) => &s.core.sale_opt_out, + Section::UsNe(s) => &s.core.sale_opt_out, + Section::UsNh(s) => &s.core.sale_opt_out, + Section::UsNj(s) => &s.core.sale_opt_out, + Section::UsTn(s) => &s.core.sale_opt_out, + Section::UsMn(s) => &s.core.sale_opt_out, + // Non-US sections — should not reach here given the ID filter. + _ => return None, + }; + Some(*sale_opt_out == OptOut::OptedOut) + } + Err(e) => { + log::warn!("Failed to decode US GPP section {us_section_id}: {e}"); + None + } + } +} +``` + +- [ ] **Step 4: Wire it into `decode_gpp_string`** + +In the same file, replace the placeholder in `decode_gpp_string`: + +```rust + let us_sale_opt_out = decode_us_sale_opt_out(&parsed); + + Ok(GppConsent { + version: 1, + section_ids, + eu_tcf, + us_sale_opt_out, + }) +``` + +- [ ] **Step 5: Run tests** + +Run: `cargo test --workspace -p trusted-server-core -- consent::gpp::tests` +Expected: all GPP tests pass, including the two new ones. + +- [ ] **Step 6: Commit** + +```bash +git add crates/trusted-server-core/src/consent/gpp.rs +git commit -m "Decode US sale opt-out from GPP sections" +``` + +--- + +## Task 3: Add GPP US branch to `allows_ec_creation()` + +**Files:** +- Modify: `crates/trusted-server-core/src/consent/mod.rs` + +- [ ] **Step 1: Write failing tests** + +Add to the `#[cfg(test)] mod tests` block in `crates/trusted-server-core/src/consent/mod.rs`: + +```rust + #[test] + fn ec_allowed_us_state_gpp_no_sale_opt_out() { + let ctx = ConsentContext { + jurisdiction: Jurisdiction::UsState("TN".to_owned()), + gpp: Some(GppConsent { + version: 1, + section_ids: vec![7], + eu_tcf: None, + us_sale_opt_out: Some(false), + }), + ..ConsentContext::default() + }; + assert!( + allows_ec_creation(&ctx), + "US state + GPP US sale_opt_out=false should allow EC" + ); + } + + #[test] + fn ec_blocked_us_state_gpp_sale_opted_out() { + let ctx = ConsentContext { + jurisdiction: Jurisdiction::UsState("TN".to_owned()), + gpp: Some(GppConsent { + version: 1, + section_ids: vec![7], + eu_tcf: None, + us_sale_opt_out: Some(true), + }), + ..ConsentContext::default() + }; + assert!( + !allows_ec_creation(&ctx), + "US state + GPP US sale_opt_out=true should block EC" + ); + } + + #[test] + fn ec_blocked_us_state_gpc_overrides_gpp_us() { + let ctx = ConsentContext { + jurisdiction: Jurisdiction::UsState("TN".to_owned()), + gpc: true, + gpp: Some(GppConsent { + version: 1, + section_ids: vec![7], + eu_tcf: None, + us_sale_opt_out: Some(false), + }), + ..ConsentContext::default() + }; + assert!( + !allows_ec_creation(&ctx), + "GPC should block EC even when GPP US says no opt-out" + ); + } + + #[test] + fn ec_us_state_tcf_takes_priority_over_gpp_us() { + let ctx = ConsentContext { + jurisdiction: Jurisdiction::UsState("TN".to_owned()), + tcf: Some(make_tcf_with_storage(true)), + gpp: Some(GppConsent { + version: 1, + section_ids: vec![7], + eu_tcf: None, + us_sale_opt_out: Some(true), + }), + ..ConsentContext::default() + }; + assert!( + allows_ec_creation(&ctx), + "TCF consent should take priority over GPP US opt-out" + ); + } + + #[test] + fn ec_us_state_gpp_us_takes_priority_over_us_privacy() { + let ctx = ConsentContext { + jurisdiction: Jurisdiction::UsState("TN".to_owned()), + gpp: Some(GppConsent { + version: 1, + section_ids: vec![7], + eu_tcf: None, + us_sale_opt_out: Some(false), + }), + us_privacy: Some(UsPrivacy { + version: 1, + notice_given: PrivacyFlag::Yes, + opt_out_sale: PrivacyFlag::Yes, + lspa_covered: PrivacyFlag::NotApplicable, + }), + ..ConsentContext::default() + }; + assert!( + allows_ec_creation(&ctx), + "GPP US should take priority over us_privacy opt-out" + ); + } + + #[test] + fn ec_us_state_gpp_no_us_section_falls_through_to_us_privacy() { + let ctx = ConsentContext { + jurisdiction: Jurisdiction::UsState("CA".to_owned()), + gpp: Some(GppConsent { + version: 1, + section_ids: vec![2], + eu_tcf: None, + us_sale_opt_out: None, + }), + us_privacy: Some(UsPrivacy { + version: 1, + notice_given: PrivacyFlag::Yes, + opt_out_sale: PrivacyFlag::No, + lspa_covered: PrivacyFlag::NotApplicable, + }), + ..ConsentContext::default() + }; + assert!( + allows_ec_creation(&ctx), + "GPP without US section should fall through to us_privacy" + ); + } +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `cargo test --workspace -p trusted-server-core -- consent::tests::ec_allowed_us_state_gpp` +Expected: FAIL — the GPP US branch doesn't exist yet, so `ec_allowed_us_state_gpp_no_sale_opt_out` fails (falls through to fail-closed). + +- [ ] **Step 3: Add the GPP US branch to `allows_ec_creation()`** + +In `crates/trusted-server-core/src/consent/mod.rs`, update `allows_ec_creation()`. The `UsState` arm currently reads: + +```rust + jurisdiction::Jurisdiction::UsState(_) => { + if ctx.gpc { + return false; + } + if let Some(tcf) = effective_tcf(ctx) { + return tcf.has_storage_consent(); + } + if let Some(usp) = &ctx.us_privacy { + return usp.opt_out_sale != PrivacyFlag::Yes; + } + false + } +``` + +Insert the GPP US check between TCF and us_privacy: + +```rust + jurisdiction::Jurisdiction::UsState(_) => { + if ctx.gpc { + return false; + } + if let Some(tcf) = effective_tcf(ctx) { + return tcf.has_storage_consent(); + } + // Check GPP US section for sale opt-out. + if let Some(gpp) = &ctx.gpp { + if let Some(opted_out) = gpp.us_sale_opt_out { + return !opted_out; + } + } + if let Some(usp) = &ctx.us_privacy { + return usp.opt_out_sale != PrivacyFlag::Yes; + } + false + } +``` + +- [ ] **Step 4: Run all tests** + +Run: `cargo test --workspace` +Expected: all tests pass, including the six new EC gating tests. + +- [ ] **Step 5: Commit** + +```bash +git add crates/trusted-server-core/src/consent/mod.rs +git commit -m "Recognize GPP US sale opt-out in EC consent gating" +``` + +--- + +## Task 4: Create Sourcepoint JS integration + +**Files:** +- Create: `crates/js/lib/src/integrations/sourcepoint/index.ts` + +- [ ] **Step 1: Write the test file first** + +Create `crates/js/lib/test/integrations/sourcepoint/index.test.ts`: + +```typescript +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; + +import { mirrorSourcepointConsent } from '../../../src/integrations/sourcepoint'; + +describe('integrations/sourcepoint', () => { + beforeEach(() => { + // Clear cookies and localStorage before each test. + document.cookie.split(';').forEach((c) => { + const name = c.split('=')[0].trim(); + if (name) document.cookie = `${name}=; expires=Thu, 01 Jan 1970 00:00:00 GMT; path=/`; + }); + localStorage.clear(); + }); + + afterEach(() => { + localStorage.clear(); + }); + + it('mirrors __gpp and __gpp_sid from _sp_user_consent_* localStorage', () => { + const payload = { + gppData: { + gppString: 'DBABLA~BVQqAAAAAgA.QA', + applicableSections: [7], + }, + }; + localStorage.setItem('_sp_user_consent_36026', JSON.stringify(payload)); + + const result = mirrorSourcepointConsent(); + + expect(result).toBe(true); + expect(document.cookie).toContain('__gpp=DBABLA~BVQqAAAAAgA.QA'); + expect(document.cookie).toContain('__gpp_sid=7'); + }); + + it('handles multiple applicable sections', () => { + const payload = { + gppData: { + gppString: 'DBABLA~BVQqAAAAAgA.QA', + applicableSections: [7, 8], + }, + }; + localStorage.setItem('_sp_user_consent_99999', JSON.stringify(payload)); + + mirrorSourcepointConsent(); + + expect(document.cookie).toContain('__gpp_sid=7,8'); + }); + + it('returns false when no _sp_user_consent_* key exists', () => { + localStorage.setItem('unrelated_key', 'value'); + + const result = mirrorSourcepointConsent(); + + expect(result).toBe(false); + expect(document.cookie).not.toContain('__gpp='); + expect(document.cookie).not.toContain('__gpp_sid='); + }); + + it('returns false for malformed JSON in localStorage', () => { + localStorage.setItem('_sp_user_consent_12345', 'not-json!!!'); + + const result = mirrorSourcepointConsent(); + + expect(result).toBe(false); + expect(document.cookie).not.toContain('__gpp='); + }); + + it('returns false when gppData is missing from payload', () => { + localStorage.setItem('_sp_user_consent_12345', JSON.stringify({ otherField: true })); + + const result = mirrorSourcepointConsent(); + + expect(result).toBe(false); + expect(document.cookie).not.toContain('__gpp='); + }); + + it('returns false when gppString is empty', () => { + const payload = { + gppData: { + gppString: '', + applicableSections: [7], + }, + }; + localStorage.setItem('_sp_user_consent_12345', JSON.stringify(payload)); + + const result = mirrorSourcepointConsent(); + + expect(result).toBe(false); + expect(document.cookie).not.toContain('__gpp='); + }); +}); +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `cd crates/js/lib && npx vitest run test/integrations/sourcepoint/index.test.ts` +Expected: FAIL — module `../../../src/integrations/sourcepoint` does not exist. + +- [ ] **Step 3: Implement the integration** + +Create `crates/js/lib/src/integrations/sourcepoint/index.ts`: + +```typescript +import { log } from '../../core/log'; + +const SP_CONSENT_PREFIX = '_sp_user_consent_'; + +interface SourcepointGppData { + gppString: string; + applicableSections: number[]; +} + +interface SourcepointConsentPayload { + gppData?: SourcepointGppData; +} + +function findSourcepointConsent(): SourcepointConsentPayload | null { + for (let i = 0; i < localStorage.length; i++) { + const key = localStorage.key(i); + if (!key?.startsWith(SP_CONSENT_PREFIX)) continue; + + const raw = localStorage.getItem(key); + if (!raw) continue; + + try { + return JSON.parse(raw) as SourcepointConsentPayload; + } catch { + log.debug('sourcepoint: failed to parse localStorage value', { key }); + return null; + } + } + return null; +} + +function writeCookie(name: string, value: string): void { + document.cookie = `${name}=${encodeURIComponent(value)}; path=/; SameSite=Lax`; +} + +/// Reads Sourcepoint consent from localStorage and mirrors it into +/// `__gpp` and `__gpp_sid` cookies for Trusted Server to read. +/// +/// Returns `true` if cookies were written, `false` otherwise. +export function mirrorSourcepointConsent(): boolean { + if (typeof localStorage === 'undefined' || typeof document === 'undefined') { + return false; + } + + const payload = findSourcepointConsent(); + if (!payload?.gppData) { + log.debug('sourcepoint: no GPP data found in localStorage'); + return false; + } + + const { gppString, applicableSections } = payload.gppData; + if (!gppString) { + log.debug('sourcepoint: gppString is empty'); + return false; + } + + writeCookie('__gpp', gppString); + + if (Array.isArray(applicableSections) && applicableSections.length > 0) { + writeCookie('__gpp_sid', applicableSections.join(',')); + } + + log.info('sourcepoint: mirrored GPP consent to cookies', { + gppLength: gppString.length, + sections: applicableSections, + }); + + return true; +} + +if (typeof window !== 'undefined') { + mirrorSourcepointConsent(); +} + +export default mirrorSourcepointConsent; +``` + +- [ ] **Step 4: Run tests** + +Run: `cd crates/js/lib && npx vitest run test/integrations/sourcepoint/index.test.ts` +Expected: all 6 tests pass. + +- [ ] **Step 5: Run the full JS test suite** + +Run: `cd crates/js/lib && npx vitest run` +Expected: all tests pass (existing + new). + +- [ ] **Step 6: Format** + +Run: `cd crates/js/lib && npm run format` +Expected: no formatting issues. + +- [ ] **Step 7: Commit** + +```bash +git add crates/js/lib/src/integrations/sourcepoint/index.ts \ + crates/js/lib/test/integrations/sourcepoint/index.test.ts +git commit -m "Add Sourcepoint JS integration for GPP consent cookie mirroring" +``` + +--- + +## Task 5: Final verification + +**Files:** None (verification only) + +- [ ] **Step 1: Build the JS bundles** + +Run: `cd crates/js/lib && node build-all.mjs` +Expected: builds successfully, `dist/tsjs-sourcepoint.js` appears in the output. + +- [ ] **Step 2: Full Rust build** + +Run: `cargo build --workspace` +Expected: compiles with no errors. + +- [ ] **Step 3: Full Rust test suite** + +Run: `cargo test --workspace` +Expected: all tests pass. + +- [ ] **Step 4: Clippy** + +Run: `cargo clippy --workspace --all-targets --all-features -- -D warnings` +Expected: no warnings. + +- [ ] **Step 5: Rust format check** + +Run: `cargo fmt --all -- --check` +Expected: no formatting issues. + +- [ ] **Step 6: Full JS test suite** + +Run: `cd crates/js/lib && npx vitest run` +Expected: all tests pass. + +- [ ] **Step 7: JS format check** + +Run: `cd crates/js/lib && npm run format` +Expected: no formatting issues. diff --git a/docs/superpowers/specs/2026-03-11-production-readiness-report-design.md b/docs/superpowers/specs/2026-03-11-production-readiness-report-design.md index 5203e5ff..e9bf5d16 100644 --- a/docs/superpowers/specs/2026-03-11-production-readiness-report-design.md +++ b/docs/superpowers/specs/2026-03-11-production-readiness-report-design.md @@ -51,10 +51,10 @@ match config. ### C-2: Admin endpoints unprotected unless handler regex covers them -`/admin/keys/rotate` and `/admin/keys/deactivate` are always routed. The +`/_ts/admin/keys/rotate` and `/_ts/admin/keys/deactivate` are always routed. The `enforce_basic_auth` gate only triggers for paths that match a configured `handlers[].path` regex. The default config (`^/secure`) does not cover -`/admin/*`. An operator who doesn't add an explicit admin handler has +`/_ts/admin/*`. An operator who doesn't add an explicit admin handler has **publicly-accessible key rotation/deletion endpoints**. **Refs:** @@ -64,7 +64,7 @@ match config. - `settings.rs:381` -- `handlers` parsing - `trusted-server.toml:1` -- default handler only covers `^/secure` -**Recommendation:** Either hard-require auth for `/admin/*` paths regardless of +**Recommendation:** Either hard-require auth for `/_ts/admin/*` paths regardless of handler config, or validate at startup that an admin handler exists. --- diff --git a/docs/superpowers/specs/2026-03-19-edgezero-migration-design.md b/docs/superpowers/specs/2026-03-19-edgezero-migration-design.md index 0ae8c906..c48a947d 100644 --- a/docs/superpowers/specs/2026-03-19-edgezero-migration-design.md +++ b/docs/superpowers/specs/2026-03-19-edgezero-migration-design.md @@ -46,7 +46,7 @@ These decisions are finalized and reflected in this plan: 2. **Migrate all integrations** including GPT and Google Tag Manager as first-class scope. 3. **Admin key routes must be supported on all adapters** — - `/admin/keys/rotate` and `/admin/keys/deactivate` are required on Fastly, + `/_ts/admin/keys/rotate` and `/_ts/admin/keys/deactivate` are required on Fastly, Axum, and Cloudflare (no disabled-route mode). 4. **Temporary Fastly compatibility adapter is required** — `compat.rs` lives in trusted-server during migration (created in PR 11, deleted in PR 15), @@ -1357,7 +1357,7 @@ Changes: - Local development without Viceroy - Mock stores for local KV/config/secret - Implement required admin key routes - (`/admin/keys/rotate`, `/admin/keys/deactivate`) — core signing logic + (`/_ts/admin/keys/rotate`, `/_ts/admin/keys/deactivate`) — core signing logic composes the Axum store primitives (local config/secret providers) - Add `.env.dev` or local config file for Axum-specific **non-secret** settings only (listen address, mock store paths, log level). @@ -1387,7 +1387,7 @@ Changes: - Construct `RuntimeServices` with Cloudflare-backed trait implementations - Wrangler configuration - Implement required admin key routes - (`/admin/keys/rotate`, `/admin/keys/deactivate`) — core signing logic + (`/_ts/admin/keys/rotate`, `/_ts/admin/keys/deactivate`) — core signing logic composes the Cloudflare store primitives (Workers API bindings) - Add `wrangler.toml` with bindings for KV, secrets, and config - Add integration tests: route smoke tests, admin key route tests, @@ -1421,7 +1421,7 @@ Changes: - Route parity validation for all routes currently in `crates/trusted-server-adapter-fastly/src/main.rs` (`/static/tsjs=*`, `/.well-known/trusted-server.json`, - `/verify-signature`, `/admin/keys/rotate`, `/admin/keys/deactivate`, + `/verify-signature`, `/_ts/admin/keys/rotate`, `/_ts/admin/keys/deactivate`, `/auction`, `/first-party/*`, integration routes, and publisher fallback) - Cross-adapter behavior parity tests (Fastly vs Axum vs Cloudflare) for: response status/body, required headers, cookie behavior, and request-signing diff --git a/docs/superpowers/specs/2026-03-24-ssc-prd-design.md b/docs/superpowers/specs/2026-03-24-ssc-prd-design.md index 7f88ef15..7ed0a6cc 100644 --- a/docs/superpowers/specs/2026-03-24-ssc-prd-design.md +++ b/docs/superpowers/specs/2026-03-24-ssc-prd-design.md @@ -68,8 +68,8 @@ Today, regular cookies don't suffice for publisher and partner needs. Additional - Implement real-time consent withdrawal: delete cookie and KV entry when consent is revoked - Build a server-side identity graph in Fastly KV Store that accumulates resolved partner IDs over time - Provide three KV write paths: real-time pixel sync redirects, S2S batch push from partners, and TS-initiated S2S pull from partner resolution endpoints -- Expose two bidstream integration modes: header decoration (`/identify`) and full auction orchestration (`/auction`) -- Expose a publisher-authenticated `/admin/partners/register` endpoint for partner provisioning without direct KV access +- Expose two bidstream integration modes: header decoration (`/_ts/api/v1/identify`) and full auction orchestration (`/auction`) +- Expose a publisher-authenticated `/_ts/admin/v1/partners/register` endpoint for partner provisioning without direct KV access ### Non-Goals @@ -116,9 +116,9 @@ TS Lite is a runtime configuration of the existing Trusted Server binary. It is | `GET /first-party/proxy-rebuild` | Enabled | Disabled | | HTML injection pipeline | Enabled | Disabled | | GTM integration | Enabled | Disabled | -| `GET /sync` | Disabled | **Enabled** | -| `GET /identify` | Disabled | **Enabled** | -| `POST /api/v1/sync` | Disabled | **Enabled** | +| `GET /_ts/api/v1/sync` | Disabled | **Enabled** | +| `GET /_ts/api/v1/identify` | Disabled | **Enabled** | +| `POST /_ts/api/v1/batch-sync` | Disabled | **Enabled** | | `GET /.well-known/trusted-server.json` | Enabled | Enabled | When a disabled route is requested, TS returns `404` with the header `X-ts-error: feature-disabled`. @@ -337,19 +337,19 @@ The existing `counter_store` and `opid_store` settings (currently defined but un The EC cookie is deterministic (derived from IP + publisher salt) and lives in the browser. It does not depend on KV Store availability. KV Store holds identity enrichment only — resolved partner UIDs accumulated over time. The degraded behavior policy follows from this: **EC always works; enrichment degrades gracefully.** -| Operation | KV unavailable or error | Rationale | -| --------------------------------- | --------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| EC cookie creation | Set the cookie. Skip the KV entry creation silently. Log the failure at `warn` level. | The cookie is the identity anchor — it does not require KV. The KV entry will be created on the next request once KV recovers. | -| EC cookie refresh (existing user) | Refresh the cookie. Skip the KV `last_seen` update silently. Log at `warn`. | Same as above — the cookie continues working. Stale `last_seen` is acceptable. | -| `/sync` KV write | Redirect to `return` with `ts_synced=0&ts_reason=write_failed`. | The browser redirect must not be blocked by KV availability. This case is already specified in Section 9.4. | -| `/identify` KV read | Return `200` with `ec` hash (from cookie) and `degraded: true`. Set `uids: {}` and `eids: []`. | The EC hash is still valid and useful for attribution and analytics. Empty uids signal that enrichment is unavailable, not that the user has no synced partners. `degraded: true` lets callers distinguish transient KV failure from a genuinely unenriched user. | -| S2S batch write (`/api/v1/sync`) | Return `207` with all mappings rejected, `reason: "kv_unavailable"`. | The request was valid; the failure is infrastructure. Partners should retry the batch. | -| S2S pull sync write (async) | Discard the resolved uid. Log at `warn`. Retry will occur on the next qualifying request per the `pull_sync_ttl_sec` window. | Async path — no user-facing impact. | -| Consent withdrawal KV delete | Expire the cookie immediately. Log the KV delete failure at `error` level. Retry the KV delete on the next request for this user. | Cookie deletion is the primary enforcement mechanism. KV delete failure must not block or delay the cookie expiry. | +| Operation | KV unavailable or error | Rationale | +| ------------------------------------ | --------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| EC cookie creation | Set the cookie. Skip the KV entry creation silently. Log the failure at `warn` level. | The cookie is the identity anchor — it does not require KV. The KV entry will be created on the next request once KV recovers. | +| EC cookie refresh (existing user) | Refresh the cookie. Skip the KV `last_seen` update silently. Log at `warn`. | Same as above — the cookie continues working. Stale `last_seen` is acceptable. | +| `/_ts/api/v1/sync` KV write | Redirect to `return` with `ts_synced=0&ts_reason=write_failed`. | The browser redirect must not be blocked by KV availability. This case is already specified in Section 9.4. | +| `/_ts/api/v1/identify` KV read | Return `200` with `ec` hash (from cookie) and `degraded: true`. Set `uids: {}` and `eids: []`. | The EC hash is still valid and useful for attribution and analytics. Empty uids signal that enrichment is unavailable, not that the user has no synced partners. `degraded: true` lets callers distinguish transient KV failure from a genuinely unenriched user. | +| S2S batch write (`/_ts/api/v1/sync`) | Return `207` with all mappings rejected, `reason: "kv_unavailable"`. | The request was valid; the failure is infrastructure. Partners should retry the batch. | +| S2S pull sync write (async) | Discard the resolved uid. Log at `warn`. Retry will occur on the next qualifying request per the `pull_sync_ttl_sec` window. | Async path — no user-facing impact. | +| Consent withdrawal KV delete | Expire the cookie immediately. Log the KV delete failure at `error` level. Retry the KV delete on the next request for this user. | Cookie deletion is the primary enforcement mechanism. KV delete failure must not block or delay the cookie expiry. | -**`degraded: true` in `/identify` responses** +**`degraded: true` in `/_ts/api/v1/identify` responses** -When a KV read fails, the `/identify` response includes `"degraded": true` in the JSON body alongside an empty `uids` and `eids`. The `ec` field is still populated from the cookie. Callers should proceed with identity-only targeting (EC hash) and omit partner UID parameters from downstream requests. +When a KV read fails, the `/_ts/api/v1/identify` response includes `"degraded": true` in the JSON body alongside an empty `uids` and `eids`. The `ec` field is still populated from the cookie. Callers should proceed with identity-only targeting (EC hash) and omit partner UID parameters from downstream requests. ```json { @@ -461,7 +461,7 @@ This is the primary real-time write path for building the identity graph from ex ### 9.2 Endpoint ``` -GET /sync +GET /_ts/api/v1/sync ``` ### 9.3 Parameters @@ -505,7 +505,7 @@ Partners should treat `ts_synced=0` as a signal that the mapping was not stored. **Acceptance criteria:** -- [ ] `GET /sync?partner=ssp_x&uid=abc&return=https://sync.ssp.com/ack` returns a redirect to the `return` URL within 50ms (excluding KV write time) +- [ ] `GET /_ts/api/v1/sync?partner=ssp_x&uid=abc&return=https://sync.ssp.com/ack` returns a redirect to the `return` URL within 50ms (excluding KV write time) - [ ] KV entry for the EC hash contains `ids.ssp_x.uid = "abc"` after a successful sync; response redirects to `return` with `ts_synced=1` - [ ] If no `ts-ec` cookie is present, redirects to `return` with `ts_synced=0&ts_reason=no_ec`; no KV write performed - [ ] If consent is absent or invalid, redirects to `return` with `ts_synced=0&ts_reason=no_consent`; no KV write performed @@ -524,17 +524,17 @@ The S2S batch sync API allows partners to push ID mappings to Trusted Server in ### 10.2 Endpoint ``` -POST /api/v1/sync +POST /_ts/api/v1/batch-sync ``` ### 10.3 Authentication -Partners authenticate with a rotatable API key. Key rotation must not require redeploying the binary. Partner provisioning is handled via the `/admin/partners/register` endpoint (see Section 15, Open Questions). +Partners authenticate with a rotatable API key. Key rotation must not require redeploying the binary. Partner provisioning is handled via the `/_ts/admin/v1/partners/register` endpoint (see Section 15, Open Questions). ### 10.4 Request ``` -POST /api/v1/sync +POST /_ts/api/v1/batch-sync Content-Type: application/json Authorization: Bearer @@ -593,7 +593,7 @@ Before writing a mapping, Trusted Server checks the KV metadata for the given EC **Acceptance criteria:** -- [ ] `POST /api/v1/sync` with a valid Bearer token and a batch of up to 1000 mappings returns a response within 5 seconds +- [ ] `POST /_ts/api/v1/batch-sync` with a valid Bearer token and a batch of up to 1000 mappings returns a response within 5 seconds - [ ] Accepted mappings are written to the corresponding KV identity graph entries within 1 second - [ ] Mappings for unknown `ec_hash` values are rejected with `ec_hash_not_found` - [ ] Mappings for users with withdrawn consent are rejected with `consent_withdrawn` @@ -721,22 +721,22 @@ The following fields are added to the partner record schema (Section 13.3): Trusted Server exposes two modes for injecting EC identity into the bidstream. Publishers choose the mode that fits their existing ad stack. -### 12.2 Mode A: Identity resolution (`/identify`) +### 12.2 Mode A: Identity resolution (`/_ts/api/v1/identify`) -Trusted Server exposes `/identify` as a standalone identity resolution endpoint for callers that need EC identity and resolved partner UIDs outside of TS's own auction orchestration. TS builds the OpenRTB request in Mode B — `/identify` is not part of that path. It serves three distinct use cases: +Trusted Server exposes `/_ts/api/v1/identify` as a standalone identity resolution endpoint for callers that need EC identity and resolved partner UIDs outside of TS's own auction orchestration. TS builds the OpenRTB request in Mode B — `/_ts/api/v1/identify` is not part of that path. It serves three distinct use cases: **Use case 1 — Attribution and analytics** Any server-side or browser-side system that needs to tag an event, impression, or conversion with the user's EC hash. Examples: analytics pipelines, attribution platforms, reporting dashboards. **Use case 2 — Publisher ad server outbid context** -After TS's auction completes and winners are delivered to the publisher's ad server endpoint, the publisher's ad server may need EC identity and resolved partner UIDs to evaluate whether to accept the programmatic winner or outbid with a direct-sold placement. For this use case, TS includes the EC identity in the winner notification payload directly (see Section 12.3) — a separate `/identify` call is only needed if the publisher's ad server receives the winner through a path that does not carry TS headers. +After TS's auction completes and winners are delivered to the publisher's ad server endpoint, the publisher's ad server may need EC identity and resolved partner UIDs to evaluate whether to accept the programmatic winner or outbid with a direct-sold placement. For this use case, TS includes the EC identity in the winner notification payload directly (see Section 12.3) — a separate `/_ts/api/v1/identify` call is only needed if the publisher's ad server receives the winner through a path that does not carry TS headers. **Use case 3 — Client-side wrappers for non-TS SSPs** -Some SSPs run client-side header bidding wrappers (e.g., Amazon TAM, certain Index Exchange configurations) that do not participate in TS's server-side auction orchestration. A Prebid.js module or custom wrapper script calls `/identify` from the browser to obtain the EC hash and resolved partner UIDs, then injects those values into bid requests sent to those SSPs. This ensures non-TS demand sources bid with the same identity enrichment as TS-orchestrated bids, enabling a fair comparison at winner selection. +Some SSPs run client-side header bidding wrappers (e.g., Amazon TAM, certain Index Exchange configurations) that do not participate in TS's server-side auction orchestration. A Prebid.js module or custom wrapper script calls `/_ts/api/v1/identify` from the browser to obtain the EC hash and resolved partner UIDs, then injects those values into bid requests sent to those SSPs. This ensures non-TS demand sources bid with the same identity enrichment as TS-orchestrated bids, enabling a fair comparison at winner selection. -> **Prerequisite for use case 3:** For a non-TS SSP to receive a useful UID from `/identify`, that SSP must already be a registered partner in `partner_store` and must have a resolved uid in the KV identity graph for this user (via pixel sync, S2S batch, or S2S pull). Without a prior sync, `/identify` returns no uid for that partner. +> **Prerequisite for use case 3:** For a non-TS SSP to receive a useful UID from `/_ts/api/v1/identify`, that SSP must already be a registered partner in `partner_store` and must have a resolved uid in the KV identity graph for this user (via pixel sync, S2S batch, or S2S pull). Without a prior sync, `/_ts/api/v1/identify` returns no uid for that partner. -**Endpoint:** `GET /identify` +**Endpoint:** `GET /_ts/api/v1/identify` **When to call:** Once per auction event — not per-pageview. For use case 3, call before sending bid requests to non-TS SSPs. @@ -744,7 +744,7 @@ Some SSPs run client-side header bidding wrappers (e.g., Amazon TAM, certain Ind **Pattern 1 — Browser-direct (recommended for use cases 1 and 3)** -A script on the publisher's page calls `/identify` via `fetch()`. Because `ec.publisher.com` is same-site with the publisher's domain, the browser sends the `ts-ec` cookie and consent cookies automatically. No forwarding required. +A script on the publisher's page calls `/_ts/api/v1/identify` via `fetch()`. Because `ec.publisher.com` is same-site with the publisher's domain, the browser sends the `ts-ec` cookie and consent cookies automatically. No forwarding required. ```js const identity = await fetch('https://ec.publisher.com/identify').then((r) => @@ -773,7 +773,7 @@ A server-side caller must forward the following from the original browser reques #### Cookie and consent handling -`/identify` follows the EC retrieval priority from Section 6.4. It does not generate a new EC — if no EC is present, the response body contains `consent: denied` and empty identity fields. Consent is evaluated per Section 7.1. `/identify` never sets or modifies cookies. +`/_ts/api/v1/identify` follows the EC retrieval priority from Section 6.4. It does not generate a new EC — if no EC is present, the response body contains `consent: denied` and empty identity fields. Consent is evaluated per Section 7.1. `/_ts/api/v1/identify` never sets or modifies cookies. #### Response @@ -850,7 +850,7 @@ Trusted Server owns the full auction path in Mode B. TS builds the OpenRTB reque **EC context in winner notification to publisher's ad server:** -When TS delivers auction winners to the publisher's ad server endpoint, the response includes EC identity so the publisher's ad server has full context for its outbid decision without needing to call `/identify` separately: +When TS delivers auction winners to the publisher's ad server endpoint, the response includes EC identity so the publisher's ad server has full context for its outbid decision without needing to call `/_ts/api/v1/identify` separately: | Header | Value | | ----------------- | ------------------------------------------------------------ | @@ -895,21 +895,21 @@ Each partner registered in `partner_store` declares: ### 12.6 User stories -**As a publisher using Mode A for analytics/attribution**, I want to call `/identify` from a browser script so that I can tag events and impressions with the user's EC hash and resolved partner UIDs using URL parameters. +**As a publisher using Mode A for analytics/attribution**, I want to call `/_ts/api/v1/identify` from a browser script so that I can tag events and impressions with the user's EC hash and resolved partner UIDs using URL parameters. **Acceptance criteria:** -- [ ] `GET /identify` returns `200` with a valid JSON body within 30ms when EC is present and consent is valid +- [ ] `GET /_ts/api/v1/identify` returns `200` with a valid JSON body within 30ms when EC is present and consent is valid - [ ] `uids` object contains one key per partner with `bidstream_enabled: true` and a resolved UID; partners with no resolved UID are omitted - [ ] If consent is denied, response is `403 Forbidden` with body `{"consent": "denied"}` - [ ] If no EC is present, response is `204 No Content` with no body - [ ] Response headers `X-ts-ec`, `X-ts-eids`, `X-ts-`, and `X-ts-ec-consent` are present on `200` responses as supplementary signals -**As a publisher using a client-side wrapper for non-TS SSPs**, I want to call `/identify` from my Prebid.js configuration so that SSPs outside TS's auction receive the same identity enrichment as TS-orchestrated bids, enabling a fair winner comparison. +**As a publisher using a client-side wrapper for non-TS SSPs**, I want to call `/_ts/api/v1/identify` from my Prebid.js configuration so that SSPs outside TS's auction receive the same identity enrichment as TS-orchestrated bids, enabling a fair winner comparison. **Acceptance criteria:** -- [ ] `GET /identify` called from the browser returns resolved UIDs for all registered partners with a KV entry for this user +- [ ] `GET /_ts/api/v1/identify` called from the browser returns resolved UIDs for all registered partners with a KV entry for this user - [ ] A partner with no KV entry for this user is omitted from `uids` — no empty or null entries - [ ] Response is available within 30ms so it does not block Prebid.js auction timeout @@ -933,7 +933,7 @@ The following capabilities must be configurable without redeploying the binary: - **Publisher passphrase** — the HMAC key used for EC hash generation; same value across all of the publisher's domains; shared with trusted partners to form an identity-federated consortium - **Identity graph store** — the KV store backing the EC hash → identity graph - **Partner registry store** — the KV store backing partner configuration and API key validation -- **Partner records** — each partner's allowed sync domains, bidstream settings, pull sync configuration, and API credentials; managed via `/admin/partners/register` without redeployment +- **Partner records** — each partner's allowed sync domains, bidstream settings, pull sync configuration, and API credentials; managed via `/_ts/admin/v1/partners/register` without redeployment The exact configuration format (TOML keys, KV schema, JSON field names) is an engineering decision and will be documented in the technical design doc. @@ -945,11 +945,11 @@ The following documentation changes are required alongside the EC feature: - **Rename SyntheticID → Edge Cookie** across the entire `docs/` GitHub Pages site. The underlying concept is the same but the product name changes. - **New integration guides**, one per customer type: - - Publisher (full TS): enabling EC in `trusted-server.toml`, partner onboarding via `/admin/partners/register` + - Publisher (full TS): enabling EC in `trusted-server.toml`, partner onboarding via `/_ts/admin/v1/partners/register` - SSP: pixel sync integration guide, sync pixel URL format, callback handling, optional pull resolution endpoint - DSP: S2S batch API reference, authentication, conflict resolution behavior, optional pull resolution endpoint - Identity Provider: registering as a partner, `source_domain` and `openrtb_atype` configuration, sync patterns -- **API reference** for the four new endpoints: `GET /sync`, `GET /identify`, `POST /api/v1/sync`, and the partner-side pull resolution contract +- **API reference** for the four new endpoints: `GET /_ts/api/v1/sync`, `GET /_ts/api/v1/identify`, `POST /_ts/api/v1/batch-sync`, and the partner-side pull resolution contract - **Pull sync integration guide**: partner requirements for exposing a resolution endpoint, authentication, expected response shape, rate limit behavior - **Consent enforcement guide**: how TCF and GPP signals are read, precedence rules, what happens on withdrawal @@ -957,10 +957,10 @@ The following documentation changes are required alongside the EC feature: ## 15. Open Questions -| # | Question | Owner | Status | -| --- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------- | --------------------------------------------------------------------------- | -| 1 | Partner provisioning: TS will expose a `/admin/partners/register` endpoint authenticated at the publisher level (bearer token issued per publisher Fastly service), so publishers can onboard SSP/DSP partners without touching KV directly. Engineering to define the exact auth mechanism. | Engineering | **Resolved** — `/admin/partners/register` endpoint, publisher-authenticated | -| 2 | Should TS Lite expose a `GET /health` endpoint so partners can programmatically verify their service is running and their partner config is active in KV? | Product | **N/A** — TS Lite deferred (see Section 5) | +| # | Question | Owner | Status | +| --- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | ----------- | --------------------------------------------------------------------------------------- | +| 1 | Partner provisioning: TS will expose a `/_ts/admin/v1/partners/register` endpoint authenticated at the publisher level, so publishers can onboard SSP/DSP partners without touching KV directly. | Engineering | **Resolved** — `/_ts/admin/v1/partners/register` endpoint protected by admin basic auth | +| 2 | Should TS Lite expose a `GET /health` endpoint so partners can programmatically verify their service is running and their partner config is active in KV? | Product | **N/A** — TS Lite deferred (see Section 5) | --- @@ -970,7 +970,7 @@ The following documentation changes are required alongside the EC feature: | ------------------------------- | --------------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------- | | EC match rate (returning users) | >90% within 30 days | Fastly real-time logs: ratio of requests with existing `ts-ec` cookie vs. new EC generations | | Consent enforcement accuracy | 0 ECs created for opted-out EU/UK users | Log audit: verify no `ts-ec` `Set-Cookie` in responses where consent signal is absent | -| KV sync latency (pixel sync) | p99 <75ms end-to-end | Fastly log timing on `/sync` endpoint | +| KV sync latency (pixel sync) | p99 <75ms end-to-end | Fastly log timing on `/_ts/api/v1/sync` endpoint | | S2S batch API throughput | >500 mappings/sec sustained | Load test prior to partner onboarding | | S2S pull sync resolution rate | >30% of pull calls return a non-null uid within 60 days of first partner go-live | Fastly log: pull call outcomes per partner | | Identity graph fill rate | >50% of EC hashes with at least 1 resolved partner ID within 60 days of partner go-live | KV scan sample | diff --git a/docs/superpowers/specs/2026-03-24-ssc-technical-spec-design.md b/docs/superpowers/specs/2026-03-24-ssc-technical-spec-design.md index ad47c65d..36f5ce40 100644 --- a/docs/superpowers/specs/2026-03-24-ssc-technical-spec-design.md +++ b/docs/superpowers/specs/2026-03-24-ssc-technical-spec-design.md @@ -3,7 +3,13 @@ **Status:** Draft **Author:** Engineering **PRD reference:** `docs/internal/ssc-prd.md` -**Last updated:** 2026-03-18 +**Last updated:** 2026-04-14 + +> **Supersession note (issue #666):** Sections in this historical design spec +> that describe a separate `consent_store` or consent KV fallback are obsolete. +> Current runtime behavior interprets live consent from request cookies, headers, +> geolocation, and policy defaults. `ec_identity_store` is the only KV-backed EC +> lifecycle store and holds identity graph state plus withdrawal tombstones. --- @@ -16,12 +22,13 @@ 5. [Cookie and Header Handling](#5-cookie-and-header-handling) 6. [Consent Enforcement](#6-consent-enforcement) 7. [KV Store Identity Graph](#7-kv-store-identity-graph) -8. [Pixel Sync Endpoint (`GET /sync`)](#8-pixel-sync-endpoint-get-sync) -9. [S2S Batch Sync API (`POST /api/v1/sync`)](#9-s2s-batch-sync-api-post-apiv1sync) + 7A. [Device Signals and Bot Gate](#7a-device-signals-and-bot-gate) +8. [Prebid EID Cookie Ingestion](#8-prebid-eid-cookie-ingestion) +9. [S2S Batch Sync API (`POST /_ts/api/v1/batch-sync`)](#9-s2s-batch-sync-api-post-apiv1sync) 10. [S2S Pull Sync (TS-Initiated)](#10-s2s-pull-sync-ts-initiated) -11. [Identity Resolution Endpoint (`GET /identify`)](#11-identity-resolution-endpoint-get-identify) +11. [Identity Resolution Endpoint (`GET /_ts/api/v1/identify`)](#11-identity-resolution-endpoint-get-identify) 12. [Bidstream Decoration (`/auction` Mode B)](#12-bidstream-decoration-auction-mode-b) -13. [Partner Registry and Admin Endpoint](#13-partner-registry-and-admin-endpoint) +13. [Partner Registry (Config-Based)](#13-partner-registry-config-based) 14. [Configuration](#14-configuration) 15. [Constants and Header Names](#15-constants-and-header-names) 16. [Error Handling](#16-error-handling) @@ -39,12 +46,12 @@ EC is the full replacement for SyntheticID. The PRD explicitly states backward c **Prerequisites (must be merged before this epic begins):** -- **SyntheticID → Edge Cookie rename** — [PR #479](https://github.com/IABTechLab/trusted-server/pull/479) renames SyntheticID to Edge Cookie (EC) across all code paths: `synthetic.rs` → `edge_cookie.rs`, `COOKIE_SYNTHETIC_ID` → `COOKIE_EC_ID`, `X-Synthetic-*` → `X-ts-ec`/`X-ts-ec-fresh` headers, `settings.synthetic` → `settings.edge_cookie`, and simplifies EC generation to IP-only HMAC-SHA256 (removing Handlebars templating). It also renames `ConsentPipelineInput.synthetic_id` to `ec_id`, updates consent KV helper parameters/docs, and handles consent-store key migration (old SyntheticID keys orphaned, TTL expiry cleans them up). **This PR must be merged before implementation of this spec begins.** The spec assumes a codebase where SyntheticID no longer exists. Verify before starting: +- **SyntheticID removal** — [PR #479](https://github.com/IABTechLab/trusted-server/pull/479) removes SyntheticID from all active code paths: `get_or_generate_synthetic_id()`, `COOKIE_SYNTHETIC_ID`, `X-Synthetic-*` headers, `synthetic.rs` module, `settings.synthetic` config, and all SyntheticID generation/cookie code from `publisher.rs`, `endpoints.rs`, and `registry.rs`. **This PR must be merged before implementation of this spec begins.** The spec assumes a codebase where SyntheticID no longer exists. Verify before starting: - `grep -r 'synthetic_id' crates/` returns no hits outside test fixtures - `grep -r 'X-Synthetic' crates/` returns no hits - `trusted-server.toml` has no `[synthetic]` section - - `ConsentPipelineInput` uses `ec_id`, not `synthetic_id` -- **Consent implementation** — The consent pipeline (`build_consent_context()`, `ConsentContext`, `allows_ec_creation()`, TCF/GPP/US-Privacy decoding) is implemented and available as a stable interface before this epic. PR `#380` merged to `main`. EC calls `allows_ec_creation()` directly — no new gating functions are introduced. Note: EC changes the _phase order_ relative to the old SyntheticID flow — consent is evaluated before EC generation, so first-visit consent KV persistence is deferred to the second request (see §6.1.1 for full analysis). + - `ConsentPipelineInput` uses `identity_key`, not `synthetic_id` +- **Consent implementation** — The consent pipeline (`build_consent_context()`, `ConsentContext`, `allows_ec_creation()`, TCF/GPP/US-Privacy decoding) is implemented and available as a stable interface before this epic. PR `#380` merged to `main`. EC calls `allows_ec_creation()` directly — no new gating functions are introduced. Consent is evaluated from live request cookies, headers, geolocation, and policy defaults before EC generation. **Deferred from this spec (not in scope):** @@ -66,8 +73,21 @@ Browser Request │ extract GeoInfo → enforce auth → route_request │ └──────────┬──────────────────────────────────────┘ │ -Two-phase model (matches existing codebase pattern): - +Phase 0 — bot gate (pure in-memory, no KV I/O): + ┌─────────────────────────────────────────────────┐ + │ derive_device_signals(req) │ + │ - UA → is_mobile, platform_class │ + │ - req.get_tls_ja4() → ja4_class (Section 1) │ + │ - req.get_client_h2_fingerprint() → h2_fp_hash │ + │ - (ja4_class, h2_fp_hash) → known_browser │ + │ │ + │ !looks_like_browser()? │ + │ → suppress KV graph (None), skip ec_finalize, │ + │ skip pull sync. Request still proxied to │ + │ origin — bot receives valid HTML but leaves │ + │ no trace in the identity graph. │ + └──────┬────────────────────────────────────────────┘ + │ Phase 1 — pre-routing (like `GeoInfo::from_request()`): ┌─────────────────────────────────────────┐ │ EcContext::read_from_request() │ @@ -75,6 +95,9 @@ Phase 1 — pre-routing (like `GeoInfo::from_request()`): │ - build_consent_context() → ConsentContext │ │ - allows_ec_creation(consent) │ │ No generation. No cookie writes. │ + │ │ + │ ec_context.set_device_signals(signals) │ + │ (passed through to KvEntry on creation) │ └──────┬──────────────────────────────────┘ │ Phase 2 — inside organic handlers only: @@ -84,20 +107,20 @@ Phase 2 — inside organic handlers only: handle_publisher_request() integration_registry.handle_proxy() calls ec_context.generate_if_needed() calls ec_context.generate_if_needed() -EC route handlers (GET /sync, GET /identify, POST /auction, -POST /api/v1/sync, POST /admin/*) NEVER call generate_if_needed(). -`/identify`, `/auction`, `POST /api/v1/sync`, and `POST /admin/*` -use `EcContext` in read-only form. `GET /sync` is the one exception: -it never bootstraps an EC, but it may replace `ec_context.consent` -with a locally-decoded fallback consent context for that request only -when the optional `consent` query param is the sole available signal. +EC route handlers (GET /_ts/api/v1/identify, POST /auction, +POST /_ts/api/v1/batch-sync) NEVER call generate_if_needed(). +`/_ts/api/v1/identify`, `/auction`, and `POST /_ts/api/v1/batch-sync` +use `EcContext` in read-only form. /auction reads EC identity but never bootstraps it — the publisher page-load path generates the EC before any auction request arrives. ec_finalize_response() — after every handler: - - consent withdrawn + cookie present? → clear_ec_on_response() + tombstone - - returning-user mismatch? → set_ec_on_response() [reconcile cookie to header EC] - - ec_generated == true? → set_ec_on_response() [new cookie only] + - !allows_ec_creation(&consent)? → strip EC response headers + - explicit withdrawal + cookie present? → also expire the cookie and write tombstones + - returning user with consent? → set x-ts-ec header only (no cookie/KV TTL refresh) + - ec_generated == true? → set EC cookie + x-ts-ec header + - Prebid EID ingestion: reads `ts-eids` cookie, matches source domains + via PartnerRegistry, writes changed partner UIDs to KV (same UID = no write) ``` EC state flows through an `EcContext` struct created once per request and passed through handlers. @@ -112,23 +135,28 @@ New files in `crates/trusted-server-core/src/`: crates/trusted-server-core/src/ ec/ mod.rs — EcContext, pub re-exports - identity.rs — EC generation (HMAC-SHA256, IP normalization) - cookie.rs — create_ec_cookie(), delete_ec_cookie(), set_ec_on_response() - finalize.rs — ec_finalize_response() (cookie write/delete, last_seen, tombstone) - kv.rs — KvIdentityGraph, read/write/delete identity entries - partner.rs — PartnerRecord, PartnerStore, load_partner() - sync_pixel.rs — handle_sync() handler - sync_batch.rs — handle_batch_sync() handler + generation.rs — EC generation (HMAC-SHA256, IP normalization) + cookies.rs — set_ec_cookie(), expire_ec_cookie() + consent.rs — EC consent gating helpers + device.rs — DeviceSignals derivation, UA/JA4/H2 parsing, known browser allowlist + eids.rs — OpenRTB EID construction helpers + finalize.rs — ec_finalize_response() (cookie write/delete, tombstone, EID ingestion) + kv.rs — KvIdentityGraph, read/write/delete identity entries, cluster evaluation + kv_types.rs — KvEntry, KvGeo, KvConsent, KvPubProperties, KvNetwork, KvDevice, KvMetadata + partner.rs — Partner validation helpers (ID format, API key hashing) + registry.rs — PartnerRegistry (in-memory, config-based, O(1) indexes) + rate_limiter.rs — RateLimiter trait and Fastly ERL implementation + prebid_eids.rs — ingest_prebid_eids() — ts-eids cookie parsing and KV sync + batch_sync.rs — handle_batch_sync() handler pull_sync.rs — PullSyncDispatcher, dispatch_background() identify.rs — handle_identify() handler - admin.rs — handle_register_partner() handler ``` Existing files modified: | File | Change | | -------------------------------------------------- | ----------------------------------------------------- | -| `crates/trusted-server-core/src/settings.rs` | Add `EdgeCookie` settings struct | +| `crates/trusted-server-core/src/settings.rs` | Add `Ec` and `EcPartner` settings structs | | `crates/trusted-server-core/src/constants.rs` | Add EC header/cookie name constants | | `crates/trusted-server-core/src/error.rs` | Add `EdgeCookie` error variant | | `crates/trusted-server-core/src/auction/` | Inject EC into `user.id`, `user.eids`, `user.consent` | @@ -138,7 +166,7 @@ Existing files modified: ## 4. EC Identity Generation -### 4.1 Module: `ec/identity.rs` +### 4.1 Module: `ec/generation.rs` The EC generation mirrors the SyntheticID approach (`synthetic.rs`) but strips volatile inputs. @@ -161,9 +189,12 @@ pub fn generate_ec(passphrase: &str, ip: IpAddr) -> Result String; -/// Extracts the stable 64-character hex prefix from a full EC value. +/// Extracts the stable 64-character hex prefix from a full EC ID. /// -/// The prefix is used as the KV store key. The `.suffix` is discarded. +/// This is primarily used for logging and debugging. Both the EC identity +/// EC identity KV operations use the **full EC ID** (including the +/// `.suffix`) as the key, not just this prefix. The suffix provides uniqueness +/// for users behind the same NAT/proxy infrastructure. /// /// Returns `None` if the value is not in `{64-hex}.{6-alnum}` format. pub fn ec_hash(ec_value: &str) -> Option<&str>; @@ -179,7 +210,7 @@ pub fn ec_hash(ec_value: &str) -> Option<&str>; **Output format:** `{64-char lowercase hex}.{6-char random alphanumeric}` -The random suffix is generated with `fastly::rand` (same approach as SyntheticID). Once set in a cookie the full value is preserved; only the hash prefix is used as the KV key. +The random suffix is generated with `fastly::rand` (same approach as SyntheticID). Once set in a cookie, the full value (hash + suffix) is preserved and used as the KV store key for the EC identity graph. The suffix provides uniqueness for users behind the same NAT/proxy who share the same IP-derived hash. **IPv6 /64 prefix:** Split on `:`, take first 4 groups, join with `:`. Example: `2001:db8:85a3:0000:0000:8a2e:0370:7334` → `2001:db8:85a3:0`. @@ -203,14 +234,16 @@ When both header and cookie are present, the **header wins** as `ec_value` (used - `ec_value` = header value (authoritative for handler reads) - `cookie_ec_value` = cookie value (tracked separately for withdrawal) -On consent **withdrawal** (`!allows_ec_creation && cookie_was_present`): +On **explicit consent withdrawal** (`has_explicit_ec_withdrawal(&consent) && cookie_was_present`): - Delete the browser cookie (always, based on `cookie_was_present`) - Tombstone the **cookie-derived** hash: `kv.write_withdrawal_tombstone(ec_hash(cookie_ec_value))` - If the header-derived hash differs, also tombstone it: `kv.write_withdrawal_tombstone(ec_hash(ec_value))` - This matches the existing SyntheticID behavior where revocation targets the cookie value (`publisher.rs:515`), not the header value. -On **non-withdrawal** paths (last_seen, handler reads): use `ec_value` (header-derived) as the active identity. When `cookie_ec_value` is set (mismatch), `ec_finalize_response()` overwrites the browser cookie with the header-derived `ec_value` via `set_ec_on_response()`. This reconciles the browser identity to match the publisher-forwarded identity and prevents persistent oscillation between two ECs on subsequent requests. +If `allows_ec_creation(&consent)` is `false` but there is **no explicit withdrawal signal** (for example, unknown jurisdiction or missing/undecodable consent in a regulated regime), the response strips EC-related headers only. It does **not** delete the cookie or write tombstones. + +On **non-withdrawal** paths (handler reads and response headers): use `ec_value` (header-derived) as the active identity. Returning-user responses set `x-ts-ec` for the active identity but do not refresh or repair the browser cookie. Cookie writes are reserved for newly generated ECs; cookie deletion is reserved for explicit consent withdrawal. **Validation:** Both the header and cookie values are validated independently via `ec_hash()` (`{64-hex}.{6-alnum}` format). If the header is present but malformed, it is discarded and the cookie value is used instead (if valid). A malformed header must not suppress a valid cookie — bad forwarding infrastructure should not break returning-user identity. `cookie_was_present` is set based on the raw cookie existing, regardless of validity — an invalid cookie value is still a cookie that needs to be cleared on withdrawal. @@ -220,18 +253,16 @@ Generation (step 3 above becoming a new EC) happens only inside organic handlers ```rust /// Per-request Edge Cookie state. Constructed pre-routing once per request. -/// Organic handlers call `generate_if_needed()` to mint new ECs. `/sync` is the -/// one EC route that may replace `consent` with a locally-decoded fallback for -/// the remainder of that request only. +/// Organic handlers call `generate_if_needed()` to mint new ECs. pub struct EcContext { - /// Full EC value (`hash.suffix`), if present on request or generated this request. + /// Full EC ID (`{64-hex}.{6-alnum}`), if present on request or generated this request. pub ec_value: Option, /// Whether the `ts-ec` **cookie** was present on the inbound request. /// This is the only field that gates consent-withdrawal cookie deletion — /// the PRD's delete branch is conditioned on the cookie, not on X-ts-ec header. pub cookie_was_present: bool, /// The cookie's EC value, if different from `ec_value` (header won priority). - /// Used only for withdrawal: tombstone targets the cookie-derived hash to match + /// Used only for withdrawal: tombstone targets the cookie-derived EC ID to match /// existing SyntheticID revocation behavior (`publisher.rs:515`). /// `None` when cookie absent or cookie == header value. pub cookie_ec_value: Option, @@ -249,6 +280,11 @@ pub struct EcContext { /// Stored here so pull sync can use it after `req` has been consumed by routing. /// `None` only if Fastly's `get_client_ip_addr()` returns `None`. pub client_ip: Option, + /// Device signals derived from TLS/H2/UA in the adapter layer. + /// Set via `set_device_signals()` after `read_from_request()` returns. + /// Converted to `KvDevice` and stored on new entries in `generate_if_needed()`. + /// `None` when the adapter does not provide signals (e.g., test environments). + pub device_signals: Option, } impl EcContext { @@ -256,16 +292,9 @@ impl EcContext { /// Does not write to the **EC identity KV store**. Called pre-routing, like /// `GeoInfo::from_request()` in the current `main.rs`. /// - /// Calls `build_consent_context()` with the EC hash (when present) passed - /// via `ConsentPipelineInput.ec_id` (renamed from `synthetic_id` - /// in PR #479). - /// - /// When an EC hash is available (returning user), this enables the consent - /// pipeline's KV fallback (read) and KV persistence (write to the - /// **consent** KV store). On a first visit (no EC cookie), `ec_hash` is - /// `None` and no consent KV interaction occurs; consent is evaluated purely - /// from request cookies/headers. This means consent is not persisted to - /// consent KV until the user's second request. See §6.1.1. + /// Calls `build_consent_context()` with request-local cookies, headers, + /// settings, and geo data. There is no separate consent KV fallback; live + /// consent is interpreted from the current request. pub fn read_from_request( req: &Request, settings: &Settings, @@ -292,19 +321,30 @@ impl EcContext { kv: &KvIdentityGraph, ); + /// Sets device signals derived from the adapter layer (TLS/H2/UA). + /// Must be called before `generate_if_needed()` so new entries include `KvDevice`. + pub fn set_device_signals(&mut self, signals: DeviceSignals); + + /// Returns the device signals, if set. + pub fn device_signals(&self) -> Option<&DeviceSignals>; + /// Returns the stable 64-char hex prefix, or `None` if no EC. + /// + /// Note: This extracts only the prefix for display/logging purposes. All KV + /// operations use the full EC ID (via `ec_value()`), not just this hash. pub fn ec_hash(&self) -> Option<&str>; } ``` -**`ec_finalize_response()` behavior** (signature: `ec_finalize_response(settings, geo, ec_context, kv, response)`): +**`ec_finalize_response()` behavior** (signature: `ec_finalize_response(settings, ec_context, kv, registry, eids_cookie, response)`): -1. If `!allows_ec_creation(&consent) && cookie_was_present`: call `clear_ec_on_response()` (deletes cookie **and** strips any handler-built `X-ts-ec`, `X-ts-eids`, `X-ts-ec-consent`, `x-ts-eids-truncated`, and `X-ts-` response headers) and write withdrawal tombstones for each valid known EC hash (cookie-derived and, when different, header-derived). This runs on **every route** — consent withdrawal is always real-time enforced. Keyed on `cookie_was_present`, not `ec_was_present`, because only a cookie-held EC can be deleted by the browser. When the cookie is malformed and there is no valid header-derived hash, no tombstone is written. -2. If `ec_was_present == true && ec_generated == false && allows_ec_creation(&consent)`: call `kv.update_last_seen()` (debounced). If `cookie_ec_value.is_some()`, also call `set_ec_on_response()` to reconcile the browser cookie to the authoritative header-derived EC. -3. If `ec_generated == true`: call `set_ec_on_response()` — sets `Set-Cookie` and `X-ts-ec`. KV create already happened inside `generate_if_needed()`; `ec_finalize_response()` does NOT write KV beyond tombstones and `last_seen`. -4. Handler-built response headers (`X-ts-ec`, `X-ts-eids` set directly by `/identify`) are preserved on non-withdrawal paths only. +1. If `!allows_ec_creation(&consent)`: call `clear_ec_headers_on_response()` to strip any handler-built `X-ts-ec`, `X-ts-eids`, `X-ts-ec-consent`, `x-ts-eids-truncated`, and `X-ts-` response headers. This runs on **every route**, including fail-closed cases where consent cannot be verified. +2. If `has_explicit_ec_withdrawal(&consent) && cookie_was_present`: additionally expire the cookie and write withdrawal tombstones for each valid known EC ID (cookie-derived and, when different, header-derived). Keyed on `cookie_was_present`, not `ec_was_present`, because only a cookie-held EC can be deleted by the browser. When the cookie is malformed and there is no valid header-derived EC ID, no tombstone is written. +3. If `ec_was_present == true && ec_generated == false && allows_ec_creation(&consent)`: ingest Prebid EIDs from the `ts-eids` cookie if present (see section 8) and set the `x-ts-ec` response header only. Ordinary returning-user requests do not refresh the EC cookie and do not write KV solely to extend TTL. +4. If `ec_generated == true`: set `Set-Cookie` and `X-ts-ec`. KV create already happened inside `generate_if_needed()`; `ec_finalize_response()` does NOT write KV beyond explicit-withdrawal tombstones and Prebid EID ingestion. Also ingest Prebid EIDs from the `ts-eids` cookie if present. +5. Handler-built response headers (`X-ts-ec` set directly by `/_ts/api/v1/identify`) are preserved only when consent currently allows EC. -**Note on `kv_degraded`:** Not on `EcContext` — `read_from_request()` does not read KV. Handlers track degraded state locally. `/identify` returns `degraded: true` in the JSON body on KV read failure; the auction handler treats a failed read as `eids: []`. +**Note on `kv_degraded`:** Not on `EcContext` — `read_from_request()` does not read KV. Handlers track degraded state locally. `/_ts/api/v1/identify` returns `degraded: true` in the JSON body on KV read failure; the auction handler treats a failed read as `eids: []`. ```` @@ -324,7 +364,7 @@ impl EcContext { | Max-Age | `31536000` (1 year) | | HttpOnly | No | -### 5.2 Module: `ec/cookie.rs` +### 5.2 Module: `ec/cookies.rs` The `cookie_domain` parameter passed to all functions below is computed as `format!(".{}", settings.publisher.domain)`. Do **not** use @@ -341,24 +381,52 @@ pub fn create_ec_cookie(ec_value: &str, cookie_domain: &str) -> String; pub fn delete_ec_cookie(cookie_domain: &str) -> String; // Sets Max-Age=0 with same Domain/Path/Secure/SameSite attributes. +/// Sets only the `X-ts-ec` response header on a response. +pub fn set_ec_header_on_response(response: &mut Response, ec_value: &str); + /// Sets the EC cookie and `X-ts-ec` response header on a response. -pub fn set_ec_on_response(response: &mut Response, ec_value: &str, cookie_domain: &str); +pub fn set_ec_cookie_and_header_on_response(response: &mut Response, ec_value: &str, cookie_domain: &str); /// Removes the EC cookie and strips all EC-related response headers: /// `X-ts-ec`, `X-ts-eids`, `X-ts-ec-consent`, `x-ts-eids-truncated`, -/// and any `X-ts-` headers. Called on consent withdrawal to -/// prevent leaking EC identity in handler-built headers. +/// and any `X-ts-` headers. Called on explicit consent +/// withdrawal to prevent leaking EC identity in handler-built headers. pub fn clear_ec_on_response(response: &mut Response, cookie_domain: &str); ```` ### 5.3 Response header -`X-ts-ec: {ec_hash.suffix}` is set by `set_ec_on_response()`, which is called by `ec_finalize_response()` in two cases: (1) `ec_generated == true` (new EC minted this request), or (2) `cookie_ec_value.is_some()` (header/cookie mismatch reconciliation — overwrites cookie to match header). It is also set explicitly by `/identify` and `/auction` handlers on their own response paths when an EC is present. It is **not** set on ordinary returning-user requests where the cookie already matches the header (or no header is present). +`X-ts-ec: {64-hex}.{6-alnum}` is set when an EC is available for the response. In current behavior, returning users (`ec_was_present == true && ec_generated == false && allows_ec_creation(&consent)`) receive the header only; newly generated ECs (`ec_generated == true`) receive both the header and `Set-Cookie`. `/_ts/api/v1/identify` and `/auction` also set EC-related headers on their response paths. This header is added to `INTERNAL_HEADERS` in `constants.rs` so it is stripped before proxying to downstream backends, consistent with existing `X-ts-*` handling. ### 5.4 Per-request EC lifecycle +**Phase 0 — bot gate** (always runs, all routes — pure in-memory, no KV I/O): + +``` +derive_device_signals(req) + ua = req.get_header_str("user-agent") + ja4 = req.get_tls_ja4() // Fastly SDK — full JA4 hash + h2_fp = req.get_client_h2_fingerprint() // Fastly SDK — raw H2 SETTINGS string + + DeviceSignals::derive(ua, ja4, h2_fp) + is_mobile = parse_is_mobile(ua) // 0=desktop, 1=mobile, 2=unknown + ja4_class = extract_ja4_section1(ja4) // split on '_', take [0] + platform_class = parse_platform_class(ua) // mac/windows/ios/android/linux/None + h2_fp_hash = sha256(h2_fp)[..6].hex() // 12 hex chars + known_browser = evaluate_known_browser(ja4_class, h2_fp_hash) // allowlist match + + is_real_browser = looks_like_browser() // ja4_class.is_some() && platform_class.is_some() + + if !is_real_browser: + log::debug("Bot gate: blocking EC operations") + kv_graph = None // suppress all KV operations + // ec_finalize_response() will be skipped + // pull sync will be skipped + // request still proxied to origin normally +``` + **Phase 1 — pre-routing** (always runs, all routes): ``` @@ -371,12 +439,13 @@ EcContext::read_from_request() If neither valid: ec_value = None ec_was_present = ec_value.is_some() cookie_was_present = ts-ec cookie raw key exists (regardless of validity) - ec_hash = ec_value.as_deref().and_then(ec_hash) // None on first visit or malformed - build_consent_context(jar, req, config, geo, ec_hash) → consent: ConsentContext - // ec_hash is the identity key for consent KV (renamed from synthetic_id in PR #479). - // When ec_hash is Some: consent KV fallback read + consent KV write (to consent store, not EC store). - // When ec_hash is None (first visit): no consent KV interaction — cookies/headers only. + ec_id = ec_value.as_deref() // None on first visit or malformed + build_consent_context(jar, req, config, geo, ec_id) → consent: ConsentContext + // Consent is interpreted from request-local cookies, headers, settings, and geo. + // No separate consent KV fallback or persistence runs in the EC lifecycle. ec_generated = false + + ec_context.set_device_signals(device_signals) // for KvDevice on creation ``` **Phase 2 — inside organic handlers only** (`handle_publisher_request`, `handle_proxy`): @@ -389,41 +458,42 @@ ec_context.generate_if_needed(settings, &kv) // best-effort — never 500s → generate_ec(passphrase, ip) → ec_value = Some(new_ec) → ec_generated = true - → kv.create_or_revive(ec_hash, &entry) (best-effort, log warn if fails) + → kv.create_or_revive(new_ec, &entry) (best-effort, log warn if fails) // create_or_revive overwrites a tombstone (ok=false) on re-consent // no-ops if a live entry (ok=true) already exists ``` -**`ec_finalize_response(settings, geo, ec_context, &kv, response)` — always runs, all routes:** +**`ec_finalize_response(settings, geo, ec_context, &kv, response)` — runs only when `is_real_browser == true`:** ``` - ├── !allows_ec_creation(&consent) && cookie_was_present? - │ → clear_ec_on_response() (delete cookie + strip ALL EC headers from response) - │ → // Tombstone all known valid EC hashes. May be 0, 1, or 2 hashes. - │ if let Some(cookie_hash) = cookie_ec_value.and_then(|v| ec_hash(&v)): - │ kv.write_withdrawal_tombstone(cookie_hash) // cookie-derived hash - │ if let Some(header_hash) = ec_value.and_then(|v| ec_hash(&v)): - │ if Some(header_hash) != cookie_hash: - │ kv.write_withdrawal_tombstone(header_hash) // header-derived hash (if different) - │ // When cookie is malformed and no valid header exists: no tombstone written. - │ // Cookie deletion is still the authoritative enforcement mechanism. - │ // Tombstone fails? log error, do NOT block — no retry possible on browser path. + // Bot gate: when !looks_like_browser(), this entire block is skipped. + // The response is proxied to origin without any cookie writes or KV operations. + + ├── !allows_ec_creation(&consent)? + │ → clear_ec_headers_on_response() (strip ALL EC headers from response) + │ → has_explicit_ec_withdrawal(&consent) && cookie_was_present? + │ → expire_ec_cookie() + │ → // Tombstone all known valid EC IDs. May be 0, 1, or 2 IDs. + │ if let Some(cookie_ec_id) = cookie_ec_value.filter(|v| is_valid_ec_id(v)): + │ kv.write_withdrawal_tombstone(cookie_ec_id) // cookie-derived EC ID + │ if let Some(header_ec_id) = ec_value.filter(|v| is_valid_ec_id(v)): + │ if Some(header_ec_id) != cookie_ec_id: + │ kv.write_withdrawal_tombstone(header_ec_id) // header-derived EC ID (if different) + │ // When cookie is malformed and no valid header exists: no tombstone written. + │ // Cookie deletion is still the authoritative enforcement mechanism. + │ // Tombstone fails? log error, do NOT block — no retry possible on browser path. + │ → return │ ├── ec_was_present == true && ec_generated == false && allows_ec_creation(&consent)? - │ → kv.update_last_seen(ec_hash, now()) (returning user — debounced at 300s) - │ → if cookie_ec_value.is_some(): - │ // Header and cookie disagree — reconcile by overwriting cookie with header value. - │ // Prevents persistent split identity where user oscillates between two ECs - │ // depending on whether the forwarded header is present on subsequent requests. - │ set_ec_on_response() (Set-Cookie with ec_value, the header-derived identity) + │ → set_ec_header_on_response() (returning user — no cookie/KV TTL refresh) │ └── ec_generated == true? - → set_ec_on_response() (Set-Cookie + X-ts-ec on response) + → set_ec_cookie_and_header_on_response() (Set-Cookie + X-ts-ec on response) ``` -EC route handlers (`GET /sync`, `GET /identify`, `POST /api/v1/sync`, `POST /admin/*`) never call `generate_if_needed()`. `ec_finalize_response()` will still delete the cookie on those routes if consent is withdrawn — that is intentional. +EC route handlers (`GET /_ts/api/v1/identify`, `POST /_ts/api/v1/batch-sync`) never call `generate_if_needed()`. `ec_finalize_response()` will still delete the cookie on those routes if consent is explicitly withdrawn — that is intentional. -**Cookie write rule:** `Set-Cookie` is written in exactly two cases: (1) `ec_generated == true` (first-time generation), or (2) `cookie_ec_value.is_some()` (header/cookie mismatch — reconcile cookie to match the header-derived identity). There is no cookie refresh or Max-Age reset on ordinary returning users where cookie already matches. The PRD defers a blanket refresh-on-every-request strategy to a future iteration. +**Cookie write rule:** `Set-Cookie` is written for newly generated ECs and consent-withdrawal deletion only. Ordinary returning requests set `x-ts-ec` but do not refresh the cookie `Max-Age`. --- @@ -442,41 +512,51 @@ Consent decoding shipped in `#380` (already merged). This spec treats the follow ### 6.1.1 EC consent gating EC reuses the existing `allows_ec_creation(&ConsentContext) -> bool` function -from the consent module (`consent/mod.rs`). No parallel gating function is -introduced — EC calls `allows_ec_creation()` directly for all consent decisions -(EC generation, withdrawal detection, sync gating). - -There is no EC-specific consent gate and no behavior change to -`allows_ec_creation()` in this spec. Shared consent-policy semantics stay in -the consent module; EC only consumes that existing decision. +from the consent module (`consent/mod.rs`) for EC generation, header emission, +and other "may this request use ECs right now?" decisions. -**Consent pipeline integration:** +Explicit withdrawal semantics use a separate +`has_explicit_ec_withdrawal(&ConsentContext) -> bool` helper. This narrower +signal distinguishes authoritative opt-outs from fail-closed cases where EC use +must be blocked for the current request but an already-issued EC must not be +revoked (for example, unknown jurisdiction or missing/undecodable consent in a +regulated regime). -`EcContext::read_from_request()` calls `build_consent_context()` with the EC hash as the identity key, passed via `ConsentPipelineInput.ec_id` (renamed from `synthetic_id` in PR #479). The consent pipeline's KV persistence and fallback behavior works with EC hashes: +There is no new consent source or KV lookup in this spec. Shared +consent-policy semantics stay in the consent module; EC consumes the existing +request-local decision plus the explicit-withdrawal helper. -- **Returning user** (EC cookie present → `ec_hash` is `Some`): consent KV fallback read is available when consent cookies are absent; consent KV write persists cookie-sourced consent for future requests. Note: `build_consent_context()` calls `try_kv_write()` internally, so phase 1 writes to the **consent** KV store (not the EC identity store). -- **First visit** (no EC cookie → `ec_hash` is `None`): no consent KV interaction. Consent is evaluated purely from request cookies/headers. The gap: consent is not persisted to consent KV on the first request. This is accepted — in regulated jurisdictions (GDPR, US state), consent cookies/headers must be present for `allows_ec_creation()` to return `true`, so there is always a signal to persist on the next request. In non-regulated jurisdictions, `allows_ec_creation()` returns `true` without consent signals, so there is nothing to persist anyway. Consent KV persistence begins on the second request when the EC cookie is present. +**Consent pipeline integration:** -**Consent store keying:** Old consent KV entries under SyntheticID keys become orphaned after PR #479 ships. New entries are keyed by EC hash. Orphaned entries expire via TTL — no explicit migration is performed. +`EcContext::read_from_request()` calls `build_consent_context()` with request-local cookies, headers, settings, and geo data. Current runtime behavior does not use a separate consent KV store or consent KV fallback. Consent is interpreted from live request signals on every request; the EC identity store only keeps the minimal `KvEntry.consent` snapshot and withdrawal tombstones for S2S enforcement. -**Rollout impact:** At cutover, returning users who relied on consent KV fallback (consent cookies absent, consent loaded from KV under SyntheticID key) will lose that fallback until a new EC-keyed consent entry is written on a subsequent request where consent cookies are present. This is a one-time window: once the EC cookie is set and a request with consent cookies arrives, the consent KV entry is written under the EC hash and fallback works again. The window duration depends on how quickly users return with consent cookies. This is accepted — consent cookies are the primary signal; KV fallback is a secondary mechanism for when cookies are blocked or absent. +All downstream EC logic uses `allows_ec_creation(&self.consent)` for creation/forwarding decisions and `has_explicit_ec_withdrawal(&self.consent)` for cookie-expiry/tombstone decisions. No consent decoding or KV-backed gating logic is added in this epic. -All downstream EC logic calls `allows_ec_creation(&self.consent)`. No consent decoding or gating logic is added in this epic. +### 6.2 Consent withdrawal — explicit delete path -### 6.2 Consent withdrawal — KV delete +When `allows_ec_creation(&consent)` returns `false`, Trusted Server **always** +strips EC-related response headers for that request. This covers both explicit +revocation and fail-closed cases. -When `allows_ec_creation(&consent)` returns `false` for a user whose **`ts-ec` cookie** is present (`cookie_was_present == true`). A user identified only by the `X-ts-ec` request header is not subject to cookie deletion — there is no cookie to expire. +Cookie expiry and tombstone writes happen only when +`has_explicit_ec_withdrawal(&consent)` returns `true` **and** the request +carried a **`ts-ec` cookie** (`cookie_was_present == true`). A user identified +only by the `X-ts-ec` request header is not subject to cookie deletion or +`tombstoning` on this path — there is no browser cookie to revoke. -1. Issue `Set-Cookie: ts-ec=; Max-Age=0; ...` and strip all EC response headers (synchronous — must not fail silently). This always happens when `cookie_was_present == true`. -2. Write tombstone for each valid EC hash available (`cookie_ec_value` and/or `ec_value`). When neither is valid (malformed cookie, no header), **no tombstone is written** — cookie deletion alone is the enforcement mechanism. When at least one valid hash exists: `kv.write_withdrawal_tombstone(hash)` sets `consent.ok = false`, clears partner IDs, TTL 24h — approximately 25ms per write. +1. Strip all EC response headers (synchronous — must not fail silently) whenever `!allows_ec_creation(&consent)`. +2. If `has_explicit_ec_withdrawal(&consent) && cookie_was_present == true`, issue `Set-Cookie: ts-ec=; Max-Age=0; ...`. +3. In that same explicit-withdrawal + cookie-present case, write a tombstone for each valid EC ID available (`cookie_ec_value` and/or `ec_value`). When neither is valid (malformed cookie, no header), **no tombstone is written** — cookie deletion alone is the browser-side enforcement mechanism. When at least one valid EC ID exists: `kv.write_withdrawal_tombstone(ec_id)` sets `consent.ok = false`, clears partner IDs, TTL 24h — approximately 25ms per write. -The tombstone write runs in the request path (not async) to ensure real-time enforcement. Using a tombstone rather than a hard delete preserves the `consent_withdrawn` signal for batch sync clients for 24 hours — otherwise batch sync cannot distinguish consent withdrawal from an EC that never existed. +The tombstone write runs in the request path (not async) to ensure real-time enforcement for authoritative withdrawals. Using a tombstone rather than a hard delete preserves the `consent_withdrawn` signal for batch sync clients for 24 hours — otherwise batch sync cannot distinguish consent withdrawal from an EC that never existed. If the tombstone write fails: -- Log at `error` level with EC hash -- Do not block the response — cookie deletion is the primary enforcement mechanism -- **No retry is possible on the browser path.** Once the cookie is deleted, subsequent browser requests carry no EC value (`ec_hash()` returns `None`), so there is no hash to tombstone. A failed tombstone means batch sync clients may see `ec_hash_not_found` (after TTL expiry) rather than `consent_withdrawn` — this is accepted degradation. The cookie deletion remains the authoritative enforcement mechanism. +- Log at `error` level with EC ID +- Do not block the response — cookie deletion is the primary enforcement mechanism on explicit-withdrawal paths +- **No retry is possible on the browser path.** Once the cookie is deleted, subsequent browser requests carry no EC value (`ec_value` returns `None`), so there is no EC ID to tombstone. A failed tombstone means batch sync clients may see `ec_id_not_found` (after TTL expiry) rather than `consent_withdrawn` — this is accepted degradation. + +Fail-closed / unverifiable-consent cases keep the cookie intact and do not write tombstones; they only suppress EC use on that request. --- @@ -484,37 +564,54 @@ If the tombstone write fails: ### 7.1 Module: `ec/kv.rs` -Two KV stores are used. Their names are configured in `trusted-server.toml`: +One KV store is used for the identity graph. Its name is configured in `trusted-server.toml`: + +| Store | TOML key | Purpose | +| -------------- | ------------- | --------------------- | +| Identity graph | `ec.ec_store` | EC ID → identity JSON | -| Store | TOML key | Purpose | -| ---------------- | ------------------ | ---------------------------------- | -| Identity graph | `ec.ec_store` | EC hash → identity JSON | -| Partner registry | `ec.partner_store` | Partner ID → config + API key hash | +Partners are defined in config (`[[ec.partners]]` in TOML) and loaded into an in-memory `PartnerRegistry` at startup. There is no KV-backed partner store. ### 7.2 Identity graph schema -**KV key:** 64-character hex hash (the stable prefix from `ec_value`, without `.suffix`). +**KV key:** Full EC ID in `{64-char hex}.{6-char alphanumeric}` format. The random suffix is intentionally included to provide uniqueness for users behind the same NAT/proxy infrastructure who would otherwise share identical IP-derived hash prefixes. **KV value (JSON, max ~5KB):** ```json { "v": 1, - "created": 1741824000, - "last_seen": 1741910400, + "created": 1775162556, "consent": { "tcf": "CP...", "gpp": "DBA...", "ok": true, - "updated": 1741910400 + "updated": 1775162556 }, "geo": { "country": "US", - "region": "CA" + "region": "TN", + "asn": 7922, + "dma": 659 + }, + "device": { + "is_mobile": 0, + "ja4_class": "t13d1516h2", + "platform_class": "mac", + "h2_fp_hash": "a3f9d21c8b04", + "known_browser": true + }, + "pub_properties": { + "origin_domain": "autoblog.com", + "seen_domains": ["autoblog.com"] + }, + "network": { + "cluster_size": 2 }, "ids": { - "ssp_x": { "uid": "abc123", "synced": 1741824000 }, - "liveramp": { "uid": "LR_xyz", "synced": 1741890000 } + "id5": { "uid": "ID5*qe8VHv..." }, + "trade_desk": { "uid": "226fb4b3-..." }, + "liveramp_ats": { "uid": "Ag2z1TDA..." } } } ``` @@ -522,12 +619,19 @@ Two KV stores are used. Their names are configured in `trusted-server.toml`: **KV metadata (max 2048 bytes, readable without streaming body):** ```json -{ "ok": true, "country": "US", "v": 1 } +{ + "ok": true, + "country": "US", + "v": 1, + "cluster_size": 2, + "is_mobile": 0, + "known_browser": true +} ``` -The `ok` field in metadata is a **historical consent record for S2S consumers only** — it is set to `false` by `write_withdrawal_tombstone()` so that batch sync clients (`POST /api/v1/sync`) can return `consent_withdrawn` rather than `ec_hash_not_found` during the 24-hour tombstone TTL. +The `ok` field in metadata is a **historical consent record for S2S consumers only** — it is set to `false` by `write_withdrawal_tombstone()` so that batch sync clients (`POST /_ts/api/v1/batch-sync`) can return `consent_withdrawn` rather than `ec_id_not_found` during the 24-hour tombstone TTL. -**`consent.ok` is NOT used to make the withdrawal decision on the main request path.** Consent withdrawal is determined entirely from `allows_ec_creation(&ec_context.consent)` on the current request. When withdrawal is detected, the cookie is deleted and `write_withdrawal_tombstone()` is called in-path (setting `ok = false`, 24h TTL — see §6.2). Engineers must not add a KV read to the consent withdrawal hot path based on this field. +**`consent.ok` is NOT used to make the withdrawal decision on the main request path.** Withdrawal enforcement is driven by current request-local consent: `allows_ec_creation(&ec_context.consent)` decides whether EC use and EC response headers are allowed on this request, and `has_explicit_ec_withdrawal(&ec_context.consent)` decides whether to expire the cookie and call `write_withdrawal_tombstone()` in-path (setting `ok = false`, 24h TTL — see §6.2). Engineers must not add a KV read to the consent withdrawal hot path based on this field. **Rust types:** @@ -535,9 +639,18 @@ The `ok` field in metadata is a **historical consent record for S2S consumers on pub struct KvEntry { pub v: u8, pub created: u64, - pub last_seen: u64, pub consent: KvConsent, pub geo: KvGeo, + /// Creation-time publisher property metadata. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub pub_properties: Option, + /// Device class signals. Written once on creation — never updated. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub device: Option, + /// Network cluster disambiguation. Written only by /identify. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub network: Option, + #[serde(default, skip_serializing_if = "HashMap::is_empty")] pub ids: HashMap, } @@ -550,24 +663,81 @@ pub struct KvConsent { pub struct KvGeo { pub country: String, + #[serde(default, skip_serializing_if = "Option::is_none")] pub region: Option, + /// Autonomous System Number (e.g. 7922 = Comcast). + /// Primary signal for distinguishing home ISP vs. corporate VPN. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub asn: Option, + /// DMA/metro code (e.g. 807 = San Francisco). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub dma: Option, } pub struct KvPartnerId { pub uid: String, - pub synced: u64, +} + +/// Publisher property metadata captured when an EC entry is created. +pub struct KvPubProperties { + /// Apex domain where this EC entry was first created. + pub origin_domain: String, + /// Bounded set of publisher apex domains seen for this EC entry. + /// Capped at 50 entries. + #[serde(default, skip_serializing_if = "BTreeSet::is_empty")] + pub seen_domains: BTreeSet, +} + +/// Coarse device signals derived from TLS handshake and UA. +/// Written once on creation — never updated after. +pub struct KvDevice { + /// 0 = desktop, 1 = mobile, 2 = unknown (non-standard client). + pub is_mobile: u8, + /// JA4 Section 1 only (e.g. "t13d1516h2" = Chrome). + #[serde(default, skip_serializing_if = "Option::is_none")] + pub ja4_class: Option, + /// Coarse OS family: "mac", "windows", "ios", "android", "linux". + #[serde(default, skip_serializing_if = "Option::is_none")] + pub platform_class: Option, + /// SHA256 prefix (12 hex chars) of H2 SETTINGS fingerprint. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub h2_fp_hash: Option, + /// true = known browser, false = known bot, None = unknown. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub known_browser: Option, +} + +/// Network cluster disambiguation data. +/// Written only by /identify — too expensive for organic hot path. +pub struct KvNetwork { + /// Number of distinct EC suffixes sharing this hash prefix. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub cluster_size: Option, } pub struct KvMetadata { pub ok: bool, pub country: String, pub v: u8, + /// Mirrors KvNetwork::cluster_size. None = not yet evaluated. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub cluster_size: Option, + /// Mirrors KvDevice::is_mobile. Enables propagation gating without body read. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub is_mobile: Option, + /// Mirrors KvDevice::known_browser. Buyer-facing quality signal. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub known_browser: Option, } ``` +All new fields use `Option` types or `serde(default)`, so existing entries +deserialize without error. No schema version bump is needed — v1 has not +shipped yet. + ### 7.3 TTL -All KV writes use `time_to_live_sec = 31536000` (1 year), matching the cookie `Max-Age`. +New live entries use `time_to_live_sec = 31536000` (1 year), matching the initial cookie `Max-Age`. Ordinary returning-user page views do not refresh the EC cookie and do not write the KV entry solely to extend TTL. Real data mutations (for example, a changed partner UID or first cluster-size evaluation) still write the live entry with the live-entry TTL. Withdrawal tombstones use a 24-hour TTL. ### 7.4 Conflict resolution — atomic read-modify-write @@ -584,22 +754,34 @@ impl KvIdentityGraph { pub fn new(store_name: impl Into) -> Self; /// Reads the full entry, returning the generation marker for CAS writes. + /// + /// # Arguments + /// + /// * `ec_id` — The full EC ID (`{64-hex}.{6-alnum}`) used as the KV key. pub fn get( &self, - ec_hash: &str, + ec_id: &str, ) -> Result, Report>; /// Reads only the metadata fields (consent flag, country). + /// + /// # Arguments + /// + /// * `ec_id` — The full EC ID (`{64-hex}.{6-alnum}`) used as the KV key. pub fn get_metadata( &self, - ec_hash: &str, + ec_id: &str, ) -> Result, Report>; /// Creates a new entry. Returns `Ok(())` if successful, `Err` if the key /// already exists (concurrent create) or on KV error. + /// + /// # Arguments + /// + /// * `ec_id` — The full EC ID (`{64-hex}.{6-alnum}`) used as the KV key. pub fn create( &self, - ec_hash: &str, + ec_id: &str, entry: &KvEntry, ) -> Result<(), Report>; @@ -615,9 +797,13 @@ impl KvIdentityGraph { /// Called by `generate_if_needed()` instead of `create()`. This ensures that /// re-consent recovery is immediate — a user who withdraws and then re-consents /// within the 24-hour tombstone window gets a fresh identity entry without delay. + /// + /// # Arguments + /// + /// * `ec_id` — The full EC ID (`{64-hex}.{6-alnum}`) used as the KV key. pub fn create_or_revive( &self, - ec_hash: &str, + ec_id: &str, entry: &KvEntry, ) -> Result<(), Report>; @@ -627,214 +813,486 @@ impl KvIdentityGraph { /// /// If the key does not exist, creates a minimal live entry first: /// `consent.ok = true`, `consent.tcf = None`, `consent.gpp = None`, - /// `created = synced`, `last_seen = synced`, `geo.country = "ZZ"`, - /// `geo.region = None`, and `ids = { partner_id: ... }`. + /// `created = now`, `geo.country = "ZZ"`, `geo.region = None`, + /// and `ids = { partner_id: ... }`. /// /// This recovery path is intentional: it materializes the graph later when /// the initial best-effort `create_or_revive()` on EC generation failed. /// Batch sync still performs its explicit existence/tombstone check before - /// calling this method, so `POST /api/v1/sync` retains its `ec_hash_not_found` + /// calling this method, so `POST /_ts/api/v1/batch-sync` retains its `ec_id_not_found` /// contract. + /// + /// # Arguments + /// + /// * `ec_id` — The full EC ID (`{64-hex}.{6-alnum}`) used as the KV key. pub fn upsert_partner_id( &self, - ec_hash: &str, + ec_id: &str, partner_id: &str, uid: &str, - synced: u64, ) -> Result<(), Report>; - /// Updates `last_seen` timestamp, but only if the stored value is more than - /// 300 seconds older than `timestamp`. This debounce prevents KV write - /// thrashing under bursty traffic — Fastly KV enforces a 1 write/sec limit - /// per key. Callers should log `warn` on failure and continue. - pub fn update_last_seen( + /// Upserts a partner ID only when the KV entry already exists. Used by + /// S2S batch sync. Returns `Unchanged` when the existing UID matches, + /// avoiding a KV write. Different UIDs overwrite the stored value; mapping + /// timestamps are not used for ordering because they are no longer stored + /// in the EC identity entry. + pub fn upsert_partner_id_if_exists( &self, - ec_hash: &str, - timestamp: u64, - ) -> Result<(), Report>; + ec_id: &str, + partner_id: &str, + uid: &str, + ) -> Result>; + + /// Counts the number of KV keys sharing a hash prefix via the list API. + /// Uses a single-page list with `limit(100)`. Returns the count, or + /// `None` if the list exceeds 100 keys (clearly a large network). + pub fn count_hash_prefix_keys( + &self, + hash_prefix: &str, + ) -> Result, Report>; + + /// Evaluates the network cluster size for an EC entry. + /// + /// Returns a stored `cluster_size` without a list call when present. If + /// missing, calls `count_hash_prefix_keys()` and writes the result to + /// `entry.network` via CAS. Returns the cluster size for inclusion in + /// the `/_ts/api/v1/identify` response. + pub fn evaluate_cluster( + &self, + ec_id: &str, + entry: &KvEntry, + generation: u64, + ) -> Result, Report>; /// Writes a withdrawal tombstone for consent enforcement. /// /// Instead of hard-deleting the KV entry, this overwrites it with /// `consent.ok = false`, clears all partner IDs, and sets a 24-hour TTL. - /// The tombstone allows batch sync clients (`POST /api/v1/sync`) to return - /// `consent_withdrawn` rather than `ec_hash_not_found` for the tombstone TTL. + /// The tombstone allows batch sync clients (`POST /_ts/api/v1/batch-sync`) to return + /// `consent_withdrawn` rather than `ec_id_not_found` for the tombstone TTL. /// /// After the 24-hour TTL expires, the entry is gone. Any subsequent `get()` - /// returns `None` (`ec_hash_not_found`) — the distinction is time-bounded. + /// returns `None` (`ec_id_not_found`) — the distinction is time-bounded. /// /// Caller must handle `Err` by logging at `error` level; the cookie deletion /// in `ec_finalize_response()` is the primary enforcement mechanism. + /// + /// # Arguments + /// + /// * `ec_id` — The full EC ID (`{64-hex}.{6-alnum}`) used as the KV key. pub fn write_withdrawal_tombstone( &self, - ec_hash: &str, + ec_id: &str, ) -> Result<(), Report>; /// Hard-deletes the entry. Used only for data deletion requests (IAB deletion /// framework — deferred). For consent withdrawal, use `write_withdrawal_tombstone()`. - pub fn delete(&self, ec_hash: &str) -> Result<(), Report>; + /// + /// # Arguments + /// + /// * `ec_id` — The full EC ID (`{64-hex}.{6-alnum}`) used as the KV key. + pub fn delete(&self, ec_id: &str) -> Result<(), Report>; } ``` -`MAX_CAS_RETRIES = 3`. If all retries fail on a generation conflict, return `Err` — callers handle per-endpoint policy (§8.3 step 7 for pixel sync, §9.4 for batch sync). +`MAX_CAS_RETRIES = 5`. If all retries fail on a generation conflict, return `Err` — callers handle per-endpoint policy (§9.4 for batch sync, §8.4 for Prebid EID ingestion). ### 7.5 KV degraded behavior | Operation | KV unavailable | Action | | ---------------------------------- | -------------- | ---------------------------------------------------------------------------------------------- | | EC cookie creation | KV error | Set cookie. Skip KV create. Log `warn`. | -| `/sync` KV write | KV error | Redirect with `ts_synced=0&ts_reason=write_failed`. | -| `/identify` KV read | KV error | Return `200` with `ec` set, `degraded: true`, empty `uids`/`eids`. | -| `POST /api/v1/sync` | KV error | Return `207` with all mappings rejected, `reason: "kv_unavailable"`. | +| Prebid EID ingestion KV write | KV error | Skip write. Log `warn`. Retry on next qualifying request. | +| `/_ts/api/v1/identify` KV read | KV error | Return `200` with `ec` set, `degraded: true`, empty `uid`/`eid`. | +| `POST /_ts/api/v1/batch-sync` | KV error | Return `207` with all mappings rejected, `reason: "kv_unavailable"`. | | Pull sync KV write | KV error | Discard uid. Log `warn`. Retry on next qualifying request. | | Consent withdrawal tombstone write | KV error | Delete cookie (primary enforcement). Log `error`. Next request: no cookie → no EC regenerated. | --- -## 8. Pixel Sync Endpoint (`GET /sync`) +## 7A. Device Signals and Bot Gate + +### 7A.1 Overview + +Device signals provide coarse, non-PII browser classification derived from +the TLS handshake and User-Agent header at the Fastly edge. They serve two +purposes: + +1. **Bot gate** — block all KV identity operations for unrecognized clients + (bots, scrapers, non-standard HTTP clients). The request is still proxied + to the publisher origin normally — the bot receives valid HTML but leaves + no trace in the identity graph. +2. **Device class record** — store a write-once `KvDevice` on each EC entry + for future cross-browser propagation decisions and buyer-facing device + quality scoring. + +All signal derivation is pure in-memory computation — no KV I/O. It runs on +every request before EC context creation. + +### 7A.2 Signal derivation -### 8.1 Module: `ec/sync_pixel.rs` +No Client Hints are used — JA4 and UA platform parsing provide equivalent or +superior signal for every browser including Safari and Firefox, which do not +send Client Hints. + +**`is_mobile`** — derived in priority order: + +| Condition | Value | +| ---------------------------------------------- | -------------------------------------------------------------------------- | +| UA contains `iPhone`, `iPad`, or `Android` | `1` — confirmed mobile | +| UA contains `Macintosh`, `Windows`, or `Linux` | `0` — confirmed desktop | +| Neither pattern matches | `2` — genuinely unknown (rare; typically bots or heavily hardened clients) | + +Note: `is_mobile: 2` in practice signals a non-standard client rather than +Safari, since Safari always produces a recognizable UA platform string. + +**`platform_class`** — coarse OS family parsed from UA (checked in order): + +| UA segment | `platform_class` | +| ---------------------------------- | ---------------- | +| `iPhone` or `iPad` | `ios` | +| `Android` (checked before `Linux`) | `android` | +| `Macintosh` | `mac` | +| `Windows NT` | `windows` | +| `Linux` (non-Android) | `linux` | +| No match | `None` | + +**`ja4_class`** — Section 1 of the JA4 fingerprint only (e.g. `t13d1516h2`). +Available via `req.get_tls_ja4()` in the Fastly Compute Rust SDK. The full +JA4 format is `section1_section2_section3` separated by underscores; we split +on `_` and take `[0]`. Section 1 identifies browser family (cipher count, +extension count, ALPN) without uniquely fingerprinting a device. The full JA4 +is never stored. + +**`h2_fp_hash`** — first 12 hex characters of SHA256 of the raw HTTP/2 +SETTINGS fingerprint string, available via `req.get_client_h2_fingerprint()`. +Used alongside `ja4_class` to confirm browser family and detect bots. + +**`known_browser`** — set `true` when `ja4_class` + `h2_fp_hash` match a +known legitimate browser pattern from the allowlist below. `None` when +unknown. Both signals must be present for a match — if either is `None`, +returns `None`. + +### 7A.3 Known browser fingerprint allowlist + +Empirically derived from Fastly Compute production responses (2026-04-03): + +| Browser | `ja4_class` | `h2_fp` raw string | `known_browser` | +| ----------------------------------- | ------------ | -------------------------------- | --------------- | +| Chrome/Mac (v146) | `t13d1516h2` | `1:65536;2:0;4:6291456;6:262144` | `true` | +| Safari/Mac (v26) + Safari/iOS (v26) | `t13d2013h2` | `2:0;3:100;4:2097152` | `true` | +| Firefox/Mac (v149) | `t13d1717h2` | `1:65536;2:0;4:131072;5:16384` | `true` | + +Safari Mac and Safari iOS share identical TLS/H2 stacks — distinguished only +by `platform_class` (`mac` vs `ios`) and `is_mobile` (`0` vs `1`). + +This allowlist will expand as new browser versions are observed in production. +Entries not matching any allowlist row get `known_browser: None` (not `false`) +unless they match a confirmed bot pattern. + +The allowlist comparison works by hashing the known raw H2 SETTINGS strings +at evaluation time and comparing against the request's `h2_fp_hash`. The list +is small (3 entries) so the cost is negligible. + +### 7A.4 Bot gate behavior + +The bot gate checks for **signal presence** rather than matching against a +hardcoded fingerprint allowlist. Real browsers always produce a valid TLS +fingerprint (`ja4_class`) and a recognizable UA platform string +(`platform_class`). Raw HTTP clients (curl, Python requests, Go net/http, +headless scrapers) typically lack one or both. + +The gate uses `DeviceSignals::looks_like_browser()`: ```rust -pub async fn handle_sync( - settings: &Settings, - kv: &KvIdentityGraph, - partner_store: &PartnerStore, - req: &Request, - ec_context: &mut EcContext, -) -> Result>; +pub fn looks_like_browser(&self) -> bool { + self.ja4_class.is_some() && self.platform_class.is_some() +} ``` -### 8.2 Query parameters +| Condition | EC operations | Example | +| ------------------------------------------------ | ------------- | -------------------------------- | +| `ja4_class` present AND `platform_class` present | **Allowed** | Any real browser on any OS | +| `ja4_class` missing OR `platform_class` missing | **Blocked** | curl, Python requests, Googlebot | + +`known_browser` (the fingerprint allowlist match) is still computed and stored +on `KvDevice` for analytics and future buyer-facing quality scoring, but it +does **not** gate identity operations. This avoids blocking legitimate browsers +whose JA4/H2 fingerprints are not yet in the allowlist. + +**Implementation in the Fastly adapter:** + +1. After `GeoInfo::from_request()`, call `derive_device_signals(req)` which + reads `User-Agent`, `req.get_tls_ja4()`, and + `req.get_client_h2_fingerprint()`. +2. If `!looks_like_browser()`: + - `kv_graph` is set to `None` (suppresses all KV reads and writes) + - `ec_finalize_response()` is skipped (no cookie set/deleted) + - Pull sync is skipped + - The request proceeds through normal routing — organic requests are + proxied to publisher origin, API endpoints respond normally (but + without EC identity data) +3. If `looks_like_browser()`: proceed normally. Device signals are set + on `EcContext` via `set_device_signals()` so they flow through to + `KvEntry` creation. + +**Current bot response:** the request is served normally (proxied to origin) +without any KV operations or cookie writes. The bot receives a valid HTML +response but leaves no trace in the identity graph. + +### 7A.5 `DeviceSignals` struct -| Parameter | Required | Description | -| --------- | -------- | ---------------------------------------------------------------------------- | -| `partner` | Yes | Partner ID — must exist in `partner_store` | -| `uid` | Yes | Partner's user ID for this user | -| `return` | Yes | Redirect-back URL (must match partner's `allowed_return_domains`) | -| `consent` | No | Fallback TCF/GPP string if `ec_context.consent.is_empty()` after pre-routing | +```rust +/// Device signals derived from a single request. +/// Computed in the Fastly adapter from raw TLS/H2/UA data. +pub struct DeviceSignals { + pub is_mobile: u8, + pub ja4_class: Option, + pub platform_class: Option, + pub h2_fp_hash: Option, + pub known_browser: Option, +} + +impl DeviceSignals { + /// Derives all device signals from raw request data. + pub fn derive(ua: &str, ja4: Option<&str>, h2_fp: Option<&str>) -> Self; -### 8.3 Flow + /// Returns true when ja4_class and platform_class are both present. + /// Used by the bot gate — see §7A.4. + pub fn looks_like_browser(&self) -> bool; + /// Converts to KvDevice for KV storage. + pub fn to_kv_device(&self) -> KvDevice; +} ``` -1. Parse query params. Missing required params → 400. - -2. Require a valid cookie-held EC. - If `cookie_was_present == false` OR `ec_context.ec_hash().is_none()` - (cookie missing or malformed) → redirect to - {return}?ts_synced=0&ts_reason=no_ec - -3. Look up partner record in partner_store. - Not found → 400. - -4. Validate return URL host against partner.allowed_return_domains. - - Exact hostname match only — no suffix or wildcard. - - Mismatch → 400. - -5. Evaluate consent. Use `ec_context.consent` (built pre-routing via - `build_consent_context()`). The optional `consent` query param is a **fallback - only** — used solely when `ec_context.consent.is_empty()` returns `true`. - This is the actual contract from the consent module. It is broader than - “no cookies or headers on the wire”: if consent KV fallback, decoded objects, - GPP section IDs, AC string, raw US privacy, or GPC already populated the - context, `is_empty()` is `false` and the query param is ignored entirely. - - When the fallback applies: decode the query param into a **locally-built** - `ConsentContext` (same TCF/GPP/USP decoders, same jurisdiction inputs), then - assign that value into `ec_context.consent` for the remainder of this request. - This makes the sync write decision and `ec_finalize_response()` use the same - effective consent view, avoiding a same-request “write partner ID, then - withdraw EC” conflict. Do NOT re-call `build_consent_context()` — that would - trigger `try_kv_write()` and persist the query-param consent to the consent KV - store, which is not intended. The decoded fallback applies only to this `/sync` - request; it is not written to the consent KV store and does not change any - future request unless the client sends real consent cookies/headers again. - - `!allows_ec_creation(...)` → redirect to {return}?ts_synced=0&ts_reason=no_consent - -6. Check anti-stuffing rate limit (sync_rate_limit per EC hash per partner per hour). - Exceeded → `429 Too Many Requests` (no redirect — the `return` URL is never called). - -7. kv.upsert_partner_id(ec_hash, partner_id, uid, now()) - If the root KV entry is missing (e.g. initial `create_or_revive()` failed on - the organic page load), `upsert_partner_id()` creates a minimal live entry and - then writes `ids[partner_id]`. This is the recovery path for best-effort EC - creation misses. - KV write failure → redirect to {return}?ts_synced=0&ts_reason=write_failed - -8. Success → redirect to {return}?ts_synced=1 + +### 7A.6 `KvDevice` write policy + +`KvDevice` is written to `KvEntry.device` only during `generate_if_needed()` +(new EC creation). It is never updated after creation — device signals are a +first-seen record of how this EC entry was established. + +Existing entries (created before device signals were implemented) will have +`device: None`. Downstream consumers must handle `None` as "pre-device-signals +entry" rather than "unknown device." + +### 7A.7 Publisher property metadata (`KvPubProperties`) + +`KvPubProperties` records the publisher domain where the EC entry was created. +Earlier drafts treated `seen_domains` as mutable domain history, but the current +implementation avoids recurring organic-request KV writes. New entries seed only +the creation domain and runtime requests do not append domains. Legacy +map-shaped records with per-domain visit objects are accepted on read and +reserialized as a domain list on future writes. + +```rust +pub struct KvPubProperties { + pub origin_domain: String, + pub seen_domains: BTreeSet, +} ``` -`ts_synced` values: +**Written:** on `KvEntry::new()` / `create_or_revive()` for the creation domain +only. Ordinary returning-user requests do not update this structure. -| Value | Meaning | -| ------------------------------------ | ----------------------------- | -| `ts_synced=1` | KV write succeeded | -| `ts_synced=0&ts_reason=no_ec` | No valid EC cookie present | -| `ts_synced=0&ts_reason=no_consent` | Consent absent or denied | -| `ts_synced=0&ts_reason=write_failed` | KV write failed after retries | +**Cap:** `seen_domains` sets are capped at 50 entries (`MAX_SEEN_DOMAINS`) +during validation so old or malformed records cannot grow unbounded. -Rate limit exceeded returns `429 Too Many Requests` directly — the partner's `return` URL is not called in this case. +### 7A.8 Network cluster disambiguation (`KvNetwork`) -### 8.4 Return URL construction +Tracks how many distinct EC entries share the same hash prefix. A high count +indicates a shared network (corporate VPN, campus); a low count indicates an +individual or household. + +```rust +pub struct KvNetwork { + pub cluster_size: Option, +} +``` -Append `ts_synced` (and optional `ts_reason`) to the `return` URL: +**Written:** only by the `/_ts/api/v1/identify` endpoint, never on the organic proxy path. +The prefix-match list API call required to compute `cluster_size` is too +expensive for the hot path. -- If the URL already has a query string, append `&ts_synced=...` -- If not, append `?ts_synced=...` +**Evaluation:** `evaluate_cluster()` on `KvIdentityGraph`: -Do not modify any other query parameters on the `return` URL. +- Returns the stored `cluster_size` without a prefix-list call when present +- If `cluster_size` is missing, calls `count_hash_prefix_keys()` with `limit(100)` — a single list-page call +- Writes the computed result to `entry.network` via best-effort CAS +- `cluster_recheck_secs` is retained only as a legacy compatibility setting because no cluster-check timestamp is stored in the EC identity entry + +**Threshold guidance:** + +| Cluster size | Likely scenario | +| ------------ | ----------------------------------------- | +| 1–3 | Individual / household | +| 4–10 | Small shared space (family, small office) | +| 11–50 | Medium office, hotel, coworking | +| 50+ | Corporate VPN, university, campus | + +**Default trust threshold:** entries with `cluster_size <= 10` are treated as +individual users for identity resolution purposes. Configurable per publisher +via `trusted-server.toml`: + +```toml +[ec] +cluster_trust_threshold = 10 # default +# cluster_recheck_secs is legacy compatibility; cluster_size is computed once per entry +``` -### 8.5 Security +### 7A.9 Geo extensions (`KvGeo`) -- `return` URL validated by exact hostname match against `partner.allowed_return_domains`. No subdomain wildcard matching. -- No HMAC signature required on inbound sync request. -- Rate limit: `partner.sync_rate_limit` writes per EC hash per partner per hour. Default: 100. Configurable per partner in `partner_store`. +`KvGeo` is extended with two non-PII network signals available from Fastly's +`geo_lookup()` on the client IP: + +- **`asn: Option`** — Autonomous System Number (e.g. `7922` = Comcast). + Primary signal for distinguishing home ISP vs. corporate VPN. Populated from + `GeoInfo::asn` which reads `fastly::geo::Geo::as_number()`. A value of `0` + from the Fastly API is mapped to `None`. +- **`dma: Option`** — DMA/metro code (e.g. `807` = San Francisco). + Market-level targeting signal; not personal data. Populated from + `GeoInfo::metro_code` when non-zero. + +Both fields are written on initial `KvEntry::new()` from `GeoInfo`. Never +updated after creation — geo is a first-seen signal, not a real-time one. + +### 7A.10 IP address storage policy + +Raw IP addresses are personal data under GDPR (CJEU _Breyer v. Germany_, 2016) +and must not be stored in KV entries. The EC hash already derives from the IP +without persisting it. + +Permitted IP-derived signals (written at creation time): + +- `geo.country` — ISO 3166-1 alpha-2 +- `geo.region` — ISO 3166-2 subdivision +- `geo.asn` — ASN number (network identifier, not personal data) +- `geo.dma` — DMA/metro code (market identifier, not personal data) + +### 7A.11 Privacy rationale + +`ja4_class` (Section 1 only) and `platform_class` are category signals, not +unique device identifiers. They are equivalent in precision to `geo.country` +— they identify a class of client, not an individual. The full JA4 fingerprint +(Sections 2 and 3) is never stored, as it approaches unique device +identification and would require explicit consent basis under GDPR Art. 4(1). --- -## 9. S2S Batch Sync API (`POST /api/v1/sync`) +## 8. Prebid EID Cookie Ingestion + +> **Note:** The pixel sync endpoint (`GET /_ts/api/v1/sync`) has been removed. Partner ID sync from the browser is now handled via the Prebid EID cookie, which is written client-side by the TSJS Prebid integration and ingested server-side in `ec_finalize_response()`. -### 9.1 Module: `ec/sync_batch.rs` +### 8.1 Module: `ec/prebid_eids.rs` ```rust -pub async fn handle_batch_sync( - settings: &Settings, +/// Parses a `ts-eids` cookie value and writes matched partner UIDs to KV. +/// +/// Best-effort: all errors are logged and swallowed so the main request +/// path is never affected. +pub fn ingest_prebid_eids( + cookie_value: &str, + ec_id: &str, kv: &KvIdentityGraph, - partner_store: &PartnerStore, + registry: &PartnerRegistry, +); +``` + +### 8.2 Cookie format + +| Attribute | Value | +| ---------- | -------------------------------------------------------------------------------------------- | +| Name | `ts-eids` | +| Format | Base64-encoded (standard RFC 4648) JSON array of OpenRTB-style EIDs (`{source, uids:[...]}`) | +| Max size | JS writer targets 3 KB; backend parser accepts up to 8 KiB raw cookie length | +| Written by | TSJS Prebid integration (client-side JS) | +| Read by | `ec_finalize_response()` (server-side, via `ingest_prebid_eids()`) | + +**Example decoded value:** + +```json +[ + { + "source": "uidapi.com", + "uids": [{ "id": "A4A...", "atype": 3 }] + }, + { + "source": "liveramp.com", + "uids": [{ "id": "LR_xyz", "atype": 3 }] + } +] +``` + +### 8.3 JS side + +The TSJS Prebid integration calls `pbjs.getUserIdsAsEids()` in the `bidsBackHandler` callback after each auction. The returned OpenRTB-style EID array is base64-encoded and written to the `ts-eids` cookie. This runs entirely client-side — no server round-trip is needed for the write. Current writers preserve the full `{source, uids:[...]}` shape; the backend remains backward-compatible with the earlier flattened `{source, id, atype}` payload during rollout. + +### 8.4 Backend side + +`ingest_prebid_eids()` is called from `ec_finalize_response()` on both returning-user and new-EC paths when a `ts-eids` cookie is present and consent is granted. The flow: + +1. Base64-decode the cookie value. +2. JSON-parse into OpenRTB-style `Eid` entries; if that parse fails, fall back to the earlier flattened `{source, id, atype}` payload for backward compatibility. +3. For each EID entry: + a. Look up `registry.find_by_source_domain(&eid.source)`. Skip if no match. + b. Find the first non-empty UID in `eid.uids`. Skip the source if none is present. + c. Skip oversized UID values. + d. Call `kv.upsert_partner_id(ec_id, &partner.id, &uid.id)`. The upsert skips the KV write when the stored UID already matches. +4. All errors are logged and swallowed — EID ingestion never blocks the response. + +### 8.5 Source domain matching + +Source domains are matched via `PartnerRegistry.find_by_source_domain()`, which performs a case-insensitive lookup against the `source_domain` field configured on each partner in `[[ec.partners]]`. The registry builds a `by_source_domain` HashMap at startup for O(1) lookups. + +### 8.6 Write suppression + +EC identity entries no longer store per-partner sync timestamps. Instead of a +time-based debounce, `upsert_partner_id()` skips the KV write when the stored UID +already matches the incoming UID. Different UIDs replace the stored value. + +--- + +## 9. S2S Batch Sync API (`POST /_ts/api/v1/batch-sync`) + +### 9.1 Module: `ec/batch_sync.rs` + +```rust +pub fn handle_batch_sync( + kv: &KvIdentityGraph, + registry: &PartnerRegistry, + rate_limiter: &dyn RateLimiter, req: Request, ) -> Result>; ``` ### 9.2 Authentication -`Authorization: Bearer ` header required. Auth flow: +`Authorization: Bearer ` header required. Auth flow: -1. Compute `sha256_hex(api_key)`. -2. Look up `partner_store.find_by_api_key_hash(hash)` — uses the `apikey:{hash}` secondary index (§13.1) for O(1) lookup instead of scanning all partners. -3. If the index returns a partner, verify the partner's stored `api_key_hash` matches the computed hash (constant-time comparison). This guards against stale index entries from key rotation. -4. If no match or verification fails → `401 Unauthorized` with no body processing. -5. If KV lookup fails (store unavailable) → `503 Service Unavailable`. +1. Compute `sha256_hex(api_token)`. +2. Look up `registry.find_by_api_key_hash(hash)` — the `PartnerRegistry` maintains a `by_api_key_hash` HashMap built at startup from `[[ec.partners]]` config for O(1) lookup. +3. If no match → `401 Unauthorized` with no body processing. -Key rotation does not require binary redeployment — partners update via `/admin/partners/register`, which handles old API-key index cleanup (§13.1). +Key rotation requires updating the `api_token` in `[[ec.partners]]` TOML and redeploying. ### 9.2.1 API-key rate limiting -After successful auth, check the API-key level rate limit: `partner.batch_rate_limit` requests per partner per minute (default 60). Uses the same Fastly rate-limiting API as pixel sync (§14.3), with key `batch:{partner_id}`. +After successful auth, check the API-key level rate limit: `partner.batch_rate_limit` requests per partner per minute (default 60). Uses Fastly's Edge Rate Limiting API (§14.3), with key `batch:{partner_id}`. Exceeded → `429 Too Many Requests` with body `{ "error": "rate_limit_exceeded" }`. No mappings are processed. ### 9.3 Request format ``` -POST /api/v1/sync +POST /_ts/api/v1/batch-sync Content-Type: application/json Authorization: Bearer { "mappings": [ { - "ec_hash": "<64-character hex hash>", + "ec_id": "", "partner_uid": "abc123", "timestamp": 1741824000 } @@ -846,13 +1304,13 @@ Maximum batch size: 1000 mappings. Requests exceeding this receive `400 Bad Requ ### 9.4 Processing -The authenticated partner's ID (from the `PartnerRecord` resolved via API key in §9.2) determines the `ids[partner_id]` namespace for all writes in this batch. A partner can only write to their own namespace. +The authenticated partner's ID (from the `PartnerConfig` resolved via API key hash in §9.2) determines the `ids[partner_id]` namespace for all writes in this batch. A partner can only write to their own namespace. For each mapping: -1. Validate `ec_hash` format (must be exactly 64 lowercase hex characters). Invalid format → reject with `reason: "invalid_ec_hash"`. -2. Read KV metadata for `ec_hash`. If not found → reject with `reason: "ec_hash_not_found"`. If `consent.ok = false` → reject with `reason: "consent_withdrawn"`. -3. `kv.upsert_partner_id(ec_hash, partner_id, partner_uid, timestamp)`. The upsert internally skips the write if the existing `ids[partner_id].synced ≥ timestamp` (idempotent — counted as accepted, no error). On KV failure → reject all remaining mappings with `reason: "kv_unavailable"`, return `207`. +1. Validate `ec_id` format (must match `{64-hex}.{6-alnum}` pattern). Invalid format → reject with `reason: "invalid_ec_id"`. +2. Read KV metadata for `ec_id`. If not found → reject with `reason: "ec_id_not_found"`. If `consent.ok = false` → reject with `reason: "consent_withdrawn"`. +3. `kv.upsert_partner_id_if_exists(ec_id, partner_id, partner_uid)`. Mapping `timestamp` is retained for API compatibility but is not used for ordering. The upsert skips the write if the existing UID already matches (counted as accepted). A different UID overwrites the stored value. On KV failure → reject all remaining mappings with `reason: "kv_unavailable"`, return `207`. ### 9.5 Response format @@ -861,7 +1319,7 @@ For each mapping: "accepted": 998, "rejected": 2, "errors": [ - { "index": 45, "reason": "ec_hash_not_found" }, + { "index": 45, "reason": "ec_id_not_found" }, { "index": 72, "reason": "consent_withdrawn" } ] } @@ -875,7 +1333,6 @@ HTTP status rules: | Some accepted, some rejected | `207 Multi-Status` | | All rejected (auth valid, batch valid) | `207 Multi-Status` with `accepted: 0` | | Auth invalid | `401 Unauthorized` | -| Auth KV lookup failed (store down) | `503 Service Unavailable` | | Malformed JSON or > 1000 mappings | `400 Bad Request` | | KV entirely unavailable | `207 Multi-Status`, all rejected with `kv_unavailable` | @@ -893,10 +1350,10 @@ pub struct BatchSyncError { #[derive(Debug, derive_more::Display)] pub enum BatchSyncRejection { - #[display("invalid_ec_hash")] - InvalidEcHash, - #[display("ec_hash_not_found")] - EcHashNotFound, + #[display("invalid_ec_id")] + InvalidEcId, + #[display("ec_id_not_found")] + EcIdNotFound, #[display("consent_withdrawn")] ConsentWithdrawn, #[display("kv_unavailable")] @@ -932,7 +1389,7 @@ impl PullSyncDispatcher { &self, ec_context: &EcContext, client_ip: IpAddr, - partners: &[PartnerRecord], + partners: &[&PartnerConfig], kv: &KvIdentityGraph, ); } @@ -940,9 +1397,9 @@ impl PullSyncDispatcher { /// Fires a single partner pull request via `send_async()`, waits for the /// response via `PendingRequest::wait()`, and writes the result to KV. fn pull_one_partner( - ec_hash: &str, + ec_id: &str, ip: IpAddr, - partner: &PartnerRecord, + partner: &PartnerConfig, kv: &KvIdentityGraph, ); ``` @@ -951,15 +1408,17 @@ fn pull_one_partner( A pull sync is dispatched for a partner when all of the following are true on a request: -1. The request was routed to an **organic handler** (`handle_publisher_request` or `integration_registry.handle_proxy`). Pull sync never fires on EC route handlers (`/sync`, `/identify`, `/api/v1/sync`, `/admin/*`) or `/auction`. This matches the PRD requirement that pull calls must not happen during the pixel sync flow. +1. The request was routed to an **organic handler** (`handle_publisher_request` or `integration_registry.handle_proxy`). Pull sync never fires on EC route handlers (`/_ts/api/v1/identify`, `/_ts/api/v1/batch-sync`) or `/auction`. 2. A valid EC is present (`ec_context.ec_hash().is_some()`). This includes an EC newly generated on the current organic request — pull sync may run immediately after first-page EC creation because the response cookie is flushed before the background dispatch starts. 3. `allows_ec_creation(&ec_context.consent) == true` 4. `partner.pull_sync_enabled == true` -5. Either: no entry exists for this partner in the KV graph, or the existing `synced` timestamp is older than `partner.pull_sync_ttl_sec` (default 86400 seconds) -6. Rate limit not exceeded: `partner.pull_sync_rate_limit` calls per EC hash per partner per hour (default 10) +5. The partner UID is missing from the KV graph. If `ids[partner_id]` is already present, pull sync is skipped. +6. Rate limit not exceeded: `partner.pull_sync_rate_limit` calls per EC ID per partner per hour (default 10) + +`partner.pull_sync_ttl_sec` is retained for configuration compatibility, but is not used by the current fill-missing-only behavior because EC entries no longer store per-partner sync timestamps. ### 10.3 Execution model @@ -967,18 +1426,18 @@ Pull calls are dispatched using Fastly's background task / `send_async` model af Maximum concurrent pull calls per request: `settings.ec.pull_sync_concurrency` (default 3). -**Architectural divergence from PRD:** The PRD describes excess partner calls being queued and dispatched on subsequent requests for the same user. A persistent queue is not implementable in the stateless Fastly WASM edge environment — there is no cross-request mutable state. This spec adapts the intent using a stateless rotating offset: sort qualifying partners by ID, then use `(unix_timestamp_secs / 3600) % partner_count` as the starting index (wrapping). This ensures different partners are prioritized across different requests without persisted state. Partners not called on a given request remain eligible on the next qualifying request per their `pull_sync_ttl_sec` condition. The practical outcome (all partners eventually called) matches the PRD intent; the mechanism differs due to the platform constraint. +**Architectural divergence from PRD:** The PRD describes excess partner calls being queued and dispatched on subsequent requests for the same user. A persistent queue is not implementable in the stateless Fastly WASM edge environment — there is no cross-request mutable state. This spec adapts the intent using a stateless rotating offset: sort qualifying partners by ID, then use `(unix_timestamp_secs / 3600) % partner_count` as the starting index (wrapping). This ensures different missing partners are prioritized across requests without persisted queue state. Once a partner UID is stored, that partner is no longer eligible for pull sync under the current fill-missing-only behavior. ### 10.4 Outbound request ``` -GET {partner.pull_sync_url}?ec_hash={64-char-hex}&ip={ip_address} +GET {partner.pull_sync_url}?ec_id={64-hex}.{6-alnum} Authorization: Bearer {partner.ts_pull_token} ``` -Before dispatching, `pull_sync.rs` validates that `pull_sync_url`'s hostname is present in `partner.pull_sync_allowed_domains`. If not, the call is skipped and an `error` is logged — this is a configuration error that should not occur at runtime if admin validation is working correctly (§13.2 step 3). +Before dispatching, `pull_sync.rs` validates that `pull_sync_url`'s hostname is present in `partner.pull_sync_allowed_domains`. If not, the call is skipped and an `error` is logged — this is a configuration error that should not occur at runtime if startup validation in `PartnerRegistry::from_config()` is working correctly. -Only the EC hash and IP are sent. No consent strings, geo data, or other partner IDs are included. +Only the full EC ID is sent. No client IP, consent strings, geo data, or other partner IDs are included. **Expected partner responses:** @@ -993,69 +1452,97 @@ Any other non-200 response is treated as a transient failure. No retry. The next ### 10.5 KV write on success -On a non-null `uid`: call `kv.upsert_partner_id(ec_hash, partner_id, uid, now())`. If the root entry is missing, the upsert creates a minimal live entry first (same recovery path as `/sync`). On KV failure: log `warn` and discard the result. Retry occurs on the next qualifying request. - -The write updates `ids[partner_id].synced` to the current timestamp, resetting the `pull_sync_ttl_sec` window. +On a non-null `uid`: call `kv.upsert_partner_id(ec_id, partner_id, uid)`. If the root entry is missing, the upsert creates a minimal live entry first. If the same UID is already stored, the upsert skips the KV write. On KV failure: log `warn` and discard the result. Retry occurs on the next qualifying request while the partner UID remains missing. --- -## 11. Identity Resolution Endpoint (`GET /identify`) +## 11. Identity Resolution Endpoint (`GET /_ts/api/v1/identify`) ### 11.1 Module: `ec/identify.rs` ```rust -pub async fn handle_identify( +pub fn handle_identify( settings: &Settings, kv: &KvIdentityGraph, - partner_store: &PartnerStore, + registry: &PartnerRegistry, req: &Request, ec_context: &EcContext, ) -> Result>; ``` -### 11.2 Call patterns +### 11.2 Authentication + +**Bearer token required.** The `Authorization: Bearer ` header identifies the requesting partner. Auth flow: + +1. Parse the Bearer token from the `Authorization` header. +2. Compute `sha256_hex(api_token)`. +3. Look up `registry.find_by_api_key_hash(hash)` — O(1) in-memory lookup. +4. If no match → `401 Unauthorized` with `{ "error": "invalid_token" }`. -**Browser-direct:** The browser sends the request to `ec.publisher.com/identify`. Cookies and consent cookies are sent automatically (same-site). No special header forwarding required. +The authenticated partner determines which UID is returned — each partner sees only their own synced UID for the given EC, not all partners' UIDs. -**Server-side proxy (for use case 2):** The publisher's origin server must forward: +### 11.2.1 Call patterns + +**Browser-direct:** The browser sends the request to `ec.publisher.com/_ts/api/v1/identify` with the partner's API token in the `Authorization` header. Cookies (including `ts-ec` and consent cookies) are sent automatically (same-site). + +**Server-side proxy:** The publisher's origin server must forward: | Header | Required | | --------------------------------------------------------- | -------------------------------------- | +| `Authorization: Bearer ` | Yes | | `Cookie: ts-ec=` or `X-ts-ec: ` | Yes | | `Cookie: euconsent-v2=` or `Cookie: __gpp=` | Yes for EU/UK/US users | | `X-consent-advertising: ` | Optional — takes precedence if present | ### 11.3 EC and consent handling -`/identify` follows `EcContext` retrieval priority (Section 4.2). It does **not** -generate a new EC, and the handler itself does not write cookies. However, -`ec_finalize_response()` still runs after the handler: on consent withdrawal it -deletes the EC cookie, and on header/cookie mismatch it may reconcile the cookie -to the authoritative header-derived EC. +`/_ts/api/v1/identify` follows `EcContext` retrieval priority (Section 4.2). It does **not** +generate a new EC, and the handler itself does not write cookies. After the +handler, `ec_finalize_response()` may still delete the EC cookie on consent +withdrawal. Ordinary returning-user responses set the `x-ts-ec` header only; +they do not refresh or repair the browser cookie. Consent is evaluated using the same logic as Section 6. ### 11.4 Response -**`200 OK` — EC present, consent granted:** +**`401 Unauthorized` — missing or invalid Bearer token:** + +```json +{ "error": "invalid_token" } +``` + +This is checked first, before consent or EC presence. + +**`200 OK` — EC present, consent granted, partner UID resolved:** ```json { "ec": "a1b2c3...AbC123", "consent": "ok", "degraded": false, - "uids": { - "uid2": "A4A...", - "liveramp": "LR_xyz" - }, - "eids": [ - { "source": "uidapi.com", "uids": [{ "id": "A4A...", "atype": 3 }] }, - { "source": "liveramp.com", "uids": [{ "id": "LR_xyz", "atype": 3 }] } - ] + "partner_id": "liveramp", + "uid": "LR_xyz", + "eid": { "source": "liveramp.com", "uids": [{ "id": "LR_xyz", "atype": 3 }] }, + "cluster_size": 2 } ``` -`uids` contains one key per partner with `bidstream_enabled: true` and a resolved UID in the KV graph. Partners with no resolved UID for this user are omitted. +The response is scoped to the requesting partner only. `partner_id` identifies which partner was authenticated. `uid` is the partner's resolved UID for this EC. `eid` is the OpenRTB 2.6 EID object for this partner. `cluster_size` is included when the network cluster has been evaluated (see §7A.8); absent when not yet evaluated. + +**`200 OK` — EC present, consent granted, no UID for this partner:** + +```json +{ + "ec": "a1b2c3...AbC123", + "consent": "ok", + "degraded": false, + "partner_id": "liveramp", + "cluster_size": null +} +``` + +`uid` and `eid` are omitted when the partner has no synced UID for this EC. **`200 OK` — KV unavailable (degraded):** @@ -1064,8 +1551,8 @@ Consent is evaluated using the same logic as Section 6. "ec": "a1b2c3...AbC123", "consent": "ok", "degraded": true, - "uids": {}, - "eids": [] + "partner_id": "liveramp", + "cluster_size": null } ``` @@ -1078,8 +1565,8 @@ This case occurs by design when `create_or_revive()` fails on EC generation (bes "ec": "a1b2c3...AbC123", "consent": "ok", "degraded": false, - "uids": {}, - "eids": [] + "partner_id": "liveramp", + "cluster_size": null } ``` @@ -1091,7 +1578,7 @@ Note: `degraded` is `false` because the KV read succeeded (it returned `None`, m { "consent": "denied" } ``` -Consent is evaluated **before** EC presence. If `!allows_ec_creation(&consent)`, return `403` immediately — do not fall through to the `204` branch. This ensures consent denial is always surfaced, even for users with no EC. +Consent is evaluated **after** auth but **before** EC presence. If `!allows_ec_creation(&consent)`, return `403` immediately — do not fall through to the `204` branch. This ensures consent denial is always surfaced, even for users with no EC. **`204 No Content` — no EC present, consent not denied.** No body. @@ -1099,18 +1586,15 @@ Consent is evaluated **before** EC presence. If `!allows_ec_creation(&consent)`, Set on `200` responses only: -| Header | Value | -| ------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `X-ts-ec` | `{ec_hash.suffix}` | -| `X-ts-eids` | Standard base64 (RFC 4648, with `=` padding) of the JSON array of OpenRTB 2.6 `user.eids` objects. Capped at **4 KB** after encoding. If the encoded value exceeds 4 KB, the array is truncated (fewest partners first — highest `synced` timestamp retained) until it fits, and a `x-ts-eids-truncated: true` header is added. | -| `X-ts-` | Resolved UID per partner (e.g., `X-ts-uid2`). One header per partner with a resolved UID. **Capped at 20 partners** — partners sorted by most-recently synced; excess partners are omitted silently. | -| `X-ts-ec-consent` | `ok` (always — denied consent returns `403`, not `200`) | +| Header | Value | +| --------- | --------------------------------- | +| `X-ts-ec` | `{64-hex}.{6-alnum}` — full EC ID | -These are supplementary — callers should read the JSON body as the primary contract. The 4 KB cap on `X-ts-eids` and the 20-partner cap on `X-ts-` headers reflect typical proxy and browser total-header-budget constraints. Both caps apply independently. +The JSON body is the primary contract. The `X-ts-ec` header is supplementary for proxy-layer consumers. ### 11.6 Performance target -`/identify` must respond within 30ms (excluding network latency) when EC is present and KV read succeeds. This requires the KV read to be on the fast path with no retries. +`/_ts/api/v1/identify` must respond within 30ms (excluding network latency) when EC is present and KV read succeeds. This requires the KV read to be on the fast path with no retries. CORS headers must be set to allow browser-direct calls from the publisher's page. The `Access-Control-Allow-Origin` header is dynamically reflected from the `Origin` request header if the origin is an exact match or a subdomain of `settings.publisher.domain`: @@ -1122,20 +1606,18 @@ CORS headers must be set to allow browser-direct calls from the publisher's page Access-Control-Allow-Origin: Access-Control-Allow-Credentials: true Access-Control-Allow-Methods: GET, OPTIONS -Access-Control-Allow-Headers: Cookie, X-ts-ec, X-consent-advertising -Access-Control-Expose-Headers: X-ts-ec, X-ts-eids, X-ts-ec-consent, X-ts-eids-truncated, +Access-Control-Allow-Headers: Authorization, X-ts-ec +Access-Control-Max-Age: 600 Vary: Origin ``` -**`Access-Control-Expose-Headers` note:** The dynamic `X-ts-` headers must be enumerated per-response, not as a static constant. The handler builds the expose list by iterating the partner IDs that have resolved UIDs in the response. `x-ts-eids-truncated` is always included in the expose list (browser JS should be able to detect truncation even when it occurs). - **Origin validation logic:** CORS headers are only relevant when the `Origin` request header is present (browser requests always send it; server-side proxy calls typically do not). -- **No `Origin` header present:** Process normally. No CORS headers added. No `403`. This is the server-side proxy path from §11.2 — origin-server calls forwarding `Cookie` and consent headers. +- **No `Origin` header present:** Process normally. No CORS headers added. No `403`. This is the server-side proxy path from §11.2.1 — origin-server calls forwarding `Cookie`, consent headers, and `Authorization`. - **`Origin` header present, hostname matches `publisher.domain` or ends with `.{publisher.domain}` and scheme is `https`:** Reflect origin in `Access-Control-Allow-Origin`. Add `Vary: Origin`. - **`Origin` header present but does not match:** Return `403`. No body. -Browser `fetch()` with `credentials: "include"` sends an `OPTIONS` preflight. The router handles `OPTIONS /identify` identically — returns `200 OK` with the CORS headers above and no body. +Browser `fetch()` with `credentials: "include"` sends an `OPTIONS` preflight. The router handles `OPTIONS /_ts/api/v1/identify` identically — returns `200 OK` with the CORS headers above and no body. --- @@ -1176,7 +1658,7 @@ let (user_id, eids) = match ec_context.ec_hash() { Some(hash) => { let kv_entry = kv.get(hash).ok().flatten(); let eids = match kv_entry { - Some((entry, _gen)) => build_eids_from_kv(&entry, partner_store), + Some((entry, _gen)) => build_eids_from_kv(&entry, ®istry), None => vec![], // KV read failed or no entry — degrade gracefully }; (ec_context.ec_value.clone(), eids) @@ -1224,7 +1706,7 @@ The current `/auction` path returns a JSON response inline to the JS caller (`en | Header | Value | | --------------------- | ------------------------------------------------------------------------------------------------------------------ | -| `X-ts-ec` | `{ec_hash.suffix}` — when EC is present | +| `X-ts-ec` | `{64-hex}.{6-alnum}` — full EC ID, when EC is present | | `X-ts-eids` | Standard base64 (RFC 4648) of OpenRTB 2.6 `user.eids` JSON array. Capped at 4 KB — same truncation rules as §11.5. | | `X-ts-eids-truncated` | `true` — present only when `X-ts-eids` was truncated | | `X-ts-ec-consent` | `ok` — only present when consent granted; on withdrawal `ec_finalize_response()` strips all EC headers | @@ -1233,200 +1715,221 @@ The current `/auction` path returns a JSON response inline to the JS caller (`en --- -## 13. Partner Registry and Admin Endpoint +## 13. Partner Registry (Config-Based) + +### 13.1 Overview + +Partners are defined in `[[ec.partners]]` TOML configuration and loaded into an in-memory `PartnerRegistry` at startup. There is no KV-backed partner store and no admin registration endpoint. Partner changes require a config update and redeployment. + +### 13.2 Module: `ec/partner.rs` + +Contains only validation helpers and API key hashing. The full partner data model and registry live in `ec/registry.rs`. + +```rust +/// Validates a partner ID format and checks against reserved names. +/// +/// # Errors +/// +/// Returns a descriptive error string on validation failure. +pub fn validate_partner_id(id: &str) -> Result<(), String>; +// Must match `^[a-z0-9_-]{1,32}$`. Reserved names rejected: +// `ec`, `eids`, `ec-consent`, `eids-truncated`, `synthetic`, `ts`, `version`, `env`. -### 13.1 Module: `ec/partner.rs` +/// Computes the SHA-256 hex digest of an API key. +pub fn hash_api_key(api_key: &str) -> String; +``` + +### 13.3 Module: `ec/registry.rs` ```rust -pub struct PartnerRecord { - /// Partner identifier. Must match `^[a-z0-9_-]{1,32}$` (lowercase, no spaces). - /// Used to build `X-ts-` response headers — header-safety is required. - /// Reserved names that would collide with existing managed headers are rejected - /// at registration: `ec`, `eids`, `ec-consent`, `eids-truncated`, `synthetic`, `ts`, `version`, `env`. +/// Runtime-ready partner configuration with precomputed API key hash. +#[derive(Debug, Clone)] +pub struct PartnerConfig { pub id: String, pub name: String, - pub allowed_return_domains: Vec, - pub api_key_hash: String, // SHA-256 hex of the partner's API key + pub source_domain: String, + pub openrtb_atype: u8, pub bidstream_enabled: bool, - pub source_domain: String, // OpenRTB source (e.g., "liveramp.com") - pub openrtb_atype: u8, // typically 3 - pub sync_rate_limit: u32, // per EC hash per partner per hour - pub batch_rate_limit: u32, // API-key level: requests per partner per minute (default 60) + pub api_key_hash: String, // SHA-256 hex, precomputed at startup + pub batch_rate_limit: u32, // requests per partner per minute (default 60) pub pull_sync_enabled: bool, - pub pull_sync_url: Option, // required when pull_sync_enabled; validated at registration - pub pull_sync_allowed_domains: Vec, // allowlist of domains TS may call for this partner - pub pull_sync_ttl_sec: u64, // default 86400 - pub pull_sync_rate_limit: u32, // default 10 - pub ts_pull_token: Option, // required when pull_sync_enabled; outbound bearer token -} - -pub struct PartnerStore { - store_name: String, + pub pull_sync_url: Option, + pub pull_sync_allowed_domains: Vec, + pub pull_sync_ttl_sec: u64, // default 86400 + pub pull_sync_rate_limit: u32, // default 10 + pub ts_pull_token: Option, // outbound bearer token for pull sync } -impl PartnerStore { - pub fn new(store_name: impl Into) -> Self; - - /// Looks up a partner by ID. Returns `None` if not found. - pub fn get(&self, partner_id: &str) -> Result, Report>; - - /// Verifies an API key against the stored hash for a given partner. - /// Uses constant-time comparison. - pub fn verify_api_key(&self, partner_id: &str, api_key: &str) -> bool; - - /// Writes or updates a partner record. - /// Returns `true` if this was a new partner (create), `false` if an existing - /// partner was updated. The pre-read needed for index maintenance (old API key - /// deletion) also determines this. - pub fn upsert(&self, record: &PartnerRecord) -> Result>; - - /// Looks up the partner owning a given API key hash (for batch sync auth). - /// Uses the `apikey:{hash}` secondary index for O(1) lookup, then verifies the - /// stored `api_key_hash` matches (guards against stale index from key rotation). - pub fn find_by_api_key_hash(&self, hash: &str) -> Result, Report>; - - /// Returns all partner records with `pull_sync_enabled == true`. - /// Used by the pull sync dispatcher after each organic request. Implementations - /// must re-check `pull_sync_enabled` on the fetched record before returning it, - /// because the `_pull_enabled` secondary index is best-effort and may be stale. - pub fn pull_enabled_partners(&self) -> Result, Report>; +/// In-memory partner registry with O(1) lookups by ID, API key hash, +/// and source domain. +/// +/// Built once at startup from `[[ec.partners]]` in `trusted-server.toml`. +/// All validation happens during construction. +pub struct PartnerRegistry { + by_id: HashMap, + by_api_key_hash: HashMap, + by_source_domain: HashMap, } -``` -**Storage layout:** Partner records are stored as JSON values in `partner_store` KV, keyed by `partner_id`. Two operations require access patterns beyond single-key lookup: +impl PartnerRegistry { + /// Builds a registry from the config-defined partner list. + /// + /// # Errors + /// + /// Returns `TrustedServerError::Configuration` if any partner has an + /// invalid ID, duplicate ID, duplicate API token hash, duplicate source + /// domain, or invalid pull sync configuration. + pub fn from_config(partners: &[EcPartner]) -> Result>; -1. **`find_by_api_key_hash(hash)`** — batch sync auth needs to find the partner owning a given API key hash. Implementation: maintain a secondary index entry `apikey:{sha256_hex} → partner_id` in the same KV store. Written on `upsert()`, looked up on batch auth. **On key rotation:** `upsert()` must read the existing record first, and if the `api_key_hash` has changed, delete the old `apikey:{old_hash}` index entry before writing the new one. This prevents old API keys from remaining valid after rotation. + /// Returns an empty registry (no partners configured). + pub fn empty() -> Self; -2. **`pull_enabled_partners()`** — pull sync needs all partners with `pull_sync_enabled == true`. Implementation: maintain an index entry `_pull_enabled → [partner_id_1, partner_id_2, ...]` (JSON array of partner IDs) in the same KV store. Updated on `upsert()` when `pull_sync_enabled` changes. The dispatcher reads this list, then does individual `get()` calls for each partner record. This bounds the number of KV reads to `1 + pull_partner_count` per organic request. + /// Looks up a partner by ID. + pub fn get(&self, partner_id: &str) -> Option<&PartnerConfig>; -**Consistency model:** These index writes are **best-effort, not atomic** — Fastly KV does not support multi-key transactions. `upsert()` writes in order: (1) primary record, (2) old API-key index deletion (if key changed), (3) new API-key index, (4) `_pull_enabled` list. If the process fails mid-sequence, indexes may be stale. All readers handle this defensively: + /// Looks up a partner by the SHA-256 hex hash of their API token. + pub fn find_by_api_key_hash(&self, hash: &str) -> Option<&PartnerConfig>; -- `find_by_api_key_hash()`: if the index points to a partner whose stored `api_key_hash` does not match the lookup hash, treat as auth failure (stale index from a rotation). -- `pull_enabled_partners()`: if a listed partner ID returns `None` from `get()`, skip it silently. If the fetched record has `pull_sync_enabled == false`, also skip it silently — that is a stale `_pull_enabled` index entry. -- The `_pull_enabled` list is vulnerable to lost updates under concurrent registrations. This is accepted — partner registration is a low-frequency admin operation (not a hot path). If lost updates become an issue, a CAS-based read-modify-write can be added later. + /// Looks up a partner by their `source_domain` (case-insensitive). + /// Used by Prebid EID ingestion to match EID sources to partners. + pub fn find_by_source_domain(&self, domain: &str) -> Option<&PartnerConfig>; -### 13.2 Admin endpoint (`POST /admin/partners/register`) + /// Returns all partners with `pull_sync_enabled = true`. + pub fn pull_enabled_partners(&self) -> Vec<&PartnerConfig>; -**Module:** `ec/admin.rs` + /// Returns an iterator over all configured partners. + pub fn all(&self) -> impl Iterator; -> **Codebase invariant — requires test update:** `Settings::ADMIN_ENDPOINTS` in `settings.rs` lists routes that must be covered by a `[[handlers]]` Basic Auth entry. The existing test at `settings.rs:1504-1530` scans `main.rs` for **every** `/admin/` route string and asserts it appears in `ADMIN_ENDPOINTS`. When `/admin/partners/register` is added to `main.rs`, this test will fail. -> -> **Required changes:** -> -> 1. Do **NOT** add `/admin/partners/register` to `ADMIN_ENDPOINTS` — it uses bearer-token-in-handler auth. -> 2. Update the admin-route-scan test (`settings.rs:1504-1530`) to maintain an exclusion list of bearer-token-authed admin routes (e.g., `const BEARER_AUTH_ADMIN_ROUTES: &[&str] = &["/admin/partners/register"]`) and skip those when asserting `ADMIN_ENDPOINTS` coverage. -> 3. Narrow the `[[handlers]]` pattern in `trusted-server.toml` from `"^/admin"` to `"^/admin/keys"` so that `/admin/partners/register` is not intercepted by `enforce_basic_auth()` before reaching its bearer-token handler. + /// Returns the number of configured partners. + pub fn len(&self) -> usize; -```rust -pub async fn handle_register_partner( - settings: &Settings, - partner_store: &PartnerStore, - req: Request, -) -> Result>; + /// Returns true if no partners are configured. + pub fn is_empty(&self) -> bool; +} ``` -Authentication: `Authorization: Bearer ` header, validated inside the handler against `settings.ec.admin_token_hash` (SHA-256 constant-time comparison). This is a publisher-level admin credential — separate from partner API keys, and enforced in-handler (not via `[[handlers]]` Basic Auth). Returns `401 Unauthorized` with no body if the token is missing or invalid. - -**Request:** +### 13.4 TOML configuration -``` -POST /admin/partners/register -Authorization: Bearer -Content-Type: application/json +Partners are defined in `trusted-server.toml` as `[[ec.partners]]` array entries: -{ - "id": "ssp_x", - "name": "SSP Example", - "allowed_return_domains": ["sync.example-ssp.com"], - "api_key": "raw_key_to_hash_and_store", - "bidstream_enabled": true, - "source_domain": "example-ssp.com", - "openrtb_atype": 3, - "sync_rate_limit": 100, - "batch_rate_limit": 60, - "pull_sync_enabled": false, - "pull_sync_url": null, - "pull_sync_allowed_domains": [], - "pull_sync_ttl_sec": 86400, - "pull_sync_rate_limit": 10, - "ts_pull_token": null -} +```toml +[[ec.partners]] +id = "liveramp" +name = "LiveRamp ATS" +source_domain = "liveramp.com" +openrtb_atype = 3 +bidstream_enabled = true +api_token = "partner-api-token-here" +batch_rate_limit = 60 +pull_sync_enabled = true +pull_sync_url = "https://api.liveramp.com/resolve" +pull_sync_allowed_domains = ["api.liveramp.com"] +pull_sync_ttl_sec = 86400 +pull_sync_rate_limit = 10 +ts_pull_token = "outbound-bearer-token" + +[[ec.partners]] +id = "uid2" +name = "UID 2.0" +source_domain = "uidapi.com" +openrtb_atype = 3 +bidstream_enabled = true +api_token = "uid2-api-token" +batch_rate_limit = 60 ``` -**Processing:** +### 13.5 Startup validation -1. Validate `Authorization: Bearer `: SHA-256 hash the token and compare against `settings.ec.admin_token_hash` using constant-time comparison. `401` if missing or invalid. -2. Validate required fields (`id`, `name`, `allowed_return_domains`, `api_key`, `source_domain`). `400` on failure. - Validate `id` format: must match `^[a-z0-9_-]{1,32}$`. Must not be a reserved name - (`ec`, `eids`, `ec-consent`, `eids-truncated`, `synthetic`, `ts`, `version`, `env`). `400` with descriptive message on failure. -3. If `pull_sync_enabled == true`, validate that both `pull_sync_url` and `ts_pull_token` are present and non-empty. `400` with `"pull_sync_url and ts_pull_token are required when pull_sync_enabled is true"` if either is missing. - If `pull_sync_url` is set, validate that its hostname is present in `pull_sync_allowed_domains`. `400` on failure with `"pull_sync_url domain must be in pull_sync_allowed_domains"`. This prevents TS from being directed to call arbitrary URLs — the allowlist must be declared in the same registration payload. -4. Hash `api_key` with SHA-256 before writing — never store plaintext. -5. `let created = partner_store.upsert(record)?`. `503` on KV failure. - `upsert()` returns `true` for a new partner, `false` for an update. -6. Return `201 Created` if new partner (`created == true`), or `200 OK` if update - (`created == false`). Use an explicit response DTO — do NOT serialize the full - `PartnerRecord` (which contains `api_key_hash` and `ts_pull_token`). +`PartnerRegistry::from_config()` validates during construction: -**Response:** +1. Each partner ID matches `^[a-z0-9_-]{1,32}$` and is not reserved. +2. No duplicate partner IDs. +3. No duplicate API token hashes (collision detection). +4. No duplicate source domains. +5. Rate limits are within valid bounds. +6. If `pull_sync_enabled`, both `pull_sync_url` and `ts_pull_token` must be present. +7. If `pull_sync_url` is set, its hostname must be in `pull_sync_allowed_domains`. -```json -{ - "id": "ssp_x", - "name": "SSP Example", - "pull_sync_enabled": false, - "bidstream_enabled": true, - "created": true -} -``` - -The response confirms the registration succeeded and echoes key fields. `api_key_hash`, `ts_pull_token`, and `api_key` are never returned. `PartnerRecord` does not have a `registered_at` field — use the `created` boolean to signal first registration vs. upsert update. +Any validation failure causes a startup error (`TrustedServerError::Configuration`). --- ## 14. Configuration -### 14.1 New `EdgeCookie` settings struct +### 14.1 `Ec` settings struct Added to `crates/trusted-server-core/src/settings.rs`: ```rust #[derive(Debug, Clone, Deserialize, Serialize, Validate)] -pub struct EdgeCookie { +pub struct Ec { /// Publisher passphrase used as HMAC key for EC generation. /// Must be identical across all of the publisher's owned domains. /// Publishers sharing this value with partners form an identity-federated consortium. - #[validate(custom(function = EdgeCookie::validate_passphrase))] - pub passphrase: String, + #[validate(custom(function = Ec::validate_passphrase))] + pub passphrase: Redacted, /// Fastly KV store name for the EC identity graph. - #[validate(length(min = 1))] - pub ec_store: String, - - /// Fastly KV store name for the partner registry. - #[validate(length(min = 1))] - pub partner_store: String, - - /// SHA-256 hex of the publisher admin token for `POST /admin/partners/register`. - /// The plaintext token is provided in the `Authorization: Bearer` header; - /// it is never stored in plaintext. - #[validate(custom(function = EdgeCookie::validate_sha256_hex))] - pub admin_token_hash: String, + #[serde(default)] + pub ec_store: Option, /// Maximum concurrent pull sync calls dispatched per request. - #[validate(range(min = 1))] - #[serde(default = "EdgeCookie::default_pull_sync_concurrency")] + #[serde(default = "Ec::default_pull_sync_concurrency")] pub pull_sync_concurrency: usize, + + /// Network cluster trust threshold. Entries with `cluster_size <= threshold` + /// are treated as individual users for identity resolution purposes. + /// B2B publishers should raise this to 50+ for office-heavy audiences. + #[serde(default = "Ec::default_cluster_trust_threshold")] + pub cluster_trust_threshold: u32, + + /// Seconds between cluster size re-evaluations per entry. + /// Avoids repeated list-prefix API calls on every /identify request. + #[serde(default = "Ec::default_cluster_recheck_secs")] + pub cluster_recheck_secs: u64, + + /// Partners (SSPs, DSPs, identity vendors) for EC identity sync. + #[serde(default)] + pub partners: Vec, } -impl EdgeCookie { +impl Ec { fn validate_passphrase(passphrase: &str) -> Result<(), ValidationError>; - // Rejects "passphrase" or empty string as placeholder. - - fn validate_sha256_hex(value: &str) -> Result<(), ValidationError>; - // Requires exactly 64 lowercase hex characters. + // Rejects known placeholder values as non-production passphrases. fn default_pull_sync_concurrency() -> usize { 3 } + fn default_cluster_trust_threshold() -> u32 { 10 } + fn default_cluster_recheck_secs() -> u64 { 3600 } +} +``` + +The `EcPartner` struct (see §13.4 for TOML format): + +```rust +#[derive(Debug, Clone, Deserialize, Serialize)] +pub struct EcPartner { + pub id: String, + pub name: String, + pub source_domain: String, + #[serde(default = "EcPartner::default_openrtb_atype")] + pub openrtb_atype: u8, // default 3 + #[serde(default)] + pub bidstream_enabled: bool, + pub api_token: Redacted, // hashed at startup + #[serde(default = "EcPartner::default_batch_rate_limit")] + pub batch_rate_limit: u32, // default 60 + #[serde(default)] + pub pull_sync_enabled: bool, + #[serde(default)] + pub pull_sync_url: Option, + #[serde(default)] + pub pull_sync_allowed_domains: Vec, + #[serde(default = "EcPartner::default_pull_sync_ttl_sec")] + pub pull_sync_ttl_sec: u64, // default 86400 + #[serde(default = "EcPartner::default_pull_sync_rate_limit")] + pub pull_sync_rate_limit: u32, // default 10 + #[serde(default)] + pub ts_pull_token: Option>, } ``` @@ -1436,11 +1939,11 @@ Added to `Settings`: pub struct Settings { // ... existing fields ... #[validate(nested)] - pub ec: EdgeCookie, // Required — omitting [ec] is a startup error + pub ec: Ec, // Required — omitting [ec] is a startup error } ``` -`EdgeCookie` does not derive `Default` — omitting the `[ec]` section from TOML is a deserialization error at startup. This is intentional: `passphrase`, `ec_store`, `partner_store`, and `admin_token_hash` have no safe defaults. The `#[validate(nested)]` attribute ensures `EdgeCookie::validate_passphrase()` runs when `settings.validate()` is called at startup (`settings_data.rs:28`), matching the pattern used by `Publisher` and `Rewrite` in the existing `Settings` struct (`Synthetic` is removed in PR #479). +`Ec` does not derive `Default` — omitting the `[ec]` section from TOML is a deserialization error at startup. This is intentional: `passphrase` has no safe default. The `#[validate(nested)]` attribute ensures `Ec::validate_passphrase()` runs when `settings.validate()` is called at startup, matching the pattern used by `Publisher` and `Rewrite` in the existing `Settings` struct. ### 14.2 TOML configuration example @@ -1448,24 +1951,42 @@ pub struct Settings { [ec] passphrase = "publisher-chosen-secret" ec_store = "ec_identity_store" -partner_store = "ec_partner_store" -admin_token_hash = "sha256-hex-of-publisher-admin-token" pull_sync_concurrency = 3 +# cluster_trust_threshold = 10 # raise to 50+ for B2B publishers +# cluster_recheck_secs = 3600 # legacy compatibility; cluster_size is computed once per entry + +[[ec.partners]] +id = "liveramp" +name = "LiveRamp ATS" +source_domain = "liveramp.com" +api_token = "partner-api-token-here" +bidstream_enabled = true +batch_rate_limit = 60 +pull_sync_enabled = true +pull_sync_url = "https://api.liveramp.com/resolve" +pull_sync_allowed_domains = ["api.liveramp.com"] +ts_pull_token = "outbound-bearer-token" + +[[ec.partners]] +id = "uid2" +name = "UID 2.0" +source_domain = "uidapi.com" +api_token = "uid2-api-token" +bidstream_enabled = true ``` ### 14.3 Rate Limit Storage -Pixel sync and pull sync rate limits (per EC hash per partner per hour) cannot use in-memory state in a WASM/Fastly Compute environment — there is no shared memory across requests. +Batch sync and pull sync rate limits cannot use in-memory state in a WASM/Fastly Compute environment — there is no shared memory across requests. -**Implementation:** Use Fastly's Edge Rate Limiting API (`fastly::erl::RateCounter`), which provides distributed per-key counting without KV latency and is designed for high-frequency counting without per-key write limits. +**Implementation:** Use Fastly's Edge Rate Limiting API (`fastly::erl::RateCounter`), which provides distributed per-key counting without KV latency and is designed for high-frequency counting without per-key write limits. The `RateLimiter` trait abstracts this for testability. | Counter | Key format | Window | | ---------- | ----------------------------- | -------- | -| Pixel sync | `{partner_id}:{ec_hash}` | 1 hour | -| Pull sync | `pull:{partner_id}:{ec_hash}` | 1 hour | | Batch sync | `batch:{partner_id}` | 1 minute | +| Pull sync | `pull:{partner_id}:{ec_hash}` | 1 hour | -Engineering must confirm `fastly::erl::RateCounter` availability in the target before implementation of Steps 7, 9, and 10 is considered complete. Do NOT silently skip rate limiting in production if ERL is unavailable. Do NOT fall back to KV-based counters — they would hit the same 1 write/sec/key limit that necessitates `update_last_seen()` debouncing, and would thrash under real sync traffic. If ERL is unavailable, the rate-limited routes are blocked on an approved alternative counting mechanism. +Engineering must confirm `fastly::erl::RateCounter` availability in the target before implementation is considered complete. Do NOT silently skip rate limiting in production if ERL is unavailable. Do NOT fall back to KV-based counters — they would hit the same 1 write/sec/key limit that motivated removing recurring organic-request KV writes, and would thrash under real sync traffic. If ERL is unavailable, the rate-limited routes are blocked on an approved alternative counting mechanism. ### 14.4 Deprecation note @@ -1478,33 +1999,25 @@ Engineering must confirm `fastly::erl::RateCounter` availability in the target b New constants in `crates/trusted-server-core/src/constants.rs`: ```rust -// EC cookie name -pub const COOKIE_EC: &str = "ts-ec"; - -// EC response header -pub const HEADER_X_TS_EC: &str = "x-ts-ec"; - -// Supplementary identity headers -pub const HEADER_X_TS_EIDS: &str = "x-ts-eids"; -pub const HEADER_X_TS_EC_CONSENT: &str = "x-ts-ec-consent"; -pub const HEADER_X_TS_EIDS_TRUNCATED: &str = "x-ts-eids-truncated"; - -// Consent cookies (must match existing constants in constants.rs) -pub const COOKIE_TCF: &str = "euconsent-v2"; -pub const COOKIE_GPP: &str = "__gpp"; -pub const COOKIE_GPP_SID: &str = "__gpp_sid"; -pub const COOKIE_US_PRIVACY: &str = "us_privacy"; - -// No EC-specific geo/IP header constants — use req.get_client_ip_addr() and GeoInfo::from_request(req). +// EC cookie names +pub const COOKIE_TS_EC: &str = "ts-ec"; +pub const COOKIE_TS_EIDS: &str = "ts-eids"; + +// EC response headers +pub const HEADER_X_TS_EC: HeaderName = HeaderName::from_static("x-ts-ec"); +pub const HEADER_X_TS_EIDS: HeaderName = HeaderName::from_static("x-ts-eids"); +pub const HEADER_X_TS_EC_CONSENT: HeaderName = HeaderName::from_static("x-ts-ec-consent"); +pub const HEADER_X_TS_EIDS_TRUNCATED: HeaderName = HeaderName::from_static("x-ts-eids-truncated"); ``` -The following EC headers must be added to `INTERNAL_HEADERS` in `constants.rs` to ensure they are stripped before proxying to downstream backends: +The following EC headers are included in `INTERNAL_HEADERS` in `constants.rs` to ensure they are stripped before proxying to downstream backends: -- `HEADER_X_TS_EC` (`x-ts-ec`) -- `HEADER_X_TS_EIDS` (`x-ts-eids`) -- `HEADER_X_TS_EC_CONSENT` (`x-ts-ec-consent`) -- `HEADER_X_TS_EIDS_TRUNCATED` (`x-ts-eids-truncated`) -- Dynamic `X-ts-` headers — these cannot be registered statically because partners are added at runtime via `/admin/partners/register`. The `INTERNAL_HEADERS` filter **must use prefix stripping** (`x-ts-` prefix match) rather than enumerating partner IDs. A startup snapshot would miss partners registered after deployment. The current filter in `http_util.rs` uses explicit header names — extend it to also strip any header matching the `x-ts-` prefix pattern. +- `x-ts-ec` +- `x-ts-eids` +- `x-ts-ec-consent` +- `x-ts-eids-truncated` + +The `INTERNAL_HEADERS` filter uses `x-ts-` prefix stripping in `http_util.rs` to also strip any dynamic `X-ts-` headers without needing to enumerate partner IDs. --- @@ -1525,14 +2038,14 @@ pub enum TrustedServerError { // Maps to StatusCode::INTERNAL_SERVER_ERROR (500) // Used for: EC-specific handler errors only (not organic-path generation) - /// Partner not found in partner_store. + /// Partner not found in registry. #[display("Partner not found: {partner_id}")] PartnerNotFound { partner_id: String }, // Maps to StatusCode::BAD_REQUEST (400) /// Partner API key authentication failed. - #[display("Invalid API key for partner: {partner_id}")] - PartnerAuthFailed { partner_id: String }, + #[display("Invalid API key")] + PartnerAuthFailed, // Maps to StatusCode::UNAUTHORIZED (401) } ``` @@ -1544,23 +2057,17 @@ pub enum TrustedServerError { New routes added to `route_request()` in `crates/trusted-server-adapter-fastly/src/main.rs`: ```rust -// EC sync pixel — no auth required (partner validation is internal) -(GET, "/sync") → handle_sync(settings, &kv, &partner_store, &req, &mut ec_context) - -// EC identity resolution — no auth required (consent-gated) -(GET, "/identify") → handle_identify(settings, &kv, &partner_store, &req, &ec_context) +// EC identity resolution — Bearer token auth (internal to handler) +(GET, "/_ts/api/v1/identify") → handle_identify(settings, &kv, ®istry, &req, &ec_context) // CORS preflight for /identify — must be registered explicitly, current router dispatches by exact method/path -(OPTIONS, "/identify") → cors_preflight_identify(settings, &req) +(OPTIONS, "/_ts/api/v1/identify") → cors_preflight_identify(settings, &req) // S2S batch sync — partner API key auth (internal to handler) -(POST, "/api/v1/sync") → handle_batch_sync(settings, &kv, &partner_store, req) - -// Partner registration — publisher admin auth enforced in-handler (Bearer token) -(POST, "/admin/partners/register") → handle_register_partner(settings, &partner_store, req) +(POST, "/_ts/api/v1/batch-sync") → handle_batch_sync(&kv, ®istry, &limiter, req) ``` -Route ordering: EC routes are inserted before the fallback `handle_publisher_request()`. The `/admin/partners/register` route uses bearer-token auth in-handler (not `[[handlers]]` Basic Auth). The current `trusted-server.toml` has `path = "^/admin"` which catches **all** `/admin/*` paths via `enforce_basic_auth()` before routing — this would block bearer-token requests to `/admin/partners/register`. **Required change:** narrow the existing `[[handlers]]` pattern from `"^/admin"` to `"^/admin/keys"` so it covers only `/admin/keys/rotate` and `/admin/keys/deactivate` (the routes in `Settings::ADMIN_ENDPOINTS`). `/admin/partners/register` then passes through `enforce_basic_auth()` unchallenged and reaches the bearer-token handler. +Route ordering: EC routes are inserted before the fallback `handle_publisher_request()`. ### 17.1 EC integration in `main.rs` @@ -1574,9 +2081,17 @@ EC follows the same pre-routing pattern as `GeoInfo::from_request()` (line 70). This is a supported Fastly Compute pattern — `Response::send_to_client()` flushes the response to the client immediately and allows the WASM invocation to continue. This is not a small wiring change; it restructures how the application returns responses. ```rust -async fn route_request(...) -> Result<(), Error> { +fn route_request(...) -> Result<(), Error> { let geo_info = GeoInfo::from_request(&req); + // Phase 0 — bot gate (pure in-memory, no KV I/O). See §7A. + let device_signals = derive_device_signals(&req); + let is_real_browser = device_signals.looks_like_browser(); + if !is_real_browser { + log::debug!("Bot gate: blocking EC operations (ja4={:?}, platform={:?})", + device_signals.ja4_class, device_signals.platform_class); + } + // Pre-routing — read only, no generation (matches GeoInfo pattern). // EcContext stores client_ip internally (same req.get_client_ip_addr() // already called by GeoInfo::from_request() above). @@ -1584,20 +2099,35 @@ async fn route_request(...) -> Result<(), Error> { let mut ec_context = match ec_context_result { Ok(ctx) => ctx, Err(e) => { - // Pre-routing failure — no route matched yet, but we still need to - // send an HTTP error response. Construct one and flush immediately. log::error!("EcContext initialization failed: {e:?}"); let mut response = to_error_response(&e); response.send_to_client(); return Ok(()); } }; - let kv = KvIdentityGraph::new(&settings.ec.ec_store); - let partner_store = PartnerStore::new(&settings.ec.partner_store); - let pull_sync_dispatcher = PullSyncDispatcher::new(settings.ec.pull_sync_concurrency); + + // Pass device signals through for KvDevice on creation. + ec_context.set_device_signals(device_signals); + + // Build partner registry from config at startup. + let registry = PartnerRegistry::from_config(&settings.ec.partners)?; + + // Extract ts-eids cookie before routing consumes the request. + let eids_cookie = extract_cookie_value(&req, COOKIE_TS_EIDS); + + // Bot gate: suppress all KV operations for unrecognized clients. + let kv = if is_real_browser { + settings.ec.ec_store.as_deref().map(KvIdentityGraph::new) + } else { + None + }; + let limiter = FastlyRateLimiter::new(RATE_COUNTER_NAME); if let Some(mut response) = enforce_basic_auth(settings, &req) { - ec_finalize_response(settings, geo_info.as_ref(), &ec_context, &kv, &mut response); + // Bot gate: skip EC cookie writes for unrecognized clients. + if is_real_browser { + ec_finalize_response(settings, &ec_context, kv.as_ref(), ®istry, eids_cookie.as_deref(), &mut response); + } response.send_to_client(); return Ok(()); } @@ -1609,49 +2139,37 @@ async fn route_request(...) -> Result<(), Error> { // is_organic tracks whether pull sync should fire (organic routes only — §10.2). let mut is_organic = false; let result = match (method, path.as_str()) { - // EC-specific routes — all read-only except /sync which takes &mut. - // /sync may assign fallback consent into ec_context.consent when the - // query param is the only signal — see §8.3. - (GET, "/sync") => handle_sync(settings, &kv, &partner_store, &req, &mut ec_context).await, - (GET, "/identify") => handle_identify(settings, &kv, &partner_store, &req, &ec_context).await, - (OPTIONS, "/identify") => cors_preflight_identify(settings, &req), - (POST, "/api/v1/sync") => handle_batch_sync(settings, &kv, &partner_store, req).await, - (POST, "/admin/partners/register") => handle_register_partner(settings, &partner_store, req).await, - - // /auction — EC-read-only; never generates EC. - // NOTE: handle_auction signature changes from (settings, orchestrator, req) to - // (settings, orchestrator, &kv, req, &ec_context) — this is a call-graph change, - // not just wiring. See §12 for the full auction integration. - (POST, "/auction") => handle_auction(settings, orchestrator, &kv, req, &ec_context).await, - - // Organic routes — generate EC if needed (best-effort, never 500s), then dispatch + (GET, "/_ts/api/v1/identify") => handle_identify(settings, kv.as_ref(), ®istry, &req, &ec_context), + (OPTIONS, "/_ts/api/v1/identify") => cors_preflight_identify(settings, &req), + (POST, "/_ts/api/v1/batch-sync") => handle_batch_sync(kv.as_ref(), ®istry, &limiter, req), + (POST, "/auction") => handle_auction(settings, orchestrator, kv.as_ref(), req, &ec_context), + (m, path) if integration_registry.has_route(&m, path) => { is_organic = true; - ec_context.generate_if_needed(settings, &kv); - integration_registry.handle_proxy(&m, path, settings, req, &ec_context).await + ec_context.generate_if_needed(settings, kv.as_ref()); + integration_registry.handle_proxy(&m, path, settings, req, &ec_context) }, _ => { is_organic = true; - ec_context.generate_if_needed(settings, &kv); + ec_context.generate_if_needed(settings, kv.as_ref()); handle_publisher_request(settings, integration_registry, req, &ec_context) }, }; - // Unwrap result — errors become error responses (matches existing pattern) let mut response = result.unwrap_or_else(|e| to_error_response(&e)); - // finalize_response runs on every route — enforces cookie write/deletion/last_seen - ec_finalize_response(settings, geo_info.as_ref(), &ec_context, &kv, &mut response); + // Bot gate: skip EC cookie writes and finalize for unrecognized clients. + if is_real_browser { + ec_finalize_response(settings, &ec_context, kv.as_ref(), ®istry, eids_cookie.as_deref(), &mut response); + } - // Flush response to client; WASM continues for background pull sync. response.send_to_client(); - // Background pull sync — organic routes only (§10.2). Never fires on /sync, - // /identify, /auction, /api/v1/sync, or /admin/* routes. - // Fires outbound HTTP calls via send_async(), blocks on PendingRequest::wait(). - if is_organic { - if let (Some(ip), Ok(pull_partners)) = (ec_context.client_ip, partner_store.pull_enabled_partners()) { - pull_sync_dispatcher.dispatch_background(&ec_context, ip, &pull_partners, &kv); + // Background pull sync — organic routes only, real browsers only (§7A.4, §10.2). + if is_real_browser && is_organic { + if let Some(ip) = ec_context.client_ip { + let pull_partners = registry.pull_enabled_partners(); + pull_sync_dispatcher.dispatch_background(&ec_context, ip, &pull_partners, kv.as_ref()); } } @@ -1659,7 +2177,7 @@ async fn route_request(...) -> Result<(), Error> { } ``` -The existing `finalize_response()` in `main.rs` becomes `ec_finalize_response()` with the extended signature that accepts `ec_context` and `kv`. The `#[fastly::main]` entrypoint changes to call `route_request()` and return `Ok(())` (the response is already sent via `send_to_client()`). +The existing `finalize_response()` in `main.rs` becomes `ec_finalize_response()` with the extended signature that accepts `ec_context`, `kv`, `registry`, and `eids_cookie`. The `#[fastly::main]` entrypoint changes to call `route_request()` and return `Ok(())` (the response is already sent via `send_to_client()`). The `PartnerRegistry` is built once at startup via `PartnerRegistry::from_config(&settings.ec.partners)` and passed by reference throughout the request lifecycle. `PullSyncDispatcher::dispatch_background` uses `Request::send_async()` to fire outbound HTTP calls, then calls `PendingRequest::wait()` (blocking) on each handle under `settings.ec.pull_sync_concurrency` concurrency. No async runtime is needed — this is synchronous blocking code running after `send_to_client()` has flushed the response. The Fastly WASM invocation stays alive until `dispatch_background` returns. This does not add latency to the user-facing response. @@ -1673,53 +2191,55 @@ Follow the project's **Arrange-Act-Assert** pattern. Test both happy paths and e Each module in `ec/` has a `#[cfg(test)]` module covering: -| Module | Key test cases | -| --------------- | --------------------------------------------------------------------------------------------------------- | -| `identity.rs` | IPv4/IPv6 normalization, /64 truncation, HMAC determinism, output format | -| `finalize.rs` | `ec_finalize_response()`: cookie write on generation, deletion on withdrawal, `update_last_seen` debounce | -| `cookie.rs` | Cookie string format, Max-Age=0 for deletion, domain derivation | -| `kv.rs` | Serialization/deserialization roundtrip, CAS merge logic, metadata extraction | -| `partner.rs` | API key hash verification (constant-time), record serialization | -| `sync_pixel.rs` | All `ts_synced` redirect codes, 429 rate limit, return URL construction | -| `sync_batch.rs` | Status code selection (200/207/401/400/429), per-mapping rejection reasons, API-key rate limit | -| `pull_sync.rs` | Trigger conditions, null/404 no-op, dispatch limit | -| `identify.rs` | All response codes (200/403/204), degraded flag, `uids` filtering | +| Module | Key test cases | +| ---------------- | --------------------------------------------------------------------------------------------------------------------- | +| `generation.rs` | IPv4/IPv6 normalization, /64 truncation, HMAC determinism, output format | +| `finalize.rs` | `ec_finalize_response()`: cookie write on generation, deletion on withdrawal, returning-user EC header, EID ingestion | +| `cookies.rs` | Cookie string format, Max-Age=0 for deletion, domain derivation | +| `kv.rs` | Serialization/deserialization roundtrip, CAS merge logic, metadata extraction | +| `partner.rs` | Partner ID validation, API key hashing | +| `registry.rs` | `from_config()` validation, duplicate detection, O(1) lookups by ID/hash/domain | +| `prebid_eids.rs` | Base64 decode, JSON parse, source domain matching, debounce | +| `batch_sync.rs` | Status code selection (200/207/401/400/429), per-mapping rejection reasons, API-key rate limit | +| `pull_sync.rs` | Trigger conditions, null/404 no-op, dispatch limit | +| `identify.rs` | Bearer auth (200/401/403/204), scoped partner response, degraded flag, CORS | ### 18.2 Integration tests KV behavior is tested with Viceroy (local Fastly Compute simulator) using real KV store operations. Key scenarios: -- Consent withdrawal: cookie deletion + tombstone write (`write_withdrawal_tombstone()`) + all EC response headers stripped — in same request +- Explicit consent withdrawal: cookie deletion + tombstone write (`write_withdrawal_tombstone()`) + all EC response headers stripped — in same request - Concurrent writes: CAS retry logic under simulated generation conflicts - KV degraded: EC cookie still set when KV `create_or_revive()` fails (best-effort) -- Sync-then-identify flow: pixel sync writes partner ID, then `/identify` returns it +- Prebid EID ingestion: `ts-eids` cookie parsed, source domain matched, partner UID written to KV +- Batch sync then identify: batch sync writes partner UID, then `/_ts/api/v1/identify` returns it for that partner **Eventually-consistent caveat:** Fastly KV does not guarantee read-after-write consistency. The sync→identify scenario may not be immediately visible on production — Viceroy may behave differently. Tests for this flow should use retry with backoff (up to 1s) and be documented as Viceroy-only consistency. Do not write assertions that assume immediate visibility after a KV write. ### 18.3 JS tests (if applicable) -If any JS changes are made for EC (e.g., publisher-side `/identify` fetch helper in `crates/js/`), use Vitest with `vi.hoisted()` for mocks. +If any JS changes are made for EC (e.g., publisher-side `/_ts/api/v1/identify` fetch helper in `crates/js/`), use Vitest with `vi.hoisted()` for mocks. --- ## 19. Implementation Order -Suggested order to minimize risk and allow incremental testing. Each step should pass `cargo test --workspace` before the next begins. - -| Step | Scope | Deliverable | -| ---- | --------------------------------------------------------- | ---------------------------------------------------------------------------------------------- | -| 1 | `ec/identity.rs` + constants + settings | `generate_ec()`, `normalize_ip()`, `EcContext` | -| 2 | `ec/finalize.rs` | `ec_finalize_response()` (cookie write, deletion, tombstone, last_seen) | -| 3 | `ec/cookie.rs` | Cookie creation, deletion, response header | -| 4 | `ec/kv.rs` | `KvIdentityGraph` CRUD with CAS | -| 5 | `ec/partner.rs` + `ec/admin.rs` | `PartnerStore`, `/admin/partners/register` | -| 6 | EC middleware in `main.rs`, `publisher.rs`, `registry.rs` | `EcContext::read_from_request()` pre-routing, `generate_if_needed()`, `ec_finalize_response()` | -| 7 | `ec/sync_pixel.rs` | `GET /sync` handler + route | -| 8 | `ec/identify.rs` | `GET /identify` handler + route | -| 9 | `ec/sync_batch.rs` | `POST /api/v1/sync` handler + route | -| 10 | `ec/pull_sync.rs` | Background pull sync dispatch (blocking, after `send_to_client()`) | -| 11 | Auction integration | EC injection into `user.id`, `user.eids`, `user.consent` | -| 12 | End-to-end integration tests | Viceroy-based flow tests | +Implementation was completed in the following order. Each step passed `cargo test --workspace` before the next began. + +| Step | Scope | Deliverable | +| ---- | --------------------------------------------------------- | ------------------------------------------------------------------------------------------------- | +| 1 | `ec/generation.rs` + constants + settings | `generate_ec()`, `normalize_ip()`, `EcContext` | +| 2 | `ec/cookies.rs` | Cookie creation, deletion, response header | +| 3 | `ec/kv.rs` + `ec/kv_types.rs` | `KvIdentityGraph` CRUD with CAS | +| 4 | `ec/finalize.rs` | `ec_finalize_response()` (cookie write on generation, deletion, tombstone, returning-user header) | +| 5 | `ec/partner.rs` + `ec/registry.rs` | `PartnerRegistry` (config-based), partner validation helpers | +| 6 | EC middleware in `main.rs`, `publisher.rs`, `registry.rs` | `EcContext::read_from_request()` pre-routing, `generate_if_needed()`, `ec_finalize_response()` | +| 7 | `ec/prebid_eids.rs` | Prebid EID cookie ingestion (replaces pixel sync) | +| 8 | `ec/identify.rs` | `GET /_ts/api/v1/identify` handler + route (Bearer auth, scoped response) | +| 9 | `ec/batch_sync.rs` + `ec/rate_limiter.rs` | `POST /_ts/api/v1/batch-sync` handler + route | +| 10 | `ec/pull_sync.rs` | Background pull sync dispatch (blocking, after `send_to_client()`) | +| 11 | Auction integration | EC injection into `user.id`, `user.eids`, `user.consent` | +| 12 | End-to-end integration tests | Viceroy-based flow tests | --- @@ -1733,7 +2253,7 @@ and auction decoration — without relying on third-party cookies. **Done when:** All 12 stories below are complete, `cargo test --workspace` and `cargo clippy` pass with no warnings, and the end-to-end Viceroy flow tests -cover the full sync → identify → auction path. +cover the full EID ingestion → identify → auction path. **Spec ref:** This document. PRD: `docs/internal/ssc-prd.md`. @@ -1744,7 +2264,7 @@ cover the full sync → identify → auction path. Implement the core EC data types, generation logic, and per-request context struct that all subsequent stories depend on. -**Scope:** `ec/identity.rs`, `ec/mod.rs`, `trusted-server.toml` `[ec]` section, +**Scope:** `ec/generation.rs`, `ec/mod.rs`, `trusted-server.toml` `[ec]` section, `Settings` struct update. **Acceptance criteria:** @@ -1764,7 +2284,7 @@ struct that all subsequent stories depend on. Calls `build_consent_context()` with the EC hash as identity key and stores the result as `consent: ConsentContext` (see §6.1.1). Does not generate. Does not write to EC identity KV. (Note: `build_consent_context()` may write - to the consent KV store when an EC hash is available.) + using the request-local consent context.) - `EcContext::generate_if_needed(settings, kv)` generates a new EC when `ec_value == None && allows_ec_creation(&consent)`, sets `ec_generated = true`, and writes the initial KV entry via `kv.create_or_revive()` (best-effort). @@ -1774,8 +2294,8 @@ struct that all subsequent stories depend on. without setting `ec_generated`. It never returns an error — organic traffic must not 500 on EC failure. - `[ec]` settings block parses from TOML: `passphrase`, `ec_store`, - `partner_store`, `admin_token_hash`, `pull_sync_concurrency`. -- All unit tests in `identity.rs` pass (HMAC determinism, format, IP normalization). + `pull_sync_concurrency`, `partners`. +- All unit tests in `generation.rs` pass (HMAC determinism, format, IP normalization). **Spec ref:** §2, §3, §4, §5.4, §14.1 @@ -1784,26 +2304,19 @@ struct that all subsequent stories depend on. ### Story 2 — EC finalize response Implement `ec_finalize_response()` — the post-routing function that enforces -cookie writes, deletions, tombstones, and last-seen updates on every response. +cookie writes on generation, cookie deletion on withdrawal, tombstones, returning-user `x-ts-ec` headers, and EID ingestion on responses. **Scope:** `ec/finalize.rs` (new file) **Acceptance criteria:** - `ec_finalize_response(settings, geo, ec_context, kv, response)` runs on every route. -- Consent gating uses the existing `allows_ec_creation()` — no new gating function. -- When `!allows_ec_creation(&consent) && cookie_was_present`: calls - `clear_ec_on_response()` (deletes cookie and strips all EC response headers) - and writes tombstone for each valid EC hash available. When the cookie is - malformed and no valid header exists, no tombstone is written — cookie - deletion alone enforces withdrawal (see §6.2). -- When `ec_was_present && !ec_generated && allows_ec_creation(&consent)`: calls - `kv.update_last_seen(ec_hash, now())` (debounced at 300s). If `cookie_ec_value` - is set (header/cookie mismatch), also calls `set_ec_on_response()` to reconcile - the browser cookie to the header-derived identity. -- When `ec_generated == true`: calls `set_ec_on_response()`. -- Unit tests cover all four branches: withdrawal (with and without valid hash), - returning-user last_seen + mismatch reconciliation, and new-EC generation. +- Consent gating uses `allows_ec_creation()` for current-request EC usage and `has_explicit_ec_withdrawal()` for cookie-expiry/tombstone decisions. +- When `!allows_ec_creation(&consent)`: strips all EC response headers. +- When `has_explicit_ec_withdrawal(&consent) && cookie_was_present`: additionally expires the cookie and writes tombstones for each valid EC ID available. When the cookie is malformed and no valid header exists, no tombstone is written — cookie deletion alone enforces withdrawal (see §6.2). +- When `ec_was_present && !ec_generated && allows_ec_creation(&consent)`: sets the `x-ts-ec` response header only. It does not refresh the EC cookie, repair header/cookie mismatches, or write KV solely to extend TTL. +- When `ec_generated == true`: calls `set_ec_cookie_and_header_on_response()`. +- Unit tests cover explicit-withdrawal, fail-closed header stripping, returning-user header behavior, and new-EC generation. **Spec ref:** §5.4, §6.2 @@ -1814,7 +2327,7 @@ cookie writes, deletions, tombstones, and last-seen updates on every response. Implement the low-level functions that create and delete the `ts-ec` cookie and set EC response headers. These are called by `ec_finalize_response()` (Story 2). -**Scope:** `ec/cookie.rs` +**Scope:** `ec/cookies.rs` **Acceptance criteria:** @@ -1822,7 +2335,7 @@ and set EC response headers. These are called by `ec_finalize_response()` (Story `Max-Age=31536000`, `SameSite=Lax; Secure`. `HttpOnly` is NOT set (JS on the publisher page must be able to read the cookie). - `delete_ec_cookie()` produces a cookie with `Max-Age=0`, same attributes. -- `set_ec_on_response()` sets `Set-Cookie` and `X-ts-ec` response headers. +- `set_ec_header_on_response()` sets only `X-ts-ec`; `set_ec_cookie_and_header_on_response()` sets both `Set-Cookie` and `X-ts-ec`. - `clear_ec_on_response()` sets `Set-Cookie` with `Max-Age=0` **and** strips all EC-related response headers: `X-ts-ec`, `X-ts-eids`, `X-ts-ec-consent`, `x-ts-eids-truncated`, and any `X-ts-` headers. This prevents @@ -1854,19 +2367,14 @@ CAS-based concurrent write protection and consent withdrawal delete. - `KvIdentityGraph::create_or_revive(ec_hash, &entry)` creates a new entry OR overwrites an existing tombstone (`consent.ok = false`) with a fresh entry; no-ops if a live entry already exists. Called by `generate_if_needed()`. -- `KvIdentityGraph::update_last_seen(ec_hash, timestamp)` updates `last_seen` - without overwriting partner IDs (CAS merge), and only writes if the stored - value is more than 300s older than `timestamp` (debounce to avoid 1 write/sec - KV limit). Callers pass `now()` as `timestamp`. +- Returning-user page views do not update a last-seen field; EC entries no longer store `last_seen` or mutable publisher-domain visit timestamps. - `KvIdentityGraph::write_withdrawal_tombstone(ec_hash)` sets `consent.ok = false`, clears partner IDs, and applies a 24-hour TTL (see §6.2). Returns `Result` — callers log `error` on failure and continue (cookie deletion is the primary enforcement mechanism). - `KvIdentityGraph::delete(ec_hash)` hard-deletes the entry — used only for IAB data deletion requests, not for consent withdrawal (which uses tombstones). -- `kv.upsert_partner_id(ec_hash, partner_id, uid, timestamp)` writes to - `ids[partner_id]`, creating a minimal live root entry first if the key is - absent, and skips if existing `synced >= timestamp` (idempotent). +- `kv.upsert_partner_id(ec_hash, partner_id, uid)` writes to `ids[partner_id]`, creating a minimal live root entry first if the key is absent, and skips writes when the existing UID already matches (idempotent). - KV schema matches §7 exactly (JSON roundtrip test). - Unit tests cover CAS merge logic, tombstone write, tombstone error handling, serialization/deserialization roundtrip, metadata extraction. @@ -1875,39 +2383,31 @@ CAS-based concurrent write protection and consent withdrawal delete. --- -### Story 5 — Partner registry and admin endpoint +### Story 5 — Partner registry (config-based) -Implement `PartnerRecord`, `PartnerStore`, and the admin registration endpoint -that operators use to onboard ID sync partners. +Implement partner ID validation, API key hashing, and the in-memory +`PartnerRegistry` that replaces the KV-backed `PartnerStore`. -**Scope:** `ec/partner.rs`, `ec/admin.rs`, router update +**Scope:** `ec/partner.rs`, `ec/registry.rs` **Acceptance criteria:** -- `PartnerRecord` contains all fields from §13.1 including +- `validate_partner_id()` enforces `^[a-z0-9_-]{1,32}$` and rejects reserved + names (`ec`, `eids`, `ec-consent`, `eids-truncated`, `synthetic`, `ts`, + `version`, `env`). +- `hash_api_key()` computes SHA-256 hex of the plaintext API token. +- `PartnerConfig` contains all fields from §13.3 including `pull_sync_allowed_domains` and `batch_rate_limit`. -- `PartnerStore::get()`, `upsert()`, `find_by_api_key_hash()` operate on - `partner_store` KV. -- `pull_enabled_partners()` re-checks `pull_sync_enabled == true` on fetched - records so stale `_pull_enabled` index entries do not dispatch disabled partners. -- API key stored as SHA-256 hex; plaintext never written to KV. -- `verify_api_key()` uses constant-time comparison. -- `POST /admin/partners/register` validates `Authorization: Bearer ` inside - the handler against `settings.ec.admin_token_hash` (constant-time SHA-256 comparison). - Returns `401` if missing or invalid — before any request body is read. -- Admin endpoint validates: `pull_sync_url` hostname must be in - `pull_sync_allowed_domains` when set — returns `400` otherwise. -- Returns `201 Created` on new partner or `200 OK` on update, with an explicit - response DTO (see §13.2 step 6 — do NOT serialize full `PartnerRecord`). - Returns `400` on validation failure; `503` on KV failure. -- `/admin/partners/register` is **NOT** added to `Settings::ADMIN_ENDPOINTS` — - it uses bearer-token-in-handler auth, not `[[handlers]]` Basic Auth. -- The admin-route-scan test (`settings.rs:1504-1530`) must be updated to exclude - bearer-token-authed routes from its `ADMIN_ENDPOINTS` assertion. Add an exclusion - list (see §13.2 codebase invariant note). -- The `[[handlers]]` pattern in `trusted-server.toml` must be narrowed from - `"^/admin"` to `"^/admin/keys"` (see §13.2). -- Unit tests cover API key hash verification and record serialization. +- `PartnerRegistry::from_config()` builds the registry from `Vec` + with O(1) `by_id`, `by_api_key_hash`, and `by_source_domain` indexes. +- Startup validation catches: invalid IDs, duplicate IDs, duplicate API token + hashes, duplicate source domains, invalid pull sync configuration. +- `get()`, `find_by_api_key_hash()`, `find_by_source_domain()` return + `Option<&PartnerConfig>`. +- `pull_enabled_partners()` returns only partners with `pull_sync_enabled = true`. +- No admin endpoint — partner changes require config update and redeployment. +- Unit tests cover partner ID validation, hash computation, registry + construction, and duplicate detection. **Spec ref:** §13 @@ -1925,22 +2425,17 @@ Wire `EcContext` into the request pipeline following the two-phase model - `EcContext::read_from_request()` is called before the route match on every request, passed the existing `geo_info` (no duplicate geo header parsing). -- EC route handlers receive `ec_context` without EC generation. `/identify`, - `/auction`, `/api/v1/sync`, and `/admin/*` use read-only `&EcContext` and - never mutate it. **Exception:** `/sync` receives `&mut EcContext`; when the - consent query-param fallback applies (`ec_context.consent.is_empty()`), it - assigns the locally-decoded consent into `ec_context.consent` so that both - the sync write decision and `ec_finalize_response()` share the same effective - consent view. This prevents a same-request "write partner ID, then withdraw - EC" conflict. See §8.3 for full details. +- EC route handlers receive `ec_context` without EC generation. `/_ts/api/v1/identify`, + `/auction`, and `/_ts/api/v1/batch-sync` use read-only `&EcContext` and + never mutate it. - `/auction` consumes EC identity but never bootstraps it. - `handle_publisher_request()` and `integration_registry.handle_proxy()` call `ec_context.generate_if_needed(settings, &kv)` before their handler logic (best-effort, never 500s). - `ec_finalize_response()` receives `ec_context` and `kv` and: - - Deletes the EC cookie and writes a withdrawal tombstone when `!allows_ec_creation(&consent) && cookie_was_present` (runs on all routes). - - Calls `kv.update_last_seen(ec_hash, now())` when `ec_was_present == true && ec_generated == false && allows_ec_creation(&consent)` (returning user with valid consent). - - Calls `set_ec_on_response()` when `ec_context.ec_generated == true`, and also - on returning-user mismatch reconciliation when `cookie_ec_value.is_some()`. + - Strips EC response headers whenever `!allows_ec_creation(&consent)`. + - Additionally deletes the EC cookie and writes a withdrawal tombstone when `has_explicit_ec_withdrawal(&consent) && cookie_was_present` (runs on all routes). + - Sets `x-ts-ec` header when `ec_was_present == true && ec_generated == false && allows_ec_creation(&consent)` (returning user with valid consent). Also ingests Prebid EIDs from `ts-eids` cookie. + - Calls `set_ec_cookie_and_header_on_response()` when `ec_context.ec_generated == true` (newly generated ECs). Returning-user mismatch repair is not performed. Also ingests Prebid EIDs. - `route_request()` return type changes from `Result` to `Result<(), Error>`; response is flushed via `response.send_to_client()` instead of being returned. The `#[fastly::main]` entrypoint must also change to match. @@ -1962,102 +2457,84 @@ Wire `EcContext` into the request pipeline following the two-phase model --- -### Story 7 — Pixel sync (`GET /sync`) - -Implement the pixel-based ID sync endpoint that partners use to write their -user ID against an EC hash. +### Story 7 — Prebid EID cookie ingestion -**Scope:** `ec/sync_pixel.rs`, router update +Implement the server-side ingestion of the `ts-eids` cookie, which replaces +the pixel sync endpoint as the browser-side ID sync mechanism. +**Scope:** `ec/prebid_eids.rs`, `ec/finalize.rs` update **Acceptance criteria:** -- Missing required query params (`partner`, `uid`, `return`) → `400`. -- No valid `ts-ec` cookie (missing or malformed) → redirect to - `{return}?ts_synced=0&ts_reason=no_ec`. -- Unknown `partner` ID → `400`. -- `return` URL hostname not in `partner.allowed_return_domains` → `400`. -- Consent uses `ec_context.consent`. The optional `consent` query param is a fallback - only: it is used exclusively when `ec_context.consent.is_empty()` returns `true` - — meaning no consent signals of any kind are present (no TCF string, no GPP - string, no US Privacy string, no AC string, no GPC, no decoded consent objects). - Use the `ConsentContext::is_empty()` method directly; do not reimplement the - check from this description. If consent KV fallback or any other pre-routing - source has already populated `ec_context.consent`, `is_empty()` is `false` and - the param is ignored. - When the fallback applies, decode the consent string locally into a - `ConsentContext` and **assign it into `ec_context.consent`** so that both - the sync write and `ec_finalize_response()` share the same effective consent - (prevents a same-request "write partner ID, then withdraw EC" conflict). - Do NOT re-call `build_consent_context()` (that would trigger consent KV writes). - Denied or absent → redirect to `{return}?ts_synced=0&ts_reason=no_consent`. -- Rate limit exceeded → `429 Too Many Requests` (no redirect). -- KV write failure → redirect to `{return}?ts_synced=0&ts_reason=write_failed`. -- `kv.upsert_partner_id()` creates a minimal live root entry first when the EC - exists in the cookie but the identity graph key is still missing because the - original best-effort `create_or_revive()` failed on generation. -- Success → redirect to `{return}?ts_synced=1`. -- Return URL construction correctly appends `&` or `?` based on existing query string. -- Rate counter key: `{partner_id}:{ec_hash}`, 1-hour window, via `fastly::erl::RateCounter`. -- Unit tests cover all redirect/response codes and return URL construction. +- `ingest_prebid_eids(cookie_value, ec_id, kv, registry)` decodes a base64 JSON + array of OpenRTB-style `{source, uids:[...]}` objects and syncs matched partners to KV. The backend also accepts the earlier flattened `{source, id, atype}` payload for backward compatibility. +- Source domain matching via `registry.find_by_source_domain()` (case-insensitive). +- Sources with no non-empty UID are skipped. +- Idempotent write suppression: if the stored UID already matches the incoming UID, the write is skipped for that partner. +- KV write via `kv.upsert_partner_id()` — best-effort, errors logged at `warn`. +- Called from `ec_finalize_response()` on both returning-user and new-EC paths + when a `ts-eids` cookie is present and consent is granted. +- JS writer target size: 3 KB; backend parser raw-cookie limit: 8 KiB. +- All errors are logged and swallowed — never blocks the response. +- Unit tests cover base64 decode, JSON parse, source domain matching, size limits, + and empty/oversized UID handling. **Spec ref:** §8 --- -### Story 8 — Identity lookup (`GET /identify`) +### Story 8 — Identity lookup (`GET /_ts/api/v1/identify`) -Implement the browser-facing endpoint that publishers call to retrieve the EC -hash and synced partner UIDs for the current user. +Implement the partner-facing endpoint that authenticated partners call to +retrieve their own synced UID for the current EC. **Scope:** `ec/identify.rs`, router update **Acceptance criteria:** +- **Bearer token required.** Missing or invalid `Authorization: Bearer` → `401` + with `{ "error": "invalid_token" }`. Auth uses `registry.find_by_api_key_hash()`. - `!allows_ec_creation(consent)` (consent denied, regardless of EC presence) → `403 Forbidden`. - When EC is present but consent is denied, the handler returns `403` and - `ec_finalize_response()` deletes the cookie and writes a tombstone. + When the denial is an explicit withdrawal signal and a `ts-ec` cookie was present, `ec_finalize_response()` also deletes the cookie and writes a tombstone. Fail-closed / unverifiable-consent cases still return `403`, but they strip EC headers only. - No EC present (`ec_was_present == false`) and consent not denied → `204 No Content`. -- Valid EC, consent granted, KV read succeeds with entry → `200` with full JSON body - including `ec`, `consent`, `uids`, `eids`. -- Valid EC, consent granted, KV read succeeds but no entry (never synced or - `create_or_revive()` failed on generation) → `200` with `degraded: false`, - empty `uids`/`eids`. This is not an error — see §11.4. -- `uids` filtered to partners where `bidstream_enabled = true` and consent - granted. -- KV read error (store unavailable) → `200` with `degraded: true` and empty - `uids`/`eids`. +- Valid EC, consent granted, KV read succeeds with entry → `200` with scoped JSON body + including `ec`, `consent`, `partner_id`, `uid` (single partner's UID), `eid` + (single partner's OpenRTB EID object), `cluster_size`. +- Valid EC, consent granted, KV read succeeds but no entry for this partner → + `200` with `degraded: false`, `uid` and `eid` absent. Not an error — see §11.4. +- KV read error (store unavailable) → `200` with `degraded: true`, `uid` and + `eid` absent. +- Response scoped to the authenticated partner only — no multi-partner `uids`/`eids` maps. +- `X-ts-ec` response header set on `200` responses. - No `Origin` header (server-side proxy): process normally, no CORS headers, no `403`. - `Origin` header present and matches `publisher.domain` or subdomain: reflect in `Access-Control-Allow-Origin` + `Vary: Origin`. - `Origin` header present but does not match: `403`, no body. -- `OPTIONS /identify` preflight → `200` with CORS headers, no body. -- `generate_if_needed()` is never called — no new EC is generated. The handler - itself does not write cookies, but `ec_finalize_response()` may still delete - the cookie on withdrawal or reconcile it on header/cookie mismatch. +- `Access-Control-Allow-Headers` includes `Authorization, X-ts-ec`. +- `OPTIONS /_ts/api/v1/identify` preflight → `200` with CORS headers, no body. +- `generate_if_needed()` is never called — no new EC is generated. - Response time target: 30ms p95 (documented, not gate). -- Unit tests cover all response codes, degraded flag, `uids` filtering, - CORS origin validation. +- Unit tests cover Bearer auth (200/401/403/204), scoped partner response, + degraded flag, CORS origin validation. **Spec ref:** §11 --- -### Story 9 — S2S batch sync (`POST /api/v1/sync`) +### Story 9 — S2S batch sync (`POST /_ts/api/v1/batch-sync`) Implement the server-to-server batch sync endpoint for partners to bulk-write their UIDs against a list of EC hashes. -**Scope:** `ec/sync_batch.rs`, router update +**Scope:** `ec/batch_sync.rs`, `ec/rate_limiter.rs`, router update **Acceptance criteria:** -- Missing or invalid `Authorization: Bearer` → `401`. Auth uses index-based - lookup via `find_by_api_key_hash()` (§9.2) with constant-time hash verification. -- Auth KV lookup failure (store unavailable) → `503 Service Unavailable`. +- Missing or invalid `Authorization: Bearer` → `401`. Auth uses in-memory + lookup via `registry.find_by_api_key_hash()` (§9.2). - API-key rate limit exceeded (`batch_rate_limit` per partner per minute) → `429` with `{ "error": "rate_limit_exceeded" }`. - More than 1000 mappings → `400`. -- Per-mapping rejections: `invalid_ec_hash`, `ec_hash_not_found`, +- Per-mapping rejections: `invalid_ec_id`, `ec_id_not_found`, `consent_withdrawn`, `kv_unavailable`. - KV write failure aborts remaining mappings with `kv_unavailable`; partial results returned as `207`. @@ -2085,7 +2562,7 @@ runtime). Only fires on organic routes (§10.2). - Dispatch only when: EC present (including an EC generated on the current organic request), consent granted, `pull_sync_enabled = true`, and either no - existing partner entry or existing `synced` is older than `pull_sync_ttl_sec`. + existing partner entry; existing partner UIDs are not refreshed by pull sync. - Rate limit: `pull_sync_rate_limit` per EC hash per partner per hour; counter key `pull:{partner_id}:{ec_hash}`. - Maximum concurrent pulls per request: `settings.ec.pull_sync_concurrency` @@ -2100,7 +2577,7 @@ runtime). Only fires on organic routes (§10.2). - Dispatch runs after `send_to_client()` — does not add latency to the user-facing response. Uses `send_async()` + `PendingRequest::wait()` (blocking). - Only fires on organic routes (`handle_publisher_request`, `handle_proxy`) — - never on `/sync`, `/identify`, `/auction`, `/api/v1/sync`, or `/admin/*`. + never on `/_ts/api/v1/identify`, `/_ts/api/v1/batch-sync`, or `/auction`. - Unit tests cover trigger conditions, null/404 no-op, domain allowlist check, dispatch limit enforcement. @@ -2143,21 +2620,21 @@ across multiple handlers in a single simulated environment. **Acceptance criteria:** -- **Full flow:** First-party page load → EC generated → pixel sync writes - partner UID → `/identify` returns that UID → auction includes EID. +- **Full flow:** First-party page load → EC generated → Prebid EID cookie + ingestion writes partner UID → `/_ts/api/v1/identify` returns that UID + (scoped to authenticated partner) → auction includes EID. - **Consent withdrawal:** Request with denied consent clears EC cookie and writes a KV tombstone (`consent.ok = false`, 24h TTL) in the same request; subsequent - `/identify` with consent still denied returns `403` (consent denied → §11.4); + `/_ts/api/v1/identify` with consent still denied returns `403` (consent denied → §11.4); batch sync returns `consent_withdrawn` within the tombstone TTL. - **KV create failure:** EC cookie is still set when `create_or_revive()` fails - (best-effort). Subsequent `/identify` returns `200` with `degraded: false` and + (best-effort). Subsequent `/_ts/api/v1/identify` returns `200` with `degraded: false` and empty `uids`/`eids` (KV read succeeds — entry simply does not exist). -- **KV read failure:** `/identify` returns `200` with `degraded: true` and empty +- **KV read failure:** `/_ts/api/v1/identify` returns `200` with `degraded: true` and empty `uids`/`eids` (store unavailable, entry might exist but can't be read). - **Concurrent writes:** Two simultaneous EC creates for the same hash resolve without data loss (CAS retry). -- **Rate limits:** Pixel sync returns `429` after `sync_rate_limit` is - exceeded; batch sync returns `429` after `batch_rate_limit` is exceeded. +- **Rate limits:** Batch sync returns `429` after `batch_rate_limit` is exceeded. - **Pull sync no-op:** Partner returning `{ "uid": null }` produces no KV write and no error log. - All tests pass under `cargo test --workspace` with Viceroy. diff --git a/docs/superpowers/specs/2026-04-15-sourcepoint-gpp-consent-design.md b/docs/superpowers/specs/2026-04-15-sourcepoint-gpp-consent-design.md new file mode 100644 index 00000000..725857db --- /dev/null +++ b/docs/superpowers/specs/2026-04-15-sourcepoint-gpp-consent-design.md @@ -0,0 +1,171 @@ +# Sourcepoint GPP Consent for Edge Cookie Generation + +**Issue:** #640 +**Date:** 2026-04-15 +**Status:** Approved + +## Problem + +Edge Cookie (EC) generation fails for sites using Sourcepoint when consent is +stored only in `localStorage` and not surfaced via the standard cookies Trusted +Server reads. Sourcepoint stores US consent under `_sp_user_consent_*` keys in +`localStorage`, including a full GPP string and applicable section IDs. + +Today, Trusted Server only reads consent from `euconsent-v2`, `__gpp`, +`__gpp_sid`, `us_privacy` cookies and the `Sec-GPC` header. Even if `__gpp` / +`__gpp_sid` were present, the server only decodes the EU TCF v2 section from +GPP — it does not use GPP US sections as a consent signal for EC gating. + +This creates two gaps: + +1. **Transport gap:** The server cannot read browser `localStorage`, so no + consent reaches the backend unless client code mirrors it into cookies. +2. **Semantics gap:** Even with `__gpp` / `__gpp_sid` cookies present, current + US-state EC gating does not recognize GPP US sections as valid consent. + +## Approach + +Thin GPP pass-through: mirror Sourcepoint localStorage consent into standard +cookies on the client, and extend server-side EC gating to recognize GPP US +`sale_opt_out` as a consent signal. No compatibility bridge (`us_privacy` +derivation) — both client and server changes ship together. + +## Design + +### 1. Client-side: Sourcepoint JS integration + +New JS-only integration at `crates/js/lib/src/integrations/sourcepoint/index.ts`. +No Rust-side `IntegrationRegistration` (same pattern as `creative`). + +**On page load:** + +1. Scan `localStorage` keys matching `_sp_user_consent_*`. +2. Take the first valid match, parse the JSON value. +3. Extract `gppData.gppString` and `gppData.applicableSections` from the payload. +4. Write first-party cookies: + - `__gpp=` (path `/`, `SameSite=Lax`) + - `__gpp_sid=` (path `/`, `SameSite=Lax`) + - `_ts_gpp_src=sp` marker (path `/`, `SameSite=Lax`) +5. Log what was written for debugging. + +Cookies are session-scoped (no `max-age` / `expires`) since the source of truth +stays in `localStorage` and we re-mirror on each page load. The marker cookie +tracks Trusted Server's Sourcepoint-owned writes so the integration only clears +`__gpp` / `__gpp_sid` values that it previously mirrored; this avoids clobbering +cookies written by other CMPs. This design assumes a single active Sourcepoint +property per page; if multiple `_sp_user_consent_*` entries coexist, the first +valid one wins. The integration runs immediately, performs bounded first-load +retries, and re-mirrors on page focus/visibility refresh so session cookies do +not remain stale after mid-session consent updates. + +### 2. Server-side: GPP US section decoding + +**`crates/trusted-server-core/src/consent/types.rs`** — extend `GppConsent`: + +```rust +pub struct GppConsent { + pub version: u8, + pub section_ids: Vec, + pub eu_tcf: Option, + pub us_sale_opt_out: Option, // new +} +``` + +- `Some(true)` — a US section is present and `sale_opt_out == OptedOut` +- `Some(false)` — a US section is present and `sale_opt_out != OptedOut` +- `None` — no US section exists in the GPP string + +**`crates/trusted-server-core/src/consent/gpp.rs`** — add `decode_us_sale_opt_out`: + +Checks for any US section ID (7–23) in the parsed `GPPString`. For the first +match, decodes the section via `iab_gpp` and extracts `sale_opt_out`. Maps +`OptOut::OptedOut` to `true`, everything else to `false`. + +The `iab_gpp` crate uses different structs per state (`UsNat`, `UsCa`, `UsTn`, +etc.) but they all have `sale_opt_out: OptOut` via `us_common`. We match on the +decoded `Section` enum to extract it. + +### 3. Server-side: EC gating update + +**`crates/trusted-server-core/src/consent/mod.rs`** — update `allows_ec_creation()` +for `Jurisdiction::UsState(_)`. + +New precedence chain: + +``` +GPC → TCF → GPP US sale_opt_out → us_privacy → fail-closed +``` + +Insert between the existing TCF and `us_privacy` branches: + +```rust +// Check GPP US section for sale opt-out. +if let Some(gpp) = &ctx.gpp { + if let Some(opted_out) = gpp.us_sale_opt_out { + return !opted_out; + } +} +``` + +Semantics: + +- GPC still short-circuits at the top and blocks EC creation. +- TCF still takes priority for CMPs like Didomi. In US-state jurisdictions, an + effective TCF Purpose 1 signal is treated as the authoritative EC storage + consent decision and is evaluated before GPP US sale opt-out. +- GPP US `sale_opt_out != OptedOut` → EC allowed when no effective TCF signal is + present. +- GPP US `sale_opt_out == OptedOut` → EC blocked when no effective TCF signal is + present. +- No GPP US section → falls through to `us_privacy`. + +The TCF-before-GPP precedence is intentional rather than accidental: it preserves +existing CMP behavior where TCF Purpose 1 is the explicit storage/access signal +for the EC cookie itself. Publishers that need US-section-wins behavior should +raise that as a separate consent-policy configuration change. + +### 4. Files touched + +| File | Change | +|---|---| +| `crates/js/lib/src/integrations/sourcepoint/index.ts` | New — localStorage auto-discovery, cookie mirroring | +| `crates/js/lib/test/integrations/sourcepoint/index.test.ts` | New — Vitest tests | +| `crates/trusted-server-core/src/consent/types.rs` | Add `us_sale_opt_out: Option` to `GppConsent` | +| `crates/trusted-server-core/src/consent/gpp.rs` | Add US section decoding, extract `sale_opt_out` | +| `crates/trusted-server-core/src/consent/mod.rs` | Add GPP US branch in `allows_ec_creation()`, tests | + +No config changes and no new crate dependencies. `IntegrationRegistry` includes +`sourcepoint` in the JS-only always-shipped module list; the client-side marker +cookie prevents the always-shipped module from clearing or overwriting other +CMPs' GPP cookies. + +### 5. Testing + +**JS (Vitest):** + +- Mirrors `__gpp` and `__gpp_sid` from `_sp_user_consent_*` localStorage +- No cookies written when no `_sp_user_consent_*` key exists +- Graceful handling of malformed JSON in localStorage + +**Rust — EC gating (`consent/mod.rs`):** + +- EC allowed: US state + GPP `us_sale_opt_out = Some(false)` +- EC blocked: US state + GPP `us_sale_opt_out = Some(true)` +- EC blocked: GPC overrides permissive GPP US +- TCF takes priority over GPP US when both present +- GPP US takes priority over `us_privacy` when both present +- No GPP US section falls through to `us_privacy` +- No signals → fail-closed + +**Rust — GPP decoding (`consent/gpp.rs`):** + +- Extracts `us_sale_opt_out` from GPP string with UsNat section (ID 7) +- `us_sale_opt_out` is `None` when GPP has no US sections + +### 6. Non-goals + +- No `us_privacy` compatibility bridge (skipped per decision) +- No richer US GPP field extraction (sharing, targeted advertising opt-outs) +- No publisher configuration for Sourcepoint property ID (auto-discovery) +- No Sourcepoint CMP API integration (localStorage-only approach) +- No consent-policy knob for making GPP US sale opt-out override TCF Purpose 1 diff --git a/fastly.toml b/fastly.toml index 9d6c0f26..2ea512a6 100644 --- a/fastly.toml +++ b/fastly.toml @@ -19,17 +19,32 @@ build = """ [local_server] address = "127.0.0.1:7676" - + [local_server.backends] [local_server.kv_stores] + [[local_server.kv_stores.counter_store]] + key = "placeholder" + data = "placeholder" + + [[local_server.kv_stores.opid_store]] + key = "placeholder" + data = "placeholder" + [[local_server.kv_stores.creative_store]] key = "placeholder" data = "placeholder" - [[local_server.kv_stores.consent_store]] + [[local_server.kv_stores.ec_identity_store]] key = "placeholder" data = "placeholder" + + # Pre-seeded test EC entry for local script testing (test-prebid-eids.sh). + # Matches the TEST_EC_ID used in that script. + [[local_server.kv_stores.ec_identity_store]] + key = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa.test01" + data = '{"v":1,"created":1700000000,"last_seen":1700000000,"consent":{"ok":true,"updated":1700000000},"geo":{"country":"US"}}' + [local_server.secret_stores] [[local_server.secret_stores.signing_keys]] key = "ts-2025-10-A" diff --git a/scripts/batch-sync.sh b/scripts/batch-sync.sh new file mode 100755 index 00000000..34d3ece1 --- /dev/null +++ b/scripts/batch-sync.sh @@ -0,0 +1,135 @@ +#!/usr/bin/env bash +# +# Call the Trusted Server EC batch sync endpoint with a single mapping. +# +# Examples: +# ./scripts/batch-sync.sh \ +# --base-url https://edge.example.com \ +# --api-key "$PARTNER_API_KEY" \ +# --ec-id "$EC_ID" \ +# --partner-uid +# +# Environment fallbacks: +# TS_BASE_URL, PARTNER_API_KEY, EC_ID, PARTNER_UID, TIMESTAMP +# +set -euo pipefail + +usage() { + cat <<'EOF' +Usage: scripts/batch-sync.sh [options] + +Required, unless provided via environment: + --base-url URL Trusted Server base URL (env: TS_BASE_URL) + --api-key KEY Partner Bearer token (env: PARTNER_API_KEY) + --ec-id EC_ID Full EC ID: 64hex.6alnum (env: EC_ID) + --partner-uid UID Partner user ID to store (env: PARTNER_UID) + +Optional: + --timestamp SECONDS Unix timestamp (env: TIMESTAMP, default: now) + -h, --help Show this help + +Example: + scripts/batch-sync.sh \ + --base-url https://example.com \ + --api-key $PARTNER_API_KEY \ + --ec-id $EC_ID \ + --partner-uid $PARTNER_UID +EOF +} + +BASE_URL="${TS_BASE_URL:-}" +API_KEY="${PARTNER_API_KEY:-}" +EC_ID="${EC_ID:-}" +PARTNER_UID="${PARTNER_UID:-}" +TIMESTAMP="${TIMESTAMP:-$(date +%s)}" + +while [ "$#" -gt 0 ]; do + case "$1" in + --base-url) + BASE_URL="${2:-}" + shift 2 + ;; + --api-key) + API_KEY="${2:-}" + shift 2 + ;; + --ec-id) + EC_ID="${2:-}" + shift 2 + ;; + --partner-uid) + PARTNER_UID="${2:-}" + shift 2 + ;; + --timestamp) + TIMESTAMP="${2:-}" + shift 2 + ;; + -h|--help) + usage + exit 0 + ;; + *) + echo "Unknown argument: $1" >&2 + usage >&2 + exit 2 + ;; + esac +done + +missing=() +[ -n "$BASE_URL" ] || missing+=("--base-url") +[ -n "$API_KEY" ] || missing+=("--api-key") +[ -n "$EC_ID" ] || missing+=("--ec-id") +[ -n "$PARTNER_UID" ] || missing+=("--partner-uid") + +if [ "${#missing[@]}" -gt 0 ]; then + echo "Missing required option(s): ${missing[*]}" >&2 + usage >&2 + exit 2 +fi + +ENDPOINT="${BASE_URL%/}/_ts/api/v1/batch-sync" +BODY="$(python3 - "$EC_ID" "$PARTNER_UID" "$TIMESTAMP" <<'PY' +import json +import sys + +ec_id, partner_uid, timestamp = sys.argv[1:] +try: + timestamp = int(timestamp) +except ValueError: + print("timestamp must be an integer Unix timestamp", file=sys.stderr) + sys.exit(2) + +print(json.dumps({ + "mappings": [ + { + "ec_id": ec_id, + "partner_uid": partner_uid, + "timestamp": timestamp, + } + ] +})) +PY +)" + +RESPONSE_FILE="$(mktemp)" +trap 'rm -f "$RESPONSE_FILE"' EXIT + +echo "POST $ENDPOINT" >&2 +HTTP_STATUS="$(curl -sS \ + -o "$RESPONSE_FILE" \ + -w "%{http_code}" \ + -X POST "$ENDPOINT" \ + -H "Authorization: Bearer ${API_KEY}" \ + -H "Content-Type: application/json" \ + -d "$BODY")" + +cat "$RESPONSE_FILE" +echo + +echo "HTTP $HTTP_STATUS" >&2 +case "$HTTP_STATUS" in + 2*) exit 0 ;; + *) exit 1 ;; +esac diff --git a/scripts/integration-tests-browser.sh b/scripts/integration-tests-browser.sh index 888adb13..46555fcf 100755 --- a/scripts/integration-tests-browser.sh +++ b/scripts/integration-tests-browser.sh @@ -32,7 +32,7 @@ echo "==> Validating shared integration-test dependency versions..." echo "==> Building WASM binary (origin=http://127.0.0.1:$ORIGIN_PORT)..." TRUSTED_SERVER__PUBLISHER__ORIGIN_URL="http://127.0.0.1:$ORIGIN_PORT" \ TRUSTED_SERVER__PUBLISHER__PROXY_SECRET="integration-test-proxy-secret" \ -TRUSTED_SERVER__EDGE_COOKIE__SECRET_KEY="integration-test-secret-key" \ +TRUSTED_SERVER__EC__PASSPHRASE="integration-test-ec-secret-padded-32" \ TRUSTED_SERVER__PROXY__CERTIFICATE_CHECK=false \ cargo build --package trusted-server-adapter-fastly --release --target wasm32-wasip1 diff --git a/scripts/integration-tests.sh b/scripts/integration-tests.sh index 3b9ec974..6f40f62b 100755 --- a/scripts/integration-tests.sh +++ b/scripts/integration-tests.sh @@ -53,7 +53,7 @@ fi echo "==> Building WASM binary (origin=http://127.0.0.1:$ORIGIN_PORT)..." TRUSTED_SERVER__PUBLISHER__ORIGIN_URL="http://127.0.0.1:$ORIGIN_PORT" \ TRUSTED_SERVER__PUBLISHER__PROXY_SECRET="integration-test-proxy-secret" \ -TRUSTED_SERVER__EDGE_COOKIE__SECRET_KEY="integration-test-secret-key" \ +TRUSTED_SERVER__EC__PASSPHRASE="integration-test-ec-secret-padded-32" \ TRUSTED_SERVER__PROXY__CERTIFICATE_CHECK=false \ cargo build --package trusted-server-adapter-fastly --release --target wasm32-wasip1 diff --git a/trusted-server.toml b/trusted-server.toml index f57e9146..7b5d5a87 100644 --- a/trusted-server.toml +++ b/trusted-server.toml @@ -4,9 +4,9 @@ username = "user" password = "pass" [[handlers]] -path = "^/admin" +path = "^/_ts/admin" username = "admin" -password = "changeme" +password = "replace-with-admin-password-32-bytes" [publisher] domain = "test-publisher.com" @@ -14,8 +14,45 @@ cookie_domain = ".test-publisher.com" origin_url = "https://origin.test-publisher.com" proxy_secret = "change-me-proxy-secret" -[edge_cookie] -secret_key = "trusted-server" +[ec] +passphrase = "local-dev-passphrase-32-bytes-min" +ec_store = "ec_id_store" +pull_sync_concurrency = 3 +# cluster_trust_threshold = 10 # Entries with cluster_size <= this are individual users +# cluster_recheck_secs = 3600 # Re-evaluate cluster_size after this many seconds + +# [[ec.partners]] +# id = "liveramp" +# name = "LiveRamp" +# source_domain = "liveramp.com" +# openrtb_atype = 3 +# bidstream_enabled = true +# api_token = "partner-api-token-32-bytes-minimum" +# batch_rate_limit = 60 +# pull_sync_enabled = false + +[[ec.partners]] +id = "sharedid" +name = "Prebid SharedID" +source_domain = "sharedid.org" +openrtb_atype = 1 +bidstream_enabled = true +api_token = "sharedid-internal-token-32-bytes" + +# Integration test partners (used by crates/integration-tests) +[[ec.partners]] +id = "inttest" +name = "Integration Test Partner" +source_domain = "inttest.example.com" +bidstream_enabled = true +api_token = "inttest-api-key-1-32-bytes-minimum" + +[[ec.partners]] +id = "inttest2" +name = "Integration Test Partner 2" +source_domain = "inttest2.example.com" +bidstream_enabled = true +api_token = "inttest2-api-key-2-32-bytes-minimum" # Custom headers to be included in every response # Allows publishers to include tags such as X-Robots-Tag: noindex @@ -149,8 +186,9 @@ rewrite_script = true # mode = "restrictive" # "restrictive" | "newest" | "permissive" # freshness_threshold_days = 30 -# KV Store consent persistence (requires a KV store named "consent_store" in fastly.toml) -# consent_store = "consent_store" +# Consent is interpreted from request cookies, headers, geolocation, and these +# policy settings. EC identity lifecycle state and withdrawal tombstones are +# stored in the KV store configured by [ec].ec_store. # Rewrite configuration for creative HTML/CSS processing # [rewrite]