leocamello · leocamello · Feb 14, 2026 · Feb 14, 2026 · Feb 14, 2026 · Feb 14, 2026
diff --git a/.github/agents/copilot-instructions.md b/.github/agents/copilot-instructions.md
@@ -3,6 +3,8 @@
 Auto-generated from all feature plans. Last updated: 2026-02-14
 
 ## Active Technologies
+- Rust 1.75 (stable) + axum 0.7, tokio 1.x (full features), tracing 0.1, tracing-subscriber 0.3 (with json feature) (011-structured-logging)
+- N/A (in-memory only, stateless by design) (011-structured-logging)
 
 - Rust 1.75+ (stable toolchain) (010-web-dashboard)
 
@@ -23,6 +25,7 @@ cargo test [ONLY COMMANDS FOR ACTIVE TECHNOLOGIES][ONLY COMMANDS FOR ACTIVE TECH
 Rust 1.75+ (stable toolchain): Follow standard conventions
 
 ## Recent Changes
+- 011-structured-logging: Added Rust 1.75 (stable) + axum 0.7, tokio 1.x (full features), tracing 0.1, tracing-subscriber 0.3 (with json feature)
 
 - 010-web-dashboard: Added Rust 1.75+ (stable toolchain)
 

diff --git a/README.md b/README.md
@@ -27,6 +27,7 @@ Nexus is a distributed LLM model serving orchestrator that unifies heterogeneous
 - 🔄 **Transparent Failover**: Automatically retries with fallback backends
 - 🔌 **OpenAI-Compatible**: Works with any OpenAI API client
 - ⚡ **Zero Config**: Just run it - works out of the box with Ollama
+- 📊 **Structured Logging**: Queryable JSON logs for every request with correlation IDs ([quickstart](specs/011-structured-logging/quickstart.md))
 
 ## Supported Backends
 

diff --git a/nexus.example.toml b/nexus.example.toml
@@ -68,6 +68,22 @@ priority = 1
 # api_key_env = "OPENAI_API_KEY"
 
 [logging]
+# Global log level: trace | debug | info | warn | error
 level = "info"
-# format: pretty | json
+
+# Log format: pretty (human-readable) | json (for log aggregators)
+# Use json for production with ELK, Loki, Splunk, CloudWatch, etc.
 format = "pretty"
+
+# Component-specific log levels (optional)
+# Useful for debugging specific parts of the system without noise
+# [logging.component_levels]
+# routing = "debug"    # Detailed routing decisions
+# api = "info"         # API request handling
+# health = "warn"      # Health check results
+
+# Content logging (opt-in, defaults to false)
+# WARNING: When true, request message content will be logged (privacy risk!)
+# Only enable for local debugging. Never in production with user data.
+# enable_content_logging = false
+
diff --git a/specs/011-structured-logging/checklists/requirements.md b/specs/011-structured-logging/checklists/requirements.md
@@ -0,0 +1,119 @@
+# Specification Quality Checklist: Structured Request Logging
+
+**Purpose**: Validate specification completeness and quality before proceeding to planning  
+**Created**: 2025-02-14  
+**Feature**: [spec.md](../spec.md)
+
+## Content Quality
+
+- [x] No implementation details (languages, frameworks, APIs)
+- [x] Focused on user value and business needs
+- [x] Written for non-technical stakeholders
+- [x] All mandatory sections completed
+
+## Requirement Completeness
+
+- [x] No [NEEDS CLARIFICATION] markers remain
+- [x] Requirements are testable and unambiguous
+- [x] Success criteria are measurable
+- [x] Success criteria are technology-agnostic (no implementation details)
+- [x] All acceptance scenarios are defined
+- [x] Edge cases are identified
+- [x] Scope is clearly bounded
+- [x] Dependencies and assumptions identified
+
+## Feature Readiness
+
+- [x] All functional requirements have clear acceptance criteria
+- [x] User scenarios cover primary flows
+- [x] Feature meets measurable outcomes defined in Success Criteria
+- [x] No implementation details leak into specification
+
+## Validation Results
+
+### Content Quality Assessment
+
+✅ **No implementation details**: Spec avoids implementation specifics. Only mentions existing `tracing` crate as context (FR-011) which is appropriate since it states building on existing infrastructure. All requirements are implementation-agnostic.
+
+✅ **Focused on user value**: All user stories clearly articulate operator value - visibility, traceability, debugging, compliance, integration.
+
+✅ **Non-technical language**: Written for platform operators and stakeholders. Technical terms (JSON, correlation ID, log aggregators) are necessary domain concepts, not implementation details.
+
+✅ **Mandatory sections complete**: All required sections present with comprehensive content.
+
+### Requirement Completeness Assessment
+
+✅ **No clarification markers**: Spec is fully specified with no [NEEDS CLARIFICATION] markers. All details are concrete and actionable.
+
+✅ **Testable requirements**: All 15 functional requirements are testable:
+- FR-001: Can verify log entry exists for each request
+- FR-002: Can verify unique ID assigned and persists
+- FR-003-005: Can verify all required fields are present
+- FR-006-007: Can verify configuration options work
+- FR-008-009: Can verify content is/isn't logged based on config
+- FR-010: Can verify non-blocking behavior
+- FR-011-015: Can verify technical constraints met
+
+✅ **Measurable success criteria**: All 10 success criteria have concrete metrics:
+- SC-001: 100% coverage
+- SC-002: 10 seconds to search
+- SC-003: 1ms accuracy
+- SC-004: Zero instances
+- SC-005: 95% auto-indexed
+- SC-006: 5 seconds to take effect
+- SC-007: 10,000 req/min with <1ms overhead
+- SC-008: 60-80% volume reduction
+- SC-009: 90% issue diagnosis success
+- SC-010: Sub-second query times
+
+✅ **Technology-agnostic success criteria**: Success criteria focus on user outcomes (search time, accuracy, log volume, diagnosis success) rather than implementation. Only SC-005 mentions specific tools (ELK, Loki) but as integration targets, not implementation details.
+
+✅ **Acceptance scenarios defined**: All 6 user stories have detailed acceptance scenarios with Given/When/Then format covering happy path, error cases, and variations.
+
+✅ **Edge cases identified**: Comprehensive edge cases covering:
+- Logging system failures
+- Long-running streams
+- Request ID collisions
+- No backend available
+- Malformed requests before routing
+
+✅ **Scope clearly bounded**: 
+- Explicitly states logs are emitted, not stored (references Principle VIII)
+- Explicitly excludes response body content (references Principle III)
+- Focuses on request logging, not general application logging
+- Clear priority levels (P1-P3) define what's essential vs. enhancement
+
+✅ **Dependencies identified**: 
+- References existing tracing infrastructure
+- References existing LoggingConfig in src/config/mod.rs
+- References completions handler and routing logic locations
+- Aligns with Constitution principles III, VIII, X
+
+### Feature Readiness Assessment
+
+✅ **Acceptance criteria for all requirements**: Each functional requirement maps to acceptance scenarios in user stories:
+- FR-001-005 (core logging): User Story 1
+- FR-002, retry tracking: User Story 2
+- FR-005 (route_reason): User Story 3
+- FR-008-009 (privacy): User Story 4
+- FR-006-007 (configuration): User Stories 5-6
+
+✅ **User scenarios cover primary flows**: 
+- P1 stories (1-2) cover essential functionality: basic logging and correlation
+- P2 stories (3-4) cover important but non-critical: routing visibility and privacy
+- P3 stories (5-6) cover enhancements: component-level config and aggregator integration
+- Proper prioritization enables incremental delivery
+
+✅ **Measurable outcomes defined**: All 10 success criteria are concrete and verifiable with specific metrics and thresholds.
+
+✅ **No implementation leakage**: Spec maintains clean separation between requirements (what) and implementation (how). Mentions of existing codebase are contextual, not prescriptive.
+
+## Notes
+
+- **Specification Status**: ✅ READY FOR PLANNING
+- **Quality Score**: 14/14 items passed (100%)
+- **Recommendations**: 
+  - Spec is comprehensive and well-structured
+  - Ready to proceed to `/speckit.plan` phase
+  - No clarifications needed from stakeholders
+  - Consider starting with P1 user stories for MVP
diff --git a/specs/011-structured-logging/contracts/log-schema.json b/specs/011-structured-logging/contracts/log-schema.json
@@ -0,0 +1,203 @@
+{
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "$id": "https://nexus.dev/schemas/request-log-entry.json",
+  "title": "Nexus Request Log Entry",
+  "description": "Structured log entry schema for Nexus request logging. All fields are emitted as part of tracing spans in JSON format.",
+  "type": "object",
+  "required": [
+    "timestamp",
+    "level",
+    "target",
+    "request_id",
+    "model",
+    "status",
+    "latency_ms",
+    "stream",
+    "retry_count"
+  ],
+  "properties": {
+    "timestamp": {
+      "type": "string",
+      "format": "date-time",
+      "description": "Request start time in RFC3339 format (UTC timezone)",
+      "example": "2024-01-15T14:32:01.234Z"
+    },
+    "level": {
+      "type": "string",
+      "enum": ["TRACE", "DEBUG", "INFO", "WARN", "ERROR"],
+      "description": "Log level severity"
+    },
+    "target": {
+      "type": "string",
+      "description": "Module path where log originated (e.g., nexus::api::completions)",
+      "pattern": "^[a-z_][a-z0-9_]*(::[a-z_][a-z0-9_]*)*$",
+      "example": "nexus::api::completions"
+    },
+    "request_id": {
+      "type": "string",
+      "format": "uuid",
+      "description": "Unique correlation ID for request (UUID v4), persists across retries",
+      "example": "550e8400-e29b-41d4-a716-446655440000"
+    },
+    "model": {
+      "type": "string",
+      "minLength": 1,
+      "maxLength": 128,
+      "description": "Requested model name (may be alias)",
+      "example": "gpt-4"
+    },
+    "actual_model": {
+      "type": "string",
+      "minLength": 1,
+      "maxLength": 128,
+      "description": "Resolved model name after alias/fallback resolution (omitted if same as model)",
+      "example": "llama3:70b"
+    },
+    "backend": {
+      "type": "string",
+      "description": "Selected backend identifier, or 'none' if routing failed",
+      "example": "ollama-local"
+    },
+    "backend_type": {
+      "type": "string",
+      "enum": ["local", "cloud", "unknown"],
+      "description": "Backend category"
+    },
+    "status": {
+      "type": "string",
+      "enum": ["received", "routing", "success", "error", "retry", "fallback", "exhausted", "timeout"],
+      "description": "Request outcome status"
+    },
+    "status_code": {
+      "type": "integer",
+      "minimum": 100,
+      "maximum": 599,
+      "description": "HTTP status code",
+      "example": 200
+    },
+    "error_message": {
+      "type": "string",
+      "description": "Error description if status indicates failure",
+      "example": "Backend timeout after 30s"
+    },
+    "latency_ms": {
+      "type": "integer",
+      "minimum": 0,
+      "maximum": 300000,
+      "description": "Total request duration in milliseconds",
+      "example": 1234
+    },
+    "tokens_prompt": {
+      "type": "integer",
+      "minimum": 0,
+      "maximum": 1000000,
+      "description": "Input token count from backend response",
+      "example": 150
+    },
+    "tokens_completion": {
+      "type": "integer",
+      "minimum": 0,
+      "maximum": 1000000,
+      "description": "Output token count from backend response",
+      "example": 85
+    },
+    "tokens_total": {
+      "type": "integer",
+      "minimum": 0,
+      "maximum": 2000000,
+      "description": "Total tokens used (prompt + completion)",
+      "example": 235
+    },
+    "stream": {
+      "type": "boolean",
+      "description": "Whether request uses streaming mode",
+      "example": false
+    },
+    "route_reason": {
+      "type": "string",
+      "description": "Explanation of backend selection decision",
+      "examples": [
+        "highest_score:0.95",
+        "round_robin:index_3",
+        "fallback:primary_unhealthy",
+        "only_healthy_backend"
+      ]
+    },
+    "retry_count": {
+      "type": "integer",
+      "minimum": 0,
+      "maximum": 10,
+      "description": "Number of retry attempts (0 for first attempt)",
+      "example": 0
+    },
+    "fallback_chain": {
+      "type": "string",
+      "description": "Comma-separated list of backends attempted in order, empty if no fallbacks",
+      "example": "ollama-local,vllm-remote"
+    },
+    "prompt_preview": {
+      "type": "string",
+      "maxLength": 100,
+      "description": "First 100 characters of prompt (only when enable_content_logging=true)",
+      "example": "You are a helpful assistant. Explain quantum computing in simple terms."
+    }
+  },
+  "examples": [
+    {
+      "timestamp": "2024-01-15T14:32:01.234Z",
+      "level": "INFO",
+      "target": "nexus::api::completions",
+      "request_id": "550e8400-e29b-41d4-a716-446655440000",
+      "model": "gpt-4",
+      "actual_model": "llama3:70b",
+      "backend": "ollama-local",
+      "backend_type": "local",
+      "status": "success",
+      "status_code": 200,
+      "latency_ms": 1234,
+      "tokens_prompt": 150,
+      "tokens_completion": 85,
+      "tokens_total": 235,
+      "stream": false,
+      "route_reason": "highest_score:0.95",
+      "retry_count": 0,
+      "fallback_chain": ""
+    },
+    {
+      "timestamp": "2024-01-15T14:32:05.678Z",
+      "level": "WARN",
+      "target": "nexus::api::completions",
+      "request_id": "550e8400-e29b-41d4-a716-446655440000",
+      "model": "gpt-4",
+      "backend": "vllm-remote",
+      "backend_type": "cloud",
+      "status": "success",
+      "status_code": 200,
+      "latency_ms": 5432,
+      "tokens_prompt": 150,
+      "tokens_completion": 85,
+      "tokens_total": 235,
+      "stream": false,
+      "route_reason": "fallback:primary_unhealthy",
+      "retry_count": 2,
+      "fallback_chain": "ollama-local,vllm-remote",
+      "error_message": "Primary backend timeout after 30s"
+    },
+    {
+      "timestamp": "2024-01-15T14:32:10.123Z",
+      "level": "ERROR",
+      "target": "nexus::api::completions",
+      "request_id": "550e8400-e29b-41d4-a716-446655440001",
+      "model": "unknown-model",
+      "backend": "none",
+      "status": "error",
+      "status_code": 404,
+      "latency_ms": 12,
+      "stream": false,
+      "retry_count": 0,
+      "fallback_chain": "",
+      "error_message": "Model 'unknown-model' not found. Available: llama3:70b, gpt-3.5-turbo"
+    }
+  ],
+  "additionalProperties": false
+}