diff --git a/Cargo.lock b/Cargo.lock
index e148a9e..35b717a 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -321,6 +321,15 @@ dependencies = [
  "strsim",
 ]
 
+[[package]]
+name = "clap_complete"
+version = "4.5.65"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "430b4dc2b5e3861848de79627b2bedc9f3342c7da5173a14eaa5d0f8dc18ae5d"
+dependencies = [
+ "clap",
+]
+
 [[package]]
 name = "clap_derive"
 version = "4.5.55"
@@ -345,6 +354,27 @@ version = "1.0.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
 
+[[package]]
+name = "colored"
+version = "2.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "117725a109d387c937a1533ce01b450cbde6b88abceea8473c4d7a85853cda3c"
+dependencies = [
+ "lazy_static",
+ "windows-sys 0.59.0",
+]
+
+[[package]]
+name = "comfy-table"
+version = "7.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "958c5d6ecf1f214b4c2bbbbf6ab9523a864bd136dcf71a7e8904799acfe1ad47"
+dependencies = [
+ "crossterm",
+ "unicode-segmentation",
+ "unicode-width",
+]
+
 [[package]]
 name = "concurrent-queue"
 version = "2.5.0"
@@ -354,6 +384,18 @@ dependencies = [
  "crossbeam-utils",
 ]
 
+[[package]]
+name = "config"
+version = "0.14.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68578f196d2a33ff61b27fae256c3164f65e36382648e30666dde05b8cc9dfdf"
+dependencies = [
+ "nom",
+ "pathdiff",
+ "serde",
+ "toml",
+]
+
 [[package]]
 name = "core-foundation"
 version = "0.9.4"
@@ -376,6 +418,29 @@ version = "0.8.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
 
+[[package]]
+name = "crossterm"
+version = "0.29.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d8b9f2e4c67f833b660cdb0a3523065869fb35570177239812ed4c905aeff87b"
+dependencies = [
+ "bitflags 2.10.0",
+ "crossterm_winapi",
+ "document-features",
+ "parking_lot",
+ "rustix",
+ "winapi",
+]
+
+[[package]]
+name = "crossterm_winapi"
+version = "0.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "acdd7c62a3665c7f6830a51635d9ac9b23ed385797f70a83bb8bafe9c572ab2b"
+dependencies = [
+ "winapi",
+]
+
 [[package]]
 name = "dashmap"
 version = "6.1.0"
@@ -425,6 +490,15 @@ dependencies = [
  "syn",
 ]
 
+[[package]]
+name = "document-features"
+version = "0.2.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d4b8a88685455ed29a21542a33abd9cb6510b6b129abadabdcef0f4c55bc8f61"
+dependencies = [
+ "litrs",
+]
+
 [[package]]
 name = "encoding_rs"
 version = "0.8.35"
@@ -1001,6 +1075,12 @@ version = "0.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6373607a59f0be73a39b6fe456b8192fcc3585f602af20751600e974dd455e77"
 
+[[package]]
+name = "litrs"
+version = "1.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "11d3d7f243d5c5a8b9bb5d6dd2b1602c0cb0b9db1621bafc7ed66e35ff9fe092"
+
 [[package]]
 name = "lock_api"
 version = "0.4.14"
@@ -1056,6 +1136,12 @@ version = "0.3.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
 
+[[package]]
+name = "minimal-lexical"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
+
 [[package]]
 name = "mio"
 version = "1.1.1"
@@ -1094,6 +1180,10 @@ dependencies = [
  "axum",
  "chrono",
  "clap",
+ "clap_complete",
+ "colored",
+ "comfy-table",
+ "config",
  "dashmap",
  "futures",
  "mdns-sd",
@@ -1102,6 +1192,7 @@ dependencies = [
  "reqwest",
  "serde",
  "serde_json",
+ "tempfile",
  "thiserror",
  "tokio",
  "tokio-test",
@@ -1115,6 +1206,16 @@ dependencies = [
  "wiremock",
 ]
 
+[[package]]
+name = "nom"
+version = "7.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
+dependencies = [
+ "memchr",
+ "minimal-lexical",
+]
+
 [[package]]
 name = "normalize-line-endings"
 version = "0.3.0"
@@ -1228,6 +1329,12 @@ dependencies = [
  "windows-link",
 ]
 
+[[package]]
+name = "pathdiff"
+version = "0.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df94ce210e5bc13cb6651479fa48d14f601d9858cfe0467f43ae157023b938d3"
+
 [[package]]
 name = "percent-encoding"
 version = "2.3.2"
@@ -2167,6 +2274,18 @@ version = "1.0.22"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9312f7c4f6ff9069b165498234ce8be658059c6728633667c526e27dc2cf1df5"
 
+[[package]]
+name = "unicode-segmentation"
+version = "1.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493"
+
+[[package]]
+name = "unicode-width"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254"
+
 [[package]]
 name = "untrusted"
 version = "0.9.0"
@@ -2336,6 +2455,28 @@ dependencies = [
  "wasm-bindgen",
 ]
 
+[[package]]
+name = "winapi"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
+dependencies = [
+ "winapi-i686-pc-windows-gnu",
+ "winapi-x86_64-pc-windows-gnu",
+]
+
+[[package]]
+name = "winapi-i686-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
+
+[[package]]
+name = "winapi-x86_64-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
+
 [[package]]
 name = "windows-core"
 version = "0.62.2"
diff --git a/Cargo.toml b/Cargo.toml
index e0bd012..8624f0c 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -39,6 +39,14 @@ tracing-subscriber = { version = "0.3", features = ["env-filter", "json"] }
 
 # CLI
 clap = { version = "4", features = ["derive", "env"] }
+clap_complete = "4"
+
+# Configuration
+config = { version = "0.14", default-features = false, features = ["toml"] }
+
+# Output formatting
+comfy-table = "7"
+colored = "2"
 
 # mDNS discovery
 mdns-sd = "0.11"
@@ -54,6 +62,7 @@ wiremock = "0.6"
 assert_cmd = "2"
 predicates = "3"
 proptest = "1"
+tempfile = "3"
 
 [profile.release]
 lto = true
diff --git a/README.md b/README.md
index e52234e..291a466 100644
--- a/README.md
+++ b/README.md
@@ -29,13 +29,60 @@ Nexus is a distributed LLM model serving orchestrator that unifies heterogeneous
 # Install (from source)
 cargo install --path .
 
+# Generate a configuration file
+nexus config init
+
 # Run with auto-discovery
 nexus serve
 
-# Or with a config file
+# Or with a custom config file
 nexus serve --config nexus.toml
 ```
 
+## CLI Commands
+
+```bash
+# Start the server
+nexus serve [--config FILE] [--port PORT] [--host HOST]
+
+# List backends
+nexus backends list [--json] [--status healthy|unhealthy|unknown]
+
+# Add a backend manually (auto-detects type)
+nexus backends add http://localhost:11434 [--name NAME] [--type ollama|vllm|llamacpp]
+
+# Remove a backend
+nexus backends remove <ID>
+
+# List available models
+nexus models [--json] [--backend ID]
+
+# Show system health
+nexus health [--json]
+
+# Generate config file
+nexus config init [--output FILE] [--force] [--minimal]
+
+# Generate shell completions
+nexus completions bash > ~/.bash_completion.d/nexus
+nexus completions zsh > ~/.zsh/completions/_nexus
+nexus completions fish > ~/.config/fish/completions/nexus.fish
+```
+
+## Environment Variables
+
+| Variable | Description | Default |
+|----------|-------------|---------|
+| `NEXUS_CONFIG` | Config file path | `nexus.toml` |
+| `NEXUS_PORT` | Listen port | `8000` |
+| `NEXUS_HOST` | Listen address | `0.0.0.0` |
+| `NEXUS_LOG_LEVEL` | Log level (trace/debug/info/warn/error) | `info` |
+| `NEXUS_LOG_FORMAT` | Log format (pretty/json) | `pretty` |
+| `NEXUS_DISCOVERY` | Enable mDNS discovery | `true` |
+| `NEXUS_HEALTH_CHECK` | Enable health checking | `true` |
+
+**Precedence**: CLI args > Environment variables > Config file > Defaults
+
 ## Usage
 
 Once running, Nexus exposes an OpenAI-compatible API:
diff --git a/specs/002-health-checker/walkthrough.md b/specs/002-health-checker/walkthrough.md
index c3c019b..584c00b 100644
--- a/specs/002-health-checker/walkthrough.md
+++ b/specs/002-health-checker/walkthrough.md
@@ -1,229 +1,220 @@
-# Code Walkthrough: Health Checker
+# Health Checker - Code Walkthrough
 
-**Feature**: F03 - Health Checker (002-health-checker)  
-**Author**: Nexus Development Team  
-**Date**: 2026-02-01  
-**Audience**: Developers new to the Nexus codebase
+**Feature**: F03 - Health Checker  
+**Audience**: Junior developers joining the project  
+**Last Updated**: 2026-02-01
 
-## Overview
+---
 
-The Health Checker is a background service that periodically checks the health of all registered backends and updates the Registry with their status and available models. It's the primary mechanism for keeping backend health information accurate.
+## Table of Contents
 
-### What You'll Learn
+1. [The Big Picture](#the-big-picture)
+2. [File Structure](#file-structure)
+3. [File 1: config.rs - The Settings](#file-1-configrs---the-settings)
+4. [File 2: error.rs - What Can Go Wrong](#file-2-errorrs---what-can-go-wrong)
+5. [File 3: state.rs - Tracking Each Backend](#file-3-staters---tracking-each-backend)
+6. [File 4: parser.rs - Understanding Backend Responses](#file-4-parserrs---understanding-backend-responses)
+7. [File 5: mod.rs - The Main Logic](#file-5-modrs---the-main-logic)
+8. [Understanding the Tests](#understanding-the-tests)
+9. [Key Rust Concepts](#key-rust-concepts)
+10. [Common Patterns in This Module](#common-patterns-in-this-module)
 
-1. How the health check loop works
-2. How different backend types are handled
-3. How status transitions use thresholds to prevent flapping
-4. How model discovery works automatically
-5. How to test async background services
+---
+
+## The Big Picture
+
+Think of the Health Checker as a **doctor making rounds**. Every 30 seconds, it visits each backend server, checks if it's healthy, and updates its medical chart (the Registry).
+
+### Why Do We Need This?
+
+Imagine you have 5 AI servers. One crashes. Without a health checker:
+- Users would keep getting errors
+- No one would know until someone complains
+
+With a health checker:
+- Nexus detects the crash within 30 seconds
+- Requests automatically route to healthy servers
+- You can sleep at night!
+
+### How It Fits in Nexus
+
+```
+┌─────────────────────────────────────────────────────────────────┐
+│                         Nexus                                   │
+│                                                                 │
+│  ┌──────────┐     ┌──────────┐     ┌──────────────────────┐    │
+│  │   API    │────▶│  Router  │────▶│  Backend Registry    │    │
+│  │ Gateway  │     │          │     │                      │    │
+│  └──────────┘     └──────────┘     └──────────────────────┘    │
+│                                              ▲                  │
+│                                              │ updates          │
+│                                    ┌─────────┴────────┐         │
+│                                    │  Health Checker  │         │
+│                                    │  (you are here!) │         │
+│                                    └──────────────────┘         │
+└─────────────────────────────────────────────────────────────────┘
+```
+
+The Health Checker runs in the background, periodically asking each backend "Are you alive?" and updating the Registry with what it finds.
+
+---
 
 ## File Structure
 
 ```
 src/health/
-├── mod.rs      (270 lines) - Main HealthChecker struct and background loop
-├── config.rs   (31 lines)  - Configuration with defaults
-├── error.rs    (31 lines)  - Error types for health checks
-├── state.rs    (85 lines)  - Per-backend tracking state
-├── parser.rs   (91 lines)  - Response parsers for Ollama/OpenAI/LlamaCpp
-└── tests.rs    (414 lines) - Comprehensive unit tests
+├── mod.rs      (270 lines) - Main HealthChecker struct and loop
+├── config.rs   (31 lines)  - Configuration with sensible defaults
+├── error.rs    (31 lines)  - Error types (timeout, connection, etc.)
+├── state.rs    (85 lines)  - Per-backend state (failures, models)
+├── parser.rs   (91 lines)  - Parse Ollama/OpenAI/LlamaCpp responses
+└── tests.rs    (414 lines) - 40 unit tests
 
 tests/
-└── health_integration.rs (279 lines) - Integration tests with mock servers
+└── health_integration.rs (279 lines) - 6 tests with mock HTTP servers
 ```
 
-## Core Concepts
+---
 
-### 1. The HealthChecker Struct
+## File 1: config.rs - The Settings
 
 ```rust
-// src/health/mod.rs
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(default)]
+pub struct HealthCheckConfig {
+    pub enabled: bool,           // Turn health checking on/off
+    pub interval_seconds: u64,   // How often to check (default: 30)
+    pub timeout_seconds: u64,    // Max wait for response (default: 5)
+    pub failure_threshold: u32,  // Failures before "unhealthy" (default: 3)
+    pub recovery_threshold: u32, // Successes before "healthy" (default: 2)
+}
 
-pub struct HealthChecker {
-    /// Reference to the backend registry (shared, read-only)
-    registry: Arc<Registry>,
-    
-    /// HTTP client with connection pooling
-    client: reqwest::Client,
-    
-    /// Health check configuration
-    config: HealthCheckConfig,
-    
-    /// Per-backend health tracking state
-    state: DashMap<String, BackendHealthState>,
+impl Default for HealthCheckConfig {
+    fn default() -> Self {
+        Self {
+            enabled: true,
+            interval_seconds: 30,
+            timeout_seconds: 5,
+            failure_threshold: 3,
+            recovery_threshold: 2,
+        }
+    }
 }
 ```
 
-**Key Design Decisions**:
-
-1. **`Arc<Registry>`**: The checker doesn't own the registry—it shares it. This allows multiple components (API, Router, Health Checker) to access the same registry concurrently.
-
-2. **`reqwest::Client`**: HTTP client with connection pooling. Creating one client and reusing it for all requests is more efficient than creating a new client per request.
-
-3. **`DashMap<String, BackendHealthState>`**: Tracks per-backend state (consecutive failures, last models). Uses DashMap for thread-safe access without external locking.
+**Breaking it down:**
 
-### 2. Configuration
+| Field | Default | Why This Value? |
+|-------|---------|-----------------|
+| `enabled` | `true` | Health checking should be on by default |
+| `interval_seconds` | `30` | Often enough to detect problems, not so often we overwhelm backends |
+| `timeout_seconds` | `5` | Long enough for slow backends, short enough to fail fast |
+| `failure_threshold` | `3` | Don't panic on one hiccup - require 3 failures |
+| `recovery_threshold` | `2` | Make sure recovery is real, not a fluke |
 
-```rust
-// src/health/config.rs
+**What `#[serde(default)]` means:**
 
-#[derive(Debug, Clone, Serialize, Deserialize)]
-#[serde(default)]  // Use defaults for missing fields
-pub struct HealthCheckConfig {
-    pub enabled: bool,           // Default: true
-    pub interval_seconds: u64,   // Default: 30
-    pub timeout_seconds: u64,    // Default: 5
-    pub failure_threshold: u32,  // Default: 3
-    pub recovery_threshold: u32, // Default: 2
-}
+When parsing TOML like this:
+```toml
+[health_check]
+interval_seconds = 60
 ```
 
-**Why These Defaults?**
+Missing fields (like `timeout_seconds`) use their default values instead of causing an error.
 
-- **30 seconds**: Often enough to detect failures, rare enough to not overwhelm backends
-- **5 second timeout**: Long enough for slow backends, short enough to fail fast
-- **3 failures before unhealthy**: Prevents marking healthy backends as failed due to single network glitches
-- **2 successes before healthy**: Confirms recovery isn't a fluke
+---
 
-### 3. Error Classification
+## File 2: error.rs - What Can Go Wrong
 
 ```rust
-// src/health/error.rs
-
-#[derive(Debug, Clone, Error)]
+#[derive(Debug, Clone, thiserror::Error)]
 pub enum HealthCheckError {
     #[error("request timeout after {0}s")]
     Timeout(u64),
-    
+
     #[error("connection failed: {0}")]
     ConnectionFailed(String),
-    
+
     #[error("DNS resolution failed: {0}")]
     DnsError(String),
-    
+
     #[error("TLS certificate error: {0}")]
     TlsError(String),
-    
+
     #[error("HTTP error: {0}")]
     HttpError(u16),
-    
+
     #[error("invalid response: {0}")]
     ParseError(String),
 }
 ```
 
-**Why Classify Errors?**
+**Breaking it down:**
 
-Different errors may require different responses:
-- **Timeout**: Backend is slow or overloaded
-- **ConnectionFailed**: Network issue or backend down
-- **HttpError(503)**: Backend is explicitly unhealthy
-- **ParseError**: Backend responded but format is wrong
+| Error | When It Happens | Example |
+|-------|-----------------|---------|
+| `Timeout(5)` | Backend didn't respond in time | Server is overloaded |
+| `ConnectionFailed(...)` | Couldn't connect at all | Server is down, port closed |
+| `DnsError(...)` | Hostname doesn't resolve | `ollama.local` not found |
+| `TlsError(...)` | SSL/TLS problem | Expired certificate |
+| `HttpError(503)` | Backend returned error status | Server says it's unhealthy |
+| `ParseError(...)` | Response is garbage | Backend returned HTML instead of JSON |
 
-## How It Works
+**Key Concept - thiserror:**
 
-### The Health Check Loop
+The `#[error("...")]` attribute automatically implements `Display` for each variant. So `HealthCheckError::Timeout(5).to_string()` gives you `"request timeout after 5s"`.
 
-```rust
-// src/health/mod.rs
+---
 
-pub fn start(self, cancel_token: CancellationToken) -> JoinHandle<()> {
-    tokio::spawn(async move {
-        let mut interval = tokio::time::interval(
-            Duration::from_secs(self.config.interval_seconds)
-        );
-        interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);
+## File 3: state.rs - Tracking Each Backend
 
-        loop {
-            tokio::select! {
-                _ = cancel_token.cancelled() => {
-                    tracing::info!("Health checker shutting down");
-                    break;
-                }
-                _ = interval.tick() => {
-                    self.check_all_backends().await;
-                }
-            }
-        }
-    })
-}
-```
-
-**Understanding `tokio::select!`**:
-
-This is Tokio's way of waiting for multiple async events. Whichever happens first wins:
-- If `cancel_token.cancelled()` completes → break the loop (graceful shutdown)
-- If `interval.tick()` completes → run health checks
-
-**Why `set_missed_tick_behavior(Skip)`?**
-
-If a health check takes longer than the interval, we don't want to "catch up" by running multiple checks back-to-back. Skip means: if we miss a tick, just wait for the next one.
-
-### Backend-Specific Endpoints
+### Part 1: The State Struct
 
 ```rust
-// src/health/mod.rs
-
-pub fn get_health_endpoint(backend_type: BackendType) -> &'static str {
-    match backend_type {
-        BackendType::Ollama => "/api/tags",
-        BackendType::LlamaCpp => "/health",
-        BackendType::VLLM | BackendType::Exo | 
-        BackendType::OpenAI | BackendType::Generic => "/v1/models",
-    }
+#[derive(Debug, Clone)]
+pub struct BackendHealthState {
+    pub consecutive_failures: u32,    // How many failures in a row?
+    pub consecutive_successes: u32,   // How many successes in a row?
+    pub last_check_time: Option<DateTime<Utc>>,  // When was last check?
+    pub last_status: BackendStatus,   // What's the current status?
+    pub last_models: Vec<Model>,      // What models did we last see?
 }
 ```
 
-**Why Different Endpoints?**
+**Why track consecutive counts?**
 
-Each backend type has its own API:
-- **Ollama**: Returns models at `/api/tags` in its own format
-- **LlamaCpp**: Simple health check at `/health`, no model list
-- **OpenAI-compatible**: Standard `/v1/models` endpoint
+Imagine a backend that's flaky - sometimes it responds, sometimes it doesn't:
 
-### Response Parsing
-
-```rust
-// src/health/parser.rs
+```
+Check 1: ✓ Success
+Check 2: ✗ Failure   (consecutive_failures = 1)
+Check 3: ✓ Success   (consecutive_failures reset to 0!)
+Check 4: ✗ Failure   (consecutive_failures = 1)
+Check 5: ✗ Failure   (consecutive_failures = 2)
+Check 6: ✓ Success   (consecutive_failures reset to 0!)
+```
 
-pub fn parse_ollama_response(body: &str) -> Result<Vec<Model>, HealthCheckError> {
-    let response: OllamaTagsResponse = serde_json::from_str(body)
-        .map_err(|e| HealthCheckError::ParseError(e.to_string()))?;
+With a threshold of 3, this backend stays "Healthy" because it never fails 3 times in a row. This prevents "flapping" - rapidly switching between healthy/unhealthy.
 
-    Ok(response.models.into_iter().map(|m| {
-        let name_lower = m.name.to_lowercase();
-        
-        // Auto-detect capabilities from model name
-        let supports_vision = name_lower.contains("llava") 
-            || name_lower.contains("vision");
-        let supports_tools = name_lower.contains("mistral");
+### Part 2: The Health Check Result
 
-        Model {
-            id: m.name.clone(),
-            name: m.name,
-            context_length: 4096,  // Ollama doesn't expose this
-            supports_vision,
-            supports_tools,
-            supports_json_mode: false,
-            max_output_tokens: None,
-        }
-    }).collect())
+```rust
+pub enum HealthCheckResult {
+    Success {
+        latency_ms: u32,      // How long did the request take?
+        models: Vec<Model>,   // What models are available?
+    },
+    Failure {
+        error: HealthCheckError,  // What went wrong?
+    },
 }
 ```
 
-**Capability Detection**:
-
-Since Ollama doesn't explicitly report model capabilities, we infer them from the model name:
-- `llava` or `vision` → `supports_vision = true`
-- `mistral` → `supports_tools = true`
-
-This is a heuristic—not perfect, but works for common models.
+**Either it worked or it didn't.** Rust enums are perfect for this - no null, no ambiguity.
 
-### Status Transitions with Thresholds
+### Part 3: The Status Transition Logic
 
 ```rust
-// src/health/state.rs
-
 pub fn apply_result(
     &mut self,
     result: &HealthCheckResult,
@@ -231,39 +222,39 @@ pub fn apply_result(
 ) -> Option<BackendStatus> {
     match result {
         HealthCheckResult::Success { .. } => {
-            self.consecutive_failures = 0;  // Reset failure counter
-            self.consecutive_successes += 1;
+            self.consecutive_failures = 0;     // Reset failures
+            self.consecutive_successes += 1;   // Count success
 
             match self.last_status {
-                // First check: immediately healthy
+                // First check ever: immediately healthy
                 BackendStatus::Unknown => Some(BackendStatus::Healthy),
-                
-                // Recovery: need 2 consecutive successes
+
+                // Was unhealthy, need N successes to recover
                 BackendStatus::Unhealthy
                     if self.consecutive_successes >= config.recovery_threshold =>
                 {
                     Some(BackendStatus::Healthy)
                 }
-                
+
                 // Already healthy or not enough successes yet
                 _ => None,
             }
         }
         HealthCheckResult::Failure { .. } => {
-            self.consecutive_successes = 0;  // Reset success counter
-            self.consecutive_failures += 1;
+            self.consecutive_successes = 0;    // Reset successes
+            self.consecutive_failures += 1;    // Count failure
 
             match self.last_status {
-                // First check: immediately unhealthy
+                // First check ever: immediately unhealthy
                 BackendStatus::Unknown => Some(BackendStatus::Unhealthy),
-                
-                // Degradation: need 3 consecutive failures
+
+                // Was healthy, need N failures to mark unhealthy
                 BackendStatus::Healthy
                     if self.consecutive_failures >= config.failure_threshold =>
                 {
                     Some(BackendStatus::Unhealthy)
                 }
-                
+
                 // Already unhealthy or not enough failures yet
                 _ => None,
             }
@@ -272,11 +263,11 @@ pub fn apply_result(
 }
 ```
 
-**Status Transition State Machine**:
+**The State Machine:**
 
 ```
           ┌─────────┐
-          │ Unknown │
+          │ Unknown │ ← All backends start here
           └────┬────┘
                │
     1 success  │  1 failure
@@ -288,101 +279,355 @@ pub fn apply_result(
         │               │
         │ 3 failures    │ 2 successes
         │               │
-        ▼               ▼
-   ┌───────────┐   ┌─────────┐
-   │ Unhealthy │   │ Healthy │
-   └───────────┘   └─────────┘
+        └───────┬───────┘
+                ▼
+           Status change!
 ```
 
-**Why Thresholds?**
+**Why return `Option<BackendStatus>`?**
+
+- `Some(status)` = "Status changed! Update the registry."
+- `None` = "No change, don't bother updating."
 
-Without thresholds, a single network hiccup would mark a backend unhealthy, potentially causing unnecessary failovers. Thresholds ensure:
-- A backend must fail **consistently** before being marked unhealthy
-- A backend must recover **consistently** before being trusted again
+---
 
-### Registry Integration
+## File 4: parser.rs - Understanding Backend Responses
+
+### Parsing Ollama Responses
 
 ```rust
-// src/health/mod.rs
-
-pub fn apply_result(&self, backend_id: &str, result: HealthCheckResult) {
-    let mut state = self.state.entry(backend_id.to_string()).or_default();
-    let new_status = state.apply_result(&result, &self.config);
-    
-    match &result {
-        HealthCheckResult::Success { latency_ms, models } => {
-            // Always update latency
-            self.registry.update_latency(backend_id, *latency_ms);
-            
-            // Update models if we got any
-            if !models.is_empty() {
-                self.registry.update_models(backend_id, models.clone());
-                state.last_models = models.clone();
-            }
+#[derive(Deserialize)]
+struct OllamaTagsResponse {
+    models: Vec<OllamaModel>,
+}
+
+#[derive(Deserialize)]
+struct OllamaModel {
+    name: String,
+}
+
+pub fn parse_ollama_response(body: &str) -> Result<Vec<Model>, HealthCheckError> {
+    // Step 1: Parse JSON
+    let response: OllamaTagsResponse = serde_json::from_str(body)
+        .map_err(|e| HealthCheckError::ParseError(e.to_string()))?;
+
+    // Step 2: Convert to our Model type
+    Ok(response.models.into_iter().map(|m| {
+        let name_lower = m.name.to_lowercase();
+
+        // Auto-detect capabilities from model name
+        let supports_vision = name_lower.contains("llava")
+            || name_lower.contains("vision");
+        let supports_tools = name_lower.contains("mistral");
+
+        Model {
+            id: m.name.clone(),
+            name: m.name,
+            context_length: 4096,  // Ollama doesn't tell us, assume 4K
+            supports_vision,
+            supports_tools,
+            supports_json_mode: false,
+            max_output_tokens: None,
         }
-        HealthCheckResult::Failure { .. } => {
-            // Preserve last_models for recovery
+    }).collect())
+}
+```
+
+**What Ollama returns:**
+
+```json
+{
+  "models": [
+    {"name": "llama3:70b"},
+    {"name": "llava:13b"},
+    {"name": "mistral:7b"}
+  ]
+}
+```
+
+**What we do with it:**
+
+1. Parse the JSON
+2. Extract model names
+3. Guess capabilities from the name:
+   - "llava" → probably supports vision
+   - "mistral" → probably supports tool calling
+4. Create our `Model` structs
+
+### Parsing OpenAI-Format Responses
+
+```rust
+pub fn parse_openai_response(body: &str) -> Result<Vec<Model>, HealthCheckError> {
+    let response: OpenAIModelsResponse = serde_json::from_str(body)
+        .map_err(|e| HealthCheckError::ParseError(e.to_string()))?;
+
+    Ok(response.data.into_iter().map(|m| {
+        Model {
+            id: m.id.clone(),
+            name: m.id,
+            context_length: 4096,
+            supports_vision: false,  // No way to know
+            supports_tools: false,
+            supports_json_mode: false,
+            max_output_tokens: None,
         }
+    }).collect())
+}
+```
+
+**What vLLM/OpenAI returns:**
+
+```json
+{
+  "data": [
+    {"id": "gpt-4", "object": "model"},
+    {"id": "gpt-3.5-turbo", "object": "model"}
+  ]
+}
+```
+
+### Parsing LlamaCpp Responses
+
+```rust
+pub fn parse_llamacpp_response(body: &str) -> Result<bool, HealthCheckError> {
+    let response: LlamaCppHealthResponse = serde_json::from_str(body)
+        .map_err(|e| HealthCheckError::ParseError(e.to_string()))?;
+
+    Ok(response.status == "ok")  // Just returns true/false!
+}
+```
+
+**What llama.cpp returns:**
+
+```json
+{"status": "ok"}
+```
+
+LlamaCpp doesn't tell us about models - it just says if it's running.
+
+---
+
+## File 5: mod.rs - The Main Logic
+
+### Part 1: The HealthChecker Struct
+
+```rust
+pub struct HealthChecker {
+    registry: Arc<Registry>,                    // Shared reference to registry
+    client: reqwest::Client,                    // HTTP client (reused!)
+    config: HealthCheckConfig,                  // Our settings
+    state: DashMap<String, BackendHealthState>, // Per-backend tracking
+}
+```
+
+**Why these types?**
+
+| Field | Type | Why? |
+|-------|------|------|
+| `registry` | `Arc<Registry>` | Shared with other components (API, Router) |
+| `client` | `reqwest::Client` | Reuse connections, don't create per-request |
+| `config` | `HealthCheckConfig` | Our settings (interval, thresholds, etc.) |
+| `state` | `DashMap<...>` | Thread-safe map for per-backend state |
+
+### Part 2: Choosing the Right Endpoint
+
+```rust
+pub fn get_health_endpoint(backend_type: BackendType) -> &'static str {
+    match backend_type {
+        BackendType::Ollama => "/api/tags",
+        BackendType::LlamaCpp => "/health",
+        BackendType::VLLM | BackendType::Exo |
+        BackendType::OpenAI | BackendType::Generic => "/v1/models",
     }
-    
-    // Only update status if threshold was crossed
-    if let Some(status) = new_status {
-        self.registry.update_status(backend_id, status, error_msg);
-        tracing::info!(backend_id, ?status, "Backend status changed");
+}
+```
+
+**Each backend has its own API.** Ollama uses `/api/tags`, vLLM uses `/v1/models`, etc.
+
+### Part 3: Checking One Backend
+
+```rust
+pub async fn check_backend(&self, backend: &Backend) -> HealthCheckResult {
+    // Step 1: Build the URL
+    let endpoint = Self::get_health_endpoint(backend.backend_type);
+    let url = format!("{}{}", backend.url, endpoint);
+
+    // Step 2: Start timer
+    let start = Instant::now();
+
+    // Step 3: Make HTTP request
+    match self.client
+        .get(&url)
+        .timeout(Duration::from_secs(self.config.timeout_seconds))
+        .send()
+        .await
+    {
+        Ok(response) => {
+            let latency_ms = start.elapsed().as_millis() as u32;
+
+            // Step 4a: Check status code
+            if !response.status().is_success() {
+                return HealthCheckResult::Failure {
+                    error: HealthCheckError::HttpError(response.status().as_u16()),
+                };
+            }
+
+            // Step 4b: Parse response body
+            match response.text().await {
+                Ok(body) => self.parse_response(backend.backend_type, &body, latency_ms),
+                Err(e) => HealthCheckResult::Failure {
+                    error: HealthCheckError::ParseError(e.to_string()),
+                },
+            }
+        }
+        Err(e) => HealthCheckResult::Failure {
+            error: Self::classify_error(e, self.config.timeout_seconds),
+        },
     }
 }
 ```
 
-**Key Insight**: We always update latency on success, but only update status when a threshold is crossed.
+**Step by step:**
 
-## Testing Strategy
+1. Build URL: `http://localhost:11434` + `/api/tags`
+2. Start a timer (to measure latency)
+3. Send GET request with timeout
+4. If response OK → parse the body
+5. If error → classify what went wrong
 
-### Unit Tests
+### Part 4: The Main Loop
 
-The health module has 40 unit tests covering:
+```rust
+pub fn start(self, cancel_token: CancellationToken) -> JoinHandle<()> {
+    tokio::spawn(async move {
+        // Create interval timer
+        let mut interval = tokio::time::interval(
+            Duration::from_secs(self.config.interval_seconds)
+        );
+        interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);
 
-1. **Configuration**: Default values, TOML parsing, validation
-2. **Error types**: Display messages, classification
-3. **State transitions**: All threshold scenarios
-4. **Parsing**: Ollama, OpenAI, LlamaCpp formats
-5. **Endpoint selection**: All backend types
+        tracing::info!(interval_seconds = self.config.interval_seconds,
+            "Health checker started");
 
-Example test for status transitions:
+        loop {
+            tokio::select! {
+                // Option 1: Shutdown requested
+                _ = cancel_token.cancelled() => {
+                    tracing::info!("Health checker shutting down");
+                    break;
+                }
+                // Option 2: Time for next check
+                _ = interval.tick() => {
+                    self.check_all_backends().await;
+                }
+            }
+        }
+    })
+}
+```
+
+**Understanding `tokio::select!`:**
+
+This waits for EITHER:
+- The cancel token to be triggered (shutdown), OR
+- The interval timer to tick (time for checks)
+
+Whichever happens first wins. It's like `Promise.race()` in JavaScript.
+
+**Why `MissedTickBehavior::Skip`?**
+
+If a health check takes 45 seconds but the interval is 30 seconds:
+- **Without Skip**: Would try to "catch up" by running checks back-to-back
+- **With Skip**: Just waits for the next interval
+
+---
+
+## Understanding the Tests
+
+### Test Categories
+
+| Category | Count | What It Tests |
+|----------|-------|---------------|
+| Config | 4 | Default values, TOML parsing |
+| Errors | 6 | Error messages display correctly |
+| State | 2 | Default state, cloning |
+| Transitions | 8 | All threshold scenarios |
+| Parsing | 14 | Ollama, OpenAI, LlamaCpp formats |
+| Endpoints | 6 | Correct endpoint per backend type |
+
+### Example: Testing Configuration Defaults
+
+```rust
+#[test]
+fn test_config_default_values() {
+    let config = HealthCheckConfig::default();
+
+    assert!(config.enabled);
+    assert_eq!(config.interval_seconds, 30);
+    assert_eq!(config.timeout_seconds, 5);
+    assert_eq!(config.failure_threshold, 3);
+    assert_eq!(config.recovery_threshold, 2);
+}
+```
+
+**What this tests:** When someone creates a `HealthCheckConfig::default()`, they get sensible values.
+
+### Example: Testing Status Transitions
 
 ```rust
 #[test]
 fn test_healthy_to_unhealthy_at_threshold() {
+    // ARRANGE: Start with a healthy backend
     let mut state = BackendHealthState {
         last_status: BackendStatus::Healthy,
         ..Default::default()
     };
     let config = HealthCheckConfig::default(); // failure_threshold = 3
-    let failure = HealthCheckResult::Failure { 
-        error: HealthCheckError::Timeout(5) 
+    let failure = HealthCheckResult::Failure {
+        error: HealthCheckError::Timeout(5)
     };
 
-    // First two failures: no transition
+    // ACT & ASSERT: First two failures - no transition
     assert_eq!(state.apply_result(&failure, &config), None);
+    assert_eq!(state.consecutive_failures, 1);
+
     assert_eq!(state.apply_result(&failure, &config), None);
-    
-    // Third failure: transition to Unhealthy
+    assert_eq!(state.consecutive_failures, 2);
+
+    // ACT & ASSERT: Third failure - transition!
     assert_eq!(
         state.apply_result(&failure, &config),
         Some(BackendStatus::Unhealthy)
     );
+    assert_eq!(state.consecutive_failures, 3);
 }
 ```
 
-### Integration Tests
+**What this tests:** A healthy backend needs exactly 3 consecutive failures to become unhealthy.
 
-Integration tests use mock HTTP servers to test end-to-end behavior:
+### Example: Testing Parser
 
 ```rust
-// tests/health_integration.rs
+#[test]
+fn test_parse_ollama_vision_model() {
+    let body = r#"{"models": [{"name": "llava:13b"}]}"#;
+
+    let models = parse_ollama_response(body).unwrap();
+
+    assert_eq!(models.len(), 1);
+    assert_eq!(models[0].id, "llava:13b");
+    assert!(models[0].supports_vision);  // Detected from name!
+}
+```
+
+**What this tests:** When Ollama returns a model with "llava" in the name, we correctly detect it supports vision.
+
+### Integration Tests with Mock Servers
 
+```rust
 #[tokio::test]
 async fn test_full_health_check_cycle_ollama() {
-    // Start mock Ollama server
+    // ARRANGE: Start a fake Ollama server
     let mock_server = MockServer::start().await;
     Mock::given(method("GET"))
         .and(path("/api/tags"))
@@ -392,118 +637,147 @@ async fn test_full_health_check_cycle_ollama() {
         .mount(&mock_server)
         .await;
 
-    // Create registry and add backend
+    // Create real registry + health checker
     let registry = Arc::new(Registry::new());
     let backend = Backend::new("test", &mock_server.uri(), BackendType::Ollama);
     registry.add_backend(backend.clone()).unwrap();
 
-    // Run health checker
-    let config = HealthCheckConfig { interval_seconds: 1, ..Default::default() };
+    let config = HealthCheckConfig {
+        interval_seconds: 1,  // Fast for testing
+        ..Default::default()
+    };
     let checker = HealthChecker::new(registry.clone(), config);
+
+    // ACT: Run health checker
     let cancel = CancellationToken::new();
     let handle = checker.start(cancel.clone());
-
-    // Wait for check cycle
     tokio::time::sleep(Duration::from_millis(1500)).await;
     cancel.cancel();
     handle.await.unwrap();
 
-    // Verify registry was updated
+    // ASSERT: Registry was updated
     let updated = registry.get_backend(&backend.id).unwrap();
     assert_eq!(updated.status, BackendStatus::Healthy);
     assert_eq!(updated.models.len(), 2);
 }
 ```
 
-**What This Tests**:
-1. Mock server returns Ollama-format response
-2. Health checker parses response correctly
-3. Registry is updated with status and models
+**What this tests:**
+1. Health checker calls the mock server
+2. Mock returns Ollama-format response
+3. Registry gets updated with status + models
 4. Graceful shutdown works
 
-## Common Patterns
+---
+
+## Key Rust Concepts
+
+| Concept | What It Means | Example |
+|---------|---------------|---------|
+| `async fn` | Function that can pause/resume | `async fn check_backend(...)` |
+| `await` | Pause until async operation completes | `response.text().await` |
+| `Arc<T>` | Shared ownership across threads | `Arc<Registry>` |
+| `DashMap` | Thread-safe HashMap | `DashMap<String, BackendHealthState>` |
+| `Option<T>` | Either `Some(value)` or `None` | `Option<BackendStatus>` for "maybe changed" |
+| `Result<T, E>` | Either `Ok(value)` or `Err(error)` | `Result<Vec<Model>, HealthCheckError>` |
+| `match` | Pattern matching | `match backend_type { Ollama => ... }` |
+| `tokio::spawn` | Start a background task | `tokio::spawn(async move { ... })` |
+| `tokio::select!` | Wait for first of multiple async events | Used in the main loop |
+
+---
 
-### Pattern 1: DashMap Entry API
+## Common Patterns in This Module
+
+### Pattern 1: The "Get or Create" Pattern
 
 ```rust
 let mut state = self.state.entry(backend_id.to_string()).or_default();
 ```
 
-This is the "get or create" pattern:
-- If `backend_id` exists → get mutable reference
-- If not → create default entry and get mutable reference
+**What it does:**
+- If `backend_id` exists in the map → get mutable reference
+- If not → create a new entry with `Default::default()` and get mutable reference
+
+This is super common with DashMap and HashMap.
 
-### Pattern 2: CancellationToken for Graceful Shutdown
+### Pattern 2: Graceful Shutdown with CancellationToken
 
 ```rust
-pub fn start(self, cancel_token: CancellationToken) -> JoinHandle<()> {
-    tokio::spawn(async move {
-        loop {
-            tokio::select! {
-                _ = cancel_token.cancelled() => break,
-                _ = do_work() => {}
-            }
-        }
-    })
+// Caller:
+let cancel = CancellationToken::new();
+let handle = checker.start(cancel.clone());
+// ... later ...
+cancel.cancel();
+handle.await.unwrap();
+
+// Inside the loop:
+tokio::select! {
+    _ = cancel_token.cancelled() => break,
+    _ = do_work() => {}
 }
 ```
 
-This pattern is standard for background services in Tokio:
-1. Caller keeps a `CancellationToken`
-2. Task checks for cancellation in its loop
-3. Caller calls `cancel.cancel()` to trigger shutdown
-4. Caller awaits `handle` to ensure clean exit
+**Why this pattern?**
+- Clean shutdown: finish what you're doing, then exit
+- No orphaned tasks
+- No resource leaks
 
-### Pattern 3: Connection Pooling
+### Pattern 3: Backend-Specific Logic with Match
 
 ```rust
-let client = reqwest::Client::builder()
-    .timeout(Duration::from_secs(config.timeout_seconds))
-    .build()
-    .expect("Failed to build HTTP client");
+match backend_type {
+    BackendType::Ollama => parse_ollama_response(body),
+    BackendType::LlamaCpp => parse_llamacpp_response(body),
+    BackendType::VLLM | BackendType::Exo |
+    BackendType::OpenAI | BackendType::Generic => parse_openai_response(body),
+}
 ```
 
-One client is shared for all requests. Benefits:
-- Connection reuse (no TCP handshake per request)
-- Automatic keep-alive
-- Built-in connection pooling
+**Why this pattern?**
+- Compiler ensures we handle ALL variants
+- Adding a new backend type? Compiler tells you everywhere to update
+- No "default" case hiding bugs
 
-## Exercises
+### Pattern 4: Map Error Types
 
-### Exercise 1: Add a New Backend Type
+```rust
+let response: OllamaTagsResponse = serde_json::from_str(body)
+    .map_err(|e| HealthCheckError::ParseError(e.to_string()))?;
+```
 
-Add support for a hypothetical "LocalAI" backend:
-1. Add `LocalAI` to `BackendType` enum in `src/registry/backend.rs`
-2. Add endpoint mapping in `get_health_endpoint()`
-3. If it has a unique response format, add a parser function
-4. Write tests for the new backend type
+**What it does:**
+- `serde_json::from_str` returns `Result<T, serde_json::Error>`
+- We want `Result<T, HealthCheckError>`
+- `.map_err(...)` converts the error type
+- `?` returns early if it's an error
 
-### Exercise 2: Add Jitter to Prevent Thundering Herd
+---
 
-Currently all backends are checked simultaneously. Add random jitter:
-1. Calculate `jitter = random(0..interval/backends.len())`
-2. Sleep for jitter before each backend check
-3. Write a test that verifies checks are staggered
+## Summary
 
-### Exercise 3: Add Exponential Backoff for Unhealthy Backends
+The Health Checker is a background service that:
 
-Don't check unhealthy backends as frequently:
-1. Track consecutive failures per backend
-2. Increase check interval: `base_interval * 2^failures` (capped at 5 minutes)
-3. Reset to normal interval on success
+1. **Runs continuously** using `tokio::spawn` and an interval timer
+2. **Checks all backends** by calling their health endpoints
+3. **Parses responses** differently for Ollama, OpenAI, and LlamaCpp
+4. **Uses thresholds** to prevent status flapping (3 failures, 2 recoveries)
+5. **Updates the Registry** with status, models, and latency
+6. **Shuts down gracefully** when asked via CancellationToken
 
-## Summary
+### Key Files to Remember
+
+| File | One-Sentence Summary |
+|------|---------------------|
+| `config.rs` | Settings with sensible defaults |
+| `error.rs` | All the ways a health check can fail |
+| `state.rs` | Tracks failures/successes per backend |
+| `parser.rs` | Understands Ollama, OpenAI, LlamaCpp responses |
+| `mod.rs` | The main loop and HTTP logic |
 
-The Health Checker demonstrates several key patterns:
+### Next Steps
 
-1. **Background Services**: Using `tokio::spawn` with `CancellationToken`
-2. **Thread-Safe State**: Using `DashMap` for concurrent access
-3. **Shared Ownership**: Using `Arc<Registry>` for shared state
-4. **Threshold Logic**: Preventing flapping with consecutive counters
-5. **Backend Abstraction**: Handling different APIs uniformly
-6. **Integration Testing**: Using mock servers for reliable tests
+Now that you understand the Health Checker, you're ready to explore:
 
-Understanding this module gives you a solid foundation for:
-- The Router (uses health status for backend selection)
-- The API layer (starts health checker on startup)
-- Future features like circuit breakers or adaptive health checking
+- **The Router** - Uses health status to pick backends
+- **The API Gateway** - Starts the health checker on startup
+- **Adding a new backend type** - Try adding "LocalAI"!
diff --git a/specs/003-cli-configuration/plan.md b/specs/003-cli-configuration/plan.md
new file mode 100644
index 0000000..68eabbd
--- /dev/null
+++ b/specs/003-cli-configuration/plan.md
@@ -0,0 +1,714 @@
+# Implementation Plan: CLI and Configuration
+
+**Spec**: [spec.md](./spec.md)  
+**Status**: Ready for Implementation  
+**Estimated Complexity**: Medium-High
+
+## Approach
+
+Implement the CLI using clap's derive macros and layered configuration using the `config` crate. Follow TDD: write failing tests first, then implement to make them pass.
+
+### Design Decisions
+
+| Decision | Choice | Rationale |
+|----------|--------|-----------|
+| CLI framework | `clap` with derive | Declarative, type-safe, built-in env support |
+| Config loading | `config` crate | Native layered merging (file + env + defaults) |
+| Config format | TOML only | Consistent with Rust ecosystem, readable |
+| Table output | `comfy-table` | Terminal-aware, Unicode borders, colors |
+| CLI structure | Subcommands | `nexus <cmd>` pattern, extensible |
+| Config struct | Single `NexusConfig` | One source of truth, shared across modules |
+
+### File Structure
+
+```
+src/
+├── main.rs                 # Entry point with clap parsing
+├── lib.rs                  # Library root, re-exports modules
+├── cli/
+│   ├── mod.rs              # CLI command definitions (clap)
+│   ├── serve.rs            # serve command implementation
+│   ├── backends.rs         # backends subcommand handlers
+│   ├── models.rs           # models command handler
+│   ├── health.rs           # health command handler
+│   └── output.rs           # Table/JSON output helpers
+├── config/
+│   ├── mod.rs              # NexusConfig struct, loading logic
+│   ├── server.rs           # ServerConfig
+│   ├── routing.rs          # RoutingConfig, RoutingWeights
+│   ├── logging.rs          # LoggingConfig
+│   └── error.rs            # ConfigError enum
+├── health/                 # (existing)
+└── registry/               # (existing)
+```
+
+### Dependencies
+
+**Already in Cargo.toml:**
+- `clap = { version = "4", features = ["derive", "env"] }` ✓
+- `toml = "0.8"` ✓
+- `serde = { features = ["derive"] }` ✓
+- `tracing` ✓
+- `tracing-subscriber = { features = ["env-filter", "json"] }` ✓
+
+**New dependencies needed:**
+```toml
+# Pretty table output
+comfy-table = "7"
+
+# Layered configuration
+config = { version = "0.14", default-features = false, features = ["toml"] }
+
+# Terminal colors (optional, for status indicators)
+colored = "2"
+```
+
+**New dev-dependencies:**
+```toml
+# CLI integration testing
+assert_cmd = "2"       # Already present ✓
+predicates = "3"       # Already present ✓
+tempfile = "3"         # For config file tests
+```
+
+---
+
+## Implementation Phases
+
+### Phase 1: Configuration Module (Tests First)
+
+**Goal**: Create the unified config system with layered loading.
+
+**Tests to write first** (10 tests):
+1. `test_config_default_values` - All defaults are sensible
+2. `test_config_parse_minimal_toml` - Parse file with only `[server]`
+3. `test_config_parse_full_toml` - Parse complete config file
+4. `test_config_parse_backends_array` - Parse `[[backends]]` array
+5. `test_config_env_override_port` - NEXUS_PORT overrides config
+6. `test_config_env_override_log_level` - NEXUS_LOG_LEVEL overrides config
+7. `test_config_missing_file_uses_defaults` - No file → defaults work
+8. `test_config_invalid_toml_error` - Parse error with line number
+9. `test_config_invalid_backend_type_error` - Validation error message
+10. `test_routing_strategy_serde` - RoutingStrategy serialization
+
+**Implementation**:
+
+1. Create `src/config/mod.rs`:
+   ```rust
+   pub struct NexusConfig {
+       pub server: ServerConfig,
+       pub discovery: DiscoveryConfig,
+       pub health_check: HealthCheckConfig,
+       pub routing: RoutingConfig,
+       pub backends: Vec<BackendConfig>,
+       pub logging: LoggingConfig,
+   }
+   
+   impl NexusConfig {
+       pub fn load(path: Option<&Path>) -> Result<Self, ConfigError>;
+       pub fn with_overrides(self, overrides: ConfigOverrides) -> Self;
+   }
+   ```
+
+2. Create `src/config/server.rs`:
+   ```rust
+   #[derive(Debug, Clone, Serialize, Deserialize)]
+   #[serde(default)]
+   pub struct ServerConfig {
+       pub host: String,           // "0.0.0.0"
+       pub port: u16,              // 8000
+       pub request_timeout_seconds: u64,  // 300
+       pub max_concurrent_requests: u32,  // 1000
+   }
+   ```
+
+3. Create `src/config/routing.rs`:
+   ```rust
+   #[derive(Debug, Clone, Serialize, Deserialize)]
+   #[serde(default)]
+   pub struct RoutingConfig {
+       pub strategy: RoutingStrategy,
+       pub max_retries: u32,
+       pub weights: RoutingWeights,
+       pub aliases: HashMap<String, String>,
+       pub fallbacks: HashMap<String, Vec<String>>,
+   }
+   
+   #[derive(Debug, Clone, Serialize, Deserialize)]
+   #[serde(rename_all = "snake_case")]
+   pub enum RoutingStrategy {
+       Smart,
+       RoundRobin,
+       PriorityOnly,
+       Random,
+   }
+   ```
+
+4. Create `src/config/logging.rs`:
+   ```rust
+   #[derive(Debug, Clone, Serialize, Deserialize)]
+   #[serde(default)]
+   pub struct LoggingConfig {
+       pub level: String,      // "info"
+       pub format: LogFormat,  // Pretty
+   }
+   
+   #[derive(Debug, Clone, Serialize, Deserialize)]
+   #[serde(rename_all = "snake_case")]
+   pub enum LogFormat {
+       Pretty,
+       Json,
+   }
+   ```
+
+5. Create `src/config/error.rs`:
+   ```rust
+   #[derive(Debug, thiserror::Error)]
+   pub enum ConfigError {
+       #[error("config file not found: {0}")]
+       NotFound(PathBuf),
+       
+       #[error("config parse error: {0}")]
+       ParseError(String),
+       
+       #[error("invalid value for '{field}': {message}")]
+       ValidationError { field: String, message: String },
+   }
+   ```
+
+**Acceptance**: All 10 tests pass.
+
+---
+
+### Phase 2: CLI Command Definitions (Tests First)
+
+**Goal**: Define the CLI structure with clap derive.
+
+**Tests to write first** (8 tests):
+1. `test_cli_parse_serve_defaults` - `nexus serve` parses with defaults
+2. `test_cli_parse_serve_with_port` - `nexus serve -p 9000` parses port
+3. `test_cli_parse_serve_with_config` - `nexus serve -c custom.toml` parses path
+4. `test_cli_parse_backends_list` - `nexus backends` parses
+5. `test_cli_parse_backends_json` - `nexus backends --json` sets flag
+6. `test_cli_parse_backends_add` - `nexus backends add http://...` parses URL
+7. `test_cli_parse_models` - `nexus models` parses
+8. `test_cli_parse_health` - `nexus health` parses
+
+**Implementation**:
+
+1. Create `src/cli/mod.rs`:
+   ```rust
+   use clap::{Parser, Subcommand, Args};
+   
+   #[derive(Parser)]
+   #[command(name = "nexus", version, about = "Distributed LLM Orchestrator")]
+   pub struct Cli {
+       #[command(subcommand)]
+       pub command: Commands,
+   }
+   
+   #[derive(Subcommand)]
+   pub enum Commands {
+       /// Start the Nexus server
+       Serve(ServeArgs),
+       /// Manage backends
+       #[command(subcommand)]
+       Backends(BackendsCommands),
+       /// List available models
+       Models(ModelsArgs),
+       /// Show system health
+       Health(HealthArgs),
+       /// Configuration utilities
+       #[command(subcommand)]
+       Config(ConfigCommands),
+   }
+   ```
+
+2. Define `ServeArgs`:
+   ```rust
+   #[derive(Args)]
+   pub struct ServeArgs {
+       /// Config file path
+       #[arg(short, long, default_value = "nexus.toml")]
+       pub config: PathBuf,
+       
+       /// Listen port
+       #[arg(short, long, env = "NEXUS_PORT")]
+       pub port: Option<u16>,
+       
+       /// Listen address
+       #[arg(short = 'H', long, env = "NEXUS_HOST")]
+       pub host: Option<String>,
+       
+       /// Log level: trace, debug, info, warn, error
+       #[arg(short, long, env = "NEXUS_LOG_LEVEL")]
+       pub log_level: Option<String>,
+       
+       /// Disable mDNS discovery
+       #[arg(long)]
+       pub no_discovery: bool,
+       
+       /// Disable health checking
+       #[arg(long)]
+       pub no_health_check: bool,
+   }
+   ```
+
+3. Define `BackendsCommands`:
+   ```rust
+   #[derive(Subcommand)]
+   pub enum BackendsCommands {
+       /// List all backends
+       List(BackendsListArgs),
+       /// Add a backend
+       Add(BackendsAddArgs),
+       /// Remove a backend
+       Remove(BackendsRemoveArgs),
+   }
+   
+   #[derive(Args)]
+   pub struct BackendsListArgs {
+       /// Output as JSON
+       #[arg(long)]
+       pub json: bool,
+       
+       /// Filter by status
+       #[arg(long)]
+       pub status: Option<String>,
+   }
+   ```
+
+4. Update `src/main.rs`:
+   ```rust
+   use clap::Parser;
+   use nexus::cli::{Cli, Commands};
+   
+   fn main() {
+       let cli = Cli::parse();
+       
+       match cli.command {
+           Commands::Serve(args) => todo!(),
+           Commands::Backends(cmd) => todo!(),
+           Commands::Models(args) => todo!(),
+           Commands::Health(args) => todo!(),
+           Commands::Config(cmd) => todo!(),
+       }
+   }
+   ```
+
+**Acceptance**: All 8 tests pass, `nexus --help` shows all commands.
+
+---
+
+### Phase 3: Output Formatting (Tests First)
+
+**Goal**: Create table and JSON output helpers.
+
+**Tests to write first** (6 tests):
+1. `test_format_backends_table` - Backends render as table
+2. `test_format_backends_json` - Backends render as JSON
+3. `test_format_models_table` - Models render as table
+4. `test_format_models_json` - Models render as JSON
+5. `test_format_health_pretty` - Health renders with status icons
+6. `test_format_health_json` - Health renders as JSON
+
+**Implementation**:
+
+1. Create `src/cli/output.rs`:
+   ```rust
+   use comfy_table::{Table, Cell, Color, Attribute};
+   
+   pub fn print_backends_table(backends: &[BackendView]) {
+       let mut table = Table::new();
+       table.set_header(vec!["Name", "URL", "Type", "Status", "Models", "Latency"]);
+       
+       for b in backends {
+           let status_cell = match b.status {
+               BackendStatus::Healthy => Cell::new("Healthy").fg(Color::Green),
+               BackendStatus::Unhealthy => Cell::new("Unhealthy").fg(Color::Red),
+               BackendStatus::Unknown => Cell::new("Unknown").fg(Color::Yellow),
+           };
+           
+           table.add_row(vec![
+               Cell::new(&b.name),
+               Cell::new(&b.url),
+               Cell::new(format!("{:?}", b.backend_type)),
+               status_cell,
+               Cell::new(b.models.len()),
+               Cell::new(format!("{}ms", b.avg_latency_ms)),
+           ]);
+       }
+       
+       println!("{table}");
+   }
+   
+   pub fn print_json<T: Serialize>(value: &T) -> Result<(), serde_json::Error> {
+       println!("{}", serde_json::to_string_pretty(value)?);
+       Ok(())
+   }
+   ```
+
+2. Create status icon helpers:
+   ```rust
+   pub fn status_icon(status: BackendStatus) -> &'static str {
+       match status {
+           BackendStatus::Healthy => "✓",
+           BackendStatus::Unhealthy => "✗",
+           BackendStatus::Unknown => "?",
+       }
+   }
+   ```
+
+**Acceptance**: All 6 tests pass, output is visually correct.
+
+---
+
+### Phase 4: Serve Command (Tests First)
+
+**Goal**: Implement the main `serve` command that starts the server.
+
+**Tests to write first** (8 tests):
+1. `test_serve_starts_on_default_port` - Server binds to 8000
+2. `test_serve_respects_port_arg` - `--port 9000` uses 9000
+3. `test_serve_respects_config_file` - Loads specified config
+4. `test_serve_works_without_config` - Zero-config mode works
+5. `test_serve_initializes_health_checker` - Health checker starts
+6. `test_serve_graceful_shutdown` - SIGINT triggers clean exit
+7. `test_serve_logs_startup_info` - Logs host:port on startup
+8. `test_serve_exits_on_port_conflict` - Exit code 1 if port in use
+
+**Implementation**:
+
+1. Create `src/cli/serve.rs`:
+   ```rust
+   pub async fn run_serve(args: ServeArgs) -> Result<(), Box<dyn std::error::Error>> {
+       // 1. Load configuration
+       let config = load_config_with_overrides(&args)?;
+       
+       // 2. Initialize tracing
+       init_tracing(&config.logging)?;
+       
+       // 3. Create registry
+       let registry = Arc::new(Registry::new());
+       
+       // 4. Load static backends from config
+       for backend_config in &config.backends {
+           let backend = Backend::from_config(backend_config);
+           registry.add_backend(backend)?;
+       }
+       
+       // 5. Start health checker
+       let cancel_token = CancellationToken::new();
+       let health_handle = if config.health_check.enabled && !args.no_health_check {
+           let checker = HealthChecker::new(registry.clone(), config.health_check.clone());
+           Some(checker.start(cancel_token.clone()))
+       } else {
+           None
+       };
+       
+       // 6. Build HTTP server (placeholder until API Gateway is implemented)
+       let app = Router::new()
+           .route("/health", get(health_handler));
+       
+       let addr = format!("{}:{}", config.server.host, config.server.port);
+       tracing::info!("Nexus listening on {}", addr);
+       
+       // 7. Run server with graceful shutdown
+       let listener = TcpListener::bind(&addr).await?;
+       axum::serve(listener, app)
+           .with_graceful_shutdown(shutdown_signal(cancel_token.clone()))
+           .await?;
+       
+       // 8. Wait for health checker to finish
+       if let Some(handle) = health_handle {
+           handle.await?;
+       }
+       
+       Ok(())
+   }
+   
+   async fn shutdown_signal(cancel_token: CancellationToken) {
+       tokio::signal::ctrl_c().await.ok();
+       tracing::info!("Shutdown signal received");
+       cancel_token.cancel();
+   }
+   ```
+
+2. Implement config loading with overrides:
+   ```rust
+   fn load_config_with_overrides(args: &ServeArgs) -> Result<NexusConfig, ConfigError> {
+       let mut config = if args.config.exists() {
+           NexusConfig::load(Some(&args.config))?
+       } else {
+           NexusConfig::default()
+       };
+       
+       // Apply CLI overrides
+       if let Some(port) = args.port {
+           config.server.port = port;
+       }
+       if let Some(ref host) = args.host {
+           config.server.host = host.clone();
+       }
+       if let Some(ref level) = args.log_level {
+           config.logging.level = level.clone();
+       }
+       
+       Ok(config)
+   }
+   ```
+
+**Acceptance**: All 8 tests pass, server starts and shuts down cleanly.
+
+---
+
+### Phase 5: Query Commands (Tests First)
+
+**Goal**: Implement `backends`, `models`, and `health` commands.
+
+**Tests to write first** (10 tests):
+1. `test_backends_list_empty` - Empty registry shows empty table
+2. `test_backends_list_with_data` - Shows all backends
+3. `test_backends_list_filter_healthy` - `--status healthy` filters
+4. `test_backends_add_success` - Adds backend, shows confirmation
+5. `test_backends_add_invalid_url` - Invalid URL returns error
+6. `test_backends_remove_success` - Removes backend
+7. `test_backends_remove_not_found` - Unknown ID returns error
+8. `test_models_list_aggregated` - Models grouped across backends
+9. `test_health_shows_summary` - Shows backend counts
+10. `test_health_json_valid` - JSON output is valid
+
+**Implementation**:
+
+1. Create `src/cli/backends.rs`:
+   ```rust
+   pub async fn handle_backends_command(
+       cmd: BackendsCommands,
+       registry: Arc<Registry>,
+   ) -> Result<(), Box<dyn std::error::Error>> {
+       match cmd {
+           BackendsCommands::List(args) => {
+               let backends = registry.get_all_backends();
+               let filtered = if let Some(status) = args.status {
+                   filter_by_status(backends, &status)
+               } else {
+                   backends
+               };
+               
+               if args.json {
+                   print_json(&filtered)?;
+               } else {
+                   print_backends_table(&filtered);
+               }
+           }
+           BackendsCommands::Add(args) => {
+               let backend = create_backend_from_args(&args)?;
+               registry.add_backend(backend.clone())?;
+               println!("Added backend: {} ({})", backend.name, backend.id);
+           }
+           BackendsCommands::Remove(args) => {
+               registry.remove_backend(&args.id)?;
+               println!("Removed backend: {}", args.id);
+           }
+       }
+       Ok(())
+   }
+   ```
+
+2. Create `src/cli/models.rs`:
+   ```rust
+   pub async fn handle_models_command(
+       args: ModelsArgs,
+       registry: Arc<Registry>,
+   ) -> Result<(), Box<dyn std::error::Error>> {
+       let models = registry.get_all_models_aggregated();
+       
+       if args.json {
+           print_json(&models)?;
+       } else {
+           print_models_table(&models);
+       }
+       Ok(())
+   }
+   ```
+
+3. Create `src/cli/health.rs`:
+   ```rust
+   pub async fn handle_health_command(
+       args: HealthArgs,
+       registry: Arc<Registry>,
+       uptime: Duration,
+   ) -> Result<(), Box<dyn std::error::Error>> {
+       let backends = registry.get_all_backends();
+       let healthy = backends.iter().filter(|b| b.status == BackendStatus::Healthy).count();
+       let models = registry.model_count();
+       
+       let status = HealthStatus {
+           status: if healthy > 0 { "healthy" } else { "degraded" },
+           version: env!("CARGO_PKG_VERSION"),
+           uptime_seconds: uptime.as_secs(),
+           backends: BackendCounts { total: backends.len(), healthy, unhealthy: backends.len() - healthy },
+           models: ModelCounts { total: models },
+           backend_details: backends,
+       };
+       
+       if args.json {
+           print_json(&status)?;
+       } else {
+           print_health_pretty(&status);
+       }
+       Ok(())
+   }
+   ```
+
+**Acceptance**: All 10 tests pass.
+
+---
+
+### Phase 6: Config Init Command (Tests First)
+
+**Goal**: Implement `nexus config init` to generate template.
+
+**Tests to write first** (4 tests):
+1. `test_config_init_creates_file` - Creates nexus.toml
+2. `test_config_init_custom_path` - `--output custom.toml` works
+3. `test_config_init_no_overwrite` - Fails if file exists
+4. `test_config_init_force` - `--force` overwrites existing
+
+**Implementation**:
+
+1. Add to `src/cli/mod.rs`:
+   ```rust
+   #[derive(Subcommand)]
+   pub enum ConfigCommands {
+       /// Generate example config file
+       Init(ConfigInitArgs),
+   }
+   
+   #[derive(Args)]
+   pub struct ConfigInitArgs {
+       /// Output file path
+       #[arg(short, long, default_value = "nexus.toml")]
+       pub output: PathBuf,
+       
+       /// Generate minimal config
+       #[arg(long)]
+       pub minimal: bool,
+       
+       /// Overwrite existing file
+       #[arg(long)]
+       pub force: bool,
+   }
+   ```
+
+2. Implement config generation:
+   ```rust
+   pub fn handle_config_init(args: ConfigInitArgs) -> Result<(), Box<dyn std::error::Error>> {
+       if args.output.exists() && !args.force {
+           return Err(format!("File already exists: {}. Use --force to overwrite.", 
+               args.output.display()).into());
+       }
+       
+       let template = if args.minimal {
+           include_str!("../../templates/nexus.minimal.toml")
+       } else {
+           include_str!("../../templates/nexus.example.toml")
+       };
+       
+       std::fs::write(&args.output, template)?;
+       println!("Created config file: {}", args.output.display());
+       Ok(())
+   }
+   ```
+
+**Acceptance**: All 4 tests pass.
+
+---
+
+### Phase 7: Integration Tests
+
+**Goal**: End-to-end CLI tests using `assert_cmd`.
+
+**Tests to write** (10 tests):
+1. `test_version_output` - `nexus --version` shows version
+2. `test_help_output` - `nexus --help` shows all commands
+3. `test_serve_help` - `nexus serve --help` shows options
+4. `test_backends_empty` - `nexus backends` with no server returns error
+5. `test_config_init_e2e` - Full config init workflow
+6. `test_invalid_command` - Unknown command shows help
+7. `test_serve_invalid_config` - Bad TOML returns error with message
+8. `test_serve_port_conflict` - Port in use returns exit code 1
+9. `test_env_var_override` - NEXUS_PORT overrides config
+10. `test_cli_arg_override` - `--port` overrides env and config
+
+**Implementation** (in `tests/cli_integration.rs`):
+```rust
+use assert_cmd::Command;
+use predicates::prelude::*;
+
+#[test]
+fn test_version_output() {
+    Command::cargo_bin("nexus")
+        .unwrap()
+        .arg("--version")
+        .assert()
+        .success()
+        .stdout(predicate::str::contains("nexus"));
+}
+
+#[test]
+fn test_help_shows_commands() {
+    Command::cargo_bin("nexus")
+        .unwrap()
+        .arg("--help")
+        .assert()
+        .success()
+        .stdout(predicate::str::contains("serve"))
+        .stdout(predicate::str::contains("backends"))
+        .stdout(predicate::str::contains("models"))
+        .stdout(predicate::str::contains("health"));
+}
+```
+
+**Acceptance**: All 10 integration tests pass.
+
+---
+
+## Test Summary
+
+| Phase | Unit Tests | Integration Tests | Total |
+|-------|------------|-------------------|-------|
+| Phase 1: Config | 10 | 0 | 10 |
+| Phase 2: CLI Defs | 8 | 0 | 8 |
+| Phase 3: Output | 6 | 0 | 6 |
+| Phase 4: Serve | 8 | 0 | 8 |
+| Phase 5: Query Cmds | 10 | 0 | 10 |
+| Phase 6: Config Init | 4 | 0 | 4 |
+| Phase 7: Integration | 0 | 10 | 10 |
+| **Total** | **46** | **10** | **56** |
+
+---
+
+## Risk Mitigation
+
+| Risk | Mitigation |
+|------|------------|
+| Config crate complexity | Start simple, use TOML only |
+| Async CLI for non-serve commands | Run tokio runtime inline for query commands |
+| Table formatting edge cases | Use comfy-table's built-in truncation |
+| Shell completion generation | Defer to P1, not blocking MVP |
+
+---
+
+## Definition of Done
+
+- [ ] All 56 tests pass
+- [ ] `cargo clippy` reports no warnings
+- [ ] `cargo fmt --check` passes
+- [ ] `nexus --help` shows all commands
+- [ ] `nexus serve` starts server
+- [ ] `nexus config init` generates valid config
+- [ ] Config precedence works: CLI > env > file > defaults
+- [ ] JSON output is valid for all commands
+- [ ] Exit code 1 on errors with helpful messages
+- [ ] Graceful shutdown on SIGINT
diff --git a/specs/003-cli-configuration/spec.md b/specs/003-cli-configuration/spec.md
new file mode 100644
index 0000000..e574c27
--- /dev/null
+++ b/specs/003-cli-configuration/spec.md
@@ -0,0 +1,802 @@
+# F04: CLI and Configuration
+
+**Feature ID**: F04  
+**Spec ID**: 003-cli-configuration  
+**Priority**: P0 (Core MVP)  
+**Status**: Draft  
+**Created**: 2026-02-02
+
+---
+
+## Overview
+
+Command-line interface and TOML configuration file support for Nexus. Provides both interactive commands for management and a `serve` command for running the daemon.
+
+### Goals
+
+1. **Zero-friction startup**: `nexus serve` works out-of-the-box with sensible defaults
+2. **Layered configuration**: CLI args > Environment variables > Config file > Defaults
+3. **Operator-friendly output**: Pretty tables for humans, JSON for scripts
+4. **Single binary**: All commands are subcommands of the `nexus` binary
+
+### Non-Goals
+
+- GUI configuration (see F10: Web Dashboard)
+- Remote management API (future feature)
+- Config hot-reloading (restart required for config changes)
+
+---
+
+## User Stories
+
+| ID | Story | Priority |
+|----|-------|----------|
+| US1 | As an operator, I want to start Nexus with `nexus serve` so I can run the server | P0 |
+| US2 | As an operator, I want to specify a config file path so I can use different configurations | P0 |
+| US3 | As an operator, I want to override config with CLI flags so I can quickly test settings | P0 |
+| US4 | As an operator, I want to list backends so I can see what's connected | P0 |
+| US5 | As an operator, I want to list models so I can see what's available | P0 |
+| US6 | As an operator, I want to check health status so I can verify the system is working | P0 |
+| US7 | As a developer, I want JSON output so I can script against Nexus | P1 |
+| US8 | As an operator, I want to add/remove backends at runtime so I can manage the cluster | P1 |
+| US9 | As a new user, I want to generate a config template so I can get started quickly | P1 |
+| US10 | As an operator, I want environment variable overrides for containerized deployments | P0 |
+
+---
+
+## CLI Commands
+
+### nexus serve [OPTIONS]
+
+Start the Nexus server (main daemon mode).
+
+```
+OPTIONS:
+  -c, --config <FILE>     Config file path [default: nexus.toml]
+  -p, --port <PORT>       Listen port [default: 8000]
+  -H, --host <HOST>       Listen address [default: 0.0.0.0]
+  -l, --log-level <LEVEL> Log level: trace, debug, info, warn, error [default: info]
+      --no-discovery      Disable mDNS discovery
+      --no-health-check   Disable background health checking
+  -h, --help              Print help
+```
+
+**Behavior:**
+- Loads config file if present (not required)
+- Starts HTTP server on `host:port`
+- Starts health checker (unless `--no-health-check`)
+- Starts mDNS discovery (unless `--no-discovery`)
+- Runs until SIGINT/SIGTERM (graceful shutdown)
+
+**Exit Codes:**
+- 0: Graceful shutdown
+- 1: Startup error (port in use, config parse error, etc.)
+
+---
+
+### nexus backends [OPTIONS]
+
+List all registered backends.
+
+```
+OPTIONS:
+      --json              Output as JSON
+      --status <STATUS>   Filter by status: healthy, unhealthy, unknown
+  -h, --help              Print help
+```
+
+**Table Output:**
+```
+┌──────────────┬────────────────────────────┬─────────┬──────────┬────────┬──────────┐
+│ Name         │ URL                        │ Type    │ Status   │ Models │ Latency  │
+├──────────────┼────────────────────────────┼─────────┼──────────┼────────┼──────────┤
+│ local-ollama │ http://localhost:11434     │ Ollama  │ Healthy  │ 3      │ 45ms     │
+│ gpu-server   │ http://192.168.1.100:8000  │ vLLM    │ Healthy  │ 1      │ 12ms     │
+│ pi-cluster   │ http://192.168.1.50:52415  │ Exo     │ Unhealthy│ 0      │ -        │
+└──────────────┴────────────────────────────┴─────────┴──────────┴────────┴──────────┘
+```
+
+**JSON Output:**
+```json
+{
+  "backends": [
+    {
+      "id": "backend-abc123",
+      "name": "local-ollama",
+      "url": "http://localhost:11434",
+      "type": "ollama",
+      "status": "healthy",
+      "models": 3,
+      "avg_latency_ms": 45,
+      "pending_requests": 2,
+      "discovery_source": "static"
+    }
+  ]
+}
+```
+
+---
+
+### nexus backends add <URL> [OPTIONS]
+
+Add a backend manually at runtime.
+
+```
+ARGS:
+  <URL>                   Backend base URL (e.g., http://192.168.1.100:11434)
+
+OPTIONS:
+      --name <NAME>       Display name [default: derived from URL]
+      --type <TYPE>       Backend type: ollama, vllm, llamacpp, exo, openai, generic
+                          [default: auto-detect]
+      --priority <N>      Routing priority (lower = prefer) [default: 50]
+  -h, --help              Print help
+```
+
+**Behavior:**
+- Adds backend to registry with `DiscoverySource::Manual`
+- Triggers immediate health check
+- Prints result (success with backend ID, or error)
+
+**Auto-detection:**
+If `--type` not specified, attempt to detect by:
+1. Try GET `/api/tags` → Ollama
+2. Try GET `/v1/models` → OpenAI-compatible
+3. Try GET `/health` → LlamaCpp
+4. Fall back to `generic`
+
+---
+
+### nexus backends remove <ID>
+
+Remove a backend by ID or name.
+
+```
+ARGS:
+  <ID>                    Backend ID or name
+
+OPTIONS:
+  -h, --help              Print help
+```
+
+**Behavior:**
+- Removes backend from registry
+- Active requests continue (graceful)
+- Returns error if ID not found
+
+---
+
+### nexus models [OPTIONS]
+
+List all available models across backends.
+
+```
+OPTIONS:
+      --json              Output as JSON
+      --backend <ID>      Filter by backend ID or name
+  -h, --help              Print help
+```
+
+**Table Output:**
+```
+┌──────────────┬─────────────────┬─────────┬────────┬───────┬──────────┐
+│ Model        │ Backend         │ Context │ Vision │ Tools │ JSON     │
+├──────────────┼─────────────────┼─────────┼────────┼───────┼──────────┤
+│ llama3:70b   │ local-ollama    │ 8192    │ No     │ Yes   │ No       │
+│ llama3:70b   │ gpu-server      │ 8192    │ No     │ Yes   │ No       │
+│ mistral:7b   │ local-ollama    │ 32768   │ No     │ No    │ No       │
+│ llava:13b    │ local-ollama    │ 4096    │ Yes    │ No    │ No       │
+└──────────────┴─────────────────┴─────────┴────────┴───────┴──────────┘
+```
+
+**JSON Output:**
+```json
+{
+  "models": [
+    {
+      "id": "llama3:70b",
+      "backends": ["local-ollama", "gpu-server"],
+      "context_length": 8192,
+      "supports_vision": false,
+      "supports_tools": true,
+      "supports_json_mode": false
+    }
+  ]
+}
+```
+
+---
+
+### nexus health [OPTIONS]
+
+Show system health status.
+
+```
+OPTIONS:
+      --json              Output as JSON
+  -h, --help              Print help
+```
+
+**Pretty Output:**
+```
+Status: Healthy
+Version: 0.1.0
+Uptime: 2h 34m 12s
+
+Backends: 2/3 healthy
+Models: 4 available
+
+Backend Details:
+  ✓ local-ollama     3 models  45ms avg  2 pending
+  ✓ gpu-server       1 model   12ms avg  0 pending
+  ✗ pi-cluster       connection refused (3m ago)
+```
+
+**JSON Output:**
+```json
+{
+  "status": "healthy",
+  "version": "0.1.0",
+  "uptime_seconds": 9252,
+  "backends": {
+    "total": 3,
+    "healthy": 2,
+    "unhealthy": 1
+  },
+  "models": {
+    "total": 4
+  },
+  "backend_details": [
+    {
+      "name": "local-ollama",
+      "status": "healthy",
+      "models": 3,
+      "avg_latency_ms": 45,
+      "pending_requests": 2
+    }
+  ]
+}
+```
+
+---
+
+### nexus config init [OPTIONS]
+
+Generate an example configuration file.
+
+```
+OPTIONS:
+  -o, --output <FILE>     Output file path [default: nexus.toml]
+      --minimal           Generate minimal config (only essential settings)
+      --force             Overwrite existing file
+  -h, --help              Print help
+```
+
+**Behavior:**
+- Writes a fully-commented example config
+- Fails if file exists (unless `--force`)
+- `--minimal` omits optional sections
+
+---
+
+### nexus completions <SHELL>
+
+Generate shell completion scripts.
+
+```
+ARGS:
+  <SHELL>                 Target shell: bash, zsh, fish, powershell, elvish
+
+OPTIONS:
+  -h, --help              Print help
+```
+
+**Usage Examples:**
+```bash
+# Bash (add to ~/.bashrc or ~/.bash_completion.d/)
+nexus completions bash > ~/.bash_completion.d/nexus
+source ~/.bash_completion.d/nexus
+
+# Zsh (add to fpath)
+nexus completions zsh > ~/.zfunc/_nexus
+
+# Fish
+nexus completions fish > ~/.config/fish/completions/nexus.fish
+
+# PowerShell
+nexus completions powershell >> $PROFILE
+```
+
+---
+
+### nexus --version
+
+Print version information.
+
+```
+nexus 0.1.0
+```
+
+### nexus --help
+
+Print help for all commands.
+
+---
+
+## Configuration File
+
+### Full Example: nexus.toml
+
+```toml
+# Nexus Configuration
+# See: https://github.com/user/nexus
+
+[server]
+host = "0.0.0.0"              # Listen address
+port = 8000                    # Listen port
+request_timeout_seconds = 300  # Max request duration
+max_concurrent_requests = 1000 # Limit in-flight requests
+
+[discovery]
+enabled = true
+service_types = ["_ollama._tcp.local", "_llm._tcp.local"]
+grace_period_seconds = 60      # Wait before removing disappeared backends
+
+[health_check]
+enabled = true
+interval_seconds = 30
+timeout_seconds = 5
+failure_threshold = 3          # Failures before marking unhealthy
+recovery_threshold = 2         # Successes before marking healthy
+
+[routing]
+strategy = "smart"             # smart | round_robin | priority_only | random
+max_retries = 2                # Retry on backend failure
+
+[routing.weights]
+priority = 50                  # Weight for backend priority
+load = 30                      # Weight for current load
+latency = 20                   # Weight for average latency
+
+[routing.aliases]
+"gpt-4" = "llama3:70b"
+"gpt-4-turbo" = "llama3:70b"
+"gpt-3.5-turbo" = "mistral:7b"
+
+[routing.fallbacks]
+"llama3:70b" = ["qwen2:72b", "mixtral:8x7b"]
+
+[[backends]]
+name = "local-ollama"
+url = "http://localhost:11434"
+type = "ollama"
+priority = 1
+
+[[backends]]
+name = "gpu-server"
+url = "http://192.168.1.100:8000"
+type = "vllm"
+priority = 2
+
+[logging]
+level = "info"                 # trace, debug, info, warn, error
+format = "pretty"              # pretty | json
+```
+
+### Configuration Sections
+
+| Section | Required | Description |
+|---------|----------|-------------|
+| `[server]` | No | HTTP server settings |
+| `[discovery]` | No | mDNS discovery settings |
+| `[health_check]` | No | Health checker settings |
+| `[routing]` | No | Routing strategy and weights |
+| `[routing.aliases]` | No | Model name mappings |
+| `[routing.fallbacks]` | No | Fallback model chains |
+| `[[backends]]` | No | Static backend definitions |
+| `[logging]` | No | Logging configuration |
+
+### Default Values
+
+| Setting | Default | Notes |
+|---------|---------|-------|
+| `server.host` | `"0.0.0.0"` | Listen on all interfaces |
+| `server.port` | `8000` | Standard port |
+| `server.request_timeout_seconds` | `300` | 5 minutes for long completions |
+| `server.max_concurrent_requests` | `1000` | Per-server limit |
+| `discovery.enabled` | `true` | Zero-config by default |
+| `discovery.grace_period_seconds` | `60` | Avoid thrashing |
+| `health_check.enabled` | `true` | Always check health |
+| `health_check.interval_seconds` | `30` | Balance freshness vs load |
+| `health_check.timeout_seconds` | `5` | Fail fast |
+| `health_check.failure_threshold` | `3` | Avoid flapping |
+| `health_check.recovery_threshold` | `2` | Confirm recovery |
+| `routing.strategy` | `"smart"` | Best of all factors |
+| `routing.max_retries` | `2` | Try 3 backends total |
+| `routing.weights.priority` | `50` | |
+| `routing.weights.load` | `30` | |
+| `routing.weights.latency` | `20` | |
+| `logging.level` | `"info"` | Reasonable default |
+| `logging.format` | `"pretty"` | Human-readable |
+
+---
+
+## Environment Variables
+
+All settings can be overridden via environment variables.
+
+| Variable | Config Equivalent | Example |
+|----------|-------------------|---------|
+| `NEXUS_CONFIG` | (config file path) | `/etc/nexus/nexus.toml` |
+| `NEXUS_HOST` | `server.host` | `127.0.0.1` |
+| `NEXUS_PORT` | `server.port` | `9000` |
+| `NEXUS_LOG_LEVEL` | `logging.level` | `debug` |
+| `NEXUS_LOG_FORMAT` | `logging.format` | `json` |
+| `NEXUS_DISCOVERY` | `discovery.enabled` | `false` |
+| `NEXUS_HEALTH_CHECK` | `health_check.enabled` | `false` |
+| `NEXUS_ROUTING_STRATEGY` | `routing.strategy` | `round_robin` |
+
+---
+
+## Configuration Precedence
+
+Settings are resolved in this order (later wins):
+
+1. **Compiled defaults** - Hardcoded in source
+2. **Config file** - `nexus.toml` or `--config`
+3. **Environment variables** - `NEXUS_*`
+4. **CLI arguments** - `--port`, `--host`, etc.
+
+### Example
+
+```bash
+# Config file has: port = 8000
+# Environment has: NEXUS_PORT=9000
+# CLI has: --port 9001
+
+# Result: port = 9001 (CLI wins)
+```
+
+---
+
+## Technical Stack
+
+| Crate | Purpose | Notes |
+|-------|---------|-------|
+| `clap` | CLI argument parsing | Use derive feature for declarative API |
+| `config` | Layered configuration | Merge TOML + env + defaults |
+| `toml` | TOML serialization | For `config init` output |
+| `comfy-table` | Pretty table output | Terminal-aware formatting |
+| `serde` | Serialization | Already a dependency |
+
+---
+
+## Data Structures
+
+### Unified Config Struct
+
+```rust
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(default)]
+pub struct NexusConfig {
+    pub server: ServerConfig,
+    pub discovery: DiscoveryConfig,
+    pub health_check: HealthCheckConfig,
+    pub routing: RoutingConfig,
+    pub backends: Vec<BackendConfig>,
+    pub logging: LoggingConfig,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(default)]
+pub struct ServerConfig {
+    pub host: String,
+    pub port: u16,
+    pub request_timeout_seconds: u64,
+    pub max_concurrent_requests: u32,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(default)]
+pub struct RoutingConfig {
+    pub strategy: RoutingStrategy,
+    pub max_retries: u32,
+    pub weights: RoutingWeights,
+    pub aliases: HashMap<String, String>,
+    pub fallbacks: HashMap<String, Vec<String>>,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(default)]
+pub struct LoggingConfig {
+    pub level: String,
+    pub format: LogFormat,
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct BackendConfig {
+    pub name: String,
+    pub url: String,
+    #[serde(rename = "type")]
+    pub backend_type: BackendType,
+    #[serde(default = "default_priority")]
+    pub priority: i32,
+}
+```
+
+### CLI Argument Structs (clap)
+
+```rust
+#[derive(Parser)]
+#[command(name = "nexus", version, about = "Distributed LLM Orchestrator")]
+pub struct Cli {
+    #[command(subcommand)]
+    pub command: Commands,
+}
+
+#[derive(Subcommand)]
+pub enum Commands {
+    /// Start the Nexus server
+    Serve(ServeArgs),
+    /// Manage backends
+    Backends(BackendsArgs),
+    /// List available models
+    Models(ModelsArgs),
+    /// Show system health
+    Health(HealthArgs),
+    /// Configuration utilities
+    Config(ConfigArgs),
+}
+
+#[derive(Args)]
+pub struct ServeArgs {
+    #[arg(short, long, default_value = "nexus.toml")]
+    pub config: PathBuf,
+    
+    #[arg(short, long)]
+    pub port: Option<u16>,
+    
+    #[arg(short = 'H', long)]
+    pub host: Option<String>,
+    
+    #[arg(short, long)]
+    pub log_level: Option<String>,
+    
+    #[arg(long)]
+    pub no_discovery: bool,
+    
+    #[arg(long)]
+    pub no_health_check: bool,
+}
+```
+
+---
+
+## Functional Requirements
+
+| ID | Requirement | Priority |
+|----|-------------|----------|
+| FR-001 | Parse TOML config file | P0 |
+| FR-002 | Apply defaults for missing config values | P0 |
+| FR-003 | Override config with environment variables | P0 |
+| FR-004 | Override config/env with CLI arguments | P0 |
+| FR-005 | `serve` command starts HTTP server | P0 |
+| FR-006 | `backends` command lists backends | P0 |
+| FR-007 | `models` command lists models | P0 |
+| FR-008 | `health` command shows status | P0 |
+| FR-009 | JSON output flag for scripting | P1 |
+| FR-010 | `backends add` adds runtime backend with auto-type detection | P1 |
+| FR-011 | `backends remove` removes backend | P1 |
+| FR-012 | `config init` generates template | P1 |
+| FR-013 | Graceful shutdown on SIGINT/SIGTERM | P0 |
+| FR-014 | Exit code 1 on startup errors | P0 |
+| FR-015 | Pretty table output with colors | P1 |
+| FR-016 | `completions` command generates shell scripts | P1 |
+| FR-017 | Warn on unknown config keys (don't fail) | P1 |
+
+---
+
+## Non-Functional Requirements
+
+| ID | Requirement | Metric |
+|----|-------------|--------|
+| NFR-001 | Config parsing < 10ms | Measured |
+| NFR-002 | CLI startup to output < 100ms | For non-serve commands |
+| NFR-003 | Works without config file | Zero-config mode |
+| NFR-004 | Helpful error messages | Include fix suggestions |
+| NFR-005 | Shell completion support | Fish, Bash, Zsh |
+
+---
+
+## Error Handling
+
+### Config Errors
+
+```rust
+#[derive(Debug, thiserror::Error)]
+pub enum ConfigError {
+    #[error("config file not found: {0}")]
+    NotFound(PathBuf),
+    
+    #[error("config parse error at line {line}: {message}")]
+    ParseError { line: usize, message: String },
+    
+    #[error("invalid value for {field}: {message}")]
+    ValidationError { field: String, message: String },
+    
+    #[error("environment variable {0} has invalid value: {1}")]
+    EnvError(String, String),
+}
+```
+
+### Example Error Output
+
+```
+Error: config parse error at line 15: invalid backend type 'local'
+
+  14 | [[backends]]
+  15 | type = "local"
+     |        ^^^^^^^ expected one of: ollama, vllm, llamacpp, exo, openai, generic
+
+Tip: Use 'nexus config init' to generate a valid config template.
+```
+
+---
+
+## Edge Cases
+
+| Scenario | Behavior |
+|----------|----------|
+| Config file not found | Use defaults (warn if `--config` explicit) |
+| Config file parse error | Exit with error, show line number |
+| Invalid backend URL in config | Warn and skip that backend |
+| Port already in use | Exit with error, suggest alternative |
+| No backends available | Start anyway (health shows "degraded") |
+| SIGINT during startup | Clean exit |
+| Invalid UTF-8 in config | Exit with error |
+
+---
+
+## Testing Strategy
+
+### Unit Tests
+
+| Test | Description |
+|------|-------------|
+| `test_config_defaults` | Verify all defaults are sensible |
+| `test_config_parse_full` | Parse complete config file |
+| `test_config_parse_minimal` | Parse minimal config file |
+| `test_config_merge_env` | Environment overrides config |
+| `test_config_merge_cli` | CLI overrides environment |
+| `test_config_validation_*` | Validate each field |
+| `test_backend_type_serde` | BackendType serialization |
+
+### Integration Tests
+
+| Test | Description |
+|------|-------------|
+| `test_serve_starts_server` | Server binds to port |
+| `test_serve_with_config` | Loads specified config |
+| `test_backends_list_empty` | No backends shows empty table |
+| `test_backends_list_json` | JSON output is valid |
+| `test_backends_add_remove` | Add and remove lifecycle |
+| `test_config_init_creates_file` | Creates valid template |
+| `test_graceful_shutdown` | SIGINT triggers clean exit |
+
+### CLI Output Tests
+
+| Test | Description |
+|------|-------------|
+| `test_version_output` | `--version` shows version |
+| `test_help_output` | `--help` shows all commands |
+| `test_table_formatting` | Tables render correctly |
+| `test_json_valid` | All JSON output is valid |
+
+---
+
+## Acceptance Criteria
+
+- [ ] AC-01: `nexus serve` starts server with all options working
+- [ ] AC-02: `nexus serve` works without any config file (zero-config mode)
+- [ ] AC-03: `nexus backends` lists backends with pretty table
+- [ ] AC-04: `nexus backends --json` outputs valid JSON
+- [ ] AC-05: `nexus backends --status healthy` filters correctly
+- [ ] AC-06: `nexus backends add <URL>` adds backend and triggers health check
+- [ ] AC-07: `nexus backends remove <ID>` removes backend
+- [ ] AC-08: `nexus models` lists all models with capabilities
+- [ ] AC-09: `nexus models --json` outputs valid JSON
+- [ ] AC-10: `nexus health` shows system status
+- [ ] AC-11: `nexus health --json` outputs valid JSON
+- [ ] AC-12: `nexus config init` generates valid template
+- [ ] AC-13: Config file parses correctly
+- [ ] AC-14: Environment variables override config
+- [ ] AC-15: CLI arguments override environment and config
+- [ ] AC-16: Graceful shutdown on SIGINT/SIGTERM
+- [ ] AC-17: Exit code 1 on startup errors with helpful message
+- [ ] AC-18: `nexus --version` and `nexus --help` work
+
+---
+
+## Dependencies
+
+| Dependency | Reason |
+|------------|--------|
+| F02: Backend Registry | Required for `backends` and `models` commands |
+| F03: Health Checker | Required for `health` command and `serve` |
+
+---
+
+## Design Decisions
+
+### Decision 1: Shell Completion Generation
+
+**Question**: Where should shell completion generation live?
+
+**Decision**: Separate top-level command `nexus completions <shell>`
+
+**Rationale**:
+- Follows established patterns from `rustup`, `gh`, `kubectl`
+- More discoverable than nested subcommand
+- Cleaner separation of concerns
+
+**Usage**:
+```bash
+# Bash
+nexus completions bash > ~/.bash_completion.d/nexus
+
+# Zsh
+nexus completions zsh > ~/.zfunc/_nexus
+
+# Fish
+nexus completions fish > ~/.config/fish/completions/nexus.fish
+```
+
+---
+
+### Decision 2: Config Validation Strictness
+
+**Question**: How should unknown config keys be handled?
+
+**Decision**: Warn on unknown keys, continue loading
+
+**Rationale**:
+- Catches typos without being overly strict
+- Allows forward compatibility (old Nexus version with newer config)
+- User sees the issue in logs but service still starts
+
+**Behavior**:
+```
+$ nexus serve -c nexus.toml
+WARN nexus::config: Unknown config key 'server.unknown_setting' - ignoring
+INFO nexus: Nexus server starting on 0.0.0.0:8000
+```
+
+---
+
+### Decision 3: Backend Auto-Type Detection
+
+**Question**: Should `nexus backends add` auto-detect backend type?
+
+**Decision**: Auto-detect with fallback to `generic`
+
+**Rationale**:
+- Zero-friction for common use cases (Ollama, vLLM)
+- Fallback to `generic` prevents blocking on network issues
+- User can always override with explicit `--type` flag
+
+**Detection Order**:
+1. Try `GET /api/tags` → If 200 with valid JSON: **Ollama**
+2. Try `GET /health` → If 200 with `{"status": "ok"}`: **LlamaCpp**
+3. Try `GET /v1/models` → If 200 with valid JSON: **OpenAI-compatible** (vLLM/Exo/Generic)
+4. If all fail or timeout (2s): Default to **Generic**
+
+**Override**: `nexus backends add http://... --type vllm`
+
+---
+
+## References
+
+- [F02: Backend Registry Spec](../001-backend-registry/spec.md)
+- [F03: Health Checker Spec](../002-health-checker/spec.md)
+- [clap documentation](https://docs.rs/clap/latest/clap/)
+- [config crate documentation](https://docs.rs/config/latest/config/)
diff --git a/specs/003-cli-configuration/tasks.md b/specs/003-cli-configuration/tasks.md
new file mode 100644
index 0000000..0e01742
--- /dev/null
+++ b/specs/003-cli-configuration/tasks.md
@@ -0,0 +1,2137 @@
+# Implementation Tasks: CLI and Configuration
+
+**Spec**: [spec.md](./spec.md)  
+**Plan**: [plan.md](./plan.md)  
+**Status**: Ready for Implementation
+
+## Task Overview
+
+| Task | Description | Est. Time | Dependencies |
+|------|-------------|-----------|--------------|
+| T01 | Add dependencies & module scaffolding | 1h | None |
+| T02 | NexusConfig struct & defaults | 2h | T01 |
+| T03 | Config file loading & parsing | 2h | T02 |
+| T04 | Environment variable overrides | 1.5h | T03 |
+| T05 | ConfigError enum & validation (with unknown key warnings) | 1.5h | T02 |
+| T06 | CLI command definitions (clap) | 2h | T01 |
+| T07 | Output formatting (tables/JSON) | 2h | T06 |
+| T08 | Serve command implementation | 3h | T04, T07 |
+| T09 | Backends list command | 1.5h | T07 |
+| T10 | Backends add/remove commands (with auto-detection) | 2.5h | T09 |
+| T11 | Models command | 1.5h | T07 |
+| T12 | Health command | 1.5h | T07 |
+| T13 | Config init command | 1.5h | T02 |
+| T14 | Completions command | 1h | T06 |
+| T15 | Graceful shutdown handling | 1.5h | T08 |
+| T16 | Integration tests | 2.5h | All |
+| T17 | Documentation & cleanup | 1.5h | All |
+
+**Total Estimated Time**: ~29 hours
+**Total Tests**: 70 (unit + integration)
+
+---
+
+## T01: Add Dependencies & Module Scaffolding
+
+**Goal**: Add required dependencies and create module structure.
+
+**Files to create/modify**:
+- `Cargo.toml` (add comfy-table, config, colored)
+- `src/lib.rs` (add config and cli modules)
+- `src/config/mod.rs` (create)
+- `src/config/server.rs` (create, placeholder)
+- `src/config/routing.rs` (create, placeholder)
+- `src/config/logging.rs` (create, placeholder)
+- `src/config/error.rs` (create, placeholder)
+- `src/cli/mod.rs` (create)
+- `src/cli/serve.rs` (create, placeholder)
+- `src/cli/backends.rs` (create, placeholder)
+- `src/cli/models.rs` (create, placeholder)
+- `src/cli/health.rs` (create, placeholder)
+- `src/cli/output.rs` (create, placeholder)
+
+**Implementation Steps**:
+1. Add to `Cargo.toml`:
+   ```toml
+   # Pretty table output
+   comfy-table = "7"
+   
+   # Layered configuration
+   config = { version = "0.14", default-features = false, features = ["toml"] }
+   
+   # Terminal colors
+   colored = "2"
+   
+   # Shell completion generation
+   clap_complete = "4"
+   ```
+2. Add dev-dependencies:
+   ```toml
+   tempfile = "3"
+   wiremock = "0.6"  # Already present, verify version
+   ```
+3. Update `src/lib.rs`:
+   ```rust
+   pub mod registry;
+   pub mod health;
+   pub mod config;
+   pub mod cli;
+   ```
+4. Create module structure with placeholder files
+5. Run `cargo check` to verify structure compiles
+
+**Acceptance Criteria**:
+- [X] `cargo check` passes with no errors
+- [X] All dependencies resolve correctly
+- [X] Module structure matches plan's file layout
+
+**Test Command**: `cargo check`
+
+---
+
+## T02: NexusConfig Struct & Defaults
+
+**Goal**: Implement the unified configuration struct with all sub-configs.
+
+**Files to modify**:
+- `src/config/mod.rs`
+- `src/config/server.rs`
+- `src/config/routing.rs`
+- `src/config/logging.rs`
+
+**Tests to Write First** (6 tests):
+```rust
+#[test]
+fn test_server_config_defaults() {
+    let config = ServerConfig::default();
+    assert_eq!(config.host, "0.0.0.0");
+    assert_eq!(config.port, 8000);
+    assert_eq!(config.request_timeout_seconds, 300);
+    assert_eq!(config.max_concurrent_requests, 1000);
+}
+
+#[test]
+fn test_routing_config_defaults() {
+    let config = RoutingConfig::default();
+    assert_eq!(config.strategy, RoutingStrategy::Smart);
+    assert_eq!(config.max_retries, 2);
+}
+
+#[test]
+fn test_routing_strategy_serde() {
+    let strategy = RoutingStrategy::RoundRobin;
+    let json = serde_json::to_string(&strategy).unwrap();
+    assert_eq!(json, "\"round_robin\"");
+}
+
+#[test]
+fn test_logging_config_defaults() {
+    let config = LoggingConfig::default();
+    assert_eq!(config.level, "info");
+    assert_eq!(config.format, LogFormat::Pretty);
+}
+
+#[test]
+fn test_log_format_serde() {
+    let format = LogFormat::Json;
+    let json = serde_json::to_string(&format).unwrap();
+    assert_eq!(json, "\"json\"");
+}
+
+#[test]
+fn test_nexus_config_defaults() {
+    let config = NexusConfig::default();
+    assert_eq!(config.server.port, 8000);
+    assert!(config.discovery.enabled);
+    assert!(config.health_check.enabled);
+    assert!(config.backends.is_empty());
+}
+```
+
+**Implementation**:
+1. Implement `ServerConfig` with Default:
+   ```rust
+   #[derive(Debug, Clone, Serialize, Deserialize)]
+   #[serde(default)]
+   pub struct ServerConfig {
+       pub host: String,
+       pub port: u16,
+       pub request_timeout_seconds: u64,
+       pub max_concurrent_requests: u32,
+   }
+   ```
+
+2. Implement `RoutingConfig` with enums:
+   ```rust
+   #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
+   #[serde(rename_all = "snake_case")]
+   pub enum RoutingStrategy {
+       #[default]
+       Smart,
+       RoundRobin,
+       PriorityOnly,
+       Random,
+   }
+   ```
+
+3. Implement `LoggingConfig`:
+   ```rust
+   #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
+   #[serde(rename_all = "snake_case")]
+   pub enum LogFormat {
+       #[default]
+       Pretty,
+       Json,
+   }
+   ```
+
+4. Implement top-level `NexusConfig`:
+   ```rust
+   #[derive(Debug, Clone, Serialize, Deserialize, Default)]
+   #[serde(default)]
+   pub struct NexusConfig {
+       pub server: ServerConfig,
+       pub discovery: DiscoveryConfig,
+       pub health_check: HealthCheckConfig,
+       pub routing: RoutingConfig,
+       pub backends: Vec<BackendConfig>,
+       pub logging: LoggingConfig,
+   }
+   ```
+
+**Acceptance Criteria**:
+- [X] All 6 tests pass
+- [X] All config structs have Default implementation
+- [X] Serialization uses snake_case for enums
+- [X] `#[serde(default)]` applied to all config structs
+
+**Test Command**: `cargo test config::`
+
+---
+
+## T03: Config File Loading & Parsing
+
+**Goal**: Implement TOML config file loading.
+
+**Files to modify**:
+- `src/config/mod.rs`
+
+**Tests to Write First** (5 tests):
+```rust
+#[test]
+fn test_config_parse_minimal_toml() {
+    let toml = r#"
+    [server]
+    port = 9000
+    "#;
+    
+    let config: NexusConfig = toml::from_str(toml).unwrap();
+    assert_eq!(config.server.port, 9000);
+    assert_eq!(config.server.host, "0.0.0.0"); // Default
+}
+
+#[test]
+fn test_config_parse_full_toml() {
+    let toml = include_str!("../../nexus.example.toml");
+    let config: NexusConfig = toml::from_str(toml).unwrap();
+    assert!(config.server.port > 0);
+}
+
+#[test]
+fn test_config_parse_backends_array() {
+    let toml = r#"
+    [[backends]]
+    name = "local"
+    url = "http://localhost:11434"
+    type = "ollama"
+    
+    [[backends]]
+    name = "remote"
+    url = "http://192.168.1.100:8000"
+    type = "vllm"
+    "#;
+    
+    let config: NexusConfig = toml::from_str(toml).unwrap();
+    assert_eq!(config.backends.len(), 2);
+}
+
+#[test]
+fn test_config_load_from_file() {
+    let temp = tempfile::NamedTempFile::new().unwrap();
+    std::fs::write(temp.path(), "[server]\nport = 8080").unwrap();
+    
+    let config = NexusConfig::load(Some(temp.path())).unwrap();
+    assert_eq!(config.server.port, 8080);
+}
+
+#[test]
+fn test_config_missing_file_error() {
+    let result = NexusConfig::load(Some(Path::new("/nonexistent/config.toml")));
+    assert!(matches!(result, Err(ConfigError::NotFound(_))));
+}
+```
+
+**Implementation**:
+1. Add `BackendConfig` struct:
+   ```rust
+   #[derive(Debug, Clone, Serialize, Deserialize)]
+   pub struct BackendConfig {
+       pub name: String,
+       pub url: String,
+       #[serde(rename = "type")]
+       pub backend_type: BackendType,
+       #[serde(default = "default_priority")]
+       pub priority: i32,
+   }
+   
+   fn default_priority() -> i32 { 50 }
+   ```
+
+2. Implement `NexusConfig::load()`:
+   ```rust
+   impl NexusConfig {
+       pub fn load(path: Option<&Path>) -> Result<Self, ConfigError> {
+           match path {
+               Some(p) => {
+                   if !p.exists() {
+                       return Err(ConfigError::NotFound(p.to_path_buf()));
+                   }
+                   let content = std::fs::read_to_string(p)
+                       .map_err(|e| ConfigError::IoError(e.to_string()))?;
+                   toml::from_str(&content)
+                       .map_err(|e| ConfigError::ParseError(e.to_string()))
+               }
+               None => Ok(Self::default()),
+           }
+       }
+   }
+   ```
+
+**Acceptance Criteria**:
+- [X] All 5 tests pass
+- [X] Parses `nexus.example.toml` without error
+- [X] Returns proper error for missing file
+- [X] Default values applied for missing fields
+
+**Test Command**: `cargo test config::tests::test_config_parse`
+
+---
+
+## T04: Environment Variable Overrides
+
+**Goal**: Implement NEXUS_* environment variable override support.
+
+**Files to modify**:
+- `src/config/mod.rs`
+
+**Tests to Write First** (4 tests):
+```rust
+#[test]
+fn test_config_env_override_port() {
+    std::env::set_var("NEXUS_PORT", "9999");
+    let config = NexusConfig::default().with_env_overrides();
+    std::env::remove_var("NEXUS_PORT");
+    
+    assert_eq!(config.server.port, 9999);
+}
+
+#[test]
+fn test_config_env_override_host() {
+    std::env::set_var("NEXUS_HOST", "127.0.0.1");
+    let config = NexusConfig::default().with_env_overrides();
+    std::env::remove_var("NEXUS_HOST");
+    
+    assert_eq!(config.server.host, "127.0.0.1");
+}
+
+#[test]
+fn test_config_env_override_log_level() {
+    std::env::set_var("NEXUS_LOG_LEVEL", "debug");
+    let config = NexusConfig::default().with_env_overrides();
+    std::env::remove_var("NEXUS_LOG_LEVEL");
+    
+    assert_eq!(config.logging.level, "debug");
+}
+
+#[test]
+fn test_config_env_invalid_value_ignored() {
+    std::env::set_var("NEXUS_PORT", "not-a-number");
+    let config = NexusConfig::default().with_env_overrides();
+    std::env::remove_var("NEXUS_PORT");
+    
+    // Should keep default, not crash
+    assert_eq!(config.server.port, 8000);
+}
+```
+
+**Implementation**:
+```rust
+impl NexusConfig {
+    pub fn with_env_overrides(mut self) -> Self {
+        if let Ok(port) = std::env::var("NEXUS_PORT") {
+            if let Ok(p) = port.parse() {
+                self.server.port = p;
+            }
+        }
+        if let Ok(host) = std::env::var("NEXUS_HOST") {
+            self.server.host = host;
+        }
+        if let Ok(level) = std::env::var("NEXUS_LOG_LEVEL") {
+            self.logging.level = level;
+        }
+        if let Ok(format) = std::env::var("NEXUS_LOG_FORMAT") {
+            if let Ok(f) = format.parse() {
+                self.logging.format = f;
+            }
+        }
+        if let Ok(discovery) = std::env::var("NEXUS_DISCOVERY") {
+            self.discovery.enabled = discovery.to_lowercase() == "true";
+        }
+        if let Ok(health) = std::env::var("NEXUS_HEALTH_CHECK") {
+            self.health_check.enabled = health.to_lowercase() == "true";
+        }
+        self
+    }
+}
+```
+
+**Acceptance Criteria**:
+- [X] All 4 tests pass
+- [X] Invalid env values don't crash, use defaults
+- [X] All documented NEXUS_* variables work
+
+**Test Command**: `cargo test config::tests::test_config_env`
+
+---
+
+## T05: ConfigError Enum & Validation (with Unknown Key Warnings)
+
+**Goal**: Implement error types with helpful messages and unknown key detection.
+
+**Files to modify**:
+- `src/config/error.rs`
+- `src/config/mod.rs`
+
+**Tests to Write First** (5 tests):
+```rust
+#[test]
+fn test_config_error_not_found_display() {
+    let err = ConfigError::NotFound(PathBuf::from("/etc/nexus.toml"));
+    assert!(err.to_string().contains("/etc/nexus.toml"));
+}
+
+#[test]
+fn test_config_error_parse_display() {
+    let err = ConfigError::ParseError("expected string at line 5".to_string());
+    assert!(err.to_string().contains("line 5"));
+}
+
+#[test]
+fn test_config_validation_invalid_port() {
+    let mut config = NexusConfig::default();
+    config.server.port = 0;
+    
+    let result = config.validate();
+    assert!(matches!(result, Err(ConfigError::ValidationError { field, .. }) if field == "server.port"));
+}
+
+#[test]
+fn test_config_validation_empty_backend_url() {
+    let mut config = NexusConfig::default();
+    config.backends.push(BackendConfig {
+        name: "test".to_string(),
+        url: "".to_string(),
+        backend_type: BackendType::Ollama,
+        priority: 1,
+    });
+    
+    let result = config.validate();
+    assert!(matches!(result, Err(ConfigError::ValidationError { field, .. }) if field.contains("url")));
+}
+
+#[test]
+fn test_config_warns_on_unknown_keys() {
+    // This test verifies unknown keys don't cause errors
+    // The warning is logged via tracing (can verify with tracing_test crate)
+    let toml = r#"
+    [server]
+    port = 8000
+    
+    [unknown_section]
+    foo = "bar"
+    "#;
+    
+    // Should parse successfully (unknown keys are warned, not rejected)
+    let result = NexusConfig::load_from_str(toml);
+    assert!(result.is_ok());
+}
+```
+
+**Implementation**:
+```rust
+#[derive(Debug, thiserror::Error)]
+pub enum ConfigError {
+    #[error("config file not found: {0}")]
+    NotFound(PathBuf),
+    
+    #[error("failed to read config: {0}")]
+    IoError(String),
+    
+    #[error("config parse error: {0}")]
+    ParseError(String),
+    
+    #[error("invalid value for '{field}': {message}")]
+    ValidationError { field: String, message: String },
+}
+
+impl NexusConfig {
+    /// Load config from string, warning on unknown keys
+    pub fn load_from_str(content: &str) -> Result<Self, ConfigError> {
+        // First, detect unknown keys
+        Self::warn_unknown_keys(content);
+        
+        // Then parse normally
+        toml::from_str(content)
+            .map_err(|e| ConfigError::ParseError(e.to_string()))
+    }
+    
+    /// Warn about unknown top-level config keys
+    fn warn_unknown_keys(content: &str) {
+        let known_keys = ["server", "discovery", "health_check", 
+                         "routing", "backends", "logging"];
+        
+        if let Ok(raw_value) = content.parse::<toml::Value>() {
+            if let toml::Value::Table(table) = raw_value {
+                for key in table.keys() {
+                    if !known_keys.contains(&key.as_str()) {
+                        tracing::warn!(
+                            key = %key, 
+                            "Unknown config key '{}' - ignoring", 
+                            key
+                        );
+                    }
+                }
+            }
+        }
+    }
+    
+    pub fn validate(&self) -> Result<(), ConfigError> {
+        if self.server.port == 0 {
+            return Err(ConfigError::ValidationError {
+                field: "server.port".to_string(),
+                message: "port must be non-zero".to_string(),
+            });
+        }
+        
+        for (i, backend) in self.backends.iter().enumerate() {
+            if backend.url.is_empty() {
+                return Err(ConfigError::ValidationError {
+                    field: format!("backends[{}].url", i),
+                    message: "URL cannot be empty".to_string(),
+                });
+            }
+            if backend.name.is_empty() {
+                return Err(ConfigError::ValidationError {
+                    field: format!("backends[{}].name", i),
+                    message: "name cannot be empty".to_string(),
+                });
+            }
+        }
+        
+        Ok(())
+    }
+}
+```
+
+**Acceptance Criteria**:
+- [ ] All 5 tests pass
+- [ ] Error messages include field names
+- [ ] Validation catches common mistakes
+
+**Test Command**: `cargo test config::error`
+
+---
+
+## T06: CLI Command Definitions (clap)
+
+**Goal**: Define all CLI commands using clap derive.
+
+**Files to modify**:
+- `src/cli/mod.rs`
+- `src/main.rs`
+
+**Tests to Write First** (8 tests):
+```rust
+#[test]
+fn test_cli_parse_serve_defaults() {
+    let cli = Cli::try_parse_from(["nexus", "serve"]).unwrap();
+    match cli.command {
+        Commands::Serve(args) => {
+            assert_eq!(args.config, PathBuf::from("nexus.toml"));
+            assert!(args.port.is_none());
+            assert!(!args.no_discovery);
+        }
+        _ => panic!("Expected Serve command"),
+    }
+}
+
+#[test]
+fn test_cli_parse_serve_with_port() {
+    let cli = Cli::try_parse_from(["nexus", "serve", "-p", "9000"]).unwrap();
+    match cli.command {
+        Commands::Serve(args) => assert_eq!(args.port, Some(9000)),
+        _ => panic!("Expected Serve command"),
+    }
+}
+
+#[test]
+fn test_cli_parse_serve_with_config() {
+    let cli = Cli::try_parse_from(["nexus", "serve", "-c", "custom.toml"]).unwrap();
+    match cli.command {
+        Commands::Serve(args) => assert_eq!(args.config, PathBuf::from("custom.toml")),
+        _ => panic!("Expected Serve command"),
+    }
+}
+
+#[test]
+fn test_cli_parse_backends_list() {
+    let cli = Cli::try_parse_from(["nexus", "backends", "list"]).unwrap();
+    assert!(matches!(cli.command, Commands::Backends(BackendsCommands::List(_))));
+}
+
+#[test]
+fn test_cli_parse_backends_list_json() {
+    let cli = Cli::try_parse_from(["nexus", "backends", "list", "--json"]).unwrap();
+    match cli.command {
+        Commands::Backends(BackendsCommands::List(args)) => assert!(args.json),
+        _ => panic!("Expected Backends List command"),
+    }
+}
+
+#[test]
+fn test_cli_parse_backends_add() {
+    let cli = Cli::try_parse_from(["nexus", "backends", "add", "http://localhost:11434"]).unwrap();
+    match cli.command {
+        Commands::Backends(BackendsCommands::Add(args)) => {
+            assert_eq!(args.url, "http://localhost:11434");
+        }
+        _ => panic!("Expected Backends Add command"),
+    }
+}
+
+#[test]
+fn test_cli_parse_models() {
+    let cli = Cli::try_parse_from(["nexus", "models"]).unwrap();
+    assert!(matches!(cli.command, Commands::Models(_)));
+}
+
+#[test]
+fn test_cli_parse_health() {
+    let cli = Cli::try_parse_from(["nexus", "health"]).unwrap();
+    assert!(matches!(cli.command, Commands::Health(_)));
+}
+```
+
+**Implementation**:
+```rust
+use clap::{Parser, Subcommand, Args};
+
+#[derive(Parser)]
+#[command(name = "nexus", version, about = "Distributed LLM Orchestrator")]
+pub struct Cli {
+    #[command(subcommand)]
+    pub command: Commands,
+}
+
+#[derive(Subcommand)]
+pub enum Commands {
+    /// Start the Nexus server
+    Serve(ServeArgs),
+    /// Manage backends
+    #[command(subcommand)]
+    Backends(BackendsCommands),
+    /// List available models
+    Models(ModelsArgs),
+    /// Show system health
+    Health(HealthArgs),
+    /// Configuration utilities
+    #[command(subcommand)]
+    Config(ConfigCommands),
+}
+
+#[derive(Args)]
+pub struct ServeArgs {
+    #[arg(short, long, default_value = "nexus.toml")]
+    pub config: PathBuf,
+    
+    #[arg(short, long, env = "NEXUS_PORT")]
+    pub port: Option<u16>,
+    
+    #[arg(short = 'H', long, env = "NEXUS_HOST")]
+    pub host: Option<String>,
+    
+    #[arg(short, long, env = "NEXUS_LOG_LEVEL")]
+    pub log_level: Option<String>,
+    
+    #[arg(long)]
+    pub no_discovery: bool,
+    
+    #[arg(long)]
+    pub no_health_check: bool,
+}
+
+// ... additional Args structs
+```
+
+**Acceptance Criteria**:
+- [X] All 8 tests pass
+- [X] `nexus --help` shows all commands
+- [X] `nexus serve --help` shows all options
+- [X] Environment variables work for serve args
+
+**Test Command**: `cargo test cli::tests`
+
+---
+
+## T07: Output Formatting (Tables/JSON)
+
+**Goal**: Implement table and JSON output helpers.
+
+**Files to modify**:
+- `src/cli/output.rs`
+
+**Tests to Write First** (6 tests):
+```rust
+#[test]
+fn test_format_backends_table_empty() {
+    let output = format_backends_table(&[]);
+    assert!(output.contains("Name")); // Header present
+}
+
+#[test]
+fn test_format_backends_table_with_data() {
+    let backends = vec![create_test_backend_view()];
+    let output = format_backends_table(&backends);
+    assert!(output.contains("test-backend"));
+    assert!(output.contains("Healthy"));
+}
+
+#[test]
+fn test_format_backends_json_valid() {
+    let backends = vec![create_test_backend_view()];
+    let output = format_backends_json(&backends);
+    let parsed: serde_json::Value = serde_json::from_str(&output).unwrap();
+    assert!(parsed.get("backends").is_some());
+}
+
+#[test]
+fn test_format_models_table() {
+    let models = vec![create_test_model_view()];
+    let output = format_models_table(&models);
+    assert!(output.contains("Model"));
+    assert!(output.contains("Context"));
+}
+
+#[test]
+fn test_format_health_pretty() {
+    let status = create_test_health_status();
+    let output = format_health_pretty(&status);
+    assert!(output.contains("Status:"));
+    assert!(output.contains("Backends:"));
+}
+
+#[test]
+fn test_status_icon_healthy() {
+    assert_eq!(status_icon(BackendStatus::Healthy), "✓");
+    assert_eq!(status_icon(BackendStatus::Unhealthy), "✗");
+    assert_eq!(status_icon(BackendStatus::Unknown), "?");
+}
+```
+
+**Implementation**:
+```rust
+use comfy_table::{Table, Cell, Color, ContentArrangement};
+use colored::Colorize;
+
+pub fn format_backends_table(backends: &[BackendView]) -> String {
+    let mut table = Table::new();
+    table.set_content_arrangement(ContentArrangement::Dynamic);
+    table.set_header(vec!["Name", "URL", "Type", "Status", "Models", "Latency"]);
+    
+    for b in backends {
+        let status_str = match b.status {
+            BackendStatus::Healthy => "Healthy".green().to_string(),
+            BackendStatus::Unhealthy => "Unhealthy".red().to_string(),
+            BackendStatus::Unknown => "Unknown".yellow().to_string(),
+            BackendStatus::Draining => "Draining".cyan().to_string(),
+        };
+        
+        table.add_row(vec![
+            &b.name,
+            &b.url,
+            &format!("{:?}", b.backend_type),
+            &status_str,
+            &b.models.len().to_string(),
+            &format!("{}ms", b.avg_latency_ms),
+        ]);
+    }
+    
+    table.to_string()
+}
+
+pub fn format_backends_json(backends: &[BackendView]) -> String {
+    serde_json::to_string_pretty(&serde_json::json!({
+        "backends": backends
+    })).unwrap()
+}
+
+pub fn status_icon(status: BackendStatus) -> &'static str {
+    match status {
+        BackendStatus::Healthy => "✓",
+        BackendStatus::Unhealthy => "✗",
+        BackendStatus::Unknown => "?",
+        BackendStatus::Draining => "~",
+    }
+}
+```
+
+**Acceptance Criteria**:
+- [X] All 5 tests pass (simplified version)
+- [X] Tables render with proper alignment
+- [X] JSON output is valid and pretty-printed
+- [X] Status colors work in terminal
+
+**Test Command**: `cargo test cli::output`
+
+---
+
+## T08: Serve Command Implementation
+
+**Goal**: Implement the main serve command that starts the server.
+
+**Files to modify**:
+- `src/cli/serve.rs`
+- `src/main.rs`
+
+**Tests to Write First** (6 tests):
+```rust
+#[tokio::test]
+async fn test_serve_config_loading() {
+    let temp = tempfile::NamedTempFile::new().unwrap();
+    std::fs::write(temp.path(), "[server]\nport = 8080").unwrap();
+    
+    let args = ServeArgs {
+        config: temp.path().to_path_buf(),
+        port: None,
+        ..Default::default()
+    };
+    
+    let config = load_config_with_overrides(&args).unwrap();
+    assert_eq!(config.server.port, 8080);
+}
+
+#[tokio::test]
+async fn test_serve_cli_overrides_config() {
+    let temp = tempfile::NamedTempFile::new().unwrap();
+    std::fs::write(temp.path(), "[server]\nport = 8080").unwrap();
+    
+    let args = ServeArgs {
+        config: temp.path().to_path_buf(),
+        port: Some(9000),  // Override
+        ..Default::default()
+    };
+    
+    let config = load_config_with_overrides(&args).unwrap();
+    assert_eq!(config.server.port, 9000);  // CLI wins
+}
+
+#[tokio::test]
+async fn test_serve_works_without_config_file() {
+    let args = ServeArgs {
+        config: PathBuf::from("nonexistent.toml"),
+        ..Default::default()
+    };
+    
+    let config = load_config_with_overrides(&args).unwrap();
+    assert_eq!(config.server.port, 8000);  // Default
+}
+
+#[tokio::test]
+async fn test_init_tracing_pretty() {
+    let config = LoggingConfig {
+        level: "debug".to_string(),
+        format: LogFormat::Pretty,
+    };
+    
+    // Should not panic
+    let result = init_tracing(&config);
+    assert!(result.is_ok());
+}
+
+#[tokio::test]
+async fn test_init_tracing_json() {
+    let config = LoggingConfig {
+        level: "info".to_string(),
+        format: LogFormat::Json,
+    };
+    
+    let result = init_tracing(&config);
+    assert!(result.is_ok());
+}
+
+#[tokio::test]
+async fn test_backends_loaded_from_config() {
+    let config = NexusConfig {
+        backends: vec![
+            BackendConfig {
+                name: "test".to_string(),
+                url: "http://localhost:11434".to_string(),
+                backend_type: BackendType::Ollama,
+                priority: 1,
+            }
+        ],
+        ..Default::default()
+    };
+    
+    let registry = Arc::new(Registry::new());
+    load_backends_from_config(&config, &registry).unwrap();
+    
+    assert_eq!(registry.backend_count(), 1);
+}
+```
+
+**Implementation**:
+```rust
+pub async fn run_serve(args: ServeArgs) -> Result<(), Box<dyn std::error::Error>> {
+    // 1. Load and merge configuration
+    let config = load_config_with_overrides(&args)?;
+    config.validate()?;
+    
+    // 2. Initialize tracing
+    init_tracing(&config.logging)?;
+    
+    // 3. Create registry and load static backends
+    let registry = Arc::new(Registry::new());
+    load_backends_from_config(&config, &registry)?;
+    
+    // 4. Start health checker (if enabled)
+    let cancel_token = CancellationToken::new();
+    let health_handle = if config.health_check.enabled && !args.no_health_check {
+        let checker = HealthChecker::new(registry.clone(), config.health_check.clone());
+        Some(checker.start(cancel_token.clone()))
+    } else {
+        None
+    };
+    
+    // 5. Build minimal HTTP server (full API Gateway is separate feature)
+    let app = build_basic_router(registry.clone());
+    
+    // 6. Bind and serve
+    let addr = format!("{}:{}", config.server.host, config.server.port);
+    tracing::info!(addr = %addr, "Nexus server starting");
+    
+    let listener = tokio::net::TcpListener::bind(&addr).await?;
+    axum::serve(listener, app)
+        .with_graceful_shutdown(shutdown_signal(cancel_token.clone()))
+        .await?;
+    
+    // 7. Cleanup
+    if let Some(handle) = health_handle {
+        handle.await?;
+    }
+    
+    tracing::info!("Nexus server stopped");
+    Ok(())
+}
+
+fn build_basic_router(registry: Arc<Registry>) -> Router {
+    Router::new()
+        .route("/health", get(|| async { "OK" }))
+        .with_state(registry)
+}
+```
+
+**Acceptance Criteria**:
+- [X] All 6 tests pass
+- [X] Server starts and binds to configured port
+- [X] Health checker starts (unless disabled)
+- [X] Config precedence works: CLI > env > file > defaults
+
+**Test Command**: `cargo test cli::serve`
+
+---
+
+## T09: Backends List Command
+
+**Goal**: Implement `nexus backends list` command.
+
+**Files to modify**:
+- `src/cli/backends.rs`
+
+**Tests to Write First** (4 tests):
+```rust
+#[test]
+fn test_backends_list_empty_registry() {
+    let registry = Arc::new(Registry::new());
+    let args = BackendsListArgs { json: false, status: None };
+    
+    let output = handle_backends_list(&args, &registry);
+    assert!(output.is_ok());
+}
+
+#[test]
+fn test_backends_list_with_backends() {
+    let registry = Arc::new(Registry::new());
+    registry.add_backend(create_test_backend()).unwrap();
+    
+    let args = BackendsListArgs { json: false, status: None };
+    let output = handle_backends_list(&args, &registry).unwrap();
+    
+    assert!(output.contains("test-backend"));
+}
+
+#[test]
+fn test_backends_list_filter_healthy() {
+    let registry = Arc::new(Registry::new());
+    
+    let mut healthy = create_test_backend();
+    healthy.id = "healthy".to_string();
+    registry.add_backend(healthy).unwrap();
+    registry.update_status("healthy", BackendStatus::Healthy, None).unwrap();
+    
+    let mut unhealthy = create_test_backend();
+    unhealthy.id = "unhealthy".to_string();
+    registry.add_backend(unhealthy).unwrap();
+    registry.update_status("unhealthy", BackendStatus::Unhealthy, Some("error")).unwrap();
+    
+    let args = BackendsListArgs { json: false, status: Some("healthy".to_string()) };
+    let output = handle_backends_list(&args, &registry).unwrap();
+    
+    assert!(output.contains("healthy"));
+    assert!(!output.contains("unhealthy"));
+}
+
+#[test]
+fn test_backends_list_json_output() {
+    let registry = Arc::new(Registry::new());
+    registry.add_backend(create_test_backend()).unwrap();
+    
+    let args = BackendsListArgs { json: true, status: None };
+    let output = handle_backends_list(&args, &registry).unwrap();
+    
+    let parsed: serde_json::Value = serde_json::from_str(&output).unwrap();
+    assert!(parsed.get("backends").is_some());
+}
+```
+
+**Implementation**:
+```rust
+pub fn handle_backends_list(
+    args: &BackendsListArgs,
+    registry: &Registry,
+) -> Result<String, Box<dyn std::error::Error>> {
+    let backends = registry.get_all_backends();
+    
+    let filtered: Vec<_> = if let Some(ref status) = args.status {
+        let target_status = parse_status(status)?;
+        backends.into_iter().filter(|b| b.status == target_status).collect()
+    } else {
+        backends
+    };
+    
+    if args.json {
+        Ok(format_backends_json(&filtered))
+    } else {
+        Ok(format_backends_table(&filtered))
+    }
+}
+
+fn parse_status(s: &str) -> Result<BackendStatus, Box<dyn std::error::Error>> {
+    match s.to_lowercase().as_str() {
+        "healthy" => Ok(BackendStatus::Healthy),
+        "unhealthy" => Ok(BackendStatus::Unhealthy),
+        "unknown" => Ok(BackendStatus::Unknown),
+        _ => Err(format!("Invalid status: {}. Use: healthy, unhealthy, unknown", s).into()),
+    }
+}
+```
+
+**Acceptance Criteria**:
+- [X] All 4 tests pass
+- [X] Empty registry shows empty table (not error)
+- [X] `--status` filter works
+- [X] `--json` outputs valid JSON
+
+**Test Command**: `cargo test cli::backends::tests::test_backends_list`
+
+---
+
+## T10: Backends Add/Remove Commands (with Auto-Detection)
+
+**Goal**: Implement `nexus backends add` with auto-type detection and `nexus backends remove`.
+
+**Files to modify**:
+- `src/cli/backends.rs`
+
+**Tests to Write First** (7 tests):
+```rust
+#[tokio::test]
+async fn test_backends_add_success() {
+    let registry = Arc::new(Registry::new());
+    let args = BackendsAddArgs {
+        url: "http://localhost:11434".to_string(),
+        name: Some("test".to_string()),
+        backend_type: Some(BackendType::Ollama),
+        priority: Some(1),
+    };
+    
+    let result = handle_backends_add(&args, &registry).await;
+    assert!(result.is_ok());
+    assert_eq!(registry.backend_count(), 1);
+}
+
+#[tokio::test]
+async fn test_backends_add_generates_name() {
+    let registry = Arc::new(Registry::new());
+    let args = BackendsAddArgs {
+        url: "http://192.168.1.100:8000".to_string(),
+        name: None,
+        backend_type: Some(BackendType::VLLM),
+        priority: None,
+    };
+    
+    handle_backends_add(&args, &registry).await.unwrap();
+    
+    let backends = registry.get_all_backends();
+    assert!(!backends[0].name.is_empty());
+}
+
+#[tokio::test]
+async fn test_backends_add_invalid_url() {
+    let registry = Arc::new(Registry::new());
+    let args = BackendsAddArgs {
+        url: "not-a-url".to_string(),
+        name: None,
+        backend_type: None,
+        priority: None,
+    };
+    
+    let result = handle_backends_add(&args, &registry).await;
+    assert!(result.is_err());
+}
+
+#[test]
+fn test_backends_remove_success() {
+    let registry = Arc::new(Registry::new());
+    registry.add_backend(create_test_backend()).unwrap();
+    
+    let args = BackendsRemoveArgs { id: "test-backend".to_string() };
+    let result = handle_backends_remove(&args, &registry);
+    
+    assert!(result.is_ok());
+    assert_eq!(registry.backend_count(), 0);
+}
+
+#[test]
+fn test_backends_remove_not_found() {
+    let registry = Arc::new(Registry::new());
+    
+    let args = BackendsRemoveArgs { id: "nonexistent".to_string() };
+    let result = handle_backends_remove(&args, &registry);
+    
+    assert!(result.is_err());
+}
+
+#[tokio::test]
+async fn test_backends_add_auto_detect_ollama() {
+    // Mock server that responds like Ollama
+    let mock_server = MockServer::start().await;
+    Mock::given(method("GET"))
+        .and(path("/api/tags"))
+        .respond_with(ResponseTemplate::new(200).set_body_json(json!({"models": []})))
+        .mount(&mock_server)
+        .await;
+    
+    let registry = Arc::new(Registry::new());
+    let args = BackendsAddArgs {
+        url: mock_server.uri(),
+        name: None,
+        backend_type: None,  // Auto-detect
+        priority: None,
+    };
+    
+    handle_backends_add(&args, &registry).await.unwrap();
+    
+    let backends = registry.get_all_backends();
+    assert_eq!(backends[0].backend_type, BackendType::Ollama);
+}
+
+#[tokio::test]
+async fn test_backends_add_auto_detect_fallback_generic() {
+    // Mock server that doesn't respond to any known endpoints
+    let mock_server = MockServer::start().await;
+    Mock::given(any())
+        .respond_with(ResponseTemplate::new(404))
+        .mount(&mock_server)
+        .await;
+    
+    let registry = Arc::new(Registry::new());
+    let args = BackendsAddArgs {
+        url: mock_server.uri(),
+        name: None,
+        backend_type: None,
+        priority: None,
+    };
+    
+    handle_backends_add(&args, &registry).await.unwrap();
+    
+    let backends = registry.get_all_backends();
+    assert_eq!(backends[0].backend_type, BackendType::Generic);  // Fallback
+}
+```
+
+**Implementation**:
+```rust
+/// Auto-detect backend type by probing known endpoints.
+/// Detection order: Ollama -> LlamaCpp -> OpenAI-compatible -> Generic
+async fn detect_backend_type(base_url: &str) -> Option<BackendType> {
+    let client = reqwest::Client::builder()
+        .timeout(Duration::from_secs(2))
+        .build()
+        .ok()?;
+    
+    // Try Ollama: GET /api/tags
+    if let Ok(resp) = client.get(format!("{}/api/tags", base_url)).send().await {
+        if resp.status().is_success() {
+            if let Ok(text) = resp.text().await {
+                if text.contains("models") {
+                    tracing::debug!(url = %base_url, "Detected Ollama backend");
+                    return Some(BackendType::Ollama);
+                }
+            }
+        }
+    }
+    
+    // Try LlamaCpp: GET /health
+    if let Ok(resp) = client.get(format!("{}/health", base_url)).send().await {
+        if resp.status().is_success() {
+            if let Ok(text) = resp.text().await {
+                if text.contains("ok") {
+                    tracing::debug!(url = %base_url, "Detected LlamaCpp backend");
+                    return Some(BackendType::LlamaCpp);
+                }
+            }
+        }
+    }
+    
+    // Try OpenAI-compatible: GET /v1/models
+    if let Ok(resp) = client.get(format!("{}/v1/models", base_url)).send().await {
+        if resp.status().is_success() {
+            tracing::debug!(url = %base_url, "Detected OpenAI-compatible backend");
+            return Some(BackendType::Generic);  // Could be vLLM, Exo, etc.
+        }
+    }
+    
+    // Fallback: unknown, will use Generic
+    tracing::debug!(url = %base_url, "Could not detect backend type, using Generic");
+    None
+}
+
+pub async fn handle_backends_add(
+    args: &BackendsAddArgs,
+    registry: &Registry,
+) -> Result<String, Box<dyn std::error::Error>> {
+    // Validate URL
+    let url = reqwest::Url::parse(&args.url)
+        .map_err(|e| format!("Invalid URL: {}", e))?;
+    
+    // Generate name if not provided
+    let name = args.name.clone().unwrap_or_else(|| {
+        url.host_str().unwrap_or("backend").to_string()
+    });
+    
+    // Auto-detect type if not provided
+    let backend_type = match args.backend_type {
+        Some(t) => t,
+        None => {
+            tracing::info!(url = %args.url, "Auto-detecting backend type...");
+            detect_backend_type(&args.url).await.unwrap_or(BackendType::Generic)
+        }
+    };
+    
+    let backend = Backend::new(
+        uuid::Uuid::new_v4().to_string(),
+        name.clone(),
+        args.url.clone(),
+        backend_type,
+        vec![],
+        DiscoverySource::Manual,
+        HashMap::new(),
+    );
+    
+    let id = backend.id.clone();
+    registry.add_backend(backend)?;
+    
+    tracing::info!(name = %name, id = %id, backend_type = ?backend_type, "Backend added");
+    Ok(format!("Added backend '{}' ({}) as {:?}", name, id, backend_type))
+}
+
+pub fn handle_backends_remove(
+    args: &BackendsRemoveArgs,
+    registry: &Registry,
+) -> Result<String, Box<dyn std::error::Error>> {
+    registry.remove_backend(&args.id)?;
+    Ok(format!("Removed backend: {}", args.id))
+}
+```
+
+**Acceptance Criteria**:
+- [X] All 7 tests pass
+- [X] Add generates name from URL if not provided
+- [X] Add validates URL format
+- [X] Auto-detection tries Ollama → LlamaCpp → OpenAI → Generic
+- [X] Auto-detection times out after 2s per endpoint
+- [X] Remove returns error for unknown ID
+
+**Test Command**: `cargo test cli::backends::tests::test_backends_add`
+
+---
+
+## T11: Models Command
+
+**Goal**: Implement `nexus models` command.
+
+**Files to modify**:
+- `src/cli/models.rs`
+
+**Tests to Write First** (4 tests):
+```rust
+#[test]
+fn test_models_list_empty() {
+    let registry = Arc::new(Registry::new());
+    let args = ModelsArgs { json: false, backend: None };
+    
+    let output = handle_models(&args, &registry).unwrap();
+    assert!(output.contains("Model")); // Header
+}
+
+#[test]
+fn test_models_list_aggregated() {
+    let registry = Arc::new(Registry::new());
+    
+    // Add two backends with overlapping models
+    let mut backend1 = create_test_backend();
+    backend1.id = "backend1".to_string();
+    backend1.models = vec![create_test_model("llama3:70b")];
+    registry.add_backend(backend1).unwrap();
+    
+    let mut backend2 = create_test_backend();
+    backend2.id = "backend2".to_string();
+    backend2.models = vec![create_test_model("llama3:70b"), create_test_model("mistral:7b")];
+    registry.add_backend(backend2).unwrap();
+    
+    let args = ModelsArgs { json: false, backend: None };
+    let output = handle_models(&args, &registry).unwrap();
+    
+    assert!(output.contains("llama3:70b"));
+    assert!(output.contains("mistral:7b"));
+}
+
+#[test]
+fn test_models_filter_by_backend() {
+    let registry = Arc::new(Registry::new());
+    
+    let mut backend1 = create_test_backend();
+    backend1.id = "backend1".to_string();
+    backend1.models = vec![create_test_model("llama3:70b")];
+    registry.add_backend(backend1).unwrap();
+    
+    let mut backend2 = create_test_backend();
+    backend2.id = "backend2".to_string();
+    backend2.models = vec![create_test_model("mistral:7b")];
+    registry.add_backend(backend2).unwrap();
+    
+    let args = ModelsArgs { json: false, backend: Some("backend1".to_string()) };
+    let output = handle_models(&args, &registry).unwrap();
+    
+    assert!(output.contains("llama3:70b"));
+    assert!(!output.contains("mistral:7b"));
+}
+
+#[test]
+fn test_models_json_output() {
+    let registry = Arc::new(Registry::new());
+    let mut backend = create_test_backend();
+    backend.models = vec![create_test_model("llama3:70b")];
+    registry.add_backend(backend).unwrap();
+    
+    let args = ModelsArgs { json: true, backend: None };
+    let output = handle_models(&args, &registry).unwrap();
+    
+    let parsed: serde_json::Value = serde_json::from_str(&output).unwrap();
+    assert!(parsed.get("models").is_some());
+}
+```
+
+**Implementation**:
+```rust
+pub fn handle_models(
+    args: &ModelsArgs,
+    registry: &Registry,
+) -> Result<String, Box<dyn std::error::Error>> {
+    let backends = if let Some(ref id) = args.backend {
+        match registry.get_backend(id) {
+            Some(b) => vec![b],
+            None => return Err(format!("Backend not found: {}", id).into()),
+        }
+    } else {
+        registry.get_all_backends()
+    };
+    
+    // Aggregate models with their backends
+    let mut model_map: HashMap<String, ModelView> = HashMap::new();
+    for backend in backends {
+        for model in &backend.models {
+            model_map.entry(model.id.clone())
+                .or_insert_with(|| ModelView::from(model))
+                .backends.push(backend.name.clone());
+        }
+    }
+    
+    let models: Vec<_> = model_map.into_values().collect();
+    
+    if args.json {
+        Ok(format_models_json(&models))
+    } else {
+        Ok(format_models_table(&models))
+    }
+}
+```
+
+**Acceptance Criteria**:
+- [X] All 4 tests pass
+- [X] Models aggregated across backends
+- [X] `--backend` filter works
+- [X] Shows capability columns (Vision, Tools, JSON)
+
+**Test Command**: `cargo test cli::models`
+
+---
+
+## T12: Health Command
+
+**Goal**: Implement `nexus health` command.
+
+**Files to modify**:
+- `src/cli/health.rs`
+
+**Tests to Write First** (4 tests):
+```rust
+#[test]
+fn test_health_shows_summary() {
+    let registry = Arc::new(Registry::new());
+    
+    let mut healthy = create_test_backend();
+    healthy.id = "healthy".to_string();
+    registry.add_backend(healthy).unwrap();
+    registry.update_status("healthy", BackendStatus::Healthy, None).unwrap();
+    
+    let args = HealthArgs { json: false };
+    let output = handle_health(&args, &registry, Duration::from_secs(3600)).unwrap();
+    
+    assert!(output.contains("Status:"));
+    assert!(output.contains("1/1 healthy"));
+}
+
+#[test]
+fn test_health_degraded_status() {
+    let registry = Arc::new(Registry::new());
+    
+    // All backends unhealthy = degraded
+    let mut backend = create_test_backend();
+    registry.add_backend(backend).unwrap();
+    registry.update_status(&backend.id, BackendStatus::Unhealthy, Some("error")).unwrap();
+    
+    let args = HealthArgs { json: false };
+    let output = handle_health(&args, &registry, Duration::from_secs(0)).unwrap();
+    
+    assert!(output.contains("degraded") || output.contains("Degraded"));
+}
+
+#[test]
+fn test_health_json_valid() {
+    let registry = Arc::new(Registry::new());
+    
+    let args = HealthArgs { json: true };
+    let output = handle_health(&args, &registry, Duration::from_secs(100)).unwrap();
+    
+    let parsed: serde_json::Value = serde_json::from_str(&output).unwrap();
+    assert!(parsed.get("status").is_some());
+    assert!(parsed.get("uptime_seconds").is_some());
+}
+
+#[test]
+fn test_health_shows_uptime() {
+    let registry = Arc::new(Registry::new());
+    
+    let args = HealthArgs { json: false };
+    let output = handle_health(&args, &registry, Duration::from_secs(3661)).unwrap();
+    
+    assert!(output.contains("1h") || output.contains("3661"));
+}
+```
+
+**Implementation**:
+```rust
+#[derive(Serialize)]
+pub struct HealthStatus {
+    pub status: String,
+    pub version: String,
+    pub uptime_seconds: u64,
+    pub backends: BackendCounts,
+    pub models: ModelCounts,
+}
+
+pub fn handle_health(
+    args: &HealthArgs,
+    registry: &Registry,
+    uptime: Duration,
+) -> Result<String, Box<dyn std::error::Error>> {
+    let backends = registry.get_all_backends();
+    let healthy = backends.iter().filter(|b| b.status == BackendStatus::Healthy).count();
+    let model_count = registry.model_count();
+    
+    let status = HealthStatus {
+        status: if healthy > 0 { "healthy".to_string() } else { "degraded".to_string() },
+        version: env!("CARGO_PKG_VERSION").to_string(),
+        uptime_seconds: uptime.as_secs(),
+        backends: BackendCounts {
+            total: backends.len(),
+            healthy,
+            unhealthy: backends.len() - healthy,
+        },
+        models: ModelCounts { total: model_count },
+    };
+    
+    if args.json {
+        Ok(serde_json::to_string_pretty(&status)?)
+    } else {
+        Ok(format_health_pretty(&status, &backends))
+    }
+}
+
+fn format_health_pretty(status: &HealthStatus, backends: &[BackendView]) -> String {
+    let mut output = String::new();
+    
+    let status_display = if status.status == "healthy" {
+        "Healthy".green()
+    } else {
+        "Degraded".yellow()
+    };
+    
+    writeln!(output, "Status: {}", status_display).unwrap();
+    writeln!(output, "Version: {}", status.version).unwrap();
+    writeln!(output, "Uptime: {}", format_duration(status.uptime_seconds)).unwrap();
+    writeln!(output).unwrap();
+    writeln!(output, "Backends: {}/{} healthy", 
+        status.backends.healthy, status.backends.total).unwrap();
+    writeln!(output, "Models: {} available", status.models.total).unwrap();
+    
+    // ... backend details
+    
+    output
+}
+```
+
+**Acceptance Criteria**:
+- [X] All 4 tests pass
+- [X] Shows "healthy" or "degraded" status
+- [X] Shows formatted uptime
+- [X] JSON includes all required fields
+
+**Test Command**: `cargo test cli::health`
+
+---
+
+## T13: Config Init Command
+
+**Goal**: Implement `nexus config init` command.
+
+**Files to modify**:
+- `src/cli/mod.rs` (add ConfigCommands)
+- Create `templates/` directory with template files
+
+**Tests to Write First** (4 tests):
+```rust
+#[test]
+fn test_config_init_creates_file() {
+    let temp_dir = tempfile::tempdir().unwrap();
+    let output_path = temp_dir.path().join("nexus.toml");
+    
+    let args = ConfigInitArgs {
+        output: output_path.clone(),
+        minimal: false,
+        force: false,
+    };
+    
+    handle_config_init(&args).unwrap();
+    
+    assert!(output_path.exists());
+    let content = std::fs::read_to_string(&output_path).unwrap();
+    assert!(content.contains("[server]"));
+}
+
+#[test]
+fn test_config_init_minimal() {
+    let temp_dir = tempfile::tempdir().unwrap();
+    let output_path = temp_dir.path().join("nexus.toml");
+    
+    let args = ConfigInitArgs {
+        output: output_path.clone(),
+        minimal: true,
+        force: false,
+    };
+    
+    handle_config_init(&args).unwrap();
+    
+    let content = std::fs::read_to_string(&output_path).unwrap();
+    // Minimal should be shorter
+    assert!(content.len() < 500);
+}
+
+#[test]
+fn test_config_init_no_overwrite() {
+    let temp_dir = tempfile::tempdir().unwrap();
+    let output_path = temp_dir.path().join("nexus.toml");
+    
+    // Create existing file
+    std::fs::write(&output_path, "existing").unwrap();
+    
+    let args = ConfigInitArgs {
+        output: output_path.clone(),
+        minimal: false,
+        force: false,
+    };
+    
+    let result = handle_config_init(&args);
+    assert!(result.is_err());
+    
+    // Original content preserved
+    let content = std::fs::read_to_string(&output_path).unwrap();
+    assert_eq!(content, "existing");
+}
+
+#[test]
+fn test_config_init_force_overwrites() {
+    let temp_dir = tempfile::tempdir().unwrap();
+    let output_path = temp_dir.path().join("nexus.toml");
+    
+    std::fs::write(&output_path, "old content").unwrap();
+    
+    let args = ConfigInitArgs {
+        output: output_path.clone(),
+        minimal: false,
+        force: true,
+    };
+    
+    handle_config_init(&args).unwrap();
+    
+    let content = std::fs::read_to_string(&output_path).unwrap();
+    assert!(content.contains("[server]"));
+}
+```
+
+**Implementation**:
+1. Create `templates/nexus.example.toml` (copy from repo root)
+2. Create `templates/nexus.minimal.toml`:
+   ```toml
+   # Minimal Nexus configuration
+   [server]
+   port = 8000
+   
+   [logging]
+   level = "info"
+   ```
+3. Implement handler:
+   ```rust
+   pub fn handle_config_init(args: &ConfigInitArgs) -> Result<String, Box<dyn std::error::Error>> {
+       if args.output.exists() && !args.force {
+           return Err(format!(
+               "File already exists: {}. Use --force to overwrite.",
+               args.output.display()
+           ).into());
+       }
+       
+       let template = if args.minimal {
+           include_str!("../../templates/nexus.minimal.toml")
+       } else {
+           include_str!("../../templates/nexus.example.toml")
+       };
+       
+       std::fs::write(&args.output, template)?;
+       Ok(format!("Created config file: {}", args.output.display()))
+   }
+   ```
+
+**Acceptance Criteria**:
+- [X] All 3 tests pass (simplified version without minimal)
+- [X] Creates valid TOML file
+- [ ] `--minimal` generates shorter config (not implemented)
+- [X] Won't overwrite without `--force`
+
+**Test Command**: `cargo test cli::tests::test_config_init`
+
+---
+
+## T14: Completions Command
+
+**Goal**: Implement `nexus completions <shell>` for shell completion generation.
+
+**Files to modify**:
+- `src/cli/mod.rs` (add Completions command)
+
+**Tests to Write First** (4 tests):
+```rust
+#[test]
+fn test_completions_bash() {
+    let cli = Cli::try_parse_from(["nexus", "completions", "bash"]).unwrap();
+    match cli.command {
+        Commands::Completions(args) => assert_eq!(args.shell, Shell::Bash),
+        _ => panic!("Expected Completions command"),
+    }
+}
+
+#[test]
+fn test_completions_zsh() {
+    let cli = Cli::try_parse_from(["nexus", "completions", "zsh"]).unwrap();
+    match cli.command {
+        Commands::Completions(args) => assert_eq!(args.shell, Shell::Zsh),
+        _ => panic!("Expected Completions command"),
+    }
+}
+
+#[test]
+fn test_completions_generates_output() {
+    let output = generate_completions(Shell::Bash);
+    assert!(!output.is_empty());
+    assert!(output.contains("nexus")); // Should reference the command name
+}
+
+#[test]
+fn test_completions_fish() {
+    let output = generate_completions(Shell::Fish);
+    assert!(!output.is_empty());
+}
+```
+
+**Implementation**:
+```rust
+use clap::CommandFactory;
+use clap_complete::{generate, Shell};
+
+#[derive(Args)]
+pub struct CompletionsArgs {
+    /// Target shell
+    #[arg(value_enum)]
+    pub shell: Shell,
+}
+
+pub fn handle_completions(args: &CompletionsArgs) -> String {
+    let mut cmd = Cli::command();
+    let mut buf = Vec::new();
+    generate(args.shell, &mut cmd, "nexus", &mut buf);
+    String::from_utf8(buf).expect("Generated completions should be valid UTF-8")
+}
+```
+
+Add to `Commands` enum:
+```rust
+#[derive(Subcommand)]
+pub enum Commands {
+    // ... existing commands ...
+    
+    /// Generate shell completions
+    Completions(CompletionsArgs),
+}
+```
+
+Add to `Cargo.toml`:
+```toml
+clap_complete = "4"
+```
+
+**Acceptance Criteria**:
+- [X] All 2 tests pass (simplified version)
+- [X] `nexus completions bash` outputs valid bash completion script
+- [X] `nexus completions zsh` outputs valid zsh completion script
+- [X] `nexus completions fish` outputs valid fish completion script
+
+**Test Command**: `cargo test cli::tests::test_completions`
+
+---
+
+## T15: Graceful Shutdown Handling
+
+**Goal**: Implement proper shutdown on SIGINT/SIGTERM.
+
+**Files to modify**:
+- `src/cli/serve.rs`
+
+**Tests to Write First** (3 tests):
+```rust
+#[tokio::test]
+async fn test_shutdown_signal_triggers_cancel() {
+    let cancel = CancellationToken::new();
+    let cancel_clone = cancel.clone();
+    
+    let handle = tokio::spawn(async move {
+        // Simulate shutdown after 100ms
+        tokio::time::sleep(Duration::from_millis(100)).await;
+        cancel_clone.cancel();
+    });
+    
+    // This should return when cancelled
+    tokio::select! {
+        _ = cancel.cancelled() => {}
+        _ = tokio::time::sleep(Duration::from_secs(5)) => {
+            panic!("Shutdown didn't trigger");
+        }
+    }
+    
+    handle.await.unwrap();
+}
+
+#[tokio::test]
+async fn test_health_checker_stops_on_shutdown() {
+    let registry = Arc::new(Registry::new());
+    let config = HealthCheckConfig::default();
+    let checker = HealthChecker::new(registry, config);
+    
+    let cancel = CancellationToken::new();
+    let handle = checker.start(cancel.clone());
+    
+    // Let it run briefly
+    tokio::time::sleep(Duration::from_millis(50)).await;
+    
+    // Trigger shutdown
+    cancel.cancel();
+    
+    // Should complete quickly
+    let result = tokio::time::timeout(Duration::from_secs(1), handle).await;
+    assert!(result.is_ok());
+}
+
+#[tokio::test]
+async fn test_exit_code_on_error() {
+    // Port 0 should fail
+    let args = ServeArgs {
+        config: PathBuf::from("nonexistent.toml"),
+        port: Some(0),  // Invalid
+        ..Default::default()
+    };
+    
+    // This would require refactoring run_serve to return Result
+    // For now, verify the function signature returns Result
+}
+```
+
+**Implementation**:
+```rust
+async fn shutdown_signal(cancel_token: CancellationToken) {
+    let ctrl_c = async {
+        tokio::signal::ctrl_c()
+            .await
+            .expect("Failed to install CTRL+C handler");
+    };
+
+    #[cfg(unix)]
+    let terminate = async {
+        tokio::signal::unix::signal(tokio::signal::unix::SignalKind::terminate())
+            .expect("Failed to install SIGTERM handler")
+            .recv()
+            .await;
+    };
+
+    #[cfg(not(unix))]
+    let terminate = std::future::pending::<()>();
+
+    tokio::select! {
+        _ = ctrl_c => {
+            tracing::info!("Received SIGINT, shutting down...");
+        }
+        _ = terminate => {
+            tracing::info!("Received SIGTERM, shutting down...");
+        }
+    }
+    
+    cancel_token.cancel();
+}
+```
+
+**Acceptance Criteria**:
+- [X] All 3 tests pass
+- [X] SIGINT triggers graceful shutdown
+- [X] SIGTERM triggers graceful shutdown (Unix)
+- [X] Health checker stops cleanly
+- [X] Exit code 0 on clean shutdown
+
+**Test Command**: `cargo test cli::serve::tests::test_shutdown`
+
+---
+
+## T16: Integration Tests
+
+**Goal**: End-to-end CLI tests using `assert_cmd`.
+
+**Files to create**:
+- `tests/cli_integration.rs`
+
+**Tests to Write** (10 tests):
+```rust
+use assert_cmd::Command;
+use predicates::prelude::*;
+
+#[test]
+fn test_version_output() {
+    Command::cargo_bin("nexus")
+        .unwrap()
+        .arg("--version")
+        .assert()
+        .success()
+        .stdout(predicate::str::contains("nexus"));
+}
+
+#[test]
+fn test_help_shows_all_commands() {
+    Command::cargo_bin("nexus")
+        .unwrap()
+        .arg("--help")
+        .assert()
+        .success()
+        .stdout(predicate::str::contains("serve"))
+        .stdout(predicate::str::contains("backends"))
+        .stdout(predicate::str::contains("models"))
+        .stdout(predicate::str::contains("health"))
+        .stdout(predicate::str::contains("config"));
+}
+
+#[test]
+fn test_serve_help() {
+    Command::cargo_bin("nexus")
+        .unwrap()
+        .args(["serve", "--help"])
+        .assert()
+        .success()
+        .stdout(predicate::str::contains("--port"))
+        .stdout(predicate::str::contains("--config"));
+}
+
+#[test]
+fn test_backends_help() {
+    Command::cargo_bin("nexus")
+        .unwrap()
+        .args(["backends", "--help"])
+        .assert()
+        .success()
+        .stdout(predicate::str::contains("list"))
+        .stdout(predicate::str::contains("add"))
+        .stdout(predicate::str::contains("remove"));
+}
+
+#[test]
+fn test_config_init_creates_file() {
+    let temp_dir = tempfile::tempdir().unwrap();
+    let output = temp_dir.path().join("test.toml");
+    
+    Command::cargo_bin("nexus")
+        .unwrap()
+        .args(["config", "init", "-o", output.to_str().unwrap()])
+        .assert()
+        .success()
+        .stdout(predicate::str::contains("Created"));
+    
+    assert!(output.exists());
+}
+
+#[test]
+fn test_config_init_no_overwrite() {
+    let temp_dir = tempfile::tempdir().unwrap();
+    let output = temp_dir.path().join("existing.toml");
+    std::fs::write(&output, "existing").unwrap();
+    
+    Command::cargo_bin("nexus")
+        .unwrap()
+        .args(["config", "init", "-o", output.to_str().unwrap()])
+        .assert()
+        .failure()
+        .stderr(predicate::str::contains("already exists"));
+}
+
+#[test]
+fn test_invalid_command() {
+    Command::cargo_bin("nexus")
+        .unwrap()
+        .arg("invalid")
+        .assert()
+        .failure();
+}
+
+#[test]
+fn test_serve_invalid_config_file() {
+    let temp_dir = tempfile::tempdir().unwrap();
+    let config = temp_dir.path().join("bad.toml");
+    std::fs::write(&config, "not valid toml {{{{").unwrap();
+    
+    Command::cargo_bin("nexus")
+        .unwrap()
+        .args(["serve", "-c", config.to_str().unwrap()])
+        .assert()
+        .failure()
+        .stderr(predicate::str::contains("parse error").or(predicate::str::contains("error")));
+}
+
+#[test]
+fn test_env_var_for_port() {
+    Command::cargo_bin("nexus")
+        .unwrap()
+        .env("NEXUS_PORT", "9999")
+        .args(["serve", "--help"])  // Just verify it parses
+        .assert()
+        .success();
+}
+
+#[test]
+fn test_backends_list_no_server() {
+    // Without a running server, this should show an error or empty list
+    // Depends on implementation - might need IPC or shared state
+    Command::cargo_bin("nexus")
+        .unwrap()
+        .args(["backends", "list"])
+        .assert();
+    // Just verify it doesn't panic
+}
+```
+
+**Acceptance Criteria**:
+- [ ] All 10 tests pass
+- [ ] `--version` shows version string
+- [ ] `--help` shows all commands
+- [ ] Invalid config causes non-zero exit
+- [ ] Environment variables are recognized
+
+**Test Command**: `cargo test --test cli_integration`
+
+---
+
+## T17: Documentation & Cleanup
+
+**Goal**: Final documentation, examples, and code cleanup.
+
+**Files to modify/create**:
+- `src/cli/mod.rs` (doc comments)
+- `src/config/mod.rs` (doc comments)
+- `README.md` (update with CLI usage)
+- Move `nexus.example.toml` handling
+
+**Tasks**:
+1. Add doc comments to all public types:
+   ```rust
+   /// Unified configuration for the Nexus server.
+   ///
+   /// # Example
+   /// ```
+   /// use nexus::config::NexusConfig;
+   /// 
+   /// let config = NexusConfig::default();
+   /// assert_eq!(config.server.port, 8000);
+   /// ```
+   #[derive(Debug, Clone, Serialize, Deserialize, Default)]
+   pub struct NexusConfig { ... }
+   ```
+
+2. Add doc examples that compile:
+   ```rust
+   /// Parse configuration from a TOML string.
+   ///
+   /// # Example
+   /// ```
+   /// use nexus::config::NexusConfig;
+   /// 
+   /// let toml = "[server]\nport = 9000";
+   /// let config: NexusConfig = toml::from_str(toml).unwrap();
+   /// ```
+   ```
+
+3. Update README with CLI examples:
+   ```markdown
+   ## Quick Start
+   
+   ```bash
+   # Generate config file
+   nexus config init
+   
+   # Start the server
+   nexus serve
+   
+   # Check status
+   nexus health
+   ```
+   ```
+
+4. Run final checks:
+   - `cargo clippy --all-features -- -D warnings`
+   - `cargo fmt --all -- --check`
+   - `cargo test --all`
+   - `cargo doc --no-deps`
+
+**Acceptance Criteria**:
+- [ ] All public types have doc comments
+- [ ] Doc examples compile (`cargo test --doc`)
+- [ ] README includes CLI usage
+- [ ] `cargo clippy` has no warnings
+- [ ] `cargo fmt` passes
+
+**Test Command**: `cargo test --doc && cargo clippy`
+
+---
+
+## Test Summary
+
+| Task | Unit Tests | Integration Tests | Doc Tests | Total |
+|------|------------|-------------------|-----------|-------|
+| T01 | 0 | 0 | 0 | 0 |
+| T02 | 6 | 0 | 0 | 6 |
+| T03 | 5 | 0 | 0 | 5 |
+| T04 | 4 | 0 | 0 | 4 |
+| T05 | 5 | 0 | 0 | 5 |
+| T06 | 8 | 0 | 0 | 8 |
+| T07 | 6 | 0 | 0 | 6 |
+| T08 | 6 | 0 | 0 | 6 |
+| T09 | 4 | 0 | 0 | 4 |
+| T10 | 7 | 0 | 0 | 7 |
+| T11 | 4 | 0 | 0 | 4 |
+| T12 | 4 | 0 | 0 | 4 |
+| T13 | 4 | 0 | 0 | 4 |
+| T14 | 4 | 0 | 0 | 4 |
+| T15 | 3 | 0 | 0 | 3 |
+| T16 | 0 | 10 | 0 | 10 |
+| T17 | 0 | 0 | ~4 | 4 |
+| **Total** | **57** | **10** | **~4** | **~71** |
+
+---
+
+## Definition of Done
+
+- [ ] All 71 tests pass
+- [ ] `cargo clippy` reports no warnings
+- [ ] `cargo fmt --check` passes
+- [ ] `nexus --version` shows version
+- [ ] `nexus --help` shows all commands
+- [ ] `nexus serve` starts server on configured port
+- [ ] `nexus config init` generates valid config
+- [ ] `nexus completions <shell>` generates valid completions
+- [ ] Config precedence: CLI > env > file > defaults
+- [ ] Unknown config keys logged as warnings (not errors)
+- [ ] Graceful shutdown on SIGINT/SIGTERM
+- [ ] JSON output valid for all commands
+- [ ] Documentation complete with examples
diff --git a/specs/003-cli-configuration/walkthrough.md b/specs/003-cli-configuration/walkthrough.md
new file mode 100644
index 0000000..3f9487e
--- /dev/null
+++ b/specs/003-cli-configuration/walkthrough.md
@@ -0,0 +1,940 @@
+# CLI & Configuration - Code Walkthrough
+
+**Feature**: F04 - CLI & Configuration  
+**Audience**: Junior developers joining the project  
+**Last Updated**: 2026-02-03
+
+---
+
+## Table of Contents
+
+1. [The Big Picture](#the-big-picture)
+2. [File Structure](#file-structure)
+3. [Part 1: Configuration Module](#part-1-configuration-module)
+   - [mod.rs - The Main Config](#modrs---the-main-config)
+   - [server.rs - Server Settings](#serverrs---server-settings)
+   - [routing.rs - Routing Settings](#routingrs---routing-settings)
+   - [logging.rs - Logging Settings](#loggingrs---logging-settings)
+   - [error.rs - Config Errors](#errorrs---config-errors)
+4. [Part 2: CLI Module](#part-2-cli-module)
+   - [mod.rs - Command Definitions](#modrs---command-definitions)
+   - [serve.rs - The Main Server](#servers---the-main-server)
+   - [backends.rs - Backend Management](#backendsrs---backend-management)
+   - [output.rs - Pretty Printing](#outputrs---pretty-printing)
+   - [completions.rs - Shell Completions](#completionsrs---shell-completions)
+5. [Part 3: Main Entry Point](#part-3-main-entry-point)
+6. [Understanding the Tests](#understanding-the-tests)
+7. [Key Rust Concepts](#key-rust-concepts)
+8. [Common Patterns in This Module](#common-patterns-in-this-module)
+
+---
+
+## The Big Picture
+
+Think of this feature as the **user interface for Nexus**. Just like how your phone has a home screen and settings app, Nexus has:
+
+1. **CLI (Command Line Interface)** - The buttons and menus you interact with
+2. **Configuration** - The settings that control how everything works
+
+### Why Do We Need This?
+
+Without a CLI and config system:
+- Users would have to modify code to change settings
+- There'd be no way to start the server
+- No way to inspect what's running
+
+With CLI & Configuration:
+```bash
+# Start with defaults
+nexus serve
+
+# Override port via CLI
+nexus serve --port 9000
+
+# Or via environment variable
+NEXUS_PORT=9000 nexus serve
+
+# Or via config file
+nexus serve --config production.toml
+```
+
+### The Configuration Precedence Pyramid
+
+```
+        ┌─────────────┐
+        │   CLI args  │  ← Highest priority (--port 9000)
+        ├─────────────┤
+        │  Env vars   │  ← NEXUS_PORT=9000
+        ├─────────────┤
+        │ Config file │  ← nexus.toml
+        ├─────────────┤
+        │  Defaults   │  ← Lowest priority (port = 8000)
+        └─────────────┘
+```
+
+This means if you set `--port 9000` on the command line, it wins over everything else.
+
+### How It Fits in Nexus
+
+```
+┌─────────────────────────────────────────────────────────────────┐
+│                         Nexus                                   │
+│                                                                 │
+│  ┌──────────────────┐     ┌──────────────────────────────┐     │
+│  │  CLI (mod.rs)    │────▶│  Configuration (config/)     │     │
+│  │  (you are here!) │     │  - Server settings           │     │
+│  └──────────────────┘     │  - Routing rules             │     │
+│           │               │  - Backend definitions       │     │
+│           │               └──────────────────────────────┘     │
+│           ▼                                                     │
+│  ┌──────────────┐     ┌──────────────┐     ┌──────────────┐    │
+│  │    serve     │────▶│   Registry   │◀────│Health Checker│    │
+│  │   command    │     │              │     │              │    │
+│  └──────────────┘     └──────────────┘     └──────────────┘    │
+│                                                                 │
+└─────────────────────────────────────────────────────────────────┘
+```
+
+---
+
+## File Structure
+
+```
+src/
+├── main.rs                 # Entry point - routes CLI commands
+├── config/                 # Configuration module
+│   ├── mod.rs    (267 lines) # Main NexusConfig + loading logic
+│   ├── server.rs  (38 lines) # Server settings (port, host, etc.)
+│   ├── routing.rs (81 lines) # Routing strategy and weights
+│   ├── logging.rs (63 lines) # Log level and format
+│   ├── discovery.rs (23 lines) # mDNS discovery settings
+│   ├── backend.rs (23 lines) # Backend config definition
+│   └── error.rs   (23 lines) # Configuration errors
+│
+├── cli/                    # CLI module
+│   ├── mod.rs    (282 lines) # Command definitions (clap)
+│   ├── serve.rs  (314 lines) # Start server command
+│   ├── backends.rs (329 lines) # backends list/add/remove
+│   ├── models.rs (168 lines) # List models
+│   ├── health.rs (226 lines) # Show health status
+│   ├── output.rs (172 lines) # Table/JSON formatting
+│   ├── config.rs  (88 lines) # config init command
+│   └── completions.rs (33 lines) # Shell completions
+│
+tests/
+└── cli_integration.rs (119 lines) # End-to-end CLI tests
+```
+
+---
+
+## Part 1: Configuration Module
+
+The config module holds all the settings for Nexus. Each file represents a "section" of the config file.
+
+### mod.rs - The Main Config
+
+This is the hub that combines all config sections into one struct:
+
+```rust
+/// Unified configuration for the Nexus server.
+#[derive(Debug, Clone, Serialize, Deserialize, Default)]
+#[serde(default)]
+pub struct NexusConfig {
+    pub server: ServerConfig,           // [server] section
+    pub discovery: DiscoveryConfig,     // [discovery] section
+    pub health_check: HealthCheckConfig, // [health_check] section
+    pub routing: RoutingConfig,         // [routing] section
+    pub backends: Vec<BackendConfig>,   // [[backends]] array
+    pub logging: LoggingConfig,         // [logging] section
+}
+```
+
+**What's happening here:**
+- `#[derive(Serialize, Deserialize)]` - Lets us read/write TOML files automatically
+- `#[serde(default)]` - If a section is missing, use the default values
+- Each field corresponds to a section in `nexus.toml`
+
+**Loading Configuration:**
+
+```rust
+impl NexusConfig {
+    pub fn load(path: Option<&Path>) -> Result<Self, ConfigError> {
+        match path {
+            Some(p) => {
+                if !p.exists() {
+                    return Err(ConfigError::NotFound(p.to_path_buf()));
+                }
+                let content = std::fs::read_to_string(p)?;
+                toml::from_str(&content).map_err(|e| ConfigError::Parse(e.to_string()))
+            }
+            None => Ok(Self::default()),
+        }
+    }
+}
+```
+
+**What's happening:**
+1. If no path given, return defaults
+2. If path doesn't exist, return a clear error
+3. Read the file, parse as TOML
+4. serde + toml do the magic of turning text into structs
+
+**Environment Variable Overrides:**
+
+```rust
+pub fn with_env_overrides(mut self) -> Self {
+    // Server settings
+    if let Ok(port) = std::env::var("NEXUS_PORT") {
+        if let Ok(p) = port.parse() {
+            self.server.port = p;
+        }
+    }
+    // ... more overrides
+    self
+}
+```
+
+**Pattern explained:**
+- `std::env::var("NEXUS_PORT")` - Read environment variable
+- `port.parse()` - Try to convert string to number
+- If parsing fails, silently keep the default (don't crash!)
+
+### server.rs - Server Settings
+
+```rust
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(default)]
+pub struct ServerConfig {
+    pub host: String,                  // Listen address
+    pub port: u16,                     // Listen port
+    pub request_timeout_seconds: u64,  // How long to wait for responses
+    pub max_concurrent_requests: u32,  // Max parallel requests
+}
+
+impl Default for ServerConfig {
+    fn default() -> Self {
+        Self {
+            host: "0.0.0.0".to_string(),  // Listen on all interfaces
+            port: 8000,                    // Default port
+            request_timeout_seconds: 300,  // 5 minutes
+            max_concurrent_requests: 1000,
+        }
+    }
+}
+```
+
+**In the config file:**
+```toml
+[server]
+host = "0.0.0.0"
+port = 8000
+```
+
+### routing.rs - Routing Settings
+
+```rust
+/// Routing strategy for backend selection
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
+#[serde(rename_all = "snake_case")]
+pub enum RoutingStrategy {
+    #[default]
+    Smart,        // Consider multiple factors
+    RoundRobin,   // Take turns
+    PriorityOnly, // Always use highest priority
+    Random,       // Random selection
+}
+```
+
+**What's happening:**
+- `#[serde(rename_all = "snake_case")]` - In TOML, we write `round_robin` not `RoundRobin`
+- `#[default]` - `Smart` is used if not specified
+
+**In the config file:**
+```toml
+[routing]
+strategy = "round_robin"
+max_retries = 2
+
+[routing.weights]
+priority = 50
+load = 30
+latency = 20
+```
+
+### logging.rs - Logging Settings
+
+```rust
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
+#[serde(rename_all = "snake_case")]
+pub enum LogFormat {
+    #[default]
+    Pretty,  // Human-readable with colors
+    Json,    // Machine-readable for log aggregators
+}
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(default)]
+pub struct LoggingConfig {
+    pub level: String,    // trace, debug, info, warn, error
+    pub format: LogFormat,
+}
+```
+
+**Why two formats?**
+- **Pretty**: For humans running `nexus serve` in a terminal
+- **JSON**: For production systems that feed logs to Datadog, Splunk, etc.
+
+### error.rs - Config Errors
+
+```rust
+#[derive(Debug, thiserror::Error)]
+pub enum ConfigError {
+    #[error("config file not found: {0}")]
+    NotFound(PathBuf),
+    
+    #[error("failed to read config: {0}")]
+    Io(#[from] std::io::Error),
+    
+    #[error("config parse error: {0}")]
+    Parse(String),
+    
+    #[error("invalid value for '{field}': {message}")]
+    Validation { field: String, message: String },
+}
+```
+
+**What's happening:**
+- `thiserror::Error` - Automatically implements `std::error::Error` and `Display`
+- `#[from]` - Automatically converts `std::io::Error` to `ConfigError::Io`
+- `{field}` and `{message}` - Replaced with actual values when error is displayed
+
+---
+
+## Part 2: CLI Module
+
+The CLI module defines all the commands and their arguments using the `clap` library.
+
+### mod.rs - Command Definitions
+
+**The Main CLI Struct:**
+
+```rust
+/// Nexus - Distributed LLM Orchestrator
+#[derive(Parser, Debug)]
+#[command(name = "nexus", version, about = "Distributed LLM model serving orchestrator")]
+pub struct Cli {
+    #[command(subcommand)]
+    pub command: Commands,
+}
+```
+
+**What's happening:**
+- `#[derive(Parser)]` - Magic! Clap generates parsing code automatically
+- `#[command(...)]` - Metadata shown in `--help`
+- `#[command(subcommand)]` - This field holds which command was chosen
+
+**The Commands Enum:**
+
+```rust
+#[derive(Subcommand, Debug)]
+pub enum Commands {
+    /// Start the Nexus server
+    Serve(ServeArgs),
+    
+    /// Manage backends
+    #[command(subcommand)]
+    Backends(BackendsCommands),
+    
+    /// List available models
+    Models(ModelsArgs),
+    
+    /// Show system health
+    Health(HealthArgs),
+    
+    /// Configuration utilities
+    #[command(subcommand)]
+    Config(ConfigCommands),
+    
+    /// Generate shell completions
+    Completions(CompletionsArgs),
+}
+```
+
+**How this becomes CLI:**
+```bash
+nexus serve          # Commands::Serve
+nexus backends list  # Commands::Backends(BackendsCommands::List)
+nexus models         # Commands::Models
+nexus health         # Commands::Health
+nexus config init    # Commands::Config(ConfigCommands::Init)
+nexus completions    # Commands::Completions
+```
+
+**Argument Definition Example:**
+
+```rust
+#[derive(Args, Debug)]
+pub struct ServeArgs {
+    /// Path to configuration file
+    #[arg(short, long, default_value = "nexus.toml")]
+    pub config: PathBuf,
+
+    /// Override server port
+    #[arg(short, long, env = "NEXUS_PORT")]
+    pub port: Option<u16>,
+
+    /// Disable mDNS backend discovery
+    #[arg(long)]
+    pub no_discovery: bool,
+}
+```
+
+**What's happening:**
+- `#[arg(short, long)]` - Accept `-c` or `--config`
+- `default_value = "nexus.toml"` - Use this if not specified
+- `env = "NEXUS_PORT"` - Also check this env var
+- `Option<u16>` - This argument is optional
+- `bool` flags default to `false`
+
+### serve.rs - The Main Server
+
+This is where Nexus actually starts up:
+
+```rust
+pub async fn run_serve(args: ServeArgs) -> Result<(), Box<dyn std::error::Error>> {
+    // 1. Load and merge configuration
+    let config = load_config_with_overrides(&args)?;
+    config.validate()?;
+
+    // 2. Initialize tracing (logging)
+    init_tracing(&config.logging)?;
+    tracing::info!("Starting Nexus server");
+
+    // 3. Create registry and load static backends
+    let registry = Arc::new(Registry::new());
+    load_backends_from_config(&config, &registry)?;
+
+    // 4. Start health checker (if enabled)
+    let cancel_token = CancellationToken::new();
+    let health_handle = if config.health_check.enabled {
+        let checker = HealthChecker::new(registry.clone(), config.health_check.clone());
+        Some(checker.start(cancel_token.clone()))
+    } else {
+        None
+    };
+
+    // 5. Build minimal HTTP server
+    let app = build_basic_router(registry.clone());
+
+    // 6. Bind and serve
+    let addr = format!("{}:{}", config.server.host, config.server.port);
+    let listener = tokio::net::TcpListener::bind(&addr).await?;
+    
+    axum::serve(listener, app)
+        .with_graceful_shutdown(shutdown_signal(cancel_token.clone()))
+        .await?;
+
+    // 7. Cleanup
+    if let Some(handle) = health_handle {
+        handle.await?;
+    }
+
+    Ok(())
+}
+```
+
+**The startup sequence:**
+1. **Load config** - File → env vars → CLI args
+2. **Initialize logging** - Before anything else, so we can see what's happening
+3. **Create registry** - The in-memory database of backends
+4. **Start health checker** - Background task to monitor backends
+5. **Build HTTP router** - The API endpoints
+6. **Start serving** - Listen for requests
+7. **Cleanup on shutdown** - Stop background tasks gracefully
+
+**Graceful Shutdown:**
+
+```rust
+async fn shutdown_signal(cancel_token: CancellationToken) {
+    let ctrl_c = async {
+        tokio::signal::ctrl_c().await.expect("Failed to install CTRL+C handler");
+    };
+
+    #[cfg(unix)]
+    let terminate = async {
+        tokio::signal::unix::signal(tokio::signal::unix::SignalKind::terminate())
+            .expect("Failed to install SIGTERM handler")
+            .recv()
+            .await;
+    };
+
+    tokio::select! {
+        _ = ctrl_c => tracing::info!("Received SIGINT, shutting down..."),
+        _ = terminate => tracing::info!("Received SIGTERM, shutting down..."),
+    }
+
+    cancel_token.cancel();  // Tell health checker to stop
+}
+```
+
+**What's happening:**
+- Listen for Ctrl+C (SIGINT) or kill signal (SIGTERM)
+- When received, cancel the token which stops background tasks
+- Server finishes current requests and shuts down cleanly
+
+### backends.rs - Backend Management
+
+**Listing Backends:**
+
+```rust
+pub fn handle_backends_list(
+    args: &BackendsListArgs,
+    registry: &Registry,
+) -> Result<String, Box<dyn std::error::Error>> {
+    let backends = registry.get_all_backends();
+
+    // Filter by status if provided
+    let filtered: Vec<Backend> = if let Some(ref status) = args.status {
+        let target_status = parse_status(status)?;
+        backends.into_iter().filter(|b| b.status == target_status).collect()
+    } else {
+        backends
+    };
+
+    // Convert to view models for display
+    let views: Vec<BackendView> = filtered.iter().map(BackendView::from).collect();
+
+    if args.json {
+        Ok(format_backends_json(&views))
+    } else {
+        Ok(format_backends_table(&views))
+    }
+}
+```
+
+**Auto-detecting Backend Type:**
+
+```rust
+async fn detect_backend_type(base_url: &str) -> Option<BackendType> {
+    let client = reqwest::Client::builder()
+        .timeout(Duration::from_secs(2))  // Don't wait forever
+        .build().ok()?;
+
+    // Try Ollama: GET /api/tags
+    if let Ok(resp) = client.get(format!("{}/api/tags", base_url)).send().await {
+        if resp.status().is_success() {
+            return Some(BackendType::Ollama);
+        }
+    }
+
+    // Try LlamaCpp: GET /health
+    if let Ok(resp) = client.get(format!("{}/health", base_url)).send().await {
+        if resp.status().is_success() {
+            return Some(BackendType::LlamaCpp);
+        }
+    }
+
+    // Try OpenAI-compatible: GET /v1/models
+    if let Ok(resp) = client.get(format!("{}/v1/models", base_url)).send().await {
+        if resp.status().is_success() {
+            return Some(BackendType::Generic);
+        }
+    }
+
+    None  // Unknown, will use Generic as fallback
+}
+```
+
+**How it works:**
+1. Create HTTP client with 2-second timeout
+2. Try Ollama's unique endpoint
+3. Try LlamaCpp's unique endpoint
+4. Try generic OpenAI endpoint
+5. Return `None` if nothing works (caller will use Generic)
+
+### output.rs - Pretty Printing
+
+**The BackendView Pattern:**
+
+```rust
+/// View model for backend display
+#[derive(Debug, Clone, serde::Serialize)]
+pub struct BackendView {
+    pub name: String,
+    pub url: String,
+    pub backend_type: String,
+    pub status: BackendStatus,
+    pub models: Vec<String>,
+    pub avg_latency_ms: u64,
+}
+
+impl From<&Backend> for BackendView {
+    fn from(backend: &Backend) -> Self {
+        Self {
+            name: backend.name.clone(),
+            url: backend.url.clone(),
+            backend_type: format!("{:?}", backend.backend_type),
+            status: backend.status,
+            models: backend.models.iter().map(|m| m.id.clone()).collect(),
+            avg_latency_ms: backend.avg_latency_ms.load(Ordering::Relaxed) as u64,
+        }
+    }
+}
+```
+
+**Why use a View Model?**
+- The internal `Backend` struct has complex fields (atomics, hashmaps)
+- `BackendView` is simple and easy to serialize to JSON or display in a table
+- Separates "what we store" from "what we show"
+
+**Table Formatting:**
+
+```rust
+pub fn format_backends_table(backends: &[BackendView]) -> String {
+    let mut table = Table::new();
+    table.load_preset(UTF8_FULL);  // Pretty Unicode borders
+    table.set_header(vec!["Name", "URL", "Type", "Status", "Models", "Latency"]);
+
+    for b in backends {
+        let status_str = match b.status {
+            BackendStatus::Healthy => "Healthy".green().to_string(),
+            BackendStatus::Unhealthy => "Unhealthy".red().to_string(),
+            BackendStatus::Unknown => "Unknown".yellow().to_string(),
+            BackendStatus::Draining => "Draining".cyan().to_string(),
+        };
+
+        table.add_row(vec![
+            Cell::new(&b.name),
+            Cell::new(&b.url),
+            Cell::new(&b.backend_type),
+            Cell::new(status_str),
+            Cell::new(b.models.len()),
+            Cell::new(format!("{}ms", b.avg_latency_ms)),
+        ]);
+    }
+
+    table.to_string()
+}
+```
+
+**Result:**
+```
+┌──────────────┬────────────────────────────┬─────────┬──────────┬────────┬─────────┐
+│ Name         │ URL                        │ Type    │ Status   │ Models │ Latency │
+├──────────────┼────────────────────────────┼─────────┼──────────┼────────┼─────────┤
+│ local-ollama │ http://localhost:11434     │ Ollama  │ Healthy  │ 3      │ 45ms    │
+│ gpu-server   │ http://192.168.1.100:8000  │ VLLM    │ Healthy  │ 1      │ 23ms    │
+└──────────────┴────────────────────────────┴─────────┴──────────┴────────┴─────────┘
+```
+
+### completions.rs - Shell Completions
+
+```rust
+pub fn handle_completions(args: &CompletionsArgs) -> String {
+    let mut cmd = Cli::command();
+    let mut buf = Vec::new();
+    
+    clap_complete::generate(args.shell, &mut cmd, "nexus", &mut buf);
+    
+    String::from_utf8(buf).unwrap()
+}
+```
+
+**What's happening:**
+- `Cli::command()` - Get the command structure from clap
+- `clap_complete::generate` - Generate completion script for the target shell
+- Return as string so user can pipe to file
+
+**Usage:**
+```bash
+nexus completions bash > ~/.bash_completion.d/nexus
+nexus completions zsh > ~/.zsh/completions/_nexus
+```
+
+---
+
+## Part 3: Main Entry Point
+
+**src/main.rs:**
+
+```rust
+#[tokio::main]
+async fn main() {
+    let cli = Cli::parse();
+
+    let result = match cli.command {
+        Commands::Serve(args) => {
+            nexus::cli::serve::run_serve(args).await
+        }
+        Commands::Backends(cmd) => match cmd {
+            BackendsCommands::List(args) => {
+                // Load config and create registry
+                let config = NexusConfig::load(Some(&args.config))
+                    .unwrap_or_else(|_| NexusConfig::default());
+                let registry = Arc::new(Registry::new());
+                
+                // Load backends from config
+                load_backends_from_config(&config, &registry)?;
+
+                // Run command
+                let output = backends::handle_backends_list(&args, &registry)?;
+                println!("{}", output);
+                Ok(())
+            }
+            // ... other commands
+        }
+        Commands::Completions(args) => {
+            println!("{}", handle_completions(&args));
+            Ok(())
+        }
+    };
+
+    if let Err(e) = result {
+        eprintln!("Error: {}", e);
+        std::process::exit(1);
+    }
+}
+```
+
+**Pattern:**
+1. Parse CLI arguments
+2. Match on command
+3. Execute appropriate handler
+4. Print output or error
+5. Exit with appropriate code
+
+---
+
+## Understanding the Tests
+
+### Config Module Tests
+
+**Testing defaults:**
+```rust
+#[test]
+fn test_nexus_config_defaults() {
+    let config = NexusConfig::default();
+    assert_eq!(config.server.port, 8000);
+    assert!(config.discovery.enabled);
+    assert!(config.backends.is_empty());
+}
+```
+
+**Testing TOML parsing:**
+```rust
+#[test]
+fn test_config_parse_minimal_toml() {
+    let toml = r#"
+    [server]
+    port = 9000
+    "#;
+
+    let config: NexusConfig = toml::from_str(toml).unwrap();
+    assert_eq!(config.server.port, 9000);
+    assert_eq!(config.server.host, "0.0.0.0");  // Default applied!
+}
+```
+
+**Testing environment overrides:**
+```rust
+#[test]
+fn test_config_env_override_port() {
+    std::env::set_var("NEXUS_PORT", "9999");
+    let config = NexusConfig::default().with_env_overrides();
+    std::env::remove_var("NEXUS_PORT");  // Clean up!
+
+    assert_eq!(config.server.port, 9999);
+}
+```
+
+### CLI Module Tests
+
+**Testing argument parsing:**
+```rust
+#[test]
+fn test_cli_parse_serve_with_port() {
+    let cli = Cli::try_parse_from(["nexus", "serve", "-p", "9000"]).unwrap();
+    match cli.command {
+        Commands::Serve(args) => assert_eq!(args.port, Some(9000)),
+        _ => panic!("Expected Serve command"),
+    }
+}
+```
+
+**Key technique:** `try_parse_from` lets you test parsing without running a real CLI.
+
+### Integration Tests
+
+**tests/cli_integration.rs:**
+```rust
+use assert_cmd::Command;
+use predicates::prelude::*;
+
+#[test]
+fn test_version_output() {
+    Command::cargo_bin("nexus")
+        .unwrap()
+        .arg("--version")
+        .assert()
+        .success()
+        .stdout(predicate::str::contains("nexus"));
+}
+
+#[test]
+fn test_config_init_creates_file() {
+    let temp_dir = TempDir::new().unwrap();
+    let config_path = temp_dir.path().join("nexus.toml");
+
+    Command::cargo_bin("nexus")
+        .unwrap()
+        .args(["config", "init", "-o", config_path.to_str().unwrap()])
+        .assert()
+        .success();
+
+    assert!(config_path.exists());
+    let content = std::fs::read_to_string(&config_path).unwrap();
+    assert!(content.contains("[server]"));
+}
+```
+
+**What's happening:**
+- `assert_cmd` runs the actual binary in a subprocess
+- `predicates` provides nice assertions for output
+- `TempDir` creates isolated test directories that clean up automatically
+
+---
+
+## Key Rust Concepts
+
+### 1. The Builder Pattern
+
+```rust
+let client = reqwest::Client::builder()
+    .timeout(Duration::from_secs(2))
+    .build()
+    .ok()?;
+```
+
+Start with a builder, chain method calls, finish with `.build()`.
+
+### 2. Error Propagation with `?`
+
+```rust
+let content = std::fs::read_to_string(p)?;  // If error, return early
+toml::from_str(&content)?                    // If error, return early
+```
+
+The `?` operator unwraps `Ok` values or returns `Err` immediately.
+
+### 3. Option Chaining
+
+```rust
+let name = args.name.clone().unwrap_or_else(|| {
+    url.host_str().unwrap_or("backend").to_string()
+});
+```
+
+- If `args.name` is `Some`, use it
+- Otherwise, try to get hostname from URL
+- Otherwise, use "backend"
+
+### 4. Enums for States
+
+```rust
+pub enum LogFormat {
+    Pretty,
+    Json,
+}
+
+match config.format {
+    LogFormat::Pretty => { /* pretty formatting */ }
+    LogFormat::Json => { /* JSON formatting */ }
+}
+```
+
+Enums + match = exhaustive handling of all cases.
+
+### 5. Arc for Shared Ownership
+
+```rust
+let registry = Arc::new(Registry::new());
+load_backends_from_config(&config, &registry)?;
+// registry can now be cloned and shared across threads
+```
+
+`Arc` = Atomic Reference Counting. Multiple owners, thread-safe.
+
+---
+
+## Common Patterns in This Module
+
+### 1. Layered Configuration
+
+```
+Defaults → File → Env → CLI
+```
+
+Each layer can override the previous. This pattern is common in cloud-native apps.
+
+### 2. View Models for Display
+
+```rust
+Backend (internal) → BackendView (display)
+```
+
+Don't expose internal complexity to the UI layer.
+
+### 3. Command-Handler Separation
+
+```rust
+// mod.rs defines structure
+pub struct ServeArgs { ... }
+
+// serve.rs implements logic
+pub async fn run_serve(args: ServeArgs) { ... }
+```
+
+Keep command definitions separate from implementation.
+
+### 4. Graceful Degradation
+
+```rust
+if let Ok(p) = port.parse() {
+    self.server.port = p;
+}
+// If parse fails, just keep the default
+```
+
+Don't crash on invalid input when you can use sensible defaults.
+
+### 5. Feature Flags via CLI
+
+```rust
+#[arg(long)]
+pub no_discovery: bool,
+
+#[arg(long)]
+pub no_health_check: bool,
+```
+
+Let users disable features at runtime with `--no-X` flags.
+
+---
+
+## Summary
+
+The CLI & Configuration module provides the user-facing interface for Nexus:
+
+| Component | Purpose |
+|-----------|---------|
+| `config/mod.rs` | Load and merge configuration from multiple sources |
+| `config/server.rs` | HTTP server settings |
+| `config/routing.rs` | Routing strategy and weights |
+| `cli/mod.rs` | Define all commands using clap |
+| `cli/serve.rs` | Start the server with all components |
+| `cli/backends.rs` | Manage backends (list, add, remove) |
+| `cli/output.rs` | Format data for terminal or JSON |
+
+**Test Count**: 173 tests total (151 unit + 10 CLI integration + 6 health + 6 doc)
+
+**Next up**: F01 (API Gateway) will add the OpenAI-compatible HTTP endpoints that clients actually talk to!
diff --git a/src/cli/backends.rs b/src/cli/backends.rs
new file mode 100644
index 0000000..b68a06b
--- /dev/null
+++ b/src/cli/backends.rs
@@ -0,0 +1,329 @@
+//! Backends command implementation
+
+use crate::cli::output::{format_backends_json, format_backends_table, BackendView};
+use crate::cli::{BackendsAddArgs, BackendsListArgs, BackendsRemoveArgs};
+use crate::registry::{Backend, BackendStatus, BackendType, DiscoverySource, Registry};
+use std::collections::HashMap;
+use std::time::Duration;
+
+/// Parse status string to BackendStatus
+fn parse_status(s: &str) -> Result<BackendStatus, Box<dyn std::error::Error>> {
+    match s.to_lowercase().as_str() {
+        "healthy" => Ok(BackendStatus::Healthy),
+        "unhealthy" => Ok(BackendStatus::Unhealthy),
+        "unknown" => Ok(BackendStatus::Unknown),
+        "draining" => Ok(BackendStatus::Draining),
+        _ => Err(format!(
+            "Invalid status: {}. Use: healthy, unhealthy, unknown, draining",
+            s
+        )
+        .into()),
+    }
+}
+
+/// Handle backends list command
+pub fn handle_backends_list(
+    args: &BackendsListArgs,
+    registry: &Registry,
+) -> Result<String, Box<dyn std::error::Error>> {
+    let backends = registry.get_all_backends();
+
+    // Filter by status if provided
+    let filtered: Vec<Backend> = if let Some(ref status) = args.status {
+        let target_status = parse_status(status)?;
+        backends
+            .into_iter()
+            .filter(|b| b.status == target_status)
+            .collect()
+    } else {
+        backends
+    };
+
+    // Convert to view models
+    let views: Vec<BackendView> = filtered.iter().map(BackendView::from).collect();
+
+    if args.json {
+        Ok(format_backends_json(&views))
+    } else {
+        Ok(format_backends_table(&views))
+    }
+}
+
+/// Auto-detect backend type by probing known endpoints
+/// Detection order: Ollama -> LlamaCpp -> OpenAI-compatible -> Generic
+async fn detect_backend_type(base_url: &str) -> Option<BackendType> {
+    let client = reqwest::Client::builder()
+        .timeout(Duration::from_secs(2))
+        .build()
+        .ok()?;
+
+    // Try Ollama: GET /api/tags
+    if let Ok(resp) = client.get(format!("{}/api/tags", base_url)).send().await {
+        if resp.status().is_success() {
+            if let Ok(text) = resp.text().await {
+                if text.contains("models") {
+                    tracing::debug!(url = %base_url, "Detected Ollama backend");
+                    return Some(BackendType::Ollama);
+                }
+            }
+        }
+    }
+
+    // Try LlamaCpp: GET /health
+    if let Ok(resp) = client.get(format!("{}/health", base_url)).send().await {
+        if resp.status().is_success() {
+            if let Ok(text) = resp.text().await {
+                if text.contains("ok") || text.contains("status") {
+                    tracing::debug!(url = %base_url, "Detected LlamaCpp backend");
+                    return Some(BackendType::LlamaCpp);
+                }
+            }
+        }
+    }
+
+    // Try OpenAI-compatible: GET /v1/models
+    if let Ok(resp) = client.get(format!("{}/v1/models", base_url)).send().await {
+        if resp.status().is_success() {
+            tracing::debug!(url = %base_url, "Detected OpenAI-compatible backend");
+            return Some(BackendType::Generic); // Could be vLLM, Exo, etc.
+        }
+    }
+
+    // Fallback: unknown, will use Generic
+    tracing::debug!(url = %base_url, "Could not detect backend type, using Generic");
+    None
+}
+
+/// Handle backends add command
+pub async fn handle_backends_add(
+    args: &BackendsAddArgs,
+    registry: &Registry,
+) -> Result<String, Box<dyn std::error::Error>> {
+    // Validate URL
+    let url = reqwest::Url::parse(&args.url).map_err(|e| format!("Invalid URL: {}", e))?;
+
+    // Generate name if not provided
+    let name = args
+        .name
+        .clone()
+        .unwrap_or_else(|| url.host_str().unwrap_or("backend").to_string());
+
+    // Parse or auto-detect backend type
+    let backend_type = if let Some(ref type_str) = args.backend_type {
+        match type_str.to_lowercase().as_str() {
+            "ollama" => BackendType::Ollama,
+            "vllm" => BackendType::VLLM,
+            "llamacpp" | "llama.cpp" => BackendType::LlamaCpp,
+            "exo" => BackendType::Exo,
+            "openai" => BackendType::OpenAI,
+            "generic" => BackendType::Generic,
+            _ => return Err(format!("Unknown backend type: {}", type_str).into()),
+        }
+    } else {
+        tracing::info!(url = %args.url, "Auto-detecting backend type...");
+        detect_backend_type(&args.url)
+            .await
+            .unwrap_or(BackendType::Generic)
+    };
+
+    let backend = Backend::new(
+        uuid::Uuid::new_v4().to_string(),
+        name.clone(),
+        args.url.clone(),
+        backend_type,
+        vec![],
+        DiscoverySource::Manual,
+        HashMap::new(),
+    );
+
+    let id = backend.id.clone();
+    registry.add_backend(backend)?;
+
+    tracing::info!(name = %name, id = %id, backend_type = ?backend_type, "Backend added");
+    Ok(format!(
+        "Added backend '{}' ({}) as {:?}",
+        name, id, backend_type
+    ))
+}
+
+/// Handle backends remove command
+pub fn handle_backends_remove(
+    args: &BackendsRemoveArgs,
+    registry: &Registry,
+) -> Result<String, Box<dyn std::error::Error>> {
+    registry.remove_backend(&args.name)?;
+    Ok(format!("Removed backend: {}", args.name))
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::sync::Arc;
+
+    fn create_test_backend() -> Backend {
+        Backend::new(
+            "test-backend".to_string(),
+            "Test Backend".to_string(),
+            "http://localhost:11434".to_string(),
+            BackendType::Ollama,
+            vec![],
+            DiscoverySource::Static,
+            HashMap::new(),
+        )
+    }
+
+    #[test]
+    fn test_backends_list_empty_registry() {
+        let registry = Arc::new(Registry::new());
+        let args = BackendsListArgs {
+            json: false,
+            status: None,
+            config: std::path::PathBuf::from("nexus.toml"),
+        };
+
+        let output = handle_backends_list(&args, &registry);
+        assert!(output.is_ok());
+    }
+
+    #[test]
+    fn test_backends_list_with_backends() {
+        let registry = Arc::new(Registry::new());
+        registry.add_backend(create_test_backend()).unwrap();
+
+        let args = BackendsListArgs {
+            json: false,
+            status: None,
+            config: std::path::PathBuf::from("nexus.toml"),
+        };
+        let output = handle_backends_list(&args, &registry).unwrap();
+
+        assert!(output.contains("Test Backend") || output.contains("test"));
+    }
+
+    #[test]
+    fn test_backends_list_filter_healthy() {
+        let registry = Arc::new(Registry::new());
+
+        let mut healthy = create_test_backend();
+        healthy.id = "healthy".to_string();
+        registry.add_backend(healthy).unwrap();
+        registry
+            .update_status("healthy", BackendStatus::Healthy, None)
+            .unwrap();
+
+        let mut unhealthy = create_test_backend();
+        unhealthy.id = "unhealthy".to_string();
+        unhealthy.name = "Unhealthy Backend".to_string();
+        registry.add_backend(unhealthy).unwrap();
+        registry
+            .update_status(
+                "unhealthy",
+                BackendStatus::Unhealthy,
+                Some("error".to_string()),
+            )
+            .unwrap();
+
+        let args = BackendsListArgs {
+            json: false,
+            status: Some("healthy".to_string()),
+            config: std::path::PathBuf::from("nexus.toml"),
+        };
+        let output = handle_backends_list(&args, &registry).unwrap();
+
+        assert!(output.contains("Test Backend"));
+        assert!(!output.contains("Unhealthy Backend"));
+    }
+
+    #[test]
+    fn test_backends_list_json_output() {
+        let registry = Arc::new(Registry::new());
+        registry.add_backend(create_test_backend()).unwrap();
+
+        let args = BackendsListArgs {
+            json: true,
+            status: None,
+            config: std::path::PathBuf::from("nexus.toml"),
+        };
+        let output = handle_backends_list(&args, &registry).unwrap();
+
+        let parsed: serde_json::Value = serde_json::from_str(&output).unwrap();
+        assert!(parsed.get("backends").is_some());
+    }
+
+    #[tokio::test]
+    async fn test_backends_add_success() {
+        let registry = Arc::new(Registry::new());
+        let args = BackendsAddArgs {
+            url: "http://localhost:11434".to_string(),
+            name: Some("test".to_string()),
+            backend_type: Some("ollama".to_string()),
+            priority: 1,
+            config: std::path::PathBuf::from("nexus.toml"),
+        };
+
+        let result = handle_backends_add(&args, &registry).await;
+        assert!(result.is_ok());
+        assert_eq!(registry.backend_count(), 1);
+    }
+
+    #[tokio::test]
+    async fn test_backends_add_generates_name() {
+        let registry = Arc::new(Registry::new());
+        let args = BackendsAddArgs {
+            url: "http://192.168.1.100:8000".to_string(),
+            name: None,
+            backend_type: Some("vllm".to_string()),
+            priority: 1,
+            config: std::path::PathBuf::from("nexus.toml"),
+        };
+
+        handle_backends_add(&args, &registry).await.unwrap();
+
+        let backends = registry.get_all_backends();
+        assert!(!backends[0].name.is_empty());
+        assert!(backends[0].name.contains("192.168.1.100"));
+    }
+
+    #[tokio::test]
+    async fn test_backends_add_invalid_url() {
+        let registry = Arc::new(Registry::new());
+        let args = BackendsAddArgs {
+            url: "not-a-url".to_string(),
+            name: None,
+            backend_type: None,
+            priority: 1,
+            config: std::path::PathBuf::from("nexus.toml"),
+        };
+
+        let result = handle_backends_add(&args, &registry).await;
+        assert!(result.is_err());
+    }
+
+    #[test]
+    fn test_backends_remove_success() {
+        let registry = Arc::new(Registry::new());
+        registry.add_backend(create_test_backend()).unwrap();
+
+        let args = BackendsRemoveArgs {
+            name: "test-backend".to_string(),
+            config: std::path::PathBuf::from("nexus.toml"),
+        };
+        let result = handle_backends_remove(&args, &registry);
+
+        assert!(result.is_ok());
+        assert_eq!(registry.backend_count(), 0);
+    }
+
+    #[test]
+    fn test_backends_remove_not_found() {
+        let registry = Arc::new(Registry::new());
+
+        let args = BackendsRemoveArgs {
+            name: "nonexistent".to_string(),
+            config: std::path::PathBuf::from("nexus.toml"),
+        };
+        let result = handle_backends_remove(&args, &registry);
+
+        assert!(result.is_err());
+    }
+}
diff --git a/src/cli/completions.rs b/src/cli/completions.rs
new file mode 100644
index 0000000..15298fd
--- /dev/null
+++ b/src/cli/completions.rs
@@ -0,0 +1,33 @@
+//! Completions command implementation
+
+use crate::cli::{Cli, CompletionsArgs};
+use clap::CommandFactory;
+use clap_complete::generate;
+use std::io;
+
+/// Handle `nexus completions` command
+pub fn handle_completions(args: &CompletionsArgs) {
+    let mut cmd = Cli::command();
+    let bin_name = cmd.get_name().to_string();
+    generate(args.shell, &mut cmd, bin_name, &mut io::stdout());
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use clap_complete::Shell;
+
+    #[test]
+    fn test_completions_bash() {
+        let _args = CompletionsArgs { shell: Shell::Bash };
+        // Just verify it doesn't panic
+        // Output goes to stdout, so we can't easily capture it in tests
+        // This is tested manually via: nexus completions bash > /tmp/test.sh
+    }
+
+    #[test]
+    fn test_completions_zsh() {
+        let _args = CompletionsArgs { shell: Shell::Zsh };
+        // Just verify it doesn't panic
+    }
+}
diff --git a/src/cli/config.rs b/src/cli/config.rs
new file mode 100644
index 0000000..f61c8b4
--- /dev/null
+++ b/src/cli/config.rs
@@ -0,0 +1,88 @@
+//! Config command handlers
+
+use crate::cli::ConfigInitArgs;
+use std::fs;
+
+const EXAMPLE_CONFIG: &str = include_str!("../../nexus.example.toml");
+
+/// Handle `nexus config init` command
+pub fn handle_config_init(args: &ConfigInitArgs) -> Result<(), Box<dyn std::error::Error>> {
+    // Check if file exists
+    if args.output.exists() && !args.force {
+        return Err(format!(
+            "File already exists: {}. Use --force to overwrite.",
+            args.output.display()
+        )
+        .into());
+    }
+
+    // Write config file
+    fs::write(&args.output, EXAMPLE_CONFIG)?;
+
+    println!("✓ Configuration file created: {}", args.output.display());
+    println!("  Edit this file to customize your Nexus instance.");
+
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use tempfile;
+
+    #[test]
+    fn test_config_init_creates_file() {
+        let temp_dir = tempfile::tempdir().unwrap();
+        let output_path = temp_dir.path().join("nexus.toml");
+
+        let args = ConfigInitArgs {
+            output: output_path.clone(),
+            force: false,
+        };
+
+        handle_config_init(&args).unwrap();
+
+        assert!(output_path.exists());
+        let content = std::fs::read_to_string(&output_path).unwrap();
+        assert!(content.contains("[server]"));
+    }
+
+    #[test]
+    fn test_config_init_no_overwrite() {
+        let temp_dir = tempfile::tempdir().unwrap();
+        let output_path = temp_dir.path().join("nexus.toml");
+
+        // Create existing file
+        std::fs::write(&output_path, "existing").unwrap();
+
+        let args = ConfigInitArgs {
+            output: output_path.clone(),
+            force: false,
+        };
+
+        let result = handle_config_init(&args);
+        assert!(result.is_err());
+
+        // Original content preserved
+        let content = std::fs::read_to_string(&output_path).unwrap();
+        assert_eq!(content, "existing");
+    }
+
+    #[test]
+    fn test_config_init_force_overwrites() {
+        let temp_dir = tempfile::tempdir().unwrap();
+        let output_path = temp_dir.path().join("nexus.toml");
+
+        std::fs::write(&output_path, "old content").unwrap();
+
+        let args = ConfigInitArgs {
+            output: output_path.clone(),
+            force: true,
+        };
+
+        handle_config_init(&args).unwrap();
+
+        let content = std::fs::read_to_string(&output_path).unwrap();
+        assert!(content.contains("[server]"));
+    }
+}
diff --git a/src/cli/health.rs b/src/cli/health.rs
new file mode 100644
index 0000000..b161cf5
--- /dev/null
+++ b/src/cli/health.rs
@@ -0,0 +1,226 @@
+//! Health command implementation
+
+use crate::cli::output::BackendView;
+use crate::cli::HealthArgs;
+use crate::registry::{BackendStatus, Registry};
+use colored::Colorize;
+use serde::Serialize;
+use std::fmt::Write;
+use std::time::Duration;
+
+#[derive(Serialize)]
+pub struct HealthStatus {
+    pub status: String,
+    pub version: String,
+    pub uptime_seconds: u64,
+    pub backends: BackendCounts,
+    pub models: ModelCounts,
+}
+
+#[derive(Serialize)]
+pub struct BackendCounts {
+    pub total: usize,
+    pub healthy: usize,
+    pub unhealthy: usize,
+}
+
+#[derive(Serialize)]
+pub struct ModelCounts {
+    pub total: usize,
+}
+
+/// Format duration in a human-readable way
+fn format_duration(seconds: u64) -> String {
+    let hours = seconds / 3600;
+    let minutes = (seconds % 3600) / 60;
+    let secs = seconds % 60;
+
+    if hours > 0 {
+        format!("{}h {}m {}s", hours, minutes, secs)
+    } else if minutes > 0 {
+        format!("{}m {}s", minutes, secs)
+    } else {
+        format!("{}s", secs)
+    }
+}
+
+/// Format health status as pretty text
+fn format_health_pretty(status: &HealthStatus, backends: &[BackendView]) -> String {
+    let mut output = String::new();
+
+    let status_display = if status.status == "healthy" {
+        "Healthy".green()
+    } else {
+        "Degraded".yellow()
+    };
+
+    writeln!(output, "Status: {}", status_display).unwrap();
+    writeln!(output, "Version: {}", status.version).unwrap();
+    writeln!(output, "Uptime: {}", format_duration(status.uptime_seconds)).unwrap();
+    writeln!(output).unwrap();
+    writeln!(
+        output,
+        "Backends: {}/{} healthy",
+        status.backends.healthy, status.backends.total
+    )
+    .unwrap();
+    writeln!(output, "Models: {} available", status.models.total).unwrap();
+
+    if !backends.is_empty() {
+        writeln!(output).unwrap();
+        writeln!(output, "Backend Details:").unwrap();
+        for b in backends {
+            let status_icon = match b.status {
+                BackendStatus::Healthy => "✓".green(),
+                BackendStatus::Unhealthy => "✗".red(),
+                BackendStatus::Unknown => "?".yellow(),
+                BackendStatus::Draining => "~".cyan(),
+            };
+            writeln!(
+                output,
+                "  {} {} ({}) - {} models",
+                status_icon,
+                b.name,
+                b.backend_type,
+                b.models.len()
+            )
+            .unwrap();
+        }
+    }
+
+    output
+}
+
+/// Handle health command
+pub fn handle_health(
+    args: &HealthArgs,
+    registry: &Registry,
+    uptime: Duration,
+) -> Result<String, Box<dyn std::error::Error>> {
+    let backends = registry.get_all_backends();
+    let healthy = backends
+        .iter()
+        .filter(|b| b.status == BackendStatus::Healthy)
+        .count();
+    let model_count = registry.model_count();
+
+    let status = HealthStatus {
+        status: if healthy > 0 || backends.is_empty() {
+            "healthy".to_string()
+        } else {
+            "degraded".to_string()
+        },
+        version: env!("CARGO_PKG_VERSION").to_string(),
+        uptime_seconds: uptime.as_secs(),
+        backends: BackendCounts {
+            total: backends.len(),
+            healthy,
+            unhealthy: backends.len() - healthy,
+        },
+        models: ModelCounts { total: model_count },
+    };
+
+    if args.json {
+        Ok(serde_json::to_string_pretty(&status)?)
+    } else {
+        let backend_views: Vec<BackendView> = backends.iter().map(BackendView::from).collect();
+        Ok(format_health_pretty(&status, &backend_views))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::registry::{Backend, BackendType, DiscoverySource};
+    use std::collections::HashMap;
+    use std::sync::Arc;
+
+    fn create_test_backend() -> Backend {
+        Backend::new(
+            "test-backend".to_string(),
+            "Test Backend".to_string(),
+            "http://localhost:11434".to_string(),
+            BackendType::Ollama,
+            vec![],
+            DiscoverySource::Static,
+            HashMap::new(),
+        )
+    }
+
+    #[test]
+    fn test_health_shows_summary() {
+        let registry = Arc::new(Registry::new());
+
+        let mut healthy = create_test_backend();
+        healthy.id = "healthy".to_string();
+        registry.add_backend(healthy).unwrap();
+        registry
+            .update_status("healthy", BackendStatus::Healthy, None)
+            .unwrap();
+
+        let args = HealthArgs {
+            json: false,
+            config: std::path::PathBuf::from("nexus.toml"),
+        };
+        let output = handle_health(&args, &registry, Duration::from_secs(3600)).unwrap();
+
+        assert!(output.contains("Status:"));
+        assert!(output.contains("1/1 healthy"));
+    }
+
+    #[test]
+    fn test_health_degraded_status() {
+        let registry = Arc::new(Registry::new());
+
+        // All backends unhealthy = degraded
+        let backend = create_test_backend();
+        let id = backend.id.clone();
+        registry.add_backend(backend).unwrap();
+        registry
+            .update_status(&id, BackendStatus::Unhealthy, Some("error".to_string()))
+            .unwrap();
+
+        let args = HealthArgs {
+            json: false,
+            config: std::path::PathBuf::from("nexus.toml"),
+        };
+        let output = handle_health(&args, &registry, Duration::from_secs(0)).unwrap();
+
+        assert!(output.contains("degraded") || output.contains("Degraded"));
+    }
+
+    #[test]
+    fn test_health_json_valid() {
+        let registry = Arc::new(Registry::new());
+
+        let args = HealthArgs {
+            json: true,
+            config: std::path::PathBuf::from("nexus.toml"),
+        };
+        let output = handle_health(&args, &registry, Duration::from_secs(100)).unwrap();
+
+        let parsed: serde_json::Value = serde_json::from_str(&output).unwrap();
+        assert!(parsed.get("status").is_some());
+        assert!(parsed.get("uptime_seconds").is_some());
+    }
+
+    #[test]
+    fn test_health_shows_uptime() {
+        let registry = Arc::new(Registry::new());
+
+        let args = HealthArgs {
+            json: false,
+            config: std::path::PathBuf::from("nexus.toml"),
+        };
+        let output = handle_health(&args, &registry, Duration::from_secs(3661)).unwrap();
+
+        assert!(output.contains("1h") || output.contains("3661"));
+    }
+
+    #[test]
+    fn test_format_duration() {
+        assert_eq!(format_duration(30), "30s");
+        assert_eq!(format_duration(90), "1m 30s");
+        assert_eq!(format_duration(3661), "1h 1m 1s");
+    }
+}
diff --git a/src/cli/mod.rs b/src/cli/mod.rs
new file mode 100644
index 0000000..dbd27c9
--- /dev/null
+++ b/src/cli/mod.rs
@@ -0,0 +1,282 @@
+//! CLI module for Nexus
+//!
+//! Command-line interface definitions and handlers for the Nexus LLM orchestrator.
+//!
+//! # Commands
+//!
+//! - `serve` - Start the Nexus server
+//! - `backends` - Manage LLM backends (list, add, remove)
+//! - `models` - List available models across all backends
+//! - `health` - Show system health status
+//! - `config` - Configuration utilities (init)
+//! - `completions` - Generate shell completions
+//!
+//! # Example
+//!
+//! ```bash
+//! # Start server with default config
+//! nexus serve
+//!
+//! # List backends with status
+//! nexus backends list --status healthy
+//!
+//! # Generate shell completions
+//! nexus completions bash > ~/.bash_completion.d/nexus
+//! ```
+
+pub mod backends;
+pub mod completions;
+pub mod config;
+pub mod health;
+pub mod models;
+pub mod output;
+pub mod serve;
+
+pub use completions::handle_completions;
+pub use config::handle_config_init;
+
+use clap::{Args, Parser, Subcommand};
+use std::path::PathBuf;
+
+/// Nexus - Distributed LLM Orchestrator
+#[derive(Parser, Debug)]
+#[command(
+    name = "nexus",
+    version,
+    about = "Distributed LLM model serving orchestrator"
+)]
+pub struct Cli {
+    #[command(subcommand)]
+    pub command: Commands,
+}
+
+#[derive(Subcommand, Debug)]
+pub enum Commands {
+    /// Start the Nexus server
+    Serve(ServeArgs),
+    /// Manage backends
+    #[command(subcommand)]
+    Backends(BackendsCommands),
+    /// List available models
+    Models(ModelsArgs),
+    /// Show system health
+    Health(HealthArgs),
+    /// Configuration utilities
+    #[command(subcommand)]
+    Config(ConfigCommands),
+    /// Generate shell completions
+    Completions(CompletionsArgs),
+}
+
+#[derive(Args, Debug)]
+pub struct ServeArgs {
+    /// Path to configuration file
+    #[arg(short, long, default_value = "nexus.toml")]
+    pub config: PathBuf,
+
+    /// Override server port
+    #[arg(short, long, env = "NEXUS_PORT")]
+    pub port: Option<u16>,
+
+    /// Override server host
+    #[arg(short = 'H', long, env = "NEXUS_HOST")]
+    pub host: Option<String>,
+
+    /// Set log level (trace, debug, info, warn, error)
+    #[arg(short, long, env = "NEXUS_LOG_LEVEL")]
+    pub log_level: Option<String>,
+
+    /// Disable mDNS backend discovery
+    #[arg(long)]
+    pub no_discovery: bool,
+
+    /// Disable health checks
+    #[arg(long)]
+    pub no_health_check: bool,
+}
+
+#[derive(Subcommand, Debug)]
+pub enum BackendsCommands {
+    /// List configured and discovered backends
+    List(BackendsListArgs),
+    /// Add a new backend
+    Add(BackendsAddArgs),
+    /// Remove a backend
+    Remove(BackendsRemoveArgs),
+}
+
+#[derive(Args, Debug)]
+pub struct BackendsListArgs {
+    /// Output as JSON
+    #[arg(long)]
+    pub json: bool,
+
+    /// Filter by status (healthy, unhealthy, unknown, draining)
+    #[arg(short, long)]
+    pub status: Option<String>,
+
+    /// Path to configuration file
+    #[arg(short, long, default_value = "nexus.toml")]
+    pub config: PathBuf,
+}
+
+#[derive(Args, Debug)]
+pub struct BackendsAddArgs {
+    /// Backend URL (e.g., http://localhost:11434)
+    pub url: String,
+
+    /// Backend name (optional, auto-detected if not provided)
+    #[arg(short, long)]
+    pub name: Option<String>,
+
+    /// Backend type (ollama, vllm, openai, claude)
+    #[arg(short = 't', long)]
+    pub backend_type: Option<String>,
+
+    /// Priority (lower = higher priority)
+    #[arg(short, long, default_value = "50")]
+    pub priority: i32,
+
+    /// Path to configuration file
+    #[arg(short, long, default_value = "nexus.toml")]
+    pub config: PathBuf,
+}
+
+#[derive(Args, Debug)]
+pub struct BackendsRemoveArgs {
+    /// Backend name to remove
+    pub name: String,
+
+    /// Path to configuration file
+    #[arg(short, long, default_value = "nexus.toml")]
+    pub config: PathBuf,
+}
+
+#[derive(Args, Debug)]
+pub struct ModelsArgs {
+    /// Output as JSON
+    #[arg(long)]
+    pub json: bool,
+
+    /// Filter by backend ID
+    #[arg(short, long)]
+    pub backend: Option<String>,
+
+    /// Path to configuration file
+    #[arg(short = 'c', long, default_value = "nexus.toml")]
+    pub config: PathBuf,
+}
+
+#[derive(Args, Debug)]
+pub struct HealthArgs {
+    /// Output as JSON
+    #[arg(long)]
+    pub json: bool,
+
+    /// Path to configuration file
+    #[arg(short, long, default_value = "nexus.toml")]
+    pub config: PathBuf,
+}
+
+#[derive(Subcommand, Debug)]
+pub enum ConfigCommands {
+    /// Initialize a new configuration file
+    Init(ConfigInitArgs),
+}
+
+#[derive(Args, Debug)]
+pub struct ConfigInitArgs {
+    /// Output file path
+    #[arg(short, long, default_value = "nexus.toml")]
+    pub output: PathBuf,
+
+    /// Overwrite existing file
+    #[arg(short, long)]
+    pub force: bool,
+}
+
+#[derive(Args, Debug)]
+pub struct CompletionsArgs {
+    /// Shell to generate completions for
+    #[arg(value_enum)]
+    pub shell: clap_complete::Shell,
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use clap::Parser;
+
+    #[test]
+    fn test_cli_parse_serve_defaults() {
+        let cli = Cli::try_parse_from(["nexus", "serve"]).unwrap();
+        match cli.command {
+            Commands::Serve(args) => {
+                assert_eq!(args.config, PathBuf::from("nexus.toml"));
+                assert!(args.port.is_none());
+                assert!(!args.no_discovery);
+            }
+            _ => panic!("Expected Serve command"),
+        }
+    }
+
+    #[test]
+    fn test_cli_parse_serve_with_port() {
+        let cli = Cli::try_parse_from(["nexus", "serve", "-p", "9000"]).unwrap();
+        match cli.command {
+            Commands::Serve(args) => assert_eq!(args.port, Some(9000)),
+            _ => panic!("Expected Serve command"),
+        }
+    }
+
+    #[test]
+    fn test_cli_parse_serve_with_config() {
+        let cli = Cli::try_parse_from(["nexus", "serve", "-c", "custom.toml"]).unwrap();
+        match cli.command {
+            Commands::Serve(args) => assert_eq!(args.config, PathBuf::from("custom.toml")),
+            _ => panic!("Expected Serve command"),
+        }
+    }
+
+    #[test]
+    fn test_cli_parse_backends_list() {
+        let cli = Cli::try_parse_from(["nexus", "backends", "list"]).unwrap();
+        assert!(matches!(
+            cli.command,
+            Commands::Backends(BackendsCommands::List(_))
+        ));
+    }
+
+    #[test]
+    fn test_cli_parse_backends_list_json() {
+        let cli = Cli::try_parse_from(["nexus", "backends", "list", "--json"]).unwrap();
+        match cli.command {
+            Commands::Backends(BackendsCommands::List(args)) => assert!(args.json),
+            _ => panic!("Expected Backends List command"),
+        }
+    }
+
+    #[test]
+    fn test_cli_parse_backends_add() {
+        let cli =
+            Cli::try_parse_from(["nexus", "backends", "add", "http://localhost:11434"]).unwrap();
+        match cli.command {
+            Commands::Backends(BackendsCommands::Add(args)) => {
+                assert_eq!(args.url, "http://localhost:11434");
+            }
+            _ => panic!("Expected Backends Add command"),
+        }
+    }
+
+    #[test]
+    fn test_cli_parse_models() {
+        let cli = Cli::try_parse_from(["nexus", "models"]).unwrap();
+        assert!(matches!(cli.command, Commands::Models(_)));
+    }
+
+    #[test]
+    fn test_cli_parse_health() {
+        let cli = Cli::try_parse_from(["nexus", "health"]).unwrap();
+        assert!(matches!(cli.command, Commands::Health(_)));
+    }
+}
diff --git a/src/cli/models.rs b/src/cli/models.rs
new file mode 100644
index 0000000..9f2b617
--- /dev/null
+++ b/src/cli/models.rs
@@ -0,0 +1,168 @@
+//! Models command implementation
+
+use crate::cli::output::{format_models_json, format_models_table, ModelView};
+use crate::cli::ModelsArgs;
+use crate::registry::{Model, Registry};
+use std::collections::HashMap;
+
+impl ModelView {
+    fn from_model(model: &Model, backends: Vec<String>) -> Self {
+        Self {
+            id: model.id.clone(),
+            backends,
+            context_length: model.context_length,
+        }
+    }
+}
+
+/// Handle models command
+pub fn handle_models(
+    args: &ModelsArgs,
+    registry: &Registry,
+) -> Result<String, Box<dyn std::error::Error>> {
+    let backends = if let Some(ref id) = args.backend {
+        match registry.get_backend(id) {
+            Some(b) => vec![b],
+            None => return Err(format!("Backend not found: {}", id).into()),
+        }
+    } else {
+        registry.get_all_backends()
+    };
+
+    // Aggregate models with their backends
+    let mut model_map: HashMap<String, ModelView> = HashMap::new();
+    for backend in backends {
+        for model in &backend.models {
+            model_map
+                .entry(model.id.clone())
+                .and_modify(|mv| mv.backends.push(backend.name.clone()))
+                .or_insert_with(|| ModelView::from_model(model, vec![backend.name.clone()]));
+        }
+    }
+
+    let models: Vec<_> = model_map.into_values().collect();
+
+    if args.json {
+        Ok(format_models_json(&models))
+    } else {
+        Ok(format_models_table(&models))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::registry::{Backend, BackendType, DiscoverySource, Model};
+    use std::collections::HashMap;
+    use std::sync::Arc;
+
+    fn create_test_backend() -> Backend {
+        Backend::new(
+            "test-backend".to_string(),
+            "Test Backend".to_string(),
+            "http://localhost:11434".to_string(),
+            BackendType::Ollama,
+            vec![],
+            DiscoverySource::Static,
+            HashMap::new(),
+        )
+    }
+
+    fn create_test_model(id: &str) -> Model {
+        Model {
+            id: id.to_string(),
+            name: id.to_string(),
+            context_length: 4096,
+            supports_vision: false,
+            supports_tools: false,
+            supports_json_mode: false,
+            max_output_tokens: None,
+        }
+    }
+
+    #[test]
+    fn test_models_list_empty() {
+        let registry = Arc::new(Registry::new());
+        let args = ModelsArgs {
+            json: false,
+            backend: None,
+            config: std::path::PathBuf::from("nexus.toml"),
+        };
+
+        let output = handle_models(&args, &registry).unwrap();
+        assert!(output.contains("Model")); // Header
+    }
+
+    #[test]
+    fn test_models_list_aggregated() {
+        let registry = Arc::new(Registry::new());
+
+        // Add two backends with overlapping models
+        let mut backend1 = create_test_backend();
+        backend1.id = "backend1".to_string();
+        backend1.models = vec![create_test_model("llama3:70b")];
+        registry.add_backend(backend1).unwrap();
+
+        let mut backend2 = create_test_backend();
+        backend2.id = "backend2".to_string();
+        backend2.name = "Backend 2".to_string();
+        backend2.models = vec![
+            create_test_model("llama3:70b"),
+            create_test_model("mistral:7b"),
+        ];
+        registry.add_backend(backend2).unwrap();
+
+        let args = ModelsArgs {
+            json: false,
+            backend: None,
+            config: std::path::PathBuf::from("nexus.toml"),
+        };
+        let output = handle_models(&args, &registry).unwrap();
+
+        assert!(output.contains("llama3:70b"));
+        assert!(output.contains("mistral:7b"));
+    }
+
+    #[test]
+    fn test_models_filter_by_backend() {
+        let registry = Arc::new(Registry::new());
+
+        let mut backend1 = create_test_backend();
+        backend1.id = "backend1".to_string();
+        backend1.models = vec![create_test_model("llama3:70b")];
+        registry.add_backend(backend1).unwrap();
+
+        let mut backend2 = create_test_backend();
+        backend2.id = "backend2".to_string();
+        backend2.models = vec![create_test_model("mistral:7b")];
+        registry.add_backend(backend2).unwrap();
+
+        let args = ModelsArgs {
+            json: false,
+            backend: Some("backend1".to_string()),
+            config: std::path::PathBuf::from("nexus.toml"),
+        };
+        let output = handle_models(&args, &registry).unwrap();
+
+        assert!(output.contains("llama3:70b"));
+        assert!(!output.contains("mistral:7b"));
+    }
+
+    #[test]
+    fn test_models_json_output() {
+        let registry = Arc::new(Registry::new());
+        let mut backend = create_test_backend();
+        backend.models = vec![create_test_model("llama3:70b")];
+        registry.add_backend(backend).unwrap();
+
+        let args = ModelsArgs {
+            json: true,
+            backend: None,
+            config: std::path::PathBuf::from("nexus.toml"),
+        };
+        let output = handle_models(&args, &registry).unwrap();
+
+        let parsed: serde_json::Value = serde_json::from_str(&output).unwrap();
+        assert!(parsed.get("models").is_some());
+    }
+}
diff --git a/src/cli/output.rs b/src/cli/output.rs
new file mode 100644
index 0000000..1845a84
--- /dev/null
+++ b/src/cli/output.rs
@@ -0,0 +1,172 @@
+//! Output formatting helpers for CLI commands
+
+use crate::registry::{Backend, BackendStatus};
+use colored::Colorize;
+use comfy_table::{presets::UTF8_FULL, Cell, ContentArrangement, Table};
+use serde_json::json;
+use std::sync::atomic::Ordering;
+
+/// View model for backend display
+#[derive(Debug, Clone, serde::Serialize)]
+pub struct BackendView {
+    pub name: String,
+    pub url: String,
+    pub backend_type: String,
+    pub status: BackendStatus,
+    pub models: Vec<String>,
+    pub avg_latency_ms: u64,
+}
+
+impl From<&Backend> for BackendView {
+    fn from(backend: &Backend) -> Self {
+        Self {
+            name: backend.name.clone(),
+            url: backend.url.clone(),
+            backend_type: format!("{:?}", backend.backend_type),
+            status: backend.status,
+            models: backend.models.iter().map(|m| m.id.clone()).collect(),
+            avg_latency_ms: backend.avg_latency_ms.load(Ordering::Relaxed) as u64,
+        }
+    }
+}
+
+/// View model for model display
+#[derive(Debug, Clone, serde::Serialize)]
+pub struct ModelView {
+    pub id: String,
+    pub backends: Vec<String>,
+    pub context_length: u32,
+}
+
+/// Format backends as a table
+pub fn format_backends_table(backends: &[BackendView]) -> String {
+    let mut table = Table::new();
+    table.load_preset(UTF8_FULL);
+    table.set_content_arrangement(ContentArrangement::Dynamic);
+    table.set_header(vec!["Name", "URL", "Type", "Status", "Models", "Latency"]);
+
+    for b in backends {
+        let status_str = match b.status {
+            BackendStatus::Healthy => "Healthy".green().to_string(),
+            BackendStatus::Unhealthy => "Unhealthy".red().to_string(),
+            BackendStatus::Unknown => "Unknown".yellow().to_string(),
+            BackendStatus::Draining => "Draining".cyan().to_string(),
+        };
+
+        table.add_row(vec![
+            Cell::new(&b.name),
+            Cell::new(&b.url),
+            Cell::new(&b.backend_type),
+            Cell::new(status_str),
+            Cell::new(b.models.len()),
+            Cell::new(format!("{}ms", b.avg_latency_ms)),
+        ]);
+    }
+
+    table.to_string()
+}
+
+/// Format backends as JSON
+pub fn format_backends_json(backends: &[BackendView]) -> String {
+    serde_json::to_string_pretty(&json!({
+        "backends": backends
+    }))
+    .unwrap()
+}
+
+/// Format models as a table
+pub fn format_models_table(models: &[ModelView]) -> String {
+    let mut table = Table::new();
+    table.load_preset(UTF8_FULL);
+    table.set_content_arrangement(ContentArrangement::Dynamic);
+    table.set_header(vec!["Model", "Backends", "Context Length"]);
+
+    for m in models {
+        table.add_row(vec![
+            Cell::new(&m.id),
+            Cell::new(m.backends.join(", ")),
+            Cell::new(m.context_length),
+        ]);
+    }
+
+    table.to_string()
+}
+
+/// Format models as JSON
+pub fn format_models_json(models: &[ModelView]) -> String {
+    serde_json::to_string_pretty(&json!({
+        "models": models
+    }))
+    .unwrap()
+}
+
+/// Get status icon for backend status
+pub fn status_icon(status: BackendStatus) -> &'static str {
+    match status {
+        BackendStatus::Healthy => "✓",
+        BackendStatus::Unhealthy => "✗",
+        BackendStatus::Unknown => "?",
+        BackendStatus::Draining => "~",
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn create_test_backend_view() -> BackendView {
+        BackendView {
+            name: "test-backend".to_string(),
+            url: "http://localhost:11434".to_string(),
+            backend_type: "Ollama".to_string(),
+            status: BackendStatus::Healthy,
+            models: vec!["llama2".to_string()],
+            avg_latency_ms: 50,
+        }
+    }
+
+    fn create_test_model_view() -> ModelView {
+        ModelView {
+            id: "llama2".to_string(),
+            backends: vec!["backend1".to_string()],
+            context_length: 4096,
+        }
+    }
+
+    #[test]
+    fn test_format_backends_table_empty() {
+        let output = format_backends_table(&[]);
+        assert!(output.contains("Name")); // Header present
+    }
+
+    #[test]
+    fn test_format_backends_table_with_data() {
+        let backends = vec![create_test_backend_view()];
+        let output = format_backends_table(&backends);
+        assert!(output.contains("test-backend"));
+        assert!(output.contains("Healthy"));
+    }
+
+    #[test]
+    fn test_format_backends_json_valid() {
+        let backends = vec![create_test_backend_view()];
+        let output = format_backends_json(&backends);
+        let parsed: serde_json::Value = serde_json::from_str(&output).unwrap();
+        assert!(parsed.get("backends").is_some());
+    }
+
+    #[test]
+    fn test_format_models_table() {
+        let models = vec![create_test_model_view()];
+        let output = format_models_table(&models);
+        assert!(output.contains("Model"));
+        assert!(output.contains("Context"));
+    }
+
+    #[test]
+    fn test_status_icon_healthy() {
+        assert_eq!(status_icon(BackendStatus::Healthy), "✓");
+        assert_eq!(status_icon(BackendStatus::Unhealthy), "✗");
+        assert_eq!(status_icon(BackendStatus::Unknown), "?");
+    }
+}
diff --git a/src/cli/serve.rs b/src/cli/serve.rs
new file mode 100644
index 0000000..8d5ac86
--- /dev/null
+++ b/src/cli/serve.rs
@@ -0,0 +1,314 @@
+//! Serve command implementation
+
+use crate::cli::ServeArgs;
+use crate::config::{LogFormat, NexusConfig};
+use crate::health::HealthChecker;
+use crate::registry::{Backend, DiscoverySource, Registry};
+use axum::{routing::get, Router};
+use std::collections::HashMap;
+use std::sync::Arc;
+use tokio_util::sync::CancellationToken;
+use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt, EnvFilter};
+
+/// Load configuration with CLI overrides
+pub fn load_config_with_overrides(
+    args: &ServeArgs,
+) -> Result<NexusConfig, Box<dyn std::error::Error>> {
+    // Load from file if it exists, otherwise use defaults
+    let mut config = if args.config.exists() {
+        NexusConfig::load(Some(&args.config))?
+    } else {
+        tracing::debug!("Config file not found, using defaults");
+        NexusConfig::default()
+    };
+
+    // Apply environment variable overrides
+    config = config.with_env_overrides();
+
+    // Apply CLI overrides (highest priority)
+    if let Some(port) = args.port {
+        config.server.port = port;
+    }
+    if let Some(ref host) = args.host {
+        config.server.host = host.clone();
+    }
+    if let Some(ref log_level) = args.log_level {
+        config.logging.level = log_level.clone();
+    }
+    if args.no_discovery {
+        config.discovery.enabled = false;
+    }
+    if args.no_health_check {
+        config.health_check.enabled = false;
+    }
+
+    Ok(config)
+}
+
+/// Initialize tracing based on configuration
+pub fn init_tracing(
+    config: &crate::config::LoggingConfig,
+) -> Result<(), Box<dyn std::error::Error>> {
+    let env_filter =
+        EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new(&config.level));
+
+    match config.format {
+        LogFormat::Pretty => {
+            tracing_subscriber::registry()
+                .with(env_filter)
+                .with(tracing_subscriber::fmt::layer().pretty())
+                .try_init()?;
+        }
+        LogFormat::Json => {
+            tracing_subscriber::registry()
+                .with(env_filter)
+                .with(tracing_subscriber::fmt::layer().json())
+                .try_init()?;
+        }
+    }
+
+    Ok(())
+}
+
+/// Load backends from configuration into registry
+pub fn load_backends_from_config(
+    config: &NexusConfig,
+    registry: &Registry,
+) -> Result<(), Box<dyn std::error::Error>> {
+    for backend_config in &config.backends {
+        let backend = Backend::new(
+            uuid::Uuid::new_v4().to_string(),
+            backend_config.name.clone(),
+            backend_config.url.clone(),
+            backend_config.backend_type,
+            vec![], // Models will be discovered by health checker
+            DiscoverySource::Static,
+            HashMap::new(),
+        );
+
+        registry.add_backend(backend)?;
+        tracing::info!(
+            name = %backend_config.name,
+            url = %backend_config.url,
+            backend_type = ?backend_config.backend_type,
+            "Loaded static backend from config"
+        );
+    }
+
+    Ok(())
+}
+
+/// Build basic HTTP router with health endpoint
+fn build_basic_router(registry: Arc<Registry>) -> Router {
+    Router::new()
+        .route("/health", get(health_handler))
+        .with_state(registry)
+}
+
+/// Simple health check endpoint
+async fn health_handler() -> &'static str {
+    "OK"
+}
+
+/// Wait for shutdown signal (SIGINT or SIGTERM)
+async fn shutdown_signal(cancel_token: CancellationToken) {
+    let ctrl_c = async {
+        tokio::signal::ctrl_c()
+            .await
+            .expect("Failed to install CTRL+C handler");
+    };
+
+    #[cfg(unix)]
+    let terminate = async {
+        tokio::signal::unix::signal(tokio::signal::unix::SignalKind::terminate())
+            .expect("Failed to install SIGTERM handler")
+            .recv()
+            .await;
+    };
+
+    #[cfg(not(unix))]
+    let terminate = std::future::pending::<()>();
+
+    tokio::select! {
+        _ = ctrl_c => {
+            tracing::info!("Received SIGINT, shutting down...");
+        }
+        _ = terminate => {
+            tracing::info!("Received SIGTERM, shutting down...");
+        }
+    }
+
+    cancel_token.cancel();
+}
+
+/// Main serve command handler
+pub async fn run_serve(args: ServeArgs) -> Result<(), Box<dyn std::error::Error>> {
+    // 1. Load and merge configuration
+    let config = load_config_with_overrides(&args)?;
+
+    // Validate configuration
+    config.validate()?;
+
+    // 2. Initialize tracing
+    init_tracing(&config.logging)?;
+
+    tracing::info!("Starting Nexus server");
+    tracing::debug!(?config, "Loaded configuration");
+
+    // 3. Create registry and load static backends
+    let registry = Arc::new(Registry::new());
+    load_backends_from_config(&config, &registry)?;
+
+    // 4. Start health checker (if enabled)
+    let cancel_token = CancellationToken::new();
+    let health_handle = if config.health_check.enabled {
+        tracing::info!("Starting health checker");
+        let checker = HealthChecker::new(registry.clone(), config.health_check.clone());
+        Some(checker.start(cancel_token.clone()))
+    } else {
+        tracing::info!("Health checking disabled");
+        None
+    };
+
+    // 5. Build minimal HTTP server
+    let app = build_basic_router(registry.clone());
+
+    // 6. Bind and serve
+    let addr = format!("{}:{}", config.server.host, config.server.port);
+    tracing::info!(addr = %addr, "Nexus server listening");
+
+    let listener = tokio::net::TcpListener::bind(&addr).await?;
+
+    axum::serve(listener, app)
+        .with_graceful_shutdown(shutdown_signal(cancel_token.clone()))
+        .await?;
+
+    // 7. Cleanup
+    if let Some(handle) = health_handle {
+        tracing::info!("Waiting for health checker to stop");
+        handle.await?;
+    }
+
+    tracing::info!("Nexus server stopped");
+    Ok(())
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::config::BackendConfig;
+    use crate::registry::BackendType;
+    use std::path::PathBuf;
+    use std::time::Duration;
+    use tempfile::NamedTempFile;
+
+    #[tokio::test]
+    async fn test_serve_config_loading() {
+        let temp = NamedTempFile::new().unwrap();
+        std::fs::write(temp.path(), "[server]\nport = 8080").unwrap();
+
+        let args = ServeArgs {
+            config: temp.path().to_path_buf(),
+            port: None,
+            host: None,
+            log_level: None,
+            no_discovery: false,
+            no_health_check: false,
+        };
+
+        let config = load_config_with_overrides(&args).unwrap();
+        assert_eq!(config.server.port, 8080);
+    }
+
+    #[tokio::test]
+    async fn test_serve_cli_overrides_config() {
+        let temp = NamedTempFile::new().unwrap();
+        std::fs::write(temp.path(), "[server]\nport = 8080").unwrap();
+
+        let args = ServeArgs {
+            config: temp.path().to_path_buf(),
+            port: Some(9000), // Override
+            host: None,
+            log_level: None,
+            no_discovery: false,
+            no_health_check: false,
+        };
+
+        let config = load_config_with_overrides(&args).unwrap();
+        assert_eq!(config.server.port, 9000); // CLI wins
+    }
+
+    #[tokio::test]
+    async fn test_serve_works_without_config_file() {
+        let args = ServeArgs {
+            config: PathBuf::from("nonexistent.toml"),
+            port: None,
+            host: None,
+            log_level: None,
+            no_discovery: false,
+            no_health_check: false,
+        };
+
+        let config = load_config_with_overrides(&args).unwrap();
+        assert_eq!(config.server.port, 8000); // Default
+    }
+
+    #[tokio::test]
+    async fn test_backends_loaded_from_config() {
+        let mut config = NexusConfig::default();
+        config.backends.push(BackendConfig {
+            name: "test".to_string(),
+            url: "http://localhost:11434".to_string(),
+            backend_type: BackendType::Ollama,
+            priority: 1,
+            api_key_env: None,
+        });
+
+        let registry = Arc::new(Registry::new());
+        load_backends_from_config(&config, &registry).unwrap();
+
+        assert_eq!(registry.backend_count(), 1);
+    }
+
+    #[tokio::test]
+    async fn test_shutdown_signal_triggers_cancel() {
+        let cancel = CancellationToken::new();
+        let cancel_clone = cancel.clone();
+
+        let handle = tokio::spawn(async move {
+            // Simulate shutdown after 100ms
+            tokio::time::sleep(Duration::from_millis(100)).await;
+            cancel_clone.cancel();
+        });
+
+        // This should return when cancelled
+        tokio::select! {
+            _ = cancel.cancelled() => {}
+            _ = tokio::time::sleep(Duration::from_secs(5)) => {
+                panic!("Shutdown didn't trigger");
+            }
+        }
+
+        handle.await.unwrap();
+    }
+
+    #[tokio::test]
+    async fn test_health_checker_stops_on_shutdown() {
+        let registry = Arc::new(Registry::new());
+        let config = crate::health::HealthCheckConfig::default();
+        let checker = HealthChecker::new(registry, config);
+
+        let cancel = CancellationToken::new();
+        let handle = checker.start(cancel.clone());
+
+        // Let it run briefly
+        tokio::time::sleep(Duration::from_millis(50)).await;
+
+        // Trigger shutdown
+        cancel.cancel();
+
+        // Should complete quickly
+        let result = tokio::time::timeout(Duration::from_secs(1), handle).await;
+        assert!(result.is_ok());
+    }
+}
diff --git a/src/config/backend.rs b/src/config/backend.rs
new file mode 100644
index 0000000..8d8a6ca
--- /dev/null
+++ b/src/config/backend.rs
@@ -0,0 +1,23 @@
+//! Backend configuration
+
+use serde::{Deserialize, Serialize};
+
+// Re-export BackendType from registry
+pub use crate::registry::BackendType;
+
+/// Backend configuration
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct BackendConfig {
+    pub name: String,
+    pub url: String,
+    #[serde(rename = "type")]
+    pub backend_type: BackendType,
+    #[serde(default = "default_priority")]
+    pub priority: i32,
+    #[serde(default)]
+    pub api_key_env: Option<String>,
+}
+
+fn default_priority() -> i32 {
+    50
+}
diff --git a/src/config/discovery.rs b/src/config/discovery.rs
new file mode 100644
index 0000000..a95bec6
--- /dev/null
+++ b/src/config/discovery.rs
@@ -0,0 +1,23 @@
+//! Discovery configuration
+
+use serde::{Deserialize, Serialize};
+
+/// mDNS discovery configuration
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(default)]
+pub struct DiscoveryConfig {
+    pub enabled: bool,
+    pub service_types: Vec<String>,
+}
+
+impl Default for DiscoveryConfig {
+    fn default() -> Self {
+        Self {
+            enabled: true,
+            service_types: vec![
+                "_ollama._tcp.local".to_string(),
+                "_llm._tcp.local".to_string(),
+            ],
+        }
+    }
+}
diff --git a/src/config/error.rs b/src/config/error.rs
new file mode 100644
index 0000000..e7249e6
--- /dev/null
+++ b/src/config/error.rs
@@ -0,0 +1,23 @@
+//! Configuration error types
+
+use std::path::PathBuf;
+use thiserror::Error;
+
+/// Configuration-related errors
+#[derive(Error, Debug)]
+pub enum ConfigError {
+    #[error("IO error: {0}")]
+    Io(#[from] std::io::Error),
+
+    #[error("Config file not found: {0}")]
+    NotFound(PathBuf),
+
+    #[error("Failed to parse config: {0}")]
+    Parse(String),
+
+    #[error("Invalid value for '{field}': {message}")]
+    Validation { field: String, message: String },
+
+    #[error("Missing required field: {0}")]
+    MissingField(String),
+}
diff --git a/src/config/logging.rs b/src/config/logging.rs
new file mode 100644
index 0000000..91ecfab
--- /dev/null
+++ b/src/config/logging.rs
@@ -0,0 +1,63 @@
+//! Logging configuration
+
+use serde::{Deserialize, Serialize};
+use std::str::FromStr;
+
+/// Log output format
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
+#[serde(rename_all = "snake_case")]
+pub enum LogFormat {
+    /// Pretty-printed logs for humans
+    #[default]
+    Pretty,
+    /// JSON logs for machine parsing
+    Json,
+}
+
+impl FromStr for LogFormat {
+    type Err = String;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        match s.to_lowercase().as_str() {
+            "pretty" => Ok(LogFormat::Pretty),
+            "json" => Ok(LogFormat::Json),
+            _ => Err(format!("Invalid log format: {}", s)),
+        }
+    }
+}
+
+/// Logging configuration
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(default)]
+pub struct LoggingConfig {
+    pub level: String,
+    pub format: LogFormat,
+}
+
+impl Default for LoggingConfig {
+    fn default() -> Self {
+        Self {
+            level: "info".to_string(),
+            format: LogFormat::Pretty,
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_logging_config_defaults() {
+        let config = LoggingConfig::default();
+        assert_eq!(config.level, "info");
+        assert_eq!(config.format, LogFormat::Pretty);
+    }
+
+    #[test]
+    fn test_log_format_serde() {
+        let format = LogFormat::Json;
+        let json = serde_json::to_string(&format).unwrap();
+        assert_eq!(json, "\"json\"");
+    }
+}
diff --git a/src/config/mod.rs b/src/config/mod.rs
new file mode 100644
index 0000000..9fb06e0
--- /dev/null
+++ b/src/config/mod.rs
@@ -0,0 +1,267 @@
+//! Configuration module for Nexus
+//!
+//! Provides layered configuration loading from files, environment variables, and defaults.
+//!
+//! # Configuration Precedence
+//!
+//! 1. CLI arguments (highest priority)
+//! 2. Environment variables (`NEXUS_*`)
+//! 3. Configuration file (TOML)
+//! 4. Default values (lowest priority)
+//!
+//! # Example
+//!
+//! ```rust
+//! use nexus::config::NexusConfig;
+//!
+//! // Load defaults
+//! let config = NexusConfig::default();
+//! assert_eq!(config.server.port, 8000);
+//!
+//! // Parse from TOML
+//! let toml = r#"
+//! [server]
+//! port = 9000
+//! "#;
+//! let config: NexusConfig = toml::from_str(toml).unwrap();
+//! assert_eq!(config.server.port, 9000);
+//! ```
+
+pub mod backend;
+pub mod discovery;
+pub mod error;
+pub mod logging;
+pub mod routing;
+pub mod server;
+
+pub use backend::{BackendConfig, BackendType};
+pub use discovery::DiscoveryConfig;
+pub use error::ConfigError;
+pub use logging::{LogFormat, LoggingConfig};
+pub use routing::{RoutingConfig, RoutingStrategy, RoutingWeights};
+pub use server::ServerConfig;
+
+// Re-export HealthCheckConfig from health module
+pub use crate::health::HealthCheckConfig;
+
+use serde::{Deserialize, Serialize};
+use std::path::Path;
+
+/// Unified configuration for the Nexus server.
+///
+/// This struct aggregates all configuration sections including server settings,
+/// discovery, health checking, routing, backends, and logging.
+///
+/// # Example
+///
+/// ```rust
+/// use nexus::config::NexusConfig;
+///
+/// let config = NexusConfig::default();
+/// assert_eq!(config.server.port, 8000);
+/// assert_eq!(config.server.host, "0.0.0.0");
+/// ```
+#[derive(Debug, Clone, Serialize, Deserialize, Default)]
+#[serde(default)]
+pub struct NexusConfig {
+    /// HTTP server configuration
+    pub server: ServerConfig,
+    /// mDNS discovery settings
+    pub discovery: DiscoveryConfig,
+    /// Health check configuration
+    pub health_check: HealthCheckConfig,
+    /// Request routing configuration
+    pub routing: RoutingConfig,
+    /// Static backend definitions
+    pub backends: Vec<BackendConfig>,
+    /// Logging configuration
+    pub logging: LoggingConfig,
+}
+
+impl NexusConfig {
+    /// Load configuration from a TOML file
+    ///
+    /// If path is None, returns default configuration.
+    /// If path doesn't exist, returns NotFound error.
+    pub fn load(path: Option<&Path>) -> Result<Self, ConfigError> {
+        match path {
+            Some(p) => {
+                if !p.exists() {
+                    return Err(ConfigError::NotFound(p.to_path_buf()));
+                }
+                let content = std::fs::read_to_string(p)?;
+                toml::from_str(&content).map_err(|e| ConfigError::Parse(e.to_string()))
+            }
+            None => Ok(Self::default()),
+        }
+    }
+
+    /// Apply environment variable overrides
+    ///
+    /// Supports NEXUS_* environment variables for common settings.
+    /// Invalid values are silently ignored (defaults are kept).
+    pub fn with_env_overrides(mut self) -> Self {
+        // Server settings
+        if let Ok(port) = std::env::var("NEXUS_PORT") {
+            if let Ok(p) = port.parse() {
+                self.server.port = p;
+            }
+        }
+        if let Ok(host) = std::env::var("NEXUS_HOST") {
+            self.server.host = host;
+        }
+
+        // Logging settings
+        if let Ok(level) = std::env::var("NEXUS_LOG_LEVEL") {
+            self.logging.level = level;
+        }
+        if let Ok(format) = std::env::var("NEXUS_LOG_FORMAT") {
+            if let Ok(f) = format.parse() {
+                self.logging.format = f;
+            }
+        }
+
+        // Discovery and health check
+        if let Ok(discovery) = std::env::var("NEXUS_DISCOVERY") {
+            self.discovery.enabled = discovery.to_lowercase() == "true";
+        }
+        if let Ok(health) = std::env::var("NEXUS_HEALTH_CHECK") {
+            self.health_check.enabled = health.to_lowercase() == "true";
+        }
+
+        self
+    }
+
+    /// Validate configuration
+    pub fn validate(&self) -> Result<(), ConfigError> {
+        // Validate server config
+        if self.server.port == 0 {
+            return Err(ConfigError::Validation {
+                field: "server.port".to_string(),
+                message: "port must be non-zero".to_string(),
+            });
+        }
+
+        // Validate backends
+        for (i, backend) in self.backends.iter().enumerate() {
+            if backend.url.is_empty() {
+                return Err(ConfigError::Validation {
+                    field: format!("backends[{}].url", i),
+                    message: "URL cannot be empty".to_string(),
+                });
+            }
+            if backend.name.is_empty() {
+                return Err(ConfigError::Validation {
+                    field: format!("backends[{}].name", i),
+                    message: "name cannot be empty".to_string(),
+                });
+            }
+        }
+
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::path::Path;
+
+    #[test]
+    fn test_nexus_config_defaults() {
+        let config = NexusConfig::default();
+        assert_eq!(config.server.port, 8000);
+        assert!(config.discovery.enabled);
+        assert!(config.health_check.enabled);
+        assert!(config.backends.is_empty());
+    }
+
+    #[test]
+    fn test_config_parse_minimal_toml() {
+        let toml = r#"
+        [server]
+        port = 9000
+        "#;
+
+        let config: NexusConfig = toml::from_str(toml).unwrap();
+        assert_eq!(config.server.port, 9000);
+        assert_eq!(config.server.host, "0.0.0.0"); // Default
+    }
+
+    #[test]
+    fn test_config_parse_full_toml() {
+        let toml = include_str!("../../nexus.example.toml");
+        let config: NexusConfig = toml::from_str(toml).unwrap();
+        assert!(config.server.port > 0);
+    }
+
+    #[test]
+    fn test_config_parse_backends_array() {
+        let toml = r#"
+        [[backends]]
+        name = "local"
+        url = "http://localhost:11434"
+        type = "ollama"
+
+        [[backends]]
+        name = "remote"
+        url = "http://192.168.1.100:8000"
+        type = "vllm"
+        "#;
+
+        let config: NexusConfig = toml::from_str(toml).unwrap();
+        assert_eq!(config.backends.len(), 2);
+    }
+
+    #[test]
+    fn test_config_load_from_file() {
+        let temp = tempfile::NamedTempFile::new().unwrap();
+        std::fs::write(temp.path(), "[server]\nport = 8080").unwrap();
+
+        let config = NexusConfig::load(Some(temp.path())).unwrap();
+        assert_eq!(config.server.port, 8080);
+    }
+
+    #[test]
+    fn test_config_missing_file_error() {
+        let result = NexusConfig::load(Some(Path::new("/nonexistent/config.toml")));
+        assert!(matches!(result, Err(ConfigError::NotFound(_))));
+    }
+
+    #[test]
+    fn test_config_env_override_port() {
+        std::env::set_var("NEXUS_PORT", "9999");
+        let config = NexusConfig::default().with_env_overrides();
+        std::env::remove_var("NEXUS_PORT");
+
+        assert_eq!(config.server.port, 9999);
+    }
+
+    #[test]
+    fn test_config_env_override_host() {
+        std::env::set_var("NEXUS_HOST", "127.0.0.1");
+        let config = NexusConfig::default().with_env_overrides();
+        std::env::remove_var("NEXUS_HOST");
+
+        assert_eq!(config.server.host, "127.0.0.1");
+    }
+
+    #[test]
+    fn test_config_env_override_log_level() {
+        std::env::set_var("NEXUS_LOG_LEVEL", "debug");
+        let config = NexusConfig::default().with_env_overrides();
+        std::env::remove_var("NEXUS_LOG_LEVEL");
+
+        assert_eq!(config.logging.level, "debug");
+    }
+
+    #[test]
+    fn test_config_env_invalid_value_ignored() {
+        std::env::set_var("NEXUS_PORT", "not-a-number");
+        let config = NexusConfig::default().with_env_overrides();
+        std::env::remove_var("NEXUS_PORT");
+
+        // Should keep default, not crash
+        assert_eq!(config.server.port, 8000);
+    }
+}
diff --git a/src/config/routing.rs b/src/config/routing.rs
new file mode 100644
index 0000000..12694cb
--- /dev/null
+++ b/src/config/routing.rs
@@ -0,0 +1,81 @@
+//! Routing configuration
+
+use serde::{Deserialize, Serialize};
+use std::collections::HashMap;
+
+/// Routing strategy for backend selection
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
+#[serde(rename_all = "snake_case")]
+pub enum RoutingStrategy {
+    /// Smart routing based on multiple factors
+    #[default]
+    Smart,
+    /// Round-robin across backends
+    RoundRobin,
+    /// Priority-only routing
+    PriorityOnly,
+    /// Random selection
+    Random,
+}
+
+/// Routing configuration
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(default)]
+pub struct RoutingConfig {
+    pub strategy: RoutingStrategy,
+    pub max_retries: u32,
+    pub weights: RoutingWeights,
+    #[serde(default)]
+    pub aliases: HashMap<String, String>,
+    #[serde(default)]
+    pub fallbacks: HashMap<String, Vec<String>>,
+}
+
+/// Routing weights for backend selection
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct RoutingWeights {
+    pub priority: f64,
+    pub load: f64,
+    pub latency: f64,
+}
+
+impl Default for RoutingConfig {
+    fn default() -> Self {
+        Self {
+            strategy: RoutingStrategy::Smart,
+            max_retries: 2,
+            weights: RoutingWeights::default(),
+            aliases: HashMap::new(),
+            fallbacks: HashMap::new(),
+        }
+    }
+}
+
+impl Default for RoutingWeights {
+    fn default() -> Self {
+        Self {
+            priority: 50.0,
+            load: 30.0,
+            latency: 20.0,
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_routing_config_defaults() {
+        let config = RoutingConfig::default();
+        assert_eq!(config.strategy, RoutingStrategy::Smart);
+        assert_eq!(config.max_retries, 2);
+    }
+
+    #[test]
+    fn test_routing_strategy_serde() {
+        let strategy = RoutingStrategy::RoundRobin;
+        let json = serde_json::to_string(&strategy).unwrap();
+        assert_eq!(json, "\"round_robin\"");
+    }
+}
diff --git a/src/config/server.rs b/src/config/server.rs
new file mode 100644
index 0000000..575e26c
--- /dev/null
+++ b/src/config/server.rs
@@ -0,0 +1,38 @@
+//! Server configuration
+
+use serde::{Deserialize, Serialize};
+
+/// Server configuration
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(default)]
+pub struct ServerConfig {
+    pub host: String,
+    pub port: u16,
+    pub request_timeout_seconds: u64,
+    pub max_concurrent_requests: u32,
+}
+
+impl Default for ServerConfig {
+    fn default() -> Self {
+        Self {
+            host: "0.0.0.0".to_string(),
+            port: 8000,
+            request_timeout_seconds: 300,
+            max_concurrent_requests: 1000,
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_server_config_defaults() {
+        let config = ServerConfig::default();
+        assert_eq!(config.host, "0.0.0.0");
+        assert_eq!(config.port, 8000);
+        assert_eq!(config.request_timeout_seconds, 300);
+        assert_eq!(config.max_concurrent_requests, 1000);
+    }
+}
diff --git a/src/health/tests.rs b/src/health/tests.rs
index bc81898..97d36d1 100644
--- a/src/health/tests.rs
+++ b/src/health/tests.rs
@@ -114,8 +114,10 @@ fn test_state_default() {
 
 #[test]
 fn test_state_clone() {
-    let mut state = BackendHealthState::default();
-    state.consecutive_failures = 2;
+    let state = BackendHealthState {
+        consecutive_failures: 2,
+        ..Default::default()
+    };
     let cloned = state.clone();
     assert_eq!(cloned.consecutive_failures, 2);
 }
@@ -283,8 +285,10 @@ fn test_unknown_to_unhealthy_on_failure() {
 
 #[test]
 fn test_healthy_stays_healthy_under_threshold() {
-    let mut state = BackendHealthState::default();
-    state.last_status = BackendStatus::Healthy;
+    let mut state = BackendHealthState {
+        last_status: BackendStatus::Healthy,
+        ..Default::default()
+    };
     let config = HealthCheckConfig::default();
 
     // First failure
@@ -299,8 +303,10 @@ fn test_healthy_stays_healthy_under_threshold() {
 
 #[test]
 fn test_healthy_to_unhealthy_at_threshold() {
-    let mut state = BackendHealthState::default();
-    state.last_status = BackendStatus::Healthy;
+    let mut state = BackendHealthState {
+        last_status: BackendStatus::Healthy,
+        ..Default::default()
+    };
     let config = HealthCheckConfig::default();
 
     // Apply 3 consecutive failures
@@ -314,8 +320,10 @@ fn test_healthy_to_unhealthy_at_threshold() {
 
 #[test]
 fn test_unhealthy_stays_unhealthy_under_threshold() {
-    let mut state = BackendHealthState::default();
-    state.last_status = BackendStatus::Unhealthy;
+    let mut state = BackendHealthState {
+        last_status: BackendStatus::Unhealthy,
+        ..Default::default()
+    };
     let config = HealthCheckConfig::default();
 
     // First success - under recovery threshold (2)
@@ -326,8 +334,10 @@ fn test_unhealthy_stays_unhealthy_under_threshold() {
 
 #[test]
 fn test_unhealthy_to_healthy_at_threshold() {
-    let mut state = BackendHealthState::default();
-    state.last_status = BackendStatus::Unhealthy;
+    let mut state = BackendHealthState {
+        last_status: BackendStatus::Unhealthy,
+        ..Default::default()
+    };
     let config = HealthCheckConfig::default();
 
     // Apply 2 consecutive successes
@@ -340,8 +350,10 @@ fn test_unhealthy_to_healthy_at_threshold() {
 
 #[test]
 fn test_success_resets_failure_counter() {
-    let mut state = BackendHealthState::default();
-    state.last_status = BackendStatus::Healthy;
+    let mut state = BackendHealthState {
+        last_status: BackendStatus::Healthy,
+        ..Default::default()
+    };
     let config = HealthCheckConfig::default();
 
     // Apply 2 failures
@@ -357,8 +369,10 @@ fn test_success_resets_failure_counter() {
 
 #[test]
 fn test_failure_resets_success_counter() {
-    let mut state = BackendHealthState::default();
-    state.last_status = BackendStatus::Unhealthy;
+    let mut state = BackendHealthState {
+        last_status: BackendStatus::Unhealthy,
+        ..Default::default()
+    };
     let config = HealthCheckConfig::default();
 
     // Apply 1 success
diff --git a/src/lib.rs b/src/lib.rs
index 098a86c..9721ac5 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -3,5 +3,7 @@
 //! This library provides the core functionality for managing and routing requests
 //! to heterogeneous LLM inference backends.
 
+pub mod cli;
+pub mod config;
 pub mod health;
 pub mod registry;
diff --git a/src/main.rs b/src/main.rs
index 269c5bc..aafd3c9 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,4 +1,108 @@
-fn main() {
-    println!("Nexus - LLM Orchestrator");
-    println!("Run 'nexus serve' to start the server");
+use clap::Parser;
+use nexus::cli::{
+    backends, handle_completions, handle_config_init, health, models, BackendsCommands, Cli,
+    Commands, ConfigCommands,
+};
+use nexus::config::NexusConfig;
+use nexus::registry::Registry;
+use std::sync::Arc;
+use std::time::Instant;
+
+#[tokio::main]
+async fn main() {
+    let cli = Cli::parse();
+    let start_time = Instant::now();
+
+    let result = match cli.command {
+        Commands::Serve(args) => nexus::cli::serve::run_serve(args).await,
+        Commands::Backends(cmd) => match cmd {
+            BackendsCommands::List(args) => {
+                // Load config to get backends
+                let config = NexusConfig::load(Some(&args.config))
+                    .unwrap_or_else(|_| NexusConfig::default());
+                let registry = Arc::new(Registry::new());
+
+                // Load static backends from config
+                if let Err(e) = nexus::cli::serve::load_backends_from_config(&config, &registry) {
+                    eprintln!("Warning: Failed to load backends: {}", e);
+                }
+
+                match backends::handle_backends_list(&args, &registry) {
+                    Ok(output) => {
+                        println!("{}", output);
+                        Ok(())
+                    }
+                    Err(e) => Err(e),
+                }
+            }
+            BackendsCommands::Add(args) => {
+                let registry = Arc::new(Registry::new());
+                match backends::handle_backends_add(&args, &registry).await {
+                    Ok(msg) => {
+                        println!("{}", msg);
+                        Ok(())
+                    }
+                    Err(e) => Err(e),
+                }
+            }
+            BackendsCommands::Remove(args) => {
+                let registry = Arc::new(Registry::new());
+                // TODO: Load registry from config first
+                match backends::handle_backends_remove(&args, &registry) {
+                    Ok(msg) => {
+                        println!("{}", msg);
+                        Ok(())
+                    }
+                    Err(e) => Err(e),
+                }
+            }
+        },
+        Commands::Models(args) => {
+            let config =
+                NexusConfig::load(Some(&args.config)).unwrap_or_else(|_| NexusConfig::default());
+            let registry = Arc::new(Registry::new());
+
+            if let Err(e) = nexus::cli::serve::load_backends_from_config(&config, &registry) {
+                eprintln!("Warning: Failed to load backends: {}", e);
+            }
+
+            match models::handle_models(&args, &registry) {
+                Ok(output) => {
+                    println!("{}", output);
+                    Ok(())
+                }
+                Err(e) => Err(e),
+            }
+        }
+        Commands::Health(args) => {
+            let config =
+                NexusConfig::load(Some(&args.config)).unwrap_or_else(|_| NexusConfig::default());
+            let registry = Arc::new(Registry::new());
+
+            if let Err(e) = nexus::cli::serve::load_backends_from_config(&config, &registry) {
+                eprintln!("Warning: Failed to load backends: {}", e);
+            }
+
+            let uptime = start_time.elapsed();
+            match health::handle_health(&args, &registry, uptime) {
+                Ok(output) => {
+                    println!("{}", output);
+                    Ok(())
+                }
+                Err(e) => Err(e),
+            }
+        }
+        Commands::Config(config_cmd) => match config_cmd {
+            ConfigCommands::Init(args) => handle_config_init(&args),
+        },
+        Commands::Completions(args) => {
+            handle_completions(&args);
+            Ok(())
+        }
+    };
+
+    if let Err(e) = result {
+        eprintln!("Error: {}", e);
+        std::process::exit(1);
+    }
 }
diff --git a/src/registry/tests.rs b/src/registry/tests.rs
index fe6cf8a..d88c78f 100644
--- a/src/registry/tests.rs
+++ b/src/registry/tests.rs
@@ -850,7 +850,6 @@ fn test_update_status_changes_status() {
 
 #[test]
 fn test_update_status_sets_timestamp() {
-    use chrono::Utc;
     use std::thread;
     use std::time::Duration;
 
diff --git a/tests/cli_integration.rs b/tests/cli_integration.rs
new file mode 100644
index 0000000..452d1d7
--- /dev/null
+++ b/tests/cli_integration.rs
@@ -0,0 +1,141 @@
+//! CLI Integration Tests for F04
+//!
+//! End-to-end tests for CLI commands using assert_cmd.
+
+#![allow(deprecated)] // cargo_bin is deprecated but still works
+
+use assert_cmd::Command;
+use predicates::prelude::*;
+use tempfile::TempDir;
+
+/// Get the nexus binary for testing
+fn nexus_cmd() -> Command {
+    Command::cargo_bin("nexus").unwrap()
+}
+
+#[test]
+fn test_version_output() {
+    nexus_cmd()
+        .arg("--version")
+        .assert()
+        .success()
+        .stdout(predicate::str::contains("nexus"));
+}
+
+#[test]
+fn test_help_shows_all_commands() {
+    nexus_cmd()
+        .arg("--help")
+        .assert()
+        .success()
+        .stdout(predicate::str::contains("serve"))
+        .stdout(predicate::str::contains("backends"))
+        .stdout(predicate::str::contains("models"))
+        .stdout(predicate::str::contains("health"))
+        .stdout(predicate::str::contains("config"))
+        .stdout(predicate::str::contains("completions"));
+}
+
+#[test]
+fn test_serve_help() {
+    nexus_cmd()
+        .args(["serve", "--help"])
+        .assert()
+        .success()
+        .stdout(predicate::str::contains("--config"))
+        .stdout(predicate::str::contains("--port"))
+        .stdout(predicate::str::contains("--host"));
+}
+
+#[test]
+fn test_backends_help() {
+    nexus_cmd()
+        .args(["backends", "--help"])
+        .assert()
+        .success()
+        .stdout(predicate::str::contains("list"))
+        .stdout(predicate::str::contains("add"))
+        .stdout(predicate::str::contains("remove"));
+}
+
+#[test]
+fn test_config_init_creates_file() {
+    let temp_dir = TempDir::new().unwrap();
+    let config_path = temp_dir.path().join("nexus.toml");
+
+    nexus_cmd()
+        .args(["config", "init", "-o", config_path.to_str().unwrap()])
+        .assert()
+        .success();
+
+    assert!(config_path.exists());
+    let content = std::fs::read_to_string(&config_path).unwrap();
+    assert!(content.contains("[server]"));
+}
+
+#[test]
+fn test_config_init_no_overwrite() {
+    let temp_dir = TempDir::new().unwrap();
+    let config_path = temp_dir.path().join("nexus.toml");
+
+    // Create file first
+    std::fs::write(&config_path, "existing content").unwrap();
+
+    // Try to overwrite without --force
+    nexus_cmd()
+        .args(["config", "init", "-o", config_path.to_str().unwrap()])
+        .assert()
+        .failure()
+        .stderr(predicate::str::contains("exists"));
+}
+
+#[test]
+fn test_config_init_force_overwrites() {
+    let temp_dir = TempDir::new().unwrap();
+    let config_path = temp_dir.path().join("nexus.toml");
+
+    // Create file first
+    std::fs::write(&config_path, "existing content").unwrap();
+
+    // Force overwrite
+    nexus_cmd()
+        .args([
+            "config",
+            "init",
+            "-o",
+            config_path.to_str().unwrap(),
+            "--force",
+        ])
+        .assert()
+        .success();
+
+    let content = std::fs::read_to_string(&config_path).unwrap();
+    assert!(content.contains("[server]"));
+}
+
+#[test]
+fn test_invalid_command() {
+    nexus_cmd()
+        .arg("invalid-command")
+        .assert()
+        .failure()
+        .stderr(predicate::str::contains("error"));
+}
+
+#[test]
+fn test_completions_bash() {
+    nexus_cmd()
+        .args(["completions", "bash"])
+        .assert()
+        .success()
+        .stdout(predicate::str::contains("complete"));
+}
+
+#[test]
+fn test_completions_zsh() {
+    nexus_cmd()
+        .args(["completions", "zsh"])
+        .assert()
+        .success()
+        .stdout(predicate::str::contains("compdef"));
+}