microsoft · lspinheiro · Dec 31, 2025 · Dec 31, 2025 · Dec 31, 2025
diff --git a/examples/vercel_ai_webshop/.dockerignore b/examples/vercel_ai_webshop/.dockerignore
@@ -0,0 +1,36 @@
+# Dependencies
+node_modules/
+.pnpm-store/
+agl/venv/
+
+# IDE and editor files
+.idea/
+.vscode/
+*.swp
+*.swo
+
+# Environment files (contain secrets)
+.env
+.env.local
+.env.*.local
+
+# Git
+.git/
+.gitignore
+
+# Documentation (except README)
+*.md
+!README.md
+
+# Test files
+*.test.ts
+*.spec.ts
+__tests__/
+
+# Logs
+logs/
+*.log
+
+# Misc
+.DS_Store
+Thumbs.db
diff --git a/examples/vercel_ai_webshop/.env.example b/examples/vercel_ai_webshop/.env.example
@@ -0,0 +1,34 @@
+# Docker Compose Environment Variables
+# Copy this file to .env and fill in your values:
+#   cp .env.example .env
+
+# =============================================================================
+# LLM Configuration
+# =============================================================================
+#
+# Dev Mode (make dev):
+#   - CPU-only, uses OPENAI_API_BASE for LLM inference
+#   - Set OPENAI_API_KEY and optionally OPENAI_API_BASE
+#   - If OPENAI_API_BASE is not set, uses OpenAI's default endpoint
+#
+# GPU Mode (make train):
+#   - Uses VERL which internally manages vLLM
+#   - VERL publishes the LLM endpoint to the Store
+#   - Runners automatically discover the endpoint (no config needed)
+#   - OPENAI_API_KEY/BASE are ignored in GPU mode
+#
+# =============================================================================
+
+# OpenAI API key (required for dev mode, ignored in GPU mode)
+OPENAI_API_KEY=sk-xxx
+
+# Custom OpenAI-compatible endpoint (optional for dev mode)
+# Examples:
+#   - OpenAI: leave empty (uses default)
+#   - Local vLLM: http://localhost:8000/v1
+#   - Azure OpenAI: https://your-resource.openai.azure.com
+# OPENAI_API_BASE=
+
+# Model ID for dev mode (ignored in GPU mode - VERL config controls model)
+# Default: gpt-4o-mini
+WEBSHOP_MODEL=gpt-4o-mini
diff --git a/examples/vercel_ai_webshop/.env.local.example b/examples/vercel_ai_webshop/.env.local.example
@@ -0,0 +1,63 @@
+# =============================================================================
+# UI Configuration (for debugging/testing)
+# =============================================================================
+
+# WebShop Server Configuration
+# URL of the Python Flask WebShop server
+WEBSHOP_URL=http://localhost:3000
+
+# =============================================================================
+# Model Serving Configuration
+# =============================================================================
+#
+# There are two ways to configure model serving:
+#
+# OPTION 1: VERL-Integrated (Recommended for Training)
+# -----------------------------------------------------
+# When running training with VERL (python agl/run_training.py qwen):
+# - VERL automatically starts vLLM with the Qwen model
+# - VERL starts the LLM Proxy and publishes it to the Store
+# - The headless runner fetches the LLM endpoint from the Store
+# - Leave OPENAI_API_BASE unset; the runner discovers it automatically
+#
+# OPTION 2: Manual/External Endpoint
+# -----------------------------------
+# For development, debugging, or using external APIs:
+# - Set OPENAI_API_BASE to your endpoint (OpenAI, local vLLM, etc.)
+# - Set WEBSHOP_MODEL to the model ID
+# - This is also useful for UI-only testing without the training loop
+#
+# API key for OpenAI or compatible service (use 'dummy' for local vLLM)
+OPENAI_API_KEY=sk-xxx
+
+# Base URL for OpenAI-compatible endpoint (vLLM, LLMProxy, etc.)
+# Leave unset when using VERL-integrated training (auto-discovered from Store)
+# Set explicitly for manual/external endpoint usage
+# OPENAI_API_BASE=http://localhost:8000/v1
+
+# Model ID to use for the WebShop agent
+# When using VERL-integrated training, this is overridden by the Store resource
+WEBSHOP_MODEL=gpt-4o-mini
+
+# =============================================================================
+# Headless Runner Configuration (for training)
+# These variables are ONLY used by scripts/headless-runner.ts
+# The UI operates in standalone demo mode and ignores these settings.
+# =============================================================================
+
+# Agent Lightning Store URL (required for training)
+AGENT_LIGHTNING_STORE_URL=http://localhost:4747
+
+# OTLP endpoint for OpenTelemetry traces
+# Auto-derived from STORE_URL as /v1/traces if not set
+# AGENT_LIGHTNING_OTLP_ENDPOINT=http://localhost:4747/v1/traces
+
+# Mode for rollouts: train, val, or test
+AGENT_LIGHTNING_MODE=train
+
+# Service name for OpenTelemetry traces
+AGENT_LIGHTNING_SERVICE_NAME=webshop-headless-runner
+
+# Debugging
+# Enable OpenTelemetry diagnostics logging
+# OTEL_DIAG_LOG_LEVEL=DEBUG
diff --git a/examples/vercel_ai_webshop/.gitignore b/examples/vercel_ai_webshop/.gitignore
@@ -0,0 +1,8 @@
+node_modules/
+.venv/
+.next/
+*.tsbuildinfo
+server/webshop/
+
+# Log files from make dev/train
+logs/
diff --git a/examples/vercel_ai_webshop/Dockerfile.agl b/examples/vercel_ai_webshop/Dockerfile.agl
@@ -0,0 +1,58 @@
+# Agent Lightning Training Coordinator
+# Starts the Store server and coordinates training with external runners
+#
+# Build context should be the repository root:
+#   docker build -f examples/vercel_ai_webshop/Dockerfile.agl -t agl-server .
+#
+# For GPU training:
+#   docker build -f examples/vercel_ai_webshop/Dockerfile.agl --build-arg INSTALL_GPU=true -t agl-server-gpu .
+FROM python:3.11-slim
+
+# Build argument for GPU support
+ARG INSTALL_GPU=false
+
+WORKDIR /app
+
+# Install system dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    git \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+
+# Copy the entire agentlightning package from repo root
+# README.md is required by hatchling for package metadata
+COPY pyproject.toml uv.lock README.md ./
+COPY agentlightning/ ./agentlightning/
+
+# Copy the example's agl directory
+COPY examples/vercel_ai_webshop/agl/ ./agl/
+
+# Install pip and dependencies
+# For GPU mode, install with VERL extras
+RUN pip install --no-cache-dir --upgrade pip wheel setuptools && \
+    if [ "$INSTALL_GPU" = "true" ]; then \
+        pip install --no-cache-dir -e ".[verl]" || pip install --no-cache-dir -e .; \
+    else \
+        pip install --no-cache-dir -e .; \
+    fi && \
+    pip install --no-cache-dir -r agl/requirements.txt
+
+# Environment variables
+ENV PYTHONUNBUFFERED=1
+ENV AGENT_LIGHTNING_STORE_HOST=0.0.0.0
+ENV AGENT_LIGHTNING_STORE_PORT=4747
+
+# Expose the Store server port
+EXPOSE 4747
+
+# Health check for the Store server
+HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
+    CMD curl -f http://localhost:4747/health || exit 1
+
+# Default command runs dev mode
+# Override with command args for different modes:
+#   --dev              : CPU-only dev mode (default)
+#   fast               : Fast training (requires GPU)
+#   qwen               : Full Qwen training (requires GPU)
+WORKDIR /app/agl
+CMD ["python", "run_training.py", "--dev"]
diff --git a/examples/vercel_ai_webshop/Dockerfile.runner b/examples/vercel_ai_webshop/Dockerfile.runner
@@ -0,0 +1,24 @@
+# Headless Runner Image
+FROM node:20-slim
+
+WORKDIR /app
+
+# Install pnpm and procps (for healthchecks/debugging)
+RUN npm install -g pnpm && apt-get update && apt-get install -y --no-install-recommends procps && rm -rf /var/lib/apt/lists/*
+
+# Copy package files
+COPY package.json pnpm-lock.yaml* ./
+
+# Install dependencies
+RUN pnpm install --frozen-lockfile || pnpm install
+
+# Copy source code
+COPY tsconfig.json ./
+COPY src/ ./src/
+COPY scripts/ ./scripts/
+
+# Environment defaults
+ENV WEBSHOP_URL=http://webshop:3000
+
+# Default Command (Headless Runner)
+CMD ["sh", "-c", "pnpm headless -- --worker-id ${WORKER_ID:-runner-1}"]