diff --git a/kiloclaw/src/durable-objects/kiloclaw-app.test.ts b/kiloclaw/src/durable-objects/kiloclaw-app.test.ts index 060c65ffe..e01795a2d 100644 --- a/kiloclaw/src/durable-objects/kiloclaw-app.test.ts +++ b/kiloclaw/src/durable-objects/kiloclaw-app.test.ts @@ -128,7 +128,9 @@ describe('ensureApp', () => { expect(appsClient.createApp).toHaveBeenCalledWith( { apiToken: 'test-token' }, result.appName, - 'test-org' + 'test-org', + 'user-1', + 'kiloclaw_user_id' ); expect(appsClient.allocateIP).toHaveBeenCalledTimes(2); expect(appsClient.allocateIP).toHaveBeenCalledWith('test-token', result.appName, 'v6'); diff --git a/kiloclaw/src/durable-objects/kiloclaw-app.ts b/kiloclaw/src/durable-objects/kiloclaw-app.ts index 53436609d..9db23a5cf 100644 --- a/kiloclaw/src/durable-objects/kiloclaw-app.ts +++ b/kiloclaw/src/durable-objects/kiloclaw-app.ts @@ -20,6 +20,7 @@ import type { KiloClawEnv } from '../types'; import * as apps from '../fly/apps'; import { setAppSecret } from '../fly/secrets'; import { generateEnvKey } from '../utils/env-encryption'; +import { METADATA_KEY_USER_ID } from './kiloclaw-instance'; // -- Persisted state schema -- @@ -114,7 +115,7 @@ export class KiloClawApp extends DurableObject { if (!this.ipv4Allocated || !this.ipv6Allocated) { const existing = await apps.getApp({ apiToken }, appName); if (!existing) { - await apps.createApp({ apiToken }, appName, orgSlug); + await apps.createApp({ apiToken }, appName, orgSlug, userId, METADATA_KEY_USER_ID); console.log('[AppDO] Created Fly App:', appName); } } diff --git a/kiloclaw/src/fly/apps.test.ts b/kiloclaw/src/fly/apps.test.ts index eecd5d8dc..0f6fc4101 100644 --- a/kiloclaw/src/fly/apps.test.ts +++ b/kiloclaw/src/fly/apps.test.ts @@ -1,5 +1,12 @@ import { describe, it, expect, vi, afterEach } from 'vitest'; -import { appNameFromUserId, createApp, getApp, deleteApp, allocateIP } from './apps'; +import { + appNameFromUserId, + createApp, + getApp, + deleteApp, + allocateIP, + AppNameCollisionError, +} from './apps'; import { FlyApiError } from './client'; // ============================================================================ @@ -70,6 +77,8 @@ describe('appNameFromUserId', () => { const TOKEN = 'test-token'; const CONFIG = { apiToken: TOKEN }; +const USER_ID = 'user-123'; +const METADATA_KEY = 'kiloclaw_user_id'; function mockFetch(status: number, body: unknown): void { vi.stubGlobal( @@ -87,6 +96,21 @@ function mockFetchText(status: number, body: string): void { vi.stubGlobal('fetch', vi.fn().mockResolvedValue(new Response(body, { status }))); } +/** + * Mock fetch to return different responses on sequential calls. + * Each entry is [status, body] — body can be string or object (JSON-serialized). + */ +function mockFetchSequence(responses: Array<[number, unknown]>): void { + const fn = vi.fn(); + for (const [status, body] of responses) { + const responseBody = typeof body === 'string' ? body : JSON.stringify(body); + const headers: HeadersInit = + typeof body === 'string' ? {} : { 'Content-Type': 'application/json' }; + fn.mockResolvedValueOnce(new Response(responseBody, { status, headers })); + } + vi.stubGlobal('fetch', fn); +} + afterEach(() => { vi.restoreAllMocks(); }); @@ -95,7 +119,7 @@ describe('createApp', () => { it('returns FlyApp on 201', async () => { mockFetch(201, { id: 'app-123', created_at: 1234567890 }); - const result = await createApp(CONFIG, 'acct-test', 'test-org'); + const result = await createApp(CONFIG, 'acct-test', 'test-org', USER_ID, METADATA_KEY); expect(result).toEqual({ id: 'app-123', created_at: 1234567890 }); @@ -109,16 +133,135 @@ describe('createApp', () => { it('passes network param matching app name for isolation', async () => { mockFetch(201, { id: 'app-123', created_at: 0 }); - await createApp(CONFIG, 'acct-abc123', 'org'); + await createApp(CONFIG, 'acct-abc123', 'org', USER_ID, METADATA_KEY); const sentBody = JSON.parse((fetch as ReturnType).mock.calls[0][1].body); expect(sentBody.network).toBe('acct-abc123'); }); - it('treats 409 as success (app already exists)', async () => { - mockFetchText(409, 'app already exists'); + it('treats 409 as success when app has no machines (empty app)', async () => { + // First call: 409 from createApp. Second call: empty machines list. + mockFetchSequence([ + [409, 'app already exists'], + [200, []], + ]); + + const result = await createApp(CONFIG, 'acct-dup', 'org', USER_ID, METADATA_KEY); + + expect(result).toEqual({ id: 'acct-dup', created_at: 0 }); + }); + + it('treats 409 as success when machines belong to same user (retry)', async () => { + mockFetchSequence([ + [409, 'app already exists'], + [ + 200, + [ + { + id: 'machine-1', + name: 'test', + state: 'started', + region: 'iad', + instance_id: 'inst-1', + created_at: '', + updated_at: '', + config: { image: 'test', metadata: { [METADATA_KEY]: USER_ID } }, + }, + ], + ], + ]); + + const result = await createApp(CONFIG, 'acct-dup', 'org', USER_ID, METADATA_KEY); + + expect(result).toEqual({ id: 'acct-dup', created_at: 0 }); + }); + + it('throws AppNameCollisionError on 409 when machines belong to different user', async () => { + mockFetchSequence([ + [409, 'app already exists'], + [ + 200, + [ + { + id: 'machine-1', + name: 'test', + state: 'started', + region: 'iad', + instance_id: 'inst-1', + created_at: '', + updated_at: '', + config: { image: 'test', metadata: { [METADATA_KEY]: 'other-user-456' } }, + }, + ], + ], + ]); + + await expect(createApp(CONFIG, 'acct-collision', 'org', USER_ID, METADATA_KEY)).rejects.toThrow( + AppNameCollisionError + ); + + try { + // Reset mock for second call + mockFetchSequence([ + [409, 'app already exists'], + [ + 200, + [ + { + id: 'machine-1', + name: 'test', + state: 'started', + region: 'iad', + instance_id: 'inst-1', + created_at: '', + updated_at: '', + config: { image: 'test', metadata: { [METADATA_KEY]: 'other-user-456' } }, + }, + ], + ], + ]); + await createApp(CONFIG, 'acct-collision', 'org', USER_ID, METADATA_KEY); + expect.fail('should have thrown'); + } catch (err) { + expect(err).toBeInstanceOf(AppNameCollisionError); + expect((err as AppNameCollisionError).appName).toBe('acct-collision'); + expect((err as AppNameCollisionError).requestingUserId).toBe(USER_ID); + } + }); + + it('fails open on 409 when machine listing returns an error', async () => { + // If we can't list machines, treat as normal 409 (same behavior as before) + mockFetchSequence([ + [409, 'app already exists'], + [500, 'internal error'], + ]); + + const result = await createApp(CONFIG, 'acct-dup', 'org', USER_ID, METADATA_KEY); + + expect(result).toEqual({ id: 'acct-dup', created_at: 0 }); + }); - const result = await createApp(CONFIG, 'acct-dup', 'org'); + it('treats 409 as success when machines have no metadata (legacy machines)', async () => { + mockFetchSequence([ + [409, 'app already exists'], + [ + 200, + [ + { + id: 'machine-1', + name: 'test', + state: 'started', + region: 'iad', + instance_id: 'inst-1', + created_at: '', + updated_at: '', + config: { image: 'test' }, + }, + ], + ], + ]); + + const result = await createApp(CONFIG, 'acct-dup', 'org', USER_ID, METADATA_KEY); expect(result).toEqual({ id: 'acct-dup', created_at: 0 }); }); @@ -127,7 +270,7 @@ describe('createApp', () => { mockFetchText(422, 'app name taken'); try { - await createApp(CONFIG, 'acct-bad', 'org'); + await createApp(CONFIG, 'acct-bad', 'org', USER_ID, METADATA_KEY); expect.fail('should have thrown'); } catch (err) { expect(err).toBeInstanceOf(FlyApiError); diff --git a/kiloclaw/src/fly/apps.ts b/kiloclaw/src/fly/apps.ts index 6af3aca8d..9a4705e35 100644 --- a/kiloclaw/src/fly/apps.ts +++ b/kiloclaw/src/fly/apps.ts @@ -9,9 +9,30 @@ */ import { FlyApiError } from './client'; +import type { FlyMachine } from './types'; const FLY_API_BASE = 'https://api.machines.dev'; +/** + * Error thrown when a Fly app name collision is detected. + * Two different userIds produced the same truncated SHA-256 hash, + * resulting in the same Fly app name. This is a tenant isolation breach + * at the network layer — machines from different users would share + * a private network namespace. + */ +export class AppNameCollisionError extends Error { + constructor( + readonly appName: string, + readonly requestingUserId: string + ) { + super( + `Fly app name collision detected: app "${appName}" exists and belongs to a different user. ` + + `Requesting userId: "${requestingUserId}". This indicates a SHA-256 hash truncation collision.` + ); + this.name = 'AppNameCollisionError'; + } +} + // -- App name derivation -- /** @@ -74,22 +95,76 @@ async function assertOk(resp: Response, context: string): Promise { * * Each per-user app gets `network: appName` so machines in different * user apps cannot reach each other over Fly's internal `.internal` DNS. + * + * On 409 (app already exists), verifies ownership by listing machines + * and checking that any existing machines belong to the requesting userId. + * Throws AppNameCollisionError if the app belongs to a different user — + * this indicates a SHA-256 hash truncation collision (see PC-1 finding). + * + * @param userId - The userId requesting this app. Used to verify ownership on 409. + * @param userIdMetadataKey - The metadata key used to tag machines with userId. */ export async function createApp( config: FlyAppConfig, appName: string, - orgSlug: string + orgSlug: string, + userId: string, + userIdMetadataKey: string ): Promise { const resp = await apiFetch(config.apiToken, '/v1/apps', { method: 'POST', body: JSON.stringify({ app_name: appName, org_slug: orgSlug, network: appName }), }); - // 409 = app already exists (race with alarm retry or Fly API eventual consistency) - if (resp.status === 409) return { id: appName, created_at: 0 }; + + if (resp.status === 409) { + // App already exists — could be a retry (same user) or a hash collision (different user). + // Verify ownership by listing machines and checking their userId metadata. + await verifyAppOwnership(config.apiToken, appName, userId, userIdMetadataKey); + return { id: appName, created_at: 0 }; + } + await assertOk(resp, 'createApp'); return resp.json(); } +/** + * Verify that an existing Fly app belongs to the expected userId. + * + * Lists machines in the app and checks the userId metadata tag. + * If the app has machines belonging to a different user, throws + * AppNameCollisionError. If the app is empty (no machines yet) + * or all machines match the expected userId, returns normally. + */ +async function verifyAppOwnership( + apiToken: string, + appName: string, + expectedUserId: string, + userIdMetadataKey: string +): Promise { + const machinesResp = await apiFetch(apiToken, `/v1/apps/${encodeURIComponent(appName)}/machines`); + + // If we can't list machines (e.g. app in weird state), fail open — + // this is the same behavior as before the collision check was added. + if (!machinesResp.ok) return; + + const machines: FlyMachine[] = await machinesResp.json(); + + // No machines = app exists but is empty (e.g. previous retry created the app + // but crashed before creating a machine). Safe to proceed. + if (machines.length === 0) return; + + // Check if any machine belongs to a different user. + const foreignMachine = machines.find( + m => + m.config?.metadata?.[userIdMetadataKey] !== undefined && + m.config.metadata[userIdMetadataKey] !== expectedUserId + ); + + if (foreignMachine) { + throw new AppNameCollisionError(appName, expectedUserId); + } +} + /** * Check if a Fly App exists. * GET /v1/apps/{app_name} — returns the app or null if 404.