diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0ebb016749..45d7fadba0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -150,6 +150,7 @@ Breaking changes in this release:
 - The `botframework-webchat` package now uses CSS modules for styling purposes, in PR [#5666](https://github.com/microsoft/BotFramework-WebChat/pull/5666), in PR [#5677](https://github.com/microsoft/BotFramework-WebChat/pull/5677) by [@OEvgeny](https://github.com/OEvgeny)
 - 👷🏻 Added `npm run build-browser` script for building test harness package only, in PR [#5667](https://github.com/microsoft/BotFramework-WebChat/pull/5667), by [@compulim](https://github.com/compulim)
 - Added pull-based capabilities system for dynamically discovering adapter capabilities at runtime, in PR [#5679](https://github.com/microsoft/BotFramework-WebChat/pull/5679), by [@pranavjoshi001](https://github.com/pranavjoshi001)
+- Added Speech-to-Speech (S2S) support for real-time voice conversations, in PR [#5654](https://github.com/microsoft/BotFramework-WebChat/pull/5654), by [@pranavjoshi](https://github.com/pranavjoshi001)
 
 ### Changed
 
diff --git a/__tests__/assets/esm/speechToSpeech/mockAudioPlayback.js b/__tests__/assets/esm/speechToSpeech/mockAudioPlayback.js
new file mode 100644
index 0000000000..f7bba9cbf5
--- /dev/null
+++ b/__tests__/assets/esm/speechToSpeech/mockAudioPlayback.js
@@ -0,0 +1,23 @@
+/* global AudioContext */
+
+/**
+ * Mocks AudioContext.createBuffer to return buffers with minimum duration.
+ *
+ */
+export function setupMockAudioPlayback() {
+  const originalCreateBuffer = AudioContext.prototype.createBuffer;
+
+  AudioContext.prototype.createBuffer = function (numberOfChannels, length, sampleRate) {
+    // Ensure minimum duration of 0.5 seconds for testing
+    const minSamples = Math.floor(sampleRate * 0.5);
+    const actualLength = Math.max(length, minSamples);
+
+    return originalCreateBuffer.call(this, numberOfChannels, actualLength, sampleRate);
+  };
+
+  return {
+    restore: () => {
+      AudioContext.prototype.createBuffer = originalCreateBuffer;
+    }
+  };
+}
diff --git a/__tests__/assets/esm/speechToSpeech/mockMediaDevices.js b/__tests__/assets/esm/speechToSpeech/mockMediaDevices.js
new file mode 100644
index 0000000000..fb69332445
--- /dev/null
+++ b/__tests__/assets/esm/speechToSpeech/mockMediaDevices.js
@@ -0,0 +1,87 @@
+/* global clearInterval, MessageChannel, navigator, setInterval, URL, window */
+
+/**
+ * Mocks browser audio APIs for speechToSpeech testing.
+ *
+ * - Intercepts AudioContext.audioWorklet.addModule() to prevent blob execution
+ * - Mocks AudioWorkletNode for the 'audio-recorder' processor
+ * - Mocks navigator.mediaDevices.getUserMedia() to return a test audio stream
+ */
+export function setupMockMediaDevices() {
+  const OriginalAudioContext = window.AudioContext;
+
+  // Intercept AudioContext to mock audioWorklet.addModule
+  window.AudioContext = function (options) {
+    const ctx = new OriginalAudioContext(options);
+
+    ctx.audioWorklet.addModule = url => {
+      if (url.startsWith('blob:')) {
+        URL.revokeObjectURL(url);
+      }
+      return Promise.resolve();
+    };
+
+    return ctx;
+  };
+
+  Object.setPrototypeOf(window.AudioContext, OriginalAudioContext);
+  window.AudioContext.prototype = OriginalAudioContext.prototype;
+
+  // Mock AudioWorkletNode - uses GainNode as base so source.connect() works
+  window.AudioWorkletNode = function (context, name, options) {
+    const node = context.createGain();
+    const channel = new MessageChannel();
+    let recording = false;
+    let intervalId = null;
+
+    node.port = channel.port1;
+
+    // port1 is exposed as worklet.port to the real code
+    // Real code sends to port1 → received by port2.onmessage (commands)
+    // Mock sends from port2 → received by port1.onmessage (audio chunks)
+    channel.port2.onmessage = ({ data }) => {
+      if (data.command === 'START') {
+        recording = true;
+        const bufferSize = options?.processorOptions?.bufferSize || 2400;
+
+        // Send chunks at ~100ms intervals while recording
+        // Use port2.postMessage so port1.onmessage (set by real code) receives it
+        intervalId = setInterval(() => {
+          if (recording) {
+            channel.port2.postMessage({ eventType: 'audio', audioData: new Float32Array(bufferSize) });
+          }
+        }, 100);
+      } else if (data.command === 'STOP') {
+        recording = false;
+        if (intervalId) {
+          clearInterval(intervalId);
+          intervalId = null;
+        }
+      }
+    };
+
+    return node;
+  };
+
+  // Mock getUserMedia with oscillator-based test stream
+  navigator.mediaDevices.getUserMedia = constraints => {
+    const sampleRate = constraints?.audio?.sampleRate || 24000;
+    const ctx = new OriginalAudioContext({ sampleRate });
+    const oscillator = ctx.createOscillator();
+    const destination = ctx.createMediaStreamDestination();
+
+    oscillator.connect(destination);
+    oscillator.start();
+
+    destination.stream.getTracks().forEach(track => {
+      const originalStop = track.stop.bind(track);
+      track.stop = () => {
+        oscillator.stop();
+        ctx.close();
+        originalStop();
+      };
+    });
+
+    return Promise.resolve(destination.stream);
+  };
+}
diff --git a/__tests__/html2/speechToSpeech/barge.in.html b/__tests__/html2/speechToSpeech/barge.in.html
new file mode 100644
index 0000000000..d12f20c51f
--- /dev/null
+++ b/__tests__/html2/speechToSpeech/barge.in.html
@@ -0,0 +1,197 @@
+<!doctype html>
+<html lang="en-US">
+  <head>
+    <link href="/assets/index.css" rel="stylesheet" type="text/css" />
+    <script crossorigin="anonymous" src="https://unpkg.com/@babel/standalone@7.8.7/babel.min.js"></script>
+    <script crossorigin="anonymous" src="https://unpkg.com/react@16.8.6/umd/react.production.min.js"></script>
+    <script crossorigin="anonymous" src="https://unpkg.com/react-dom@16.8.6/umd/react-dom.production.min.js"></script>
+    <script crossorigin="anonymous" src="/test-harness.js"></script>
+    <script crossorigin="anonymous" src="/test-page-object.js"></script>
+    <script crossorigin="anonymous" src="/__dist__/webchat-es5.js"></script>
+    <script crossorigin="anonymous" src="/__dist__/botframework-webchat-fluent-theme.production.min.js"></script>
+  </head>
+  <body>
+    <main id="webchat"></main>
+    <!--
+      Test: Barge-in scenario with full state cycle
+      
+      Flow:
+      1. User starts recording → "Listening..."
+      2. Bot sends audio chunks → "Talk to interrupt..." (bot speaking)
+      3. User barges in (server detects) → "Listening..." (user speaking)
+      4. Server processes → "Processing..."
+      5. Bot responds with new audio → "Talk to interrupt..." (bot speaking again)
+      6. User toggles mic off
+    -->
+    <script type="module">
+      import { setupMockMediaDevices } from '/assets/esm/speechToSpeech/mockMediaDevices.js';
+      import { setupMockAudioPlayback } from '/assets/esm/speechToSpeech/mockAudioPlayback.js';
+      
+      setupMockMediaDevices();
+      setupMockAudioPlayback();
+    </script>
+    <script type="text/babel">
+      run(async function () {
+        const {
+          React,
+          ReactDOM: { render },
+          WebChat: { FluentThemeProvider, ReactWebChat, testIds }
+        } = window;
+
+        const { directLine, store } = testHelpers.createDirectLineEmulator();
+
+        // Set voice configuration capability to enable microphone button
+        directLine.setCapability('getVoiceConfiguration', { sampleRate: 24000, chunkIntervalMs: 100 }, { emitEvent: false });
+
+        render(
+          <FluentThemeProvider variant="fluent">
+            <ReactWebChat 
+              directLine={directLine}
+              store={store}
+            />
+          </FluentThemeProvider>,
+          document.getElementById('webchat')
+        );
+
+        await pageConditions.uiConnected();
+
+        const micButton = document.querySelector(`[data-testid="${testIds.sendBoxMicrophoneButton}"]`);
+        const textArea = document.querySelector(`[data-testid="${testIds.sendBoxTextBox}"]`);
+        expect(micButton).toBeTruthy();
+        expect(textArea).toBeTruthy();
+
+        // Start recording
+        await host.click(micButton);
+
+        await pageConditions.became(
+          'Recording started',
+          () => micButton.getAttribute('aria-label')?.includes('Microphone on'),
+          1000
+        );
+
+        // VERIFY: State is "listening"
+        await pageConditions.became(
+          'State: listening → Placeholder: "Listening..."',
+          () => textArea.getAttribute('placeholder') === 'Listening...',
+          2000
+        );
+
+        // Bot starts speaking (sends audio chunks)
+        await directLine.emulateIncomingVoiceActivity({
+          type: 'event',
+          name: 'media.chunk',
+          from: { role: 'bot' },
+          value: { content: 'AAAAAA==', contentType: 'audio/webm' },
+          valueType: 'application/vnd.microsoft.activity.azure.directline.audio.chunk'
+        });
+
+        await directLine.emulateIncomingVoiceActivity({
+          type: 'event',
+          name: 'media.chunk',
+          from: { role: 'bot' },
+          value: { content: 'AAAAAA==', contentType: 'audio/webm' },
+          valueType: 'application/vnd.microsoft.activity.azure.directline.audio.chunk'
+        });
+
+        // VERIFY: State is "bot_speaking" (isPlaying = true)
+        await pageConditions.became(
+          'State: bot_speaking → Placeholder: "Talk to interrupt..."',
+          () => textArea.getAttribute('placeholder') === 'Talk to interrupt...',
+          1000
+        );
+
+        // VERIFY: Mic button has pulse animation during bot speaking
+        expect(micButton.className).toMatch(/with-pulse/);
+
+        // User barges in (server detects user speech)
+        await directLine.emulateIncomingVoiceActivity({
+          type: 'event',
+          name: 'request.update',
+          from: { role: 'bot' },
+          value: { state: 'detected', message: 'Your request is identified' },
+          valueType: 'application/vnd.microsoft.activity.azure.directline.audio.state'
+        });
+
+        // VERIFY: State changes to "user_speaking" - bot audio stopped
+        await pageConditions.became(
+          'State: user_speaking → Placeholder: "Listening…" (barge-in worked)',
+          () => textArea.getAttribute('placeholder') === 'Listening...',
+          1000
+        );
+
+        // VERIFY: Mic button still has pulse animation during user speaking
+        expect(micButton.className).toMatch(/with-pulse/);
+
+        // Server processes the user's interrupted request
+        await directLine.emulateIncomingVoiceActivity({
+          type: 'event',
+          name: 'request.update',
+          from: { role: 'bot' },
+          value: { state: 'processing', message: 'Your request is being processed' },
+          valueType: 'application/vnd.microsoft.activity.azure.directline.audio.state'
+        });
+
+        // VERIFY: State is "processing"
+        await pageConditions.became(
+          'State: processing → Placeholder: "Processing…"',
+          () => textArea.getAttribute('placeholder') === 'Processing...',
+          1000
+        );
+
+        // User transcript arrives
+        await directLine.emulateIncomingVoiceActivity({
+          type: 'event',
+          name: 'media.end',
+          from: { role: 'bot' },
+          text: 'Stop! Change my destination.',
+          value: { transcription: 'Stop! Change my destination.', origin: 'user' },
+          valueType: 'application/vnd.microsoft.activity.azure.directline.audio.transcript'
+        });
+
+        await pageConditions.numActivitiesShown(1);
+
+        // Bot responds with new audio
+        await directLine.emulateIncomingVoiceActivity({
+          type: 'event',
+          name: 'media.chunk',
+          from: { role: 'bot' },
+          value: { content: 'AAAAAA==', contentType: 'audio/webm' },
+          valueType: 'application/vnd.microsoft.activity.azure.directline.audio.chunk'
+        });
+
+        // VERIFY: State is "bot_speaking" again
+        await pageConditions.became(
+          'State: bot_speaking → Placeholder: "Talk to interrupt..." (bot responding)',
+          () => textArea.getAttribute('placeholder') === 'Talk to interrupt...',
+          1000
+        );
+
+        // Bot transcript arrives
+        await directLine.emulateIncomingVoiceActivity({
+          type: 'event',
+          name: 'media.end',
+          from: { role: 'bot' },
+          text: 'Sure, where would you like to go instead?',
+          value: { transcription: 'Sure, where would you like to go instead?', origin: 'agent' },
+          valueType: 'application/vnd.microsoft.activity.azure.directline.audio.transcript'
+        });
+
+        await pageConditions.numActivitiesShown(2);
+
+        // Verify both messages appear
+        const activities = pageElements.activityContents();
+        expect(activities[0]).toHaveProperty('textContent', 'Stop! Change my destination.');
+        expect(activities[1]).toHaveProperty('textContent', 'Sure, where would you like to go instead?');
+
+        // Toggle mic off
+        await host.click(micButton);
+
+        await pageConditions.became(
+          'Recording stopped',
+          () => micButton.getAttribute('aria-label')?.includes('Microphone off'),
+          1000
+        );
+      });
+    </script>
+  </body>
+</html>
diff --git a/__tests__/html2/speechToSpeech/basic.sendbox.with.mic.html b/__tests__/html2/speechToSpeech/basic.sendbox.with.mic.html
new file mode 100644
index 0000000000..56b2608a7f
--- /dev/null
+++ b/__tests__/html2/speechToSpeech/basic.sendbox.with.mic.html
@@ -0,0 +1,80 @@
+<!doctype html>
+<html lang="en-US">
+  <head>
+    <link href="/assets/index.css" rel="stylesheet" type="text/css" />
+    <script crossorigin="anonymous" src="https://unpkg.com/@babel/standalone@7.8.7/babel.min.js"></script>
+    <script crossorigin="anonymous" src="https://unpkg.com/react@16.8.6/umd/react.production.min.js"></script>
+    <script crossorigin="anonymous" src="https://unpkg.com/react-dom@16.8.6/umd/react-dom.production.min.js"></script>
+    <script crossorigin="anonymous" src="/test-harness.js"></script>
+    <script crossorigin="anonymous" src="/test-page-object.js"></script>
+    <script crossorigin="anonymous" src="/__dist__/webchat-es5.js"></script>
+    <script crossorigin="anonymous" src="/__dist__/botframework-webchat-fluent-theme.production.min.js"></script>
+  </head>
+  <body>
+    <main id="webchat"></main>
+    <script type="text/babel">
+      run(async function () {
+        const {
+          React,
+          ReactDOM: { render },
+          WebChat: { FluentThemeProvider, ReactWebChat, testIds }
+        } = window;
+
+        // GIVEN: Web Chat with Fluent Theme and microphone button enabled
+        const { directLine, store } = testHelpers.createDirectLineEmulator();
+
+        // Set voice configuration capability to enable microphone button
+        directLine.setCapability('getVoiceConfiguration', { sampleRate: 24000, chunkIntervalMs: 100 }, { emitEvent: false });
+
+        render(
+          <FluentThemeProvider variant="fluent">
+            <ReactWebChat 
+              directLine={directLine}
+              store={store}
+              styleOptions={{
+                disableFileUpload: true,
+                hideTelephoneKeypadButton: false,
+              }}
+            />
+          </FluentThemeProvider>,
+          document.getElementById('webchat')
+        );
+
+        await pageConditions.uiConnected();
+
+        // THEN: Microphone button should be present
+        const micButton = document.querySelector(`[data-testid="${testIds.sendBoxMicrophoneButton}"]`);
+        expect(micButton).toBeTruthy();
+
+        // THEN: Telephone keypad button should be present
+        const keypadButton = document.querySelector(`[data-testid="${testIds.sendBoxTelephoneKeypadToolbarButton}"]`);
+        expect(keypadButton).toBeTruthy();
+
+        // THEN: Text counter should NOT be present
+        const textCounter = document.querySelector('.sendbox__text-counter');
+        expect(textCounter).toBeFalsy();
+
+        // THEN: Send button should NOT be present
+        const sendButton = document.querySelector(`[data-testid="${testIds.sendBoxSendButton}"]`);
+        expect(sendButton).toBeFalsy();
+
+        // THEN: Should show sendbox with microphone and keypad buttons
+        await host.snapshot('local');
+
+        // WHEN: Voice configuration is removed from directLine
+        directLine.setCapability('getVoiceConfiguration', undefined);
+
+        // Wait for UI to update
+        await pageConditions.became(
+          'Microphone button should be hidden after removing voice configuration',
+          () => !document.querySelector(`[data-testid="${testIds.sendBoxMicrophoneButton}"]`),
+          1000
+        );
+
+        // THEN: Microphone button should NOT be present anymore
+        const micButtonAfterRemoval = document.querySelector(`[data-testid="${testIds.sendBoxMicrophoneButton}"]`);
+        expect(micButtonAfterRemoval).toBeFalsy();
+      });
+    </script>
+  </body>
+</html>
diff --git a/__tests__/html2/speechToSpeech/basic.sendbox.with.mic.html.snap-1.png b/__tests__/html2/speechToSpeech/basic.sendbox.with.mic.html.snap-1.png
new file mode 100644
index 0000000000..de8e85d3d3
Binary files /dev/null and b/__tests__/html2/speechToSpeech/basic.sendbox.with.mic.html.snap-1.png differ
diff --git a/__tests__/html2/speechToSpeech/csp.recording.html b/__tests__/html2/speechToSpeech/csp.recording.html
new file mode 100644
index 0000000000..eda24c8721
--- /dev/null
+++ b/__tests__/html2/speechToSpeech/csp.recording.html
@@ -0,0 +1,138 @@
+<!doctype html>
+<html lang="en-US">
+  <head>
+    <link href="/assets/index.css" nonce="TEST_PAGE_NONCE" rel="stylesheet" type="text/css" />
+    <script crossorigin="anonymous" nonce="TEST_PAGE_NONCE" src="https://unpkg.com/@babel/standalone@7.8.7/babel.min.js"></script>
+    <script crossorigin="anonymous" nonce="TEST_PAGE_NONCE" src="https://unpkg.com/react@16.8.6/umd/react.production.min.js"></script>
+    <script crossorigin="anonymous" nonce="TEST_PAGE_NONCE" src="https://unpkg.com/react-dom@16.8.6/umd/react-dom.production.min.js"></script>
+    <script crossorigin="anonymous" nonce="TEST_PAGE_NONCE" src="/test-harness.js"></script>
+    <script crossorigin="anonymous" nonce="TEST_PAGE_NONCE" src="/test-page-object.js"></script>
+    <script crossorigin="anonymous" nonce="TEST_PAGE_NONCE" src="/__dist__/webchat-es5.js"></script>
+    <script crossorigin="anonymous" nonce="TEST_PAGE_NONCE" src="/__dist__/botframework-webchat-fluent-theme.production.min.js"></script>
+    <!--
+      CSP Configuration for testing AudioWorklet with blob URLs
+      
+      Key requirements for AudioWorklet CSP compliance:
+      - connect-src blob: allows AudioWorklet.addModule() with blob URLs
+      
+      This test verifies that:
+      1. AudioWorklet can be loaded via blob URL under strict CSP
+      2. Recording functionality works with CSP enabled
+      3. No CSP violations occur during the recording flow
+
+      Note: 'unsafe-inline' is used for style-src to allow styles from Fluent package, however our testing focuses on and connect-src directives with blob: which is working fine here.
+    -->
+    <meta
+      http-equiv="Content-Security-Policy"
+      content="default-src 'none'; base-uri 'none'; connect-src blob: https://directline.botframework.com wss://directline.botframework.com; img-src blob: data:; script-src 'strict-dynamic' 'nonce-TEST_PAGE_NONCE'; style-src 'unsafe-inline'; media-src blob: mediastream:"
+    />
+  </head>
+  <body>
+    <main id="webchat"></main>
+    <script nonce="TEST_PAGE_NONCE" type="module">
+      import { setupMockMediaDevices } from '/assets/esm/speechToSpeech/mockMediaDevices.js';
+      
+      // Setup mock media devices before test starts
+      setupMockMediaDevices();
+    </script>
+    <script nonce="TEST_PAGE_NONCE" type="text/babel">
+      run(async function () {
+        const {
+          React,
+          ReactDOM: { render },
+          WebChat: { FluentThemeProvider, ReactWebChat, testIds }
+        } = window;
+
+        // Track CSP violations
+        const cspViolations = [];
+        document.addEventListener('securitypolicyviolation', (e) => {
+          cspViolations.push({
+            violatedDirective: e.violatedDirective,
+            blockedURI: e.blockedURI,
+            originalPolicy: e.originalPolicy
+          });
+          console.error('CSP Violation:', e.violatedDirective, e.blockedURI);
+        });
+
+        // Verify CSP is active
+        const cspMeta = document.querySelector('meta[http-equiv="Content-Security-Policy"]');
+        expect(cspMeta).toBeTruthy();
+        expect(cspMeta.content).toContain('connect-src blob:');
+
+        // GIVEN: Web Chat with Speech-to-Speech enabled and CSP headers
+        const { directLine, store } = testHelpers.createDirectLineEmulator();
+
+        // Set voice configuration capability to enable microphone button
+        directLine.setCapability('getVoiceConfiguration', { sampleRate: 24000, chunkIntervalMs: 100 }, { emitEvent: false });
+
+        render(
+          <FluentThemeProvider variant="fluent">
+            <ReactWebChat 
+              directLine={directLine}
+              store={store}
+              nonce="WEB_CHAT_NONCE"
+            />
+          </FluentThemeProvider>,
+          document.getElementById('webchat')
+        );
+
+        await pageConditions.uiConnected();
+
+        const micButton = document.querySelector(`[data-testid="${testIds.sendBoxMicrophoneButton}"]`);
+        expect(micButton).toBeTruthy();
+
+        // WHEN: User clicks microphone button to start recording
+        // This triggers AudioWorklet initialization with blob URL
+        await host.click(micButton);
+
+        // THEN: Button should show recording state (AudioWorklet loaded successfully via blob URL)
+        await pageConditions.became(
+          'Microphone button changes to recording state',
+          () => {
+            const label = micButton.getAttribute('aria-label');
+            return label && label.includes('Microphone on');
+          },
+          1000
+        );
+
+        // THEN: Verify no CSP violations occurred during AudioWorklet loading
+        const blobViolations = cspViolations.filter(v => 
+          v.blockedURI.startsWith('blob:') && v.violatedDirective.includes('connect-src')
+        );
+        expect(blobViolations.length).toBe(0);
+
+        // THEN: Verify recording state is active (AudioWorklet is functioning)
+        await pageConditions.became(
+          'Voice state is listening (recording active)',
+          () => store.getState().voice?.voiceState === 'listening',
+          1000
+        );
+
+        // WHEN: User stops recording
+        await host.click(micButton);
+
+        // THEN: Button should change to not-recording state
+        await pageConditions.became(
+          'Microphone button changes to not-recording state',
+          () => {
+            const label = micButton.getAttribute('aria-label');
+            return label && label.includes('Microphone off');
+          },
+          1000
+        );
+
+        // Final verification: No blob-related CSP violations
+        const finalBlobViolations = cspViolations.filter(v => 
+          v.blockedURI.startsWith('blob:')
+        );
+        
+        if (finalBlobViolations.length > 0) {
+          console.error('CSP blob violations detected:', finalBlobViolations);
+        }
+        
+        expect(finalBlobViolations.length).toBe(0);
+
+      });
+    </script>
+  </body>
+</html>
diff --git a/__tests__/html2/speechToSpeech/dtmf.input.html b/__tests__/html2/speechToSpeech/dtmf.input.html
new file mode 100644
index 0000000000..124a7605be
--- /dev/null
+++ b/__tests__/html2/speechToSpeech/dtmf.input.html
@@ -0,0 +1,196 @@
+<!doctype html>
+<html lang="en-US">
+  <head>
+    <link href="/assets/index.css" rel="stylesheet" type="text/css" />
+    <script crossorigin="anonymous" src="https://unpkg.com/@babel/standalone@7.8.7/babel.min.js"></script>
+    <script crossorigin="anonymous" src="https://unpkg.com/react@16.8.6/umd/react.production.min.js"></script>
+    <script crossorigin="anonymous" src="https://unpkg.com/react-dom@16.8.6/umd/react-dom.production.min.js"></script>
+    <script crossorigin="anonymous" src="/test-harness.js"></script>
+    <script crossorigin="anonymous" src="/test-page-object.js"></script>
+    <script crossorigin="anonymous" src="/__dist__/webchat-es5.js"></script>
+    <script crossorigin="anonymous" src="/__dist__/botframework-webchat-fluent-theme.production.min.js"></script>
+  </head>
+  <body>
+    <main id="webchat"></main>
+    <!--
+      Test: DTMF (Dual-Tone Multi-Frequency) Input
+      
+      This test validates DTMF keypad input during voice sessions:
+      1. User is in voice mode
+      2. User presses keypad digits (DTMF tones)
+      3. Server processes DTMF input
+      4. Bot responds accordingly
+    -->
+    <script type="module">
+      import { setupMockMediaDevices } from '/assets/esm/speechToSpeech/mockMediaDevices.js';
+      setupMockMediaDevices();
+    </script>
+    <script type="text/babel">
+      run(async function () {
+        const {
+          React,
+          ReactDOM: { render },
+          WebChat: { FluentThemeProvider, ReactWebChat, testIds }
+        } = window;
+
+        const { directLine, store } = testHelpers.createDirectLineEmulator();
+
+        // Set voice configuration capability to enable microphone button
+        directLine.setCapability('getVoiceConfiguration', { sampleRate: 24000, chunkIntervalMs: 100 }, { emitEvent: false });
+        
+        // Intercept postActivity to capture outgoing DTMF events
+        const capturedDtmfEvents = [];
+        const originalPostActivity = directLine.postActivity.bind(directLine);
+        directLine.postActivity = (activity) => {
+          if (activity.name === 'media.end' && activity.type === 'event') {
+            capturedDtmfEvents.push(activity);
+          }
+          return originalPostActivity(activity);
+        };
+
+        render(
+          <FluentThemeProvider variant="fluent">
+            <ReactWebChat 
+              directLine={directLine}
+              store={store}
+              styleOptions={{
+                disableFileUpload: true,
+                hideTelephoneKeypadButton: false,
+              }}
+            />
+          </FluentThemeProvider>,
+          document.getElementById('webchat')
+        );
+
+        await pageConditions.uiConnected();
+
+        // ===== STEP 1: Verify mic button and turn on =====
+        const micButton = document.querySelector(`[data-testid="${testIds.sendBoxMicrophoneButton}"]`);
+        expect(micButton).toBeTruthy();
+
+        // GIVEN: Recording is active
+        await host.click(micButton);
+
+        await pageConditions.became(
+          'Recording started',
+          () => micButton.getAttribute('aria-label')?.includes('Microphone on'),
+          1000
+        );
+
+        // ===== STEP 2: Bot prompts for DTMF input =====
+        await directLine.emulateIncomingVoiceActivity({
+          type: 'event',
+          name: 'media.end',
+          from: { role: 'bot' },
+          text: 'Please enter your 4-digit PIN using your keypad.',
+          value: { transcription: 'Please enter your 4-digit PIN using your keypad.', origin: 'agent' },
+          valueType: 'application/vnd.microsoft.activity.azure.directline.audio.transcript'
+        });
+
+        await pageConditions.numActivitiesShown(1);
+
+        // ===== STEP 3: Click telephony keypad button =====
+        const keypadButton = document.querySelector(`[data-testid="${testIds.sendBoxTelephoneKeypadToolbarButton}"]`);
+        expect(keypadButton).toBeTruthy();
+        
+        await host.click(keypadButton);
+        
+        // Wait for keypad to open
+        await pageConditions.became(
+          'Keypad opened',
+          () => document.querySelector(`[data-testid="${testIds.sendBoxTelephoneKeypadButton1}"]`),
+          1000
+        );
+
+        // ===== STEP 4: Click 1, 2, 3, 4 on keypad =====
+        const key1 = document.querySelector(`[data-testid="${testIds.sendBoxTelephoneKeypadButton1}"]`);
+        const key2 = document.querySelector(`[data-testid="${testIds.sendBoxTelephoneKeypadButton2}"]`);
+        const key3 = document.querySelector(`[data-testid="${testIds.sendBoxTelephoneKeypadButton3}"]`);
+        const key4 = document.querySelector(`[data-testid="${testIds.sendBoxTelephoneKeypadButton4}"]`);
+        
+        expect(key1).toBeTruthy();
+        expect(key2).toBeTruthy();
+        expect(key3).toBeTruthy();
+        expect(key4).toBeTruthy();
+
+        // THEN: Should show keypad open
+        await host.snapshot('local');
+        
+        await host.click(key1);
+        await host.click(key2);
+        await host.click(key3);
+        await host.click(key4);
+
+        // ===== STEP 5: Verify DTMF events were sent via postActivity =====
+        await pageConditions.became(
+          'DTMF events sent via postActivity',
+          () => capturedDtmfEvents.length >= 4,
+          1000
+        );
+
+        expect(capturedDtmfEvents.length).toBe(4);
+
+        await host.click(keypadButton);
+
+        await directLine.emulateIncomingVoiceActivity({
+          type: 'event',
+          name: 'media.end',
+          value: { transcription: '/DTMFKey 1', origin: 'user' },
+          valueType: 'application/vnd.microsoft.activity.azure.directline.audio.transcript'
+        });
+        
+        await directLine.emulateIncomingVoiceActivity({
+          type: 'event',
+          name: 'media.end',
+          value: { transcription: '/DTMFKey 2', origin: 'user' },
+          valueType: 'application/vnd.microsoft.activity.azure.directline.audio.transcript'
+        });
+        
+        await directLine.emulateIncomingVoiceActivity({
+          type: 'event',
+          name: 'media.end',
+          value: { transcription: '/DTMFKey 3', origin: 'user' },
+          valueType: 'application/vnd.microsoft.activity.azure.directline.audio.transcript'
+        });
+        
+        await directLine.emulateIncomingVoiceActivity({
+          type: 'event',
+          name: 'media.end',
+          value: { transcription: '/DTMFKey 4', origin: 'user' },
+          valueType: 'application/vnd.microsoft.activity.azure.directline.audio.transcript'
+        });
+
+        // ===== STEP 6: Verify user transcript with 4 incoming "/DTMFKey {1/2/3/4}" =====
+        await pageConditions.numActivitiesShown(5); // 1 bot prompt + 4 user DTMF
+        
+        const activities = pageElements.activityContents();
+        expect(activities[0]).toHaveProperty('textContent', 'Please enter your 4-digit PIN using your keypad.');
+        expect(activities[1]).toHaveProperty('textContent', '/DTMFKey 1');
+        expect(activities[2]).toHaveProperty('textContent', '/DTMFKey 2');
+        expect(activities[3]).toHaveProperty('textContent', '/DTMFKey 3');
+        expect(activities[4]).toHaveProperty('textContent', '/DTMFKey 4');
+
+        // ===== STEP 7: Bot responds =====
+        await directLine.emulateIncomingVoiceActivity({
+          type: 'event',
+          name: 'media.end',
+          from: { role: 'bot' },
+          value: { transcription: 'Thank you. Your PIN has been verified.', origin: 'agent' },
+          valueType: 'application/vnd.microsoft.activity.azure.directline.audio.transcript'
+        });
+
+        // THEN: Bot message appears
+        await pageConditions.numActivitiesShown(6);
+        const finalActivities = pageElements.activityContents();
+        expect(finalActivities[5]).toHaveProperty('textContent', 'Thank you. Your PIN has been verified.');
+
+        // THEN: Should show DTMF conversation
+        await pageConditions.scrollToBottomCompleted();
+        await host.snapshot('local');
+
+        await host.click(micButton);
+
+      });
+    </script>
+  </body>
+</html>
diff --git a/__tests__/html2/speechToSpeech/dtmf.input.html.snap-1.png b/__tests__/html2/speechToSpeech/dtmf.input.html.snap-1.png
new file mode 100644
index 0000000000..c29b07065a
Binary files /dev/null and b/__tests__/html2/speechToSpeech/dtmf.input.html.snap-1.png differ
diff --git a/__tests__/html2/speechToSpeech/dtmf.input.html.snap-2.png b/__tests__/html2/speechToSpeech/dtmf.input.html.snap-2.png
new file mode 100644
index 0000000000..be9f100b54
Binary files /dev/null and b/__tests__/html2/speechToSpeech/dtmf.input.html.snap-2.png differ
diff --git a/__tests__/html2/speechToSpeech/happy.path.html b/__tests__/html2/speechToSpeech/happy.path.html
new file mode 100644
index 0000000000..e326982a9c
--- /dev/null
+++ b/__tests__/html2/speechToSpeech/happy.path.html
@@ -0,0 +1,175 @@
+<!doctype html>
+<html lang="en-US">
+  <head>
+    <link href="/assets/index.css" rel="stylesheet" type="text/css" />
+    <script crossorigin="anonymous" src="https://unpkg.com/@babel/standalone@7.8.7/babel.min.js"></script>
+    <script crossorigin="anonymous" src="https://unpkg.com/react@16.8.6/umd/react.production.min.js"></script>
+    <script crossorigin="anonymous" src="https://unpkg.com/react-dom@16.8.6/umd/react-dom.production.min.js"></script>
+    <script crossorigin="anonymous" src="/test-harness.js"></script>
+    <script crossorigin="anonymous" src="/test-page-object.js"></script>
+    <script crossorigin="anonymous" src="/__dist__/webchat-es5.js"></script>
+    <script crossorigin="anonymous" src="/__dist__/botframework-webchat-fluent-theme.production.min.js"></script>
+  </head>
+  <body>
+    <main id="webchat"></main>
+    <script type="module">
+      import { setupMockMediaDevices } from '/assets/esm/speechToSpeech/mockMediaDevices.js';
+      import { setupMockAudioPlayback } from '/assets/esm/speechToSpeech/mockAudioPlayback.js';
+      
+      setupMockMediaDevices();
+      setupMockAudioPlayback();
+    </script>
+    <script type="text/babel">
+      run(async function () {
+        const {
+          React,
+          ReactDOM: { render },
+          WebChat: { FluentThemeProvider, ReactWebChat, testIds }
+        } = window;
+
+        // GIVEN: Web Chat with Speech-to-Speech enabled
+        const { directLine, store } = testHelpers.createDirectLineEmulator();
+
+        // Set voice configuration capability to enable microphone button
+        directLine.setCapability('getVoiceConfiguration', { sampleRate: 24000, chunkIntervalMs: 100 }, { emitEvent: false });
+
+        render(
+          <FluentThemeProvider variant="fluent">
+            <ReactWebChat 
+              directLine={directLine}
+              store={store}
+            />
+          </FluentThemeProvider>,
+          document.getElementById('webchat')
+        );
+
+        await pageConditions.uiConnected();
+
+        const micButton = document.querySelector(`[data-testid="${testIds.sendBoxMicrophoneButton}"]`);
+        expect(micButton).toBeTruthy();
+
+        // WHEN: User clicks microphone button to start recording
+        await host.click(micButton);
+
+        // THEN: Button should show recording state
+        await pageConditions.became(
+          'Microphone button changes to recording state',
+          () => {
+            const label = micButton.getAttribute('aria-label');
+            return label && (label.includes('Microphone on'));
+          },
+          1000
+        );
+
+        // THEN: Verify voice state is recording/listening
+        await pageConditions.became(
+          'Voice state is listening',
+          () => store.getState().voice?.voiceState === 'listening',
+          1000
+        );
+
+        // WHEN: Server detects speech start (barge-in)
+        await directLine.emulateIncomingVoiceActivity({
+          type: 'event',
+          name: 'request.update',
+          value: { state: 'detected', message: 'Your request is identified' },
+          valueType: 'application/vnd.microsoft.activity.azure.directline.audio.state'
+        });
+
+
+        // WHEN: Server detects speech stop (processing)
+        await directLine.emulateIncomingVoiceActivity({
+          type: 'event',
+          name: 'request.update',
+          value: { state: 'processing', message: 'Your request is being processed' },
+          valueType: 'application/vnd.microsoft.activity.azure.directline.audio.state'
+        });
+
+        // WHEN: Server sends user transcript (this goes to activities with text)
+        await directLine.emulateIncomingVoiceActivity({
+          type: 'event',
+          name: 'media.end',
+          value: { transcription: 'What is the weather today?', origin: 'user' },
+          valueType: 'application/vnd.microsoft.activity.azure.directline.audio.transcript'
+        });
+
+        // THEN: User transcript appears in chat
+        await pageConditions.numActivitiesShown(1);
+        expect(pageElements.activityContents()[0]).toHaveProperty(
+          'textContent',
+          'What is the weather today?'
+        );
+
+        // THEN: Should show user message
+        await pageConditions.scrollToBottomCompleted();
+        await host.snapshot('local');
+
+        // WHEN: Server sends audio chunks
+        await directLine.emulateIncomingVoiceActivity({
+          type: 'event',
+          name: 'media.chunk',
+          value: { content: 'AAAAAA==', contentType: 'audio/webm' },
+          valueType: 'application/vnd.microsoft.activity.azure.directline.audio.chunk'
+        });
+
+        await directLine.emulateIncomingVoiceActivity({
+          type: 'event',
+          name: 'media.chunk',
+          from: { role: 'bot' },
+          value: { content: 'AAAAAA==', contentType: 'audio/webm' },
+          valueType: 'application/vnd.microsoft.activity.azure.directline.audio.chunk'
+        });
+
+
+        // WHEN: Server sends bot transcript (this goes to activities with text)
+        await directLine.emulateIncomingVoiceActivity({
+          type: 'event',
+          name: 'media.end',
+          value: { transcription: 'The weather today is sunny with a high of 75 degrees.', origin: 'agent' },
+          valueType: 'application/vnd.microsoft.activity.azure.directline.audio.transcript'
+        });
+
+
+        // THEN: Bot transcript appears in chat
+        await pageConditions.numActivitiesShown(2);
+        
+        const activities = pageElements.activityContents();
+        expect(activities[0]).toHaveProperty('textContent', 'What is the weather today?');
+        expect(activities[1]).toHaveProperty('textContent', 'The weather today is sunny with a high of 75 degrees.');
+
+        // THEN: Verify activity status for voice transcripts
+        const activityStatuses = pageElements.activityStatuses();
+        expect(activityStatuses.length).toBe(2);
+
+        // THEN: User transcript should have timestamp but NO "Agent" label
+        const userActivityStatus = activityStatuses[0];
+        expect(userActivityStatus.innerText).not.toContain('Agent');
+        expect(userActivityStatus.innerText).toContain('Just now');
+
+        // THEN: Bot transcript should have "Agent" label AND timestamp
+        const botActivityStatus = activityStatuses[1];
+        expect(botActivityStatus.innerText).toContain('Agent');
+        expect(botActivityStatus.innerText).toContain('|');
+        expect(botActivityStatus.innerText).toContain('Just now');
+
+        // WHEN: User stops recording by clicking microphone button again
+        await host.click(micButton);
+
+        // THEN: Button should change to not-recording state
+        await pageConditions.became(
+          'Microphone button changes to not-recording state',
+          () => {
+            const label = micButton.getAttribute('aria-label');
+            return label && (label.includes('Microphone off'));
+          },
+          1000
+        );
+
+        // THEN: Should show happy path conversation
+        await pageConditions.scrollToBottomCompleted();
+        await host.snapshot('local');
+
+      });
+    </script>
+  </body>
+</html>
\ No newline at end of file
diff --git a/__tests__/html2/speechToSpeech/happy.path.html.snap-1.png b/__tests__/html2/speechToSpeech/happy.path.html.snap-1.png
new file mode 100644
index 0000000000..f9e16e326a
Binary files /dev/null and b/__tests__/html2/speechToSpeech/happy.path.html.snap-1.png differ
diff --git a/__tests__/html2/speechToSpeech/happy.path.html.snap-2.png b/__tests__/html2/speechToSpeech/happy.path.html.snap-2.png
new file mode 100644
index 0000000000..d3a3724d22
Binary files /dev/null and b/__tests__/html2/speechToSpeech/happy.path.html.snap-2.png differ
diff --git a/__tests__/html2/speechToSpeech/multiple.turns.html b/__tests__/html2/speechToSpeech/multiple.turns.html
new file mode 100644
index 0000000000..7a5ccc5971
--- /dev/null
+++ b/__tests__/html2/speechToSpeech/multiple.turns.html
@@ -0,0 +1,331 @@
+<!doctype html>
+<html lang="en-US">
+  <head>
+    <link href="/assets/index.css" rel="stylesheet" type="text/css" />
+    <script crossorigin="anonymous" src="https://unpkg.com/@babel/standalone@7.8.7/babel.min.js"></script>
+    <script crossorigin="anonymous" src="https://unpkg.com/react@16.8.6/umd/react.production.min.js"></script>
+    <script crossorigin="anonymous" src="https://unpkg.com/react-dom@16.8.6/umd/react-dom.production.min.js"></script>
+    <script crossorigin="anonymous" src="/test-harness.js"></script>
+    <script crossorigin="anonymous" src="/test-page-object.js"></script>
+    <script crossorigin="anonymous" src="/__dist__/webchat-es5.js"></script>
+    <script crossorigin="anonymous" src="/__dist__/botframework-webchat-fluent-theme.production.min.js"></script>
+  </head>
+  <body>
+    <main id="webchat"></main>
+    <!--
+      Test: Multiple conversation turns via voice
+      
+      This test validates a multi-turn voice conversation:
+      1. User asks a question → state: user_speaking → processing
+      2. Bot responds → state: bot_speaking (with audio playback)
+      3. User asks follow-up → state: user_speaking → processing
+      4. Bot responds again → state: bot_speaking
+      All while verifying placeholder text transitions
+    -->
+    <script type="module">
+      import { setupMockMediaDevices } from '/assets/esm/speechToSpeech/mockMediaDevices.js';
+      import { setupMockAudioPlayback } from '/assets/esm/speechToSpeech/mockAudioPlayback.js';
+      
+      setupMockMediaDevices();
+      setupMockAudioPlayback();
+    </script>
+    <script type="text/babel">
+      run(async function () {
+        const {
+          React,
+          ReactDOM: { render },
+          WebChat: { FluentThemeProvider, ReactWebChat, testIds }
+        } = window;
+
+        const { directLine, store } = testHelpers.createDirectLineEmulator();
+
+        // Set voice configuration capability to enable microphone button
+        directLine.setCapability('getVoiceConfiguration', { sampleRate: 24000, chunkIntervalMs: 100 }, { emitEvent: false });
+
+        render(
+          <FluentThemeProvider variant="fluent">
+            <ReactWebChat 
+              directLine={directLine}
+              store={store}
+            />
+          </FluentThemeProvider>,
+          document.getElementById('webchat')
+        );
+
+        await pageConditions.uiConnected();
+
+        const micButton = document.querySelector(`[data-testid="${testIds.sendBoxMicrophoneButton}"]`);
+        const textArea = document.querySelector(`[data-testid="${testIds.sendBoxTextBox}"]`);
+
+        // ===== START: Turn on mic =====
+        await host.click(micButton);
+
+        await pageConditions.became(
+          'Recording started',
+          () => micButton.getAttribute('aria-label')?.includes('Microphone on'),
+          1000
+        );
+
+        // Verify: State is "listening"
+        await pageConditions.became(
+          'State: listening',
+          () => textArea.getAttribute('placeholder') === 'Listening...',
+          1000
+        );
+
+        // VERIFY: Mic button shows microphone icon and is active (but no pulse in listening state)
+        const micIcon = micButton.querySelector('[class*="icon--"]');
+        expect(micIcon.className).toMatch(/icon--microphone/);
+        expect(micButton.className).toMatch(/active/);
+        expect(micButton.className).not.toMatch(/with-pulse/);
+
+        // ===== TURN 1: User speaks =====
+        // User speech detected
+        await directLine.emulateIncomingVoiceActivity({
+          type: 'event',
+          name: 'request.update',
+          from: { role: 'bot' },
+          value: { state: 'detected', message: 'Your request is identified' },
+          valueType: 'application/vnd.microsoft.activity.azure.directline.audio.state'
+        });
+
+        // Verify: State is "user_speaking"
+        await pageConditions.became(
+          'State: user_speaking',
+          () => textArea.getAttribute('placeholder') === 'Listening...',
+          1000
+        );
+
+        // VERIFY: Mic button has pulse AND gradient during user speaking
+        expect(micButton.className).toMatch(/with-pulse/);
+        expect(micButton.className).toMatch(/with-gradient/);
+
+        // User speech processing
+        await directLine.emulateIncomingVoiceActivity({
+          type: 'event',
+          name: 'request.update',
+          from: { role: 'bot' },
+          value: { state: 'processing', message: 'Your request is being processed' },
+          valueType: 'application/vnd.microsoft.activity.azure.directline.audio.state'
+        });
+
+        // Verify: State is "processing"
+        await pageConditions.became(
+          'State: processing',
+          () => textArea.getAttribute('placeholder') === 'Processing...',
+          1000
+        );
+
+        // User transcript appears
+        await directLine.emulateIncomingVoiceActivity({
+          type: 'event',
+          name: 'media.end',
+          value: { transcription: 'What time is my flight?', origin: 'user' },
+          valueType: 'application/vnd.microsoft.activity.azure.directline.audio.transcript'
+        });
+
+        await pageConditions.numActivitiesShown(1);
+
+        // ===== TURN 1: Bot responds with audio =====
+        await directLine.emulateIncomingVoiceActivity({
+          type: 'event',
+          name: 'request.update',
+          from: { role: 'bot' },
+          value: { state: 'response.available' },
+          valueType: 'application/vnd.microsoft.activity.azure.directline.audio.state'
+        });
+
+        // Bot sends audio chunk (triggers playback)
+        await directLine.emulateIncomingVoiceActivity({
+          type: 'event',
+          name: 'media.chunk',
+          from: { role: 'bot' },
+          value: { content: 'AAAAAA==', contentType: 'audio/webm' },
+          valueType: 'application/vnd.microsoft.activity.azure.directline.audio.chunk'
+        });
+
+        // Verify: State is "bot_speaking"
+        await pageConditions.became(
+          'State: bot_speaking',
+          () => textArea.getAttribute('placeholder') === 'Talk to interrupt...',
+          1000
+        );
+
+        // VERIFY: Mic button shows audio-playing icon with pulse animation during bot speaking
+        expect(micButton.querySelector('[class*="icon--"]').className).toMatch(/icon--audio-playing/);
+        expect(micButton.className).toMatch(/with-pulse/);
+        expect(micButton.className).not.toMatch(/with-gradient/);
+
+        // Bot transcript appears
+        await directLine.emulateIncomingVoiceActivity({
+          type: 'event',
+          name: 'media.end',
+          value: { transcription: 'Your flight departs at 3:45 PM from Gate B7.', origin: 'agent' },
+          valueType: 'application/vnd.microsoft.activity.azure.directline.audio.transcript'
+        });
+
+        await pageConditions.numActivitiesShown(2);
+
+        // Wait for audio to finish, state returns to "listening"
+        await pageConditions.became(
+          'State: listening (after bot audio)',
+          () => textArea.getAttribute('placeholder') === 'Listening...',
+          2500
+        );
+
+        // ===== TURN 2: User asks follow-up =====
+        await directLine.emulateIncomingVoiceActivity({
+          type: 'event',
+          name: 'request.update',
+          value: { state: 'detected', message: 'Your request is identified' },
+          valueType: 'application/vnd.microsoft.activity.azure.directline.audio.state'
+        });
+
+        await directLine.emulateIncomingVoiceActivity({
+          type: 'event',
+          name: 'request.update',
+          from: { role: 'bot' },
+          value: { state: 'processing', message: 'Your request is being processed' },
+          valueType: 'application/vnd.microsoft.activity.azure.directline.audio.state'
+        });
+
+        // Verify: State is "processing"
+        await pageConditions.became(
+          'State: processing (Turn 2)',
+          () => textArea.getAttribute('placeholder') === 'Processing...',
+          1000
+        );
+
+        await directLine.emulateIncomingVoiceActivity({
+          type: 'event',
+          name: 'media.end',
+          value: { transcription: 'Is there a delay?', origin: 'user' },
+          valueType: 'application/vnd.microsoft.activity.azure.directline.audio.transcript'
+        });
+
+        await pageConditions.numActivitiesShown(3);
+
+        // ===== TURN 2: Bot responds =====
+        await directLine.emulateIncomingVoiceActivity({
+          type: 'event',
+          name: 'request.update',
+          value: { state: 'response.available' },
+          valueType: 'application/vnd.microsoft.activity.azure.directline.audio.state'
+        });
+
+        // Bot sends audio chunk
+        await directLine.emulateIncomingVoiceActivity({
+          type: 'event',
+          name: 'media.chunk',
+          from: { role: 'bot' },
+          value: { content: 'AAAAAA==', contentType: 'audio/webm' },
+          valueType: 'application/vnd.microsoft.activity.azure.directline.audio.chunk'
+        });
+
+        // Verify: State is "bot_speaking"
+        await pageConditions.became(
+          'State: bot_speaking (Turn 2)',
+          () => textArea.getAttribute('placeholder') === 'Talk to interrupt...',
+          1000
+        );
+
+        await directLine.emulateIncomingVoiceActivity({
+          type: 'event',
+          name: 'media.end',
+          value: { transcription: 'No delays reported. Your flight is on time.', origin: 'agent' },
+          valueType: 'application/vnd.microsoft.activity.azure.directline.audio.transcript'
+        });
+
+        await pageConditions.numActivitiesShown(4);
+
+        // Wait for audio to finish
+        await pageConditions.became(
+          'State: listening (after Turn 2 bot audio)',
+          () => textArea.getAttribute('placeholder') === 'Listening...',
+          1000
+        );
+
+        // ===== TURN 3: User says thank you =====
+        await directLine.emulateIncomingVoiceActivity({
+          type: 'event',
+          name: 'request.update',
+          from: { role: 'bot' },
+          value: { state: 'detected', message: 'Your request is identified' },
+          valueType: 'application/vnd.microsoft.activity.azure.directline.audio.state'
+        });
+
+        await directLine.emulateIncomingVoiceActivity({
+          type: 'event',
+          name: 'request.update',
+          from: { role: 'bot' },
+          value: { state: 'processing', message: 'Your request is being processed' },
+          valueType: 'application/vnd.microsoft.activity.azure.directline.audio.state'
+        });
+
+        await directLine.emulateIncomingVoiceActivity({
+          type: 'event',
+          name: 'media.end',
+          value: { transcription: 'Thank you!', origin: 'user' },
+          valueType: 'application/vnd.microsoft.activity.azure.directline.audio.transcript'
+        });
+
+        await pageConditions.numActivitiesShown(5);
+
+        // ===== TURN 3: Bot responds =====
+        await directLine.emulateIncomingVoiceActivity({
+          type: 'event',
+          name: 'request.update',
+          from: { role: 'bot' },
+          value: { state: 'response.available' },
+          valueType: 'application/vnd.microsoft.activity.azure.directline.audio.state'
+        });
+
+        await directLine.emulateIncomingVoiceActivity({
+          type: 'event',
+          name: 'media.chunk',
+          from: { role: 'bot' },
+          value: { content: 'AAAAAA==', contentType: 'audio/webm' },
+          valueType: 'application/vnd.microsoft.activity.azure.directline.audio.chunk'
+        });
+
+        // Verify: State is "bot_speaking"
+        await pageConditions.became(
+          'State: bot_speaking (Turn 3)',
+          () => textArea.getAttribute('placeholder') === 'Talk to interrupt...',
+          1000
+        );
+
+        await directLine.emulateIncomingVoiceActivity({
+          type: 'event',
+          name: 'media.end',
+          value: { transcription: "You're welcome! Have a safe flight.", origin: 'agent' },
+          valueType: 'application/vnd.microsoft.activity.azure.directline.audio.transcript'
+        });
+
+        await pageConditions.numActivitiesShown(6);
+
+        // ===== VERIFY: All messages in correct order =====
+        const activities = pageElements.activityContents();
+        expect(activities[0]).toHaveProperty('textContent', 'What time is my flight?');
+        expect(activities[1]).toHaveProperty('textContent', 'Your flight departs at 3:45 PM from Gate B7.');
+        expect(activities[2]).toHaveProperty('textContent', 'Is there a delay?');
+        expect(activities[3]).toHaveProperty('textContent', 'No delays reported. Your flight is on time.');
+        expect(activities[4]).toHaveProperty('textContent', 'Thank you!');
+        expect(activities[5]).toHaveProperty('textContent', "You're welcome! Have a safe flight.");
+
+        // ===== END: Turn off mic =====
+        await host.click(micButton);
+
+        await pageConditions.became(
+          'Recording stopped',
+          () => micButton.getAttribute('aria-label')?.includes('Microphone off'),
+          1000
+        );
+
+        // THEN: Should show multi-turn conversation
+        await pageConditions.scrollToBottomCompleted();
+        await host.snapshot('local');
+      });
+    </script>
+  </body>
+</html>
diff --git a/__tests__/html2/speechToSpeech/multiple.turns.html.snap-1.png b/__tests__/html2/speechToSpeech/multiple.turns.html.snap-1.png
new file mode 100644
index 0000000000..65531ebe9c
Binary files /dev/null and b/__tests__/html2/speechToSpeech/multiple.turns.html.snap-1.png differ
diff --git a/__tests__/html2/speechToSpeech/outgoing.audio.interval.html b/__tests__/html2/speechToSpeech/outgoing.audio.interval.html
new file mode 100644
index 0000000000..d2167aba2a
--- /dev/null
+++ b/__tests__/html2/speechToSpeech/outgoing.audio.interval.html
@@ -0,0 +1,140 @@
+<!doctype html>
+<html lang="en-US">
+  <head>
+    <link href="/assets/index.css" rel="stylesheet" type="text/css" />
+    <script crossorigin="anonymous" src="https://unpkg.com/@babel/standalone@7.8.7/babel.min.js"></script>
+    <script crossorigin="anonymous" src="https://unpkg.com/react@16.8.6/umd/react.production.min.js"></script>
+    <script crossorigin="anonymous" src="https://unpkg.com/react-dom@16.8.6/umd/react-dom.production.min.js"></script>
+    <script crossorigin="anonymous" src="/test-harness.js"></script>
+    <script crossorigin="anonymous" src="/test-page-object.js"></script>
+    <script crossorigin="anonymous" src="/__dist__/webchat-es5.js"></script>
+    <script crossorigin="anonymous" src="/__dist__/botframework-webchat-fluent-theme.production.min.js"></script>
+  </head>
+  <body>
+    <main id="webchat"></main>
+    <!--
+      Test: Audio chunks outgoing flow
+      
+      This test validates:
+      1. Mic on → state becomes "listening"
+      2. Audio chunks are sent via postActivity (fire-and-forget)
+      3. Verify chunk structure (type, name, value.content)
+      4. Verify ~100ms interval between chunk timestamps
+      
+      Note: Since voice activities use postVoiceActivity (fire-and-forget),
+      we intercept postActivity to capture outgoing chunks for verification.
+    -->
+    <script type="module">
+      import { setupMockMediaDevices } from '/assets/esm/speechToSpeech/mockMediaDevices.js';
+      import { setupMockAudioPlayback } from '/assets/esm/speechToSpeech/mockAudioPlayback.js';
+      
+      setupMockMediaDevices();
+      setupMockAudioPlayback();
+    </script>
+    <script type="text/babel">
+      run(async function () {
+        const {
+          React,
+          ReactDOM: { render },
+          WebChat: { FluentThemeProvider, ReactWebChat, testIds }
+        } = window;
+
+        const { directLine, store } = testHelpers.createDirectLineEmulator();
+
+        // Set voice configuration capability to enable microphone button
+        directLine.setCapability('getVoiceConfiguration', { sampleRate: 24000, chunkIntervalMs: 100 }, { emitEvent: false });
+        
+        // Intercept postActivity to capture outgoing voice chunks
+        const capturedChunks = [];
+        const originalPostActivity = directLine.postActivity.bind(directLine);
+        directLine.postActivity = (activity) => {
+          if (activity.name === 'media.chunk' && activity.type === 'event') {
+            capturedChunks.push({
+              ...activity,
+              capturedAt: Date.now()
+            });
+          }
+          return originalPostActivity(activity);
+        };
+
+        render(
+          <FluentThemeProvider variant="fluent">
+            <ReactWebChat 
+              directLine={directLine}
+              store={store}
+            />
+          </FluentThemeProvider>,
+          document.getElementById('webchat')
+        );
+
+        await pageConditions.uiConnected();
+
+        const micButton = document.querySelector(`[data-testid="${testIds.sendBoxMicrophoneButton}"]`);
+        const textArea = document.querySelector(`[data-testid="${testIds.sendBoxTextBox}"]`);
+        expect(micButton).toBeTruthy();
+        expect(textArea).toBeTruthy();
+
+        // ===== STEP 1: Start recording =====
+        await host.click(micButton);
+
+        await pageConditions.became(
+          'Recording started',
+          () => micButton.getAttribute('aria-label')?.includes('Microphone on'),
+          1000
+        );
+
+        // VERIFY: State is "listening"
+        await pageConditions.became(
+          'State: listening → Placeholder: "Listening..."',
+          () => textArea.getAttribute('placeholder') === 'Listening...',
+          1000
+        );
+
+        // ===== STEP 2: Wait for multiple chunks =====
+        // Default chunk interval is 100ms, wait for at least 5 chunks for better interval calculation
+        await pageConditions.became(
+          'Multiple audio chunks sent via postActivity',
+          () => capturedChunks.length >= 5,
+          2000
+        );
+
+        // ===== STEP 3: Stop recording =====
+        await host.click(micButton);
+
+        await pageConditions.became(
+          'Recording stopped',
+          () => micButton.getAttribute('aria-label')?.includes('Microphone off'),
+          1000
+        );
+
+        // ===== STEP 4: Verify captured chunks =====
+        expect(capturedChunks.length).toBeGreaterThanOrEqual(5);
+
+        // ===== STEP 5: Verify chunk structure =====
+        const sampleChunk = capturedChunks[0];
+        
+        expect(sampleChunk.type).toBe('event');
+        expect(sampleChunk.name).toBe('media.chunk');
+        expect(sampleChunk.value).toBeTruthy();
+        expect(sampleChunk.value.content).toBeTruthy();
+        expect(sampleChunk.value.contentType).toBe('audio/webm');
+
+        // ===== STEP 6: Verify interval using capturedAt timestamps =====
+        const timestamps = capturedChunks.map(c => c.capturedAt).sort((a, b) => a - b);
+        
+        // Calculate intervals between consecutive chunks
+        const intervals = [];
+        for (let i = 1; i < timestamps.length; i++) {
+          intervals.push(timestamps[i] - timestamps[i - 1]);
+        }
+
+        // Calculate average interval
+        const avgInterval = intervals.reduce((sum, i) => sum + i, 0) / intervals.length;
+
+        // Verify average interval is approximately 100ms (allow 50-150ms range for test stability)
+        expect(avgInterval).toBeGreaterThanOrEqual(50);
+        expect(avgInterval).toBeLessThanOrEqual(150);
+      });
+    </script>
+  </body>
+</html>
diff --git a/packages/api/src/StyleOptions.ts b/packages/api/src/StyleOptions.ts
index 875a02022f..109ae1e23a 100644
--- a/packages/api/src/StyleOptions.ts
+++ b/packages/api/src/StyleOptions.ts
@@ -998,6 +998,15 @@ type StyleOptions = {
    * New in 4.19.0.
    */
   disableFileUpload?: boolean;
+  /**
+   * Controls microphone button visibility in Fluent theme send box.
+   *
+   * - `'auto'` - Show microphone button if the chat adapter supports voice (has voiceConfiguration capability)
+   * - `'hide'` - Do not show microphone button regardless of adapter capabilities
+   *
+   * @default 'auto'
+   */
+  showMicrophoneButton?: 'auto' | 'hide';
 };
 
 // StrictStyleOptions is only used internally in Web Chat and for simplifying our code:
diff --git a/packages/api/src/boot/hook.ts b/packages/api/src/boot/hook.ts
index cd0cbff82b..dda4464d10 100644
--- a/packages/api/src/boot/hook.ts
+++ b/packages/api/src/boot/hook.ts
@@ -61,7 +61,9 @@ export {
   useSetNotification,
   useShouldSpeakIncomingActivity,
   useStartDictate,
+  useStartVoice,
   useStopDictate,
+  useStopVoice,
   useStyleOptions,
   useSubmitSendBox,
   useSuggestedActions,
@@ -74,5 +76,6 @@ export {
   useUIState,
   useUserID,
   useUsername,
-  useVoiceSelector
+  useVoiceSelector,
+  useVoiceState
 } from '../hooks/index';
diff --git a/packages/api/src/boot/internal.ts b/packages/api/src/boot/internal.ts
index 1bb222c21f..1deec63da8 100644
--- a/packages/api/src/boot/internal.ts
+++ b/packages/api/src/boot/internal.ts
@@ -1,4 +1,6 @@
 export { default as LowPriorityDecoratorComposer } from '../decorator/internal/LowPriorityDecoratorComposer';
+export { default as usePostVoiceActivity } from '../hooks/internal/usePostVoiceActivity';
 export { default as useSetDictateState } from '../hooks/internal/useSetDictateState';
+export { default as useShouldShowMicrophoneButton } from '../hooks/internal/useShouldShowMicrophoneButton';
 export { LegacyActivityContextProvider, type LegacyActivityContextType } from '../legacy/LegacyActivityBridgeContext';
 export { default as StyleOptionsComposer } from '../providers/StyleOptions/StyleOptionsComposer';
diff --git a/packages/api/src/decorator/ActivityBorder/ActivityBorderDecorator.tsx b/packages/api/src/decorator/ActivityBorder/ActivityBorderDecorator.tsx
index e8a27830e5..15b100540c 100644
--- a/packages/api/src/decorator/ActivityBorder/ActivityBorderDecorator.tsx
+++ b/packages/api/src/decorator/ActivityBorder/ActivityBorderDecorator.tsx
@@ -1,4 +1,9 @@
-import { getActivityLivestreamingMetadata, type WebChatActivity } from 'botframework-webchat-core';
+import {
+  getActivityLivestreamingMetadata,
+  getVoiceActivityRole,
+  isVoiceActivity,
+  type WebChatActivity
+} from 'botframework-webchat-core';
 import React, { memo, useMemo, type ReactNode } from 'react';
 
 import {
@@ -25,7 +30,12 @@ function ActivityBorderDecorator({ activity, children }: ActivityBorderDecorator
     const { type } = getActivityLivestreamingMetadata(activity) || {};
 
     return {
-      from: supportedActivityRoles.includes(activity?.from?.role) ? activity?.from?.role : undefined,
+      from: isVoiceActivity(activity)
+        ? getVoiceActivityRole(activity)
+        : supportedActivityRoles.includes(activity?.from?.role)
+          ? activity?.from?.role
+          : undefined,
+      modality: new Set(isVoiceActivity(activity) ? ['audio', 'text'] : ['text']),
       livestreamingState:
         type === 'final activity'
           ? 'completing'
diff --git a/packages/api/src/decorator/ActivityBorder/private/ActivityBorderDecoratorRequestContext.ts b/packages/api/src/decorator/ActivityBorder/private/ActivityBorderDecoratorRequestContext.ts
index 0e7c379146..0c8e01453a 100644
--- a/packages/api/src/decorator/ActivityBorder/private/ActivityBorderDecoratorRequestContext.ts
+++ b/packages/api/src/decorator/ActivityBorder/private/ActivityBorderDecoratorRequestContext.ts
@@ -20,6 +20,14 @@ type ActivityBorderDecoratorRequest = Readonly<{
    * - `undefined` - the sender is unknown
    */
   from: 'bot' | 'channel' | `user` | undefined;
+
+  /**
+   * Gets the modalities of the activity.
+   *
+   * - `'audio'` - the activity originated from audio/voice input
+   * - `'text'` - the activity originated from text input
+   */
+  modality: Set<'audio' | 'text'>;
 }>;
 
 type ActivityBorderDecoratorRequestContextType = Readonly<{
@@ -30,6 +38,7 @@ const ActivityBorderDecoratorRequestContext = createContext<ActivityBorderDecora
   Object.freeze({
     request: Object.freeze({
       from: undefined,
+      modality: new Set<'audio' | 'text'>(),
       livestreamingState: undefined
     })
   })
diff --git a/packages/api/src/defaultStyleOptions.ts b/packages/api/src/defaultStyleOptions.ts
index 841a5dd5bd..a77ae7d48b 100644
--- a/packages/api/src/defaultStyleOptions.ts
+++ b/packages/api/src/defaultStyleOptions.ts
@@ -317,7 +317,10 @@ const DEFAULT_OPTIONS: Required<StyleOptions> = {
 
   // Send box attachment bar
   sendBoxAttachmentBarMaxHeight: 114,
-  sendBoxAttachmentBarMaxThumbnail: 3
+  sendBoxAttachmentBarMaxThumbnail: 3,
+
+  // Speech-to-speech options
+  showMicrophoneButton: 'auto'
 };
 
 export default DEFAULT_OPTIONS;
diff --git a/packages/api/src/hooks/Composer.tsx b/packages/api/src/hooks/Composer.tsx
index 046b24218d..3b1794d86b 100644
--- a/packages/api/src/hooks/Composer.tsx
+++ b/packages/api/src/hooks/Composer.tsx
@@ -30,8 +30,10 @@ import {
   singleToArray,
   startDictate,
   startSpeakingActivity,
+  startVoiceRecording,
   stopDictate,
   stopSpeakingActivity,
+  stopVoiceRecording,
   submitSendBox,
   type DirectLineJSBotConnection,
   type GlobalScopePonyfill,
@@ -63,6 +65,7 @@ import ActivityTypingComposer from '../providers/ActivityTyping/ActivityTypingCo
 import CapabilitiesComposer from '../providers/Capabilities/CapabilitiesComposer';
 import GroupActivitiesComposer from '../providers/GroupActivities/GroupActivitiesComposer';
 import PonyfillComposer from '../providers/Ponyfill/PonyfillComposer';
+import { SpeechToSpeechComposer } from '../providers/SpeechToSpeech/SpeechToSpeechComposer';
 import StyleOptionsComposer from '../providers/StyleOptions/StyleOptionsComposer';
 import { type ActivityStatusMiddleware, type RenderActivityStatus } from '../types/ActivityStatusMiddleware';
 import AttachmentForScreenReaderMiddleware from '../types/AttachmentForScreenReaderMiddleware';
@@ -81,6 +84,7 @@ import isObject from '../utils/isObject';
 import mapMap from '../utils/mapMap';
 import normalizeLanguage from '../utils/normalizeLanguage';
 import Tracker from './internal/Tracker';
+import useVoiceHandlers from './internal/useVoiceHandlers';
 import WebChatAPIContext, { type WebChatAPIContextType } from './internal/WebChatAPIContext';
 import WebChatReduxContext, { useDispatch } from './internal/WebChatReduxContext';
 import defaultSelectVoice from './internal/defaultSelectVoice';
@@ -297,6 +301,7 @@ const ComposerCore = ({
   const [styleOptions] = useStyleOptions();
   const dispatch = useDispatch();
   const telemetryDimensionsRef = useRef({});
+  const [voiceHandlers] = useVoiceHandlers();
 
   const patchedDir = useMemo(() => (dir === 'ltr' || dir === 'rtl' ? dir : 'auto'), [dir]);
   const patchedGrammars = useMemo(() => grammars || [], [grammars]);
@@ -367,6 +372,15 @@ const ComposerCore = ({
     [dispatch]
   );
 
+  const startVoice = useCallback(() => {
+    dispatch(startVoiceRecording());
+  }, [dispatch]);
+
+  const stopVoice = useCallback(() => {
+    voiceHandlers.forEach(handler => handler.stopAllAudio());
+    dispatch(stopVoiceRecording());
+  }, [dispatch, voiceHandlers]);
+
   const patchedLocalizedStrings = useMemo(
     () => mergeStringsOverrides(getAllLocalizedStrings()[normalizeLanguage(locale)], locale, overrideLocalizedStrings),
     [locale, overrideLocalizedStrings]
@@ -554,6 +568,8 @@ const ComposerCore = ({
       scrollToEndButtonRenderer,
       selectVoice: patchedSelectVoice,
       sendTypingIndicator,
+      startVoice,
+      stopVoice,
       telemetryDimensionsRef,
       toastRenderer: patchedToastRenderer,
       trackDimension,
@@ -583,6 +599,8 @@ const ComposerCore = ({
       renderMarkdown,
       scrollToEndButtonRenderer,
       sendTypingIndicator,
+      startVoice,
+      stopVoice,
       telemetryDimensionsRef,
       trackDimension,
       uiState,
@@ -601,7 +619,9 @@ const ComposerCore = ({
                 <SendBoxToolbarMiddlewareProvider middleware={sendBoxToolbarMiddleware || EMPTY_ARRAY}>
                   <GroupActivitiesComposer groupActivitiesMiddleware={singleToArray(groupActivitiesMiddleware)}>
                     <PolymiddlewareComposer polymiddleware={polymiddleware}>
-                      {typeof children === 'function' ? children(context) : children}
+                      <SpeechToSpeechComposer>
+                        {typeof children === 'function' ? children(context) : children}
+                      </SpeechToSpeechComposer>
                     </PolymiddlewareComposer>
                   </GroupActivitiesComposer>
                   <ActivitySendStatusTelemetryComposer />
diff --git a/packages/api/src/hooks/index.ts b/packages/api/src/hooks/index.ts
index da6e0151a4..f1fb41c3ce 100644
--- a/packages/api/src/hooks/index.ts
+++ b/packages/api/src/hooks/index.ts
@@ -59,7 +59,9 @@ import useSendTypingIndicator from './useSendTypingIndicator';
 import useSetNotification from './useSetNotification';
 import useShouldSpeakIncomingActivity from './useShouldSpeakIncomingActivity';
 import useStartDictate from './useStartDictate';
+import useStartVoice from './useStartVoice';
 import useStopDictate from './useStopDictate';
+import useStopVoice from './useStopVoice';
 import useStyleOptions from './useStyleOptions';
 import useSubmitSendBox from './useSubmitSendBox';
 import useSuggestedActions from './useSuggestedActions';
@@ -72,6 +74,7 @@ import useUIState from './useUIState';
 import useUserID from './useUserID';
 import useUsername from './useUsername';
 import useVoiceSelector from './useVoiceSelector';
+import useVoiceState from './useVoiceState';
 
 export { useBuildRenderActivityCallback } from '@msinternal/botframework-webchat-api-middleware';
 export { useSuggestedActionsHooks } from '@msinternal/botframework-webchat-redux-store';
@@ -138,7 +141,9 @@ export {
   useSetNotification,
   useShouldSpeakIncomingActivity,
   useStartDictate,
+  useStartVoice,
   useStopDictate,
+  useStopVoice,
   useStyleOptions,
   useSubmitSendBox,
   useSuggestedActions,
@@ -150,5 +155,6 @@ export {
   useUIState,
   useUserID,
   useUsername,
-  useVoiceSelector
+  useVoiceSelector,
+  useVoiceState
 };
diff --git a/packages/api/src/hooks/internal/WebChatAPIContext.ts b/packages/api/src/hooks/internal/WebChatAPIContext.ts
index 0981b4ad8c..a0bc434b51 100644
--- a/packages/api/src/hooks/internal/WebChatAPIContext.ts
+++ b/packages/api/src/hooks/internal/WebChatAPIContext.ts
@@ -71,8 +71,10 @@ export type WebChatAPIContextType = {
   setSendTimeout?: (timeout: number) => void;
   startDictate?: () => void;
   startSpeakingActivity?: () => void;
+  startVoice?: () => void;
   stopDictate?: () => void;
   stopSpeakingActivity?: () => void;
+  stopVoice?: () => void;
   submitSendBox?: (method?: string, { channelData }?: { channelData: any }) => void;
   telemetryDimensionsRef?: React.Ref<any>;
   toastRenderer?: RenderToast;
diff --git a/packages/api/src/hooks/internal/usePostVoiceActivity.ts b/packages/api/src/hooks/internal/usePostVoiceActivity.ts
new file mode 100644
index 0000000000..18f735cff5
--- /dev/null
+++ b/packages/api/src/hooks/internal/usePostVoiceActivity.ts
@@ -0,0 +1,18 @@
+import { postVoiceActivity, type WebChatActivity } from 'botframework-webchat-core';
+import { useCallback } from 'react';
+import { useDispatch } from './WebChatReduxContext';
+
+/**
+ * Hook to post voice activities (fire-and-forget, no echo back).
+ * Use this for DTMF and other voice-related event activities.
+ */
+export default function usePostVoiceActivity(): (activity: WebChatActivity) => void {
+  const dispatch = useDispatch();
+
+  return useCallback(
+    (activity: WebChatActivity) => {
+      dispatch(postVoiceActivity(activity));
+    },
+    [dispatch]
+  );
+}
diff --git a/packages/api/src/hooks/internal/useRegisterVoiceHandler.ts b/packages/api/src/hooks/internal/useRegisterVoiceHandler.ts
new file mode 100644
index 0000000000..149bd97f2a
--- /dev/null
+++ b/packages/api/src/hooks/internal/useRegisterVoiceHandler.ts
@@ -0,0 +1,23 @@
+import { registerVoiceHandler, unregisterVoiceHandler, type VoiceHandler } from 'botframework-webchat-core';
+import { useCallback } from 'react';
+import randomId from '../../utils/randomId';
+import { useDispatch } from './WebChatReduxContext';
+
+/**
+ * Internal hook to register a voice handler for audio playback.
+ * @returns A function that registers a voice handler and returns an unregister function.
+ */
+export default function useRegisterVoiceHandler(): (voiceHandler: VoiceHandler) => () => void {
+  const dispatch = useDispatch();
+
+  return useCallback(
+    (voiceHandler: VoiceHandler) => {
+      const id = randomId();
+      dispatch(registerVoiceHandler(id, voiceHandler));
+      return () => {
+        dispatch(unregisterVoiceHandler(id));
+      };
+    },
+    [dispatch]
+  );
+}
diff --git a/packages/api/src/hooks/internal/useShouldShowMicrophoneButton.ts b/packages/api/src/hooks/internal/useShouldShowMicrophoneButton.ts
new file mode 100644
index 0000000000..a428812227
--- /dev/null
+++ b/packages/api/src/hooks/internal/useShouldShowMicrophoneButton.ts
@@ -0,0 +1,22 @@
+import useStyleOptions from '../useStyleOptions';
+import useCapabilities from '../../providers/Capabilities/useCapabilities';
+
+/**
+ * Internal hook to determine if the microphone button should be shown based on:
+ * - `showMicrophoneButton` style option ('auto' | 'hide')
+ * - Adapter voice capability (voiceConfiguration)
+ *
+ * - 'auto': Show if adapter has voiceConfiguration capability, hide otherwise
+ * - 'hide': Never show
+ */
+export default function useShouldShowMicrophoneButton(): boolean {
+  const [{ showMicrophoneButton }] = useStyleOptions();
+  // If adapter has voice capability, voiceConfiguration will be defined,
+  const voiceConfiguration = useCapabilities(caps => caps.voiceConfiguration);
+
+  if (showMicrophoneButton === 'hide') {
+    return false;
+  }
+
+  return !!voiceConfiguration;
+}
diff --git a/packages/api/src/hooks/internal/useVoiceHandlers.ts b/packages/api/src/hooks/internal/useVoiceHandlers.ts
new file mode 100644
index 0000000000..63eb4c3925
--- /dev/null
+++ b/packages/api/src/hooks/internal/useVoiceHandlers.ts
@@ -0,0 +1,11 @@
+import type { VoiceHandler } from 'botframework-webchat-core';
+import { useSelector } from './WebChatReduxContext';
+
+/**
+ * Internal hook to get all registered voice handlers from Redux state.
+ */
+export default function useVoiceHandlers(): readonly [ReadonlyMap<string, VoiceHandler>] {
+  return Object.freeze([
+    useSelector((state: { voice: { voiceHandlers: Map<string, VoiceHandler> } }) => state.voice.voiceHandlers)
+  ]);
+}
diff --git a/packages/api/src/hooks/internal/useVoiceStateWritable.ts b/packages/api/src/hooks/internal/useVoiceStateWritable.ts
new file mode 100644
index 0000000000..4d3a409edb
--- /dev/null
+++ b/packages/api/src/hooks/internal/useVoiceStateWritable.ts
@@ -0,0 +1,18 @@
+import { setVoiceState, type VoiceState } from 'botframework-webchat-core';
+import { useCallback } from 'react';
+import { useDispatch, useSelector } from './WebChatReduxContext';
+
+/**
+ * Internal hook to set the voice state.
+ */
+export default function useVoiceStateWritable(): readonly [VoiceState, (state: VoiceState) => void] {
+  const dispatch = useDispatch();
+  const setter = useCallback(
+    (state: VoiceState) => {
+      dispatch(setVoiceState(state));
+    },
+    [dispatch]
+  );
+  const value = useSelector(({ voice }) => voice.voiceState);
+  return Object.freeze([value, setter]);
+}
diff --git a/packages/api/src/hooks/useStartVoice.ts b/packages/api/src/hooks/useStartVoice.ts
new file mode 100644
index 0000000000..952d2b9d38
--- /dev/null
+++ b/packages/api/src/hooks/useStartVoice.ts
@@ -0,0 +1,9 @@
+import useWebChatAPIContext from './internal/useWebChatAPIContext';
+
+/**
+ * Hook to start voice mode (turns on microphone and enables audio synthesis).
+ * This starts speech-to-speech interaction.
+ */
+export default function useStartVoice(): () => void {
+  return useWebChatAPIContext().startVoice;
+}
diff --git a/packages/api/src/hooks/useStopVoice.ts b/packages/api/src/hooks/useStopVoice.ts
new file mode 100644
index 0000000000..b7be0f5447
--- /dev/null
+++ b/packages/api/src/hooks/useStopVoice.ts
@@ -0,0 +1,9 @@
+import useWebChatAPIContext from './internal/useWebChatAPIContext';
+
+/**
+ * Hook to stop voice mode (turns off microphone and stops audio playback).
+ * This ends speech-to-speech interaction.
+ */
+export default function useStopVoice(): () => void {
+  return useWebChatAPIContext().stopVoice;
+}
diff --git a/packages/api/src/hooks/useVoiceState.ts b/packages/api/src/hooks/useVoiceState.ts
new file mode 100644
index 0000000000..a3898aade7
--- /dev/null
+++ b/packages/api/src/hooks/useVoiceState.ts
@@ -0,0 +1,15 @@
+import type { VoiceState } from 'botframework-webchat-core';
+import useVoiceStateWritable from './internal/useVoiceStateWritable';
+
+/**
+ * Hook to get the voice state.
+ * The voice state represents the current state of the speech-to-speech interaction:
+ * - 'idle': No active speech session, microphone and audio playback are off
+ * - 'listening': Microphone is active
+ * - 'user_speaking': User is actively speaking
+ * - 'processing': User finished speaking, server is processing
+ * - 'bot_speaking': Bot is speaking (audio playback)
+ */
+export default function useVoiceState(): readonly [VoiceState] {
+  return Object.freeze([useVoiceStateWritable()[0]]);
+}
diff --git a/packages/api/src/localization/en-US.json b/packages/api/src/localization/en-US.json
index 74bfece917..7e22ba4c18 100644
--- a/packages/api/src/localization/en-US.json
+++ b/packages/api/src/localization/en-US.json
@@ -6,6 +6,8 @@
   "_ACCESS_KEY_FOR_MAC_ALT.comment": "These are keyboard modifier keys for screen reader on a Mac.",
   "ACTIVITY_BOT_SAID_ALT": "Bot $1 said:",
   "_ACTIVITY_BOT_SAID_ALT.comment": "This is for screen reader only. $1 is the initials for the bot (e.g. \"JD\").",
+  "ACTIVITY_STATUS_VOICE_TRANSCRIPT_AGENT_LABEL": "Agent",
+  "_ACTIVITY_STATUS_VOICE_TRANSCRIPT_AGENT_LABEL.comment": "Label shown before timestamp for incoming voice transcript messages from the agent.",
   "ACTIVITY_YOU_SAID_ALT": "You said:",
   "_ACTIVITY_YOU_SAID_ALT.comment": "This is for screen reader only.",
   "ACTIVITY_BOT_ATTACHED_ALT": "Bot attached:",
@@ -131,6 +133,14 @@
   "TEXT_INPUT_ALT": "Message input box",
   "_TEXT_INPUT_ALT.comment": "This is for screen reader for the label of the message input box.",
   "TEXT_INPUT_PLACEHOLDER": "Type your message",
+  "TEXT_INPUT_SPEECH_IDLE_PLACEHOLDER": "Start talking...",
+  "_TEXT_INPUT_SPEECH_IDLE_PLACEHOLDER.comment": "This is the placeholder text shown in the message input box when speech-to-speech is enabled and in idle state.",
+  "TEXT_INPUT_SPEECH_LISTENING_PLACEHOLDER": "Listening...",
+  "_TEXT_INPUT_SPEECH_LISTENING_PLACEHOLDER.comment": "This is the placeholder text shown in the message input box when speech-to-speech is enabled and actively listening to user speech.",
+  "TEXT_INPUT_SPEECH_PROCESSING_PLACEHOLDER": "Processing...",
+  "_TEXT_INPUT_SPEECH_PROCESSING_PLACEHOLDER.comment": "This is the placeholder text shown in the message input box when speech-to-speech is enabled and processing the user's speech input.",
+  "TEXT_INPUT_SPEECH_BOT_SPEAKING_PLACEHOLDER": "Talk to interrupt...",
+  "_TEXT_INPUT_SPEECH_BOT_SPEAKING_PLACEHOLDER.comment": "This is the placeholder text shown in the message input box when speech-to-speech is enabled and the bot is speaking.",
   "TEXT_INPUT_SEND_BUTTON_ALT": "Send",
   "TEXT_INPUT_SPEAK_BUTTON_ALT": "Speak",
   "TEXT_INPUT_UPLOAD_BUTTON_ALT": "Upload file",
diff --git a/packages/api/src/providers/SpeechToSpeech/SpeechToSpeechComposer.tsx b/packages/api/src/providers/SpeechToSpeech/SpeechToSpeechComposer.tsx
new file mode 100644
index 0000000000..da6ee79b7e
--- /dev/null
+++ b/packages/api/src/providers/SpeechToSpeech/SpeechToSpeechComposer.tsx
@@ -0,0 +1,20 @@
+import React, { type ReactNode } from 'react';
+import { VoiceHandlerBridge } from './private/VoiceHandlerBridge';
+import { VoiceRecorderBridge } from './private/VoiceRecorderBridge';
+
+/**
+ * SpeechToSpeechComposer sets up the speech-to-speech infrastructure.
+ *
+ * This component renders invisible bridge components that:
+ * 1. VoiceHandlerBridge - registers audio player functions with Redux
+ * 2. VoiceRecorderBridge - reacts to recording state and manages microphone
+ *
+ * Use the `useVoiceState`, `useStartVoice`, and `useStopVoice` hooks to access state and controls.
+ */
+export const SpeechToSpeechComposer: React.FC<{ readonly children: ReactNode }> = ({ children }) => (
+  <React.Fragment>
+    <VoiceHandlerBridge />
+    <VoiceRecorderBridge />
+    {children}
+  </React.Fragment>
+);
diff --git a/packages/api/src/providers/SpeechToSpeech/private/VoiceHandlerBridge.tsx b/packages/api/src/providers/SpeechToSpeech/private/VoiceHandlerBridge.tsx
new file mode 100644
index 0000000000..de52f17999
--- /dev/null
+++ b/packages/api/src/providers/SpeechToSpeech/private/VoiceHandlerBridge.tsx
@@ -0,0 +1,23 @@
+import { useEffect } from 'react';
+import { useAudioPlayer } from './useAudioPlayer';
+import useRegisterVoiceHandler from '../../../hooks/internal/useRegisterVoiceHandler';
+import useShouldShowMicrophoneButton from '../../../hooks/internal/useShouldShowMicrophoneButton';
+
+/**
+ * VoiceHandlerBridge is an invisible component that registers the audio player
+ * functions (queueAudio, stopAllAudio) with Redux
+ */
+export const VoiceHandlerBridge = () => {
+  const { queueAudio, stopAllAudio } = useAudioPlayer();
+  const registerVoiceHandler = useRegisterVoiceHandler();
+  const shouldShowMicrophoneButton = useShouldShowMicrophoneButton();
+
+  useEffect(() => {
+    if (!shouldShowMicrophoneButton) {
+      return;
+    }
+    return registerVoiceHandler({ queueAudio, stopAllAudio });
+  }, [queueAudio, registerVoiceHandler, shouldShowMicrophoneButton, stopAllAudio]);
+
+  return null;
+};
diff --git a/packages/api/src/providers/SpeechToSpeech/private/VoiceRecorderBridge.tsx b/packages/api/src/providers/SpeechToSpeech/private/VoiceRecorderBridge.tsx
new file mode 100644
index 0000000000..ea00dbe81d
--- /dev/null
+++ b/packages/api/src/providers/SpeechToSpeech/private/VoiceRecorderBridge.tsx
@@ -0,0 +1,41 @@
+import { useEffect, useCallback } from 'react';
+import { useRecorder } from './useRecorder';
+import usePostVoiceActivity from '../../../hooks/internal/usePostVoiceActivity';
+import useVoiceState from '../../../hooks/useVoiceState';
+
+/**
+ * VoiceRecorderBridge is an invisible component that bridges the Redux recording state
+ * with the actual microphone recording functionality.
+ */
+export function VoiceRecorderBridge(): null {
+  const [voiceState] = useVoiceState();
+  const postVoiceActivity = usePostVoiceActivity();
+
+  // Derive recording state from voiceState - recording is active when not idle
+  const recording = voiceState !== 'idle';
+
+  const handleAudioChunk = useCallback(
+    (base64: string, timestamp: string) => {
+      postVoiceActivity({
+        name: 'media.chunk',
+        type: 'event',
+        value: {
+          contentType: 'audio/webm',
+          content: base64,
+          timestamp
+        }
+      } as any);
+    },
+    [postVoiceActivity]
+  );
+
+  const { record } = useRecorder(handleAudioChunk);
+
+  useEffect(() => {
+    if (recording) {
+      return record();
+    }
+  }, [record, recording]);
+
+  return null;
+}
diff --git a/packages/api/src/providers/SpeechToSpeech/private/useAudioPlayer.spec.tsx b/packages/api/src/providers/SpeechToSpeech/private/useAudioPlayer.spec.tsx
new file mode 100644
index 0000000000..2497b0379a
--- /dev/null
+++ b/packages/api/src/providers/SpeechToSpeech/private/useAudioPlayer.spec.tsx
@@ -0,0 +1,355 @@
+/** @jest-environment @happy-dom/jest-environment */
+/// <reference types="jest" />
+/// <reference types="node" />
+
+import { render, type RenderResult } from '@testing-library/react';
+import React, { type ComponentType } from 'react';
+import { useAudioPlayer } from './useAudioPlayer';
+
+// Mock setVoiceState function
+const mockSetVoiceState = jest.fn();
+
+// Mock useVoiceStateWritable hook - returns [state, setVoiceState] array
+jest.mock('../../../hooks/internal/useVoiceStateWritable', () => ({
+  __esModule: true,
+  default: jest.fn(() => [undefined, mockSetVoiceState])
+}));
+
+jest.mock('../../Capabilities/useCapabilities', () => ({
+  __esModule: true,
+  default: jest.fn((selector: (caps: { voiceConfiguration?: { sampleRate: number } }) => unknown) =>
+    selector({ voiceConfiguration: { sampleRate: 24000 } })
+  )
+}));
+
+// Mock AudioContext and related APIs
+const mockAudioContext = {
+  close: jest.fn().mockResolvedValue(undefined),
+  createBuffer: jest.fn(),
+  createBufferSource: jest.fn(),
+  currentTime: 0,
+  destination: {},
+  resume: jest.fn().mockResolvedValue(undefined),
+  sampleRate: 24000,
+  state: 'running'
+};
+
+const mockAudioBuffer = {
+  duration: 0.1, // 100ms
+  getChannelData: jest.fn().mockReturnValue(new Float32Array(2400))
+};
+
+// Factory to create unique buffer source mocks
+const createMockBufferSource = () => ({
+  buffer: null as typeof mockAudioBuffer | null,
+  connect: jest.fn(),
+  disconnect: jest.fn(),
+  onended: null as (() => void) | null,
+  start: jest.fn(),
+  stop: jest.fn()
+});
+
+// Track all created buffer sources for assertions
+let createdBufferSources: ReturnType<typeof createMockBufferSource>[] = [];
+
+// Mock global AudioContext
+global.AudioContext = jest.fn(() => mockAudioContext) as unknown as typeof AudioContext;
+global.atob = jest.fn(str => str); // Simple mock for base64 decode
+
+type UseAudioPlayerReturn = ReturnType<typeof useAudioPlayer>;
+
+describe('useAudioPlayer', () => {
+  let HookApp: ComponentType;
+  let hookData: UseAudioPlayerReturn | undefined;
+  // eslint-disable-next-line @typescript-eslint/no-unused-vars
+  let renderResult: RenderResult;
+  const originalAudioContext = global.AudioContext;
+
+  beforeEach(() => {
+    jest.clearAllMocks();
+    createdBufferSources = [];
+    mockAudioContext.currentTime = 0;
+    mockAudioContext.createBuffer.mockReturnValue(mockAudioBuffer);
+    mockAudioContext.createBufferSource.mockImplementation(() => {
+      const source = createMockBufferSource();
+      createdBufferSources.push(source);
+      return source;
+    });
+
+    HookApp = () => {
+      hookData = useAudioPlayer();
+      return null;
+    };
+  });
+
+  afterEach(() => {
+    global.AudioContext = originalAudioContext;
+  });
+
+  describe('Initialization', () => {
+    test('should return queueAudio and stopAllAudio functions', () => {
+      render(<HookApp />);
+
+      expect(typeof hookData?.queueAudio).toBe('function');
+      expect(typeof hookData?.stopAllAudio).toBe('function');
+    });
+
+    test('should create AudioContext on first queueAudio call', async () => {
+      render(<HookApp />);
+
+      await hookData?.queueAudio('dGVzdA=='); // base64 for 'test'
+
+      expect(AudioContext).toHaveBeenCalledWith({ sampleRate: 24000 });
+    });
+
+    test('should reuse existing AudioContext on subsequent calls', async () => {
+      render(<HookApp />);
+
+      await hookData?.queueAudio('dGVzdA==');
+      await hookData?.queueAudio('dGVzdDI=');
+
+      expect(AudioContext).toHaveBeenCalledTimes(1);
+    });
+  });
+
+  describe('Audio playback', () => {
+    beforeEach(() => {
+      renderResult = render(<HookApp />);
+    });
+
+    test('should process base64 audio data correctly', async () => {
+      await hookData?.queueAudio('dGVzdA==');
+
+      expect(global.atob).toHaveBeenCalledWith('dGVzdA==');
+      expect(mockAudioContext.createBuffer).toHaveBeenCalledWith(1, expect.any(Number), 24000);
+      expect(mockAudioContext.createBufferSource).toHaveBeenCalled();
+    });
+
+    test('should set up audio buffer source correctly', async () => {
+      await hookData?.queueAudio('dGVzdA==');
+
+      const [source] = createdBufferSources;
+      expect(source.connect).toHaveBeenCalledWith(mockAudioContext.destination);
+      expect(source.start).toHaveBeenCalled();
+      expect(source.buffer).toBe(mockAudioBuffer);
+    });
+
+    test('should resume AudioContext if needed', async () => {
+      await hookData?.queueAudio('dGVzdA==');
+
+      expect(mockAudioContext.resume).toHaveBeenCalled();
+    });
+
+    test('should queue multiple audio chunks correctly', async () => {
+      mockAudioBuffer.duration = 0.1; // 100ms
+
+      await hookData?.queueAudio('dGVzdA==');
+      await hookData?.queueAudio('dGVzdDI=');
+
+      expect(createdBufferSources).toHaveLength(2);
+      // First chunk starts at currentTime (0), second at 0.1
+      expect(createdBufferSources[0].start).toHaveBeenCalledWith(0);
+      expect(createdBufferSources[1].start).toHaveBeenCalledWith(0.1);
+    });
+  });
+
+  describe('Voice state management', () => {
+    beforeEach(() => {
+      renderResult = render(<HookApp />);
+    });
+
+    test('should set voice state to bot_speaking on first chunk', async () => {
+      mockAudioContext.currentTime = 0;
+
+      await hookData?.queueAudio('dGVzdA==');
+
+      expect(mockSetVoiceState).toHaveBeenCalledWith('bot_speaking');
+    });
+
+    test('should not set bot_speaking on subsequent chunks while playing', async () => {
+      mockAudioContext.currentTime = 0;
+      mockAudioBuffer.duration = 0.1;
+
+      await hookData?.queueAudio('dGVzdA=='); // First chunk
+      mockSetVoiceState.mockClear();
+
+      await hookData?.queueAudio('dGVzdDI='); // Second chunk (while first is still playing)
+
+      expect(mockSetVoiceState).not.toHaveBeenCalledWith('bot_speaking');
+    });
+
+    test('should set voice state to listening when last audio ends', async () => {
+      await hookData?.queueAudio('dGVzdA==');
+      mockSetVoiceState.mockClear();
+
+      // Simulate audio ended
+      const [source] = createdBufferSources;
+      if (source.onended) {
+        source.onended();
+      }
+
+      expect(mockSetVoiceState).toHaveBeenCalledWith('listening');
+    });
+
+    test('should only trigger listening on the last source ended', async () => {
+      mockAudioBuffer.duration = 0.1;
+
+      await hookData?.queueAudio('dGVzdA==');
+      await hookData?.queueAudio('dGVzdDI=');
+      mockSetVoiceState.mockClear();
+
+      const [firstSource, lastSource] = createdBufferSources;
+
+      // Simulate first chunk ended (should not trigger listening)
+      if (firstSource.onended) {
+        firstSource.onended();
+      }
+
+      expect(mockSetVoiceState).not.toHaveBeenCalledWith('listening');
+
+      // Simulate last chunk ended (should trigger listening)
+      if (lastSource.onended) {
+        lastSource.onended();
+      }
+
+      expect(mockSetVoiceState).toHaveBeenCalledWith('listening');
+    });
+  });
+
+  describe('Audio cleanup', () => {
+    beforeEach(() => {
+      renderResult = render(<HookApp />);
+    });
+
+    test('should clean up buffer source on ended', async () => {
+      await hookData?.queueAudio('dGVzdA==');
+
+      const [source] = createdBufferSources;
+      // Simulate audio ended
+      if (source.onended) {
+        source.onended();
+      }
+
+      expect(source.disconnect).toHaveBeenCalled();
+      expect(source.buffer).toBeNull();
+    });
+
+    test('should stop all audio and close context', async () => {
+      await hookData?.queueAudio('dGVzdA==');
+
+      hookData?.stopAllAudio();
+
+      expect(mockAudioContext.close).toHaveBeenCalled();
+    });
+
+    test('should clear lastSourceRef onended callback on stop', async () => {
+      await hookData?.queueAudio('dGVzdA==');
+      const [source] = createdBufferSources;
+      const onEndedBefore = source.onended;
+
+      expect(onEndedBefore).not.toBeNull();
+
+      hookData?.stopAllAudio();
+
+      // After stopAllAudio, the onended should be cleared
+      expect(source.onended).toBeNull();
+    });
+  });
+
+  describe('Error handling', () => {
+    beforeEach(() => {
+      renderResult = render(<HookApp />);
+    });
+
+    test('should handle invalid base64 data gracefully', async () => {
+      await expect(async () => {
+        await hookData?.queueAudio('invalid-base64!@#');
+      }).not.toThrow();
+    });
+
+    test('should handle AudioContext creation failure', async () => {
+      global.AudioContext = jest.fn(() => {
+        throw new Error('AudioContext not supported');
+      }) as unknown as typeof AudioContext;
+
+      await expect(async () => {
+        await hookData?.queueAudio('dGVzdA==');
+      }).rejects.toThrow('AudioContext not supported');
+    });
+  });
+
+  describe('Real-world scenarios', () => {
+    beforeEach(() => {
+      renderResult = render(<HookApp />);
+    });
+
+    test('should handle streaming audio chunks', async () => {
+      mockAudioBuffer.duration = 0.05; // 50ms chunks
+
+      // Simulate streaming 5 chunks
+      await Promise.all(Array.from({ length: 5 }, (_, i) => hookData?.queueAudio(`chunk${i}`)));
+
+      expect(createdBufferSources).toHaveLength(5);
+      createdBufferSources.forEach(source => {
+        expect(source.start).toHaveBeenCalled();
+      });
+      // Should only call bot_speaking once (first chunk)
+      expect(mockSetVoiceState).toHaveBeenCalledWith('bot_speaking');
+      expect(mockSetVoiceState).toHaveBeenCalledTimes(1);
+    });
+
+    test('should handle playback interruption', async () => {
+      await hookData?.queueAudio('dGVzdA==');
+
+      hookData?.stopAllAudio();
+
+      expect(mockAudioContext.close).toHaveBeenCalled();
+    });
+
+    test('should handle resume after stop', async () => {
+      // Play, stop, then play again
+      await hookData?.queueAudio('dGVzdA==');
+      hookData?.stopAllAudio();
+      await hookData?.queueAudio('dGVzdDI=');
+
+      expect(AudioContext).toHaveBeenCalledTimes(2); // New context after stop
+    });
+
+    test('should reset nextPlayTime after stop allowing immediate playback', async () => {
+      mockAudioBuffer.duration = 0.1;
+
+      await hookData?.queueAudio('dGVzdA==');
+      hookData?.stopAllAudio();
+      mockSetVoiceState.mockClear();
+
+      await hookData?.queueAudio('dGVzdDI=');
+
+      // Should trigger bot_speaking again since it's a fresh start
+      expect(mockSetVoiceState).toHaveBeenCalledWith('bot_speaking');
+    });
+  });
+
+  describe('Performance considerations', () => {
+    beforeEach(() => {
+      renderResult = render(<HookApp />);
+    });
+
+    test('should handle large audio data', async () => {
+      const largeBase64 = 'A'.repeat(10000);
+
+      await expect(async () => {
+        await hookData?.queueAudio(largeBase64);
+      }).not.toThrow();
+    });
+
+    test('should handle rapid successive calls', async () => {
+      // Ensure the mock "base64" data has an even length as Int16Array requires multiples of 2 bytes
+      await Promise.all(Array.from({ length: 100 }, (_, i) => hookData?.queueAudio(`chunk${i}`.padEnd(8, ' '))));
+
+      expect(createdBufferSources).toHaveLength(100);
+      createdBufferSources.forEach(source => {
+        expect(source.start).toHaveBeenCalled();
+      });
+    });
+  });
+});
diff --git a/packages/api/src/providers/SpeechToSpeech/private/useAudioPlayer.ts b/packages/api/src/providers/SpeechToSpeech/private/useAudioPlayer.ts
new file mode 100644
index 0000000000..478c447994
--- /dev/null
+++ b/packages/api/src/providers/SpeechToSpeech/private/useAudioPlayer.ts
@@ -0,0 +1,92 @@
+import { useRef, useCallback, useMemo } from 'react';
+import useCapabilities from '../../Capabilities/useCapabilities';
+import useVoiceStateWritable from '../../../hooks/internal/useVoiceStateWritable';
+
+const DEFAULT_SAMPLE_RATE = 24000;
+const INT16_SCALE = 32768;
+
+export function useAudioPlayer() {
+  const audioCtxRef = useRef<AudioContext | undefined>(undefined);
+  const lastSourceRef = useRef<AudioBufferSourceNode | undefined>(undefined);
+  const nextPlayTimeRef = useRef(0);
+  const voiceConfiguration = useCapabilities(caps => caps.voiceConfiguration);
+  const [, setVoiceState] = useVoiceStateWritable();
+
+  const sampleRate = voiceConfiguration?.sampleRate ?? DEFAULT_SAMPLE_RATE;
+
+  const queueAudio = useCallback(
+    async (base64: string) => {
+      if (!audioCtxRef.current) {
+        audioCtxRef.current = new AudioContext({ sampleRate });
+      }
+      const audioCtx = audioCtxRef.current;
+      await audioCtx.resume();
+
+      try {
+        const bytes = Uint8Array.from(atob(base64), c => c.charCodeAt(0));
+        const int16Bytes = new Int16Array(bytes.buffer);
+        const float32Bytes = new Float32Array(int16Bytes.length);
+
+        for (let i = 0; i < int16Bytes.length; i++) {
+          float32Bytes[+i] = int16Bytes.at(i) / INT16_SCALE;
+        }
+
+        const buffer = audioCtx.createBuffer(1, float32Bytes.length, audioCtx.sampleRate);
+        buffer.getChannelData(0).set(float32Bytes);
+
+        const src = audioCtx.createBufferSource();
+        src.buffer = buffer;
+        src.connect(audioCtx.destination);
+
+        // Clear previous source's onended to avoid stale callbacks
+        if (lastSourceRef.current) {
+          lastSourceRef.current.onended = null;
+        }
+
+        src.onended = () => {
+          src.disconnect();
+          src.buffer = null;
+          // Only the last source's onended should trigger state change to 'listening'
+          if (lastSourceRef.current === src) {
+            setVoiceState('listening');
+          }
+        };
+
+        lastSourceRef.current = src;
+        const isFirstChunk = nextPlayTimeRef.current <= audioCtx.currentTime;
+        // Only dispatch bot_speaking on first chunk, we are resetting refs on stopAllAudio (bargein, mic off)
+        if (isFirstChunk) {
+          setVoiceState('bot_speaking');
+        }
+
+        nextPlayTimeRef.current = Math.max(nextPlayTimeRef.current, audioCtx.currentTime);
+        src.start(nextPlayTimeRef.current);
+        nextPlayTimeRef.current += buffer.duration;
+      } catch (error) {
+        console.warn('botframework-webchat: Error during audio playback in useAudioPlayer:', error);
+      }
+    },
+    [setVoiceState, sampleRate]
+  );
+
+  const stopAllAudio = useCallback(() => {
+    nextPlayTimeRef.current = 0;
+    if (lastSourceRef.current) {
+      lastSourceRef.current.onended = null;
+      lastSourceRef.current = undefined;
+    }
+    if (audioCtxRef.current) {
+      audioCtxRef.current.close();
+      audioCtxRef.current = undefined;
+    }
+  }, []);
+
+  return useMemo(
+    () =>
+      Object.freeze({
+        queueAudio,
+        stopAllAudio
+      }),
+    [queueAudio, stopAllAudio]
+  );
+}
diff --git a/packages/api/src/providers/SpeechToSpeech/private/useRecorder.spec.tsx b/packages/api/src/providers/SpeechToSpeech/private/useRecorder.spec.tsx
new file mode 100644
index 0000000000..6bb47cfa14
--- /dev/null
+++ b/packages/api/src/providers/SpeechToSpeech/private/useRecorder.spec.tsx
@@ -0,0 +1,221 @@
+/** @jest-environment @happy-dom/jest-environment */
+/// <reference types="jest" />
+
+import { act, render, waitFor, type RenderResult } from '@testing-library/react';
+import React, { type ComponentType } from 'react';
+import { useRecorder } from './useRecorder';
+
+jest.mock('../../Ponyfill/usePonyfill', () => ({ __esModule: true, default: jest.fn(() => [{ Date: global.Date }]) }));
+jest.mock('../../Capabilities/useCapabilities', () => ({
+  __esModule: true,
+  default: jest.fn((selector: (caps: { voiceConfiguration?: { sampleRate: number } }) => unknown) =>
+    selector({ voiceConfiguration: { sampleRate: 24000 } })
+  )
+}));
+
+const mockTrack = {
+  stop: jest.fn()
+};
+
+const mockMediaStream = {
+  getTracks: jest.fn(() => [mockTrack])
+};
+
+const mockMediaDevices = {
+  getUserMedia: jest.fn().mockResolvedValue(mockMediaStream)
+};
+
+const mockWorkletPort = {
+  onmessage: null as ((event: { data: unknown }) => void) | null,
+  postMessage: jest.fn()
+};
+
+const mockWorkletNode = {
+  connect: jest.fn(),
+  disconnect: jest.fn(),
+  port: mockWorkletPort
+};
+
+const mockAudioContext = {
+  audioWorklet: {
+    addModule: jest.fn().mockResolvedValue(undefined)
+  },
+  createMediaStreamSource: jest.fn(() => ({
+    connect: jest.fn()
+  })),
+  destination: {},
+  resume: jest.fn().mockResolvedValue(undefined),
+  state: 'running'
+};
+
+// --- Global Mocks Setup ---
+
+Object.defineProperty(global.navigator, 'mediaDevices', {
+  value: mockMediaDevices,
+  writable: true
+});
+
+global.AudioContext = jest.fn(() => mockAudioContext) as unknown as typeof AudioContext;
+global.AudioWorkletNode = jest.fn(() => mockWorkletNode) as unknown as typeof AudioWorkletNode;
+global.Blob = jest.fn(parts => ({ parts, type: (parts as { type?: string }[])[1]?.type })) as unknown as typeof Blob;
+global.URL.createObjectURL = jest.fn(() => 'blob:http://localhost/mock-url');
+global.URL.revokeObjectURL = jest.fn();
+global.btoa = jest.fn(str => `btoa(${str})`);
+
+// --- Tests ---
+
+describe('useRecorder', () => {
+  let onAudioChunk: jest.Mock;
+  let HookApp: ComponentType<{ onAudioChunk: (base64: string, timestamp: string) => void }>;
+  let hookData: ReturnType<typeof useRecorder> | undefined;
+  // eslint-disable-next-line @typescript-eslint/no-unused-vars
+  let renderResult: RenderResult;
+
+  beforeEach(() => {
+    // Clear all mocks before each test
+    jest.clearAllMocks();
+    onAudioChunk = jest.fn();
+    hookData = undefined;
+    mockWorkletPort.onmessage = null;
+    (mockAudioContext.state as string) = 'running';
+
+    HookApp = ({ onAudioChunk: onChunk }) => {
+      hookData = useRecorder(onChunk);
+      return null;
+    };
+  });
+
+  test('should return record function', () => {
+    render(<HookApp onAudioChunk={onAudioChunk} />);
+    expect(typeof hookData?.record).toBe('function');
+  });
+
+  test('should start recording when record is called', async () => {
+    renderResult = render(<HookApp onAudioChunk={onAudioChunk} />);
+
+    act(() => {
+      hookData?.record();
+    });
+
+    await waitFor(() => {
+      expect(navigator.mediaDevices.getUserMedia).toHaveBeenCalledTimes(1);
+    });
+
+    expect(global.AudioContext).toHaveBeenCalledTimes(1);
+    expect(mockAudioContext.audioWorklet.addModule).toHaveBeenCalledTimes(1);
+    expect(global.AudioWorkletNode).toHaveBeenCalledWith(expect.anything(), 'audio-recorder', {
+      processorOptions: { bufferSize: 2400 }
+    });
+    expect(mockWorkletNode.connect).toHaveBeenCalledTimes(1);
+    expect(mockWorkletPort.postMessage).toHaveBeenCalledWith({ command: 'START' });
+  });
+
+  test('should stop recording when returned cleanup function is called', async () => {
+    renderResult = render(<HookApp onAudioChunk={onAudioChunk} />);
+
+    let stopRecording: (() => void) | undefined;
+    // Start recording
+    act(() => {
+      stopRecording = hookData?.record();
+    });
+
+    // Wait for async startRecording to complete
+    await waitFor(() => {
+      expect(mockWorkletPort.postMessage).toHaveBeenCalledWith({ command: 'START' });
+    });
+
+    // Stop recording
+    act(() => {
+      stopRecording?.();
+    });
+
+    expect(mockWorkletPort.postMessage).toHaveBeenCalledWith({ command: 'STOP' });
+    expect(mockWorkletNode.disconnect).toHaveBeenCalledTimes(1);
+    expect(mockTrack.stop).toHaveBeenCalledTimes(1);
+  });
+
+  test('should process audio chunks sent from the worklet', async () => {
+    render(<HookApp onAudioChunk={onAudioChunk} />);
+
+    act(() => {
+      hookData?.record();
+    });
+
+    await waitFor(() => expect(mockWorkletPort.onmessage).not.toBeNull());
+
+    // Simulate a message from the audio worklet
+    const mockAudioData = new Float32Array([0.1, 0.2, -0.1]);
+    act(() => {
+      mockWorkletPort.onmessage!({
+        data: {
+          eventType: 'audio',
+          audioData: mockAudioData
+        }
+      });
+    });
+
+    await waitFor(() => expect(onAudioChunk).toHaveBeenCalledTimes(1));
+    expect(global.btoa).toHaveBeenCalled();
+    // Check that timestamp is passed as second argument
+    expect(onAudioChunk).toHaveBeenCalledWith(expect.any(String), expect.any(String));
+  });
+
+  test('should handle suspended audio context by resuming it', async () => {
+    (mockAudioContext.state as string) = 'suspended';
+    render(<HookApp onAudioChunk={onAudioChunk} />);
+
+    act(() => {
+      hookData?.record();
+    });
+
+    await waitFor(() => {
+      expect(mockAudioContext.resume).toHaveBeenCalledTimes(1);
+    });
+  });
+
+  test('should reuse existing AudioContext on subsequent calls', async () => {
+    render(<HookApp onAudioChunk={onAudioChunk} />);
+
+    let stopRecording: (() => void) | undefined;
+    act(() => {
+      stopRecording = hookData?.record();
+    });
+
+    await waitFor(() => {
+      expect(mockWorkletPort.postMessage).toHaveBeenCalledWith({ command: 'START' });
+    });
+
+    act(() => {
+      stopRecording?.();
+    });
+
+    act(() => {
+      hookData?.record();
+    });
+
+    await waitFor(() => {
+      expect(mockWorkletPort.postMessage).toHaveBeenCalledTimes(3); // START, STOP, START
+    });
+
+    // AudioContext should only be created once
+    expect(global.AudioContext).toHaveBeenCalledTimes(1);
+  });
+
+  test('should request microphone with correct audio constraints', async () => {
+    render(<HookApp onAudioChunk={onAudioChunk} />);
+
+    act(() => {
+      hookData?.record();
+    });
+
+    await waitFor(() => {
+      expect(navigator.mediaDevices.getUserMedia).toHaveBeenCalledWith({
+        audio: {
+          channelCount: 1,
+          echoCancellation: true,
+          sampleRate: 24000
+        }
+      });
+    });
+  });
+});
diff --git a/packages/api/src/providers/SpeechToSpeech/private/useRecorder.ts b/packages/api/src/providers/SpeechToSpeech/private/useRecorder.ts
new file mode 100644
index 0000000000..05ed029003
--- /dev/null
+++ b/packages/api/src/providers/SpeechToSpeech/private/useRecorder.ts
@@ -0,0 +1,146 @@
+import { useRef, useCallback, useMemo } from 'react';
+import useCapabilities from '../../Capabilities/useCapabilities';
+import usePonyfill from '../../Ponyfill/usePonyfill';
+
+// Minimum AudioWorkletProcessor definition for TypeScript recognition
+// adding reference of worker does not work
+declare class AudioWorkletProcessor {
+  buffer: number[];
+  bufferSize: number;
+  constructor(options?: AudioWorkletNodeOptions);
+  process(inputs: Float32Array[][], outputs: Float32Array[][], parameters: Record<string, Float32Array>): boolean;
+  readonly port: MessagePort;
+  recording: boolean;
+}
+declare function registerProcessor(name: string, processorCtor: typeof AudioWorkletProcessor): void;
+
+/**
+ * CSP Compliant: check __tests__/html2/speechToSpeech/csp.recording.html for CSP compliance tests.
+ * NOTE: This code is stringified and run in an AudioWorklet context, so it must be plain JavaScript
+ * without any TypeScript annotations that could be transformed by the compiler.
+ */
+const audioProcessorCode = `(${function () {
+  class AudioRecorderProcessor extends AudioWorkletProcessor {
+    constructor(options: AudioWorkletNodeOptions) {
+      super();
+      this.buffer = [];
+      this.bufferSize = options.processorOptions.bufferSize;
+      this.recording = false;
+
+      this.port.onmessage = e => {
+        if (e.data.command === 'START') {
+          this.recording = true;
+        } else if (e.data.command === 'STOP') {
+          this.recording = false;
+          this.buffer = [];
+        }
+      };
+    }
+
+    process(inputs: Float32Array[][]) {
+      if (inputs[0] && inputs[0].length && this.recording) {
+        this.buffer.push(...inputs[0][0]);
+        while (this.buffer.length >= this.bufferSize) {
+          const chunk = this.buffer.splice(0, this.bufferSize);
+          this.port.postMessage({ eventType: 'audio', audioData: new Float32Array(chunk) });
+        }
+      }
+      return true;
+    }
+  }
+
+  registerProcessor('audio-recorder', AudioRecorderProcessor);
+}})()`;
+
+const INT16_MIN = -32768;
+const INT16_MAX = 32767;
+const INT16_SCALE = 32767;
+const DEFAULT_SAMPLE_RATE = 24000;
+const DEFAULT_CHUNK_SIZE_IN_MS = 100;
+const MS_IN_SECOND = 1000;
+
+export function useRecorder(onAudioChunk: (base64: string, timestamp: string) => void) {
+  const [{ Date }] = usePonyfill();
+  const audioCtxRef = useRef<AudioContext | undefined>(undefined);
+  const streamRef = useRef<MediaStream | undefined>(undefined);
+  const voiceConfiguration = useCapabilities(caps => caps.voiceConfiguration);
+  const workletRef = useRef<AudioWorkletNode | undefined>(undefined);
+
+  const chunkIntervalMs = voiceConfiguration?.chunkIntervalMs ?? DEFAULT_CHUNK_SIZE_IN_MS;
+  const sampleRate = voiceConfiguration?.sampleRate ?? DEFAULT_SAMPLE_RATE;
+
+  const stopRecording = useCallback(() => {
+    if (workletRef.current) {
+      workletRef.current.port.postMessage({ command: 'STOP' });
+      workletRef.current.disconnect();
+      workletRef.current = undefined;
+    }
+    if (streamRef.current) {
+      streamRef.current.getTracks().forEach(track => track.stop());
+      streamRef.current = undefined;
+    }
+  }, [streamRef, workletRef]);
+
+  const initAudio = useCallback(async () => {
+    if (audioCtxRef.current) {
+      return;
+    }
+    const audioCtx = new AudioContext({ sampleRate });
+    const blob = new Blob([audioProcessorCode], {
+      type: 'application/javascript'
+    });
+    // eslint-disable-next-line no-restricted-properties
+    const url = URL.createObjectURL(blob);
+    await audioCtx.audioWorklet.addModule(url);
+    URL.revokeObjectURL(url);
+    // eslint-disable-next-line require-atomic-updates
+    audioCtxRef.current = audioCtx;
+  }, [audioCtxRef, sampleRate]);
+
+  const startRecording = useCallback(async () => {
+    await initAudio();
+    const audioCtx = audioCtxRef.current!; // audioCtx must be available after initAudio().
+    if (audioCtx.state === 'suspended') {
+      await audioCtx.resume();
+    }
+    const stream = await navigator.mediaDevices.getUserMedia({
+      audio: {
+        channelCount: 1,
+        echoCancellation: true,
+        sampleRate
+      }
+    });
+    streamRef.current = stream;
+    const source = audioCtx.createMediaStreamSource(stream);
+    const worklet = new AudioWorkletNode(audioCtx, 'audio-recorder', {
+      processorOptions: {
+        bufferSize: (sampleRate * chunkIntervalMs) / MS_IN_SECOND
+      }
+    });
+
+    worklet.port.onmessage = e => {
+      if (e.data.eventType === 'audio') {
+        const timestamp = new Date().toISOString();
+        const float32 = e.data.audioData;
+        const int16 = new Int16Array(float32.length);
+        for (let i = 0; i < float32.length; i++) {
+          int16[+i] = Math.max(INT16_MIN, Math.min(INT16_MAX, float32.at(i) * INT16_SCALE));
+        }
+        const base64 = btoa(String.fromCharCode(...new Uint8Array(int16.buffer)));
+        onAudioChunk(base64, timestamp);
+      }
+    };
+
+    source.connect(worklet);
+    worklet.connect(audioCtx.destination);
+    worklet.port.postMessage({ command: 'START' });
+    workletRef.current = worklet;
+  }, [audioCtxRef, chunkIntervalMs, Date, initAudio, onAudioChunk, sampleRate]);
+
+  const record = useCallback(() => {
+    startRecording();
+    return stopRecording;
+  }, [startRecording, stopRecording]);
+
+  return useMemo(() => ({ record }), [record]);
+}
diff --git a/packages/bundle/src/boot/actual/hook/minimal.ts b/packages/bundle/src/boot/actual/hook/minimal.ts
index 616f8e59ef..b09c196177 100644
--- a/packages/bundle/src/boot/actual/hook/minimal.ts
+++ b/packages/bundle/src/boot/actual/hook/minimal.ts
@@ -76,7 +76,9 @@ export {
   useShouldReduceMotion,
   useShouldSpeakIncomingActivity,
   useStartDictate,
+  useStartVoice,
   useStopDictate,
+  useStopVoice,
   useStyleOptions,
   useStyleSet,
   useSubmitSendBox,
@@ -95,6 +97,7 @@ export {
   useUserID,
   useUsername,
   useVoiceSelector,
+  useVoiceState,
   useWebSpeechPonyfill,
   type SendBoxFocusOptions
 } from 'botframework-webchat-component/hook';
diff --git a/packages/bundle/src/boot/actual/internal.ts b/packages/bundle/src/boot/actual/internal.ts
index b416e8208e..5949642426 100644
--- a/packages/bundle/src/boot/actual/internal.ts
+++ b/packages/bundle/src/boot/actual/internal.ts
@@ -1,5 +1,10 @@
 // We should review exports in this file to make sure 1P = 3P.
-export { type ActivityMiddleware, type TypingIndicatorMiddleware } from 'botframework-webchat-api';
+export {
+  type ActivityMiddleware,
+  type ActivityStatusMiddleware,
+  type TypingIndicatorMiddleware
+} from 'botframework-webchat-api';
+export { usePostVoiceActivity, useShouldShowMicrophoneButton } from 'botframework-webchat-api/internal';
 export {
   CodeHighlighterComposer,
   createIconComponent,
@@ -13,4 +18,11 @@ export {
   useLiveRegion,
   type HighlightCodeFn
 } from 'botframework-webchat-component/internal';
-export { getOrgSchemaMessage, type DirectLineCardAction, type WebChatActivity } from 'botframework-webchat-core';
+export {
+  getOrgSchemaMessage,
+  getVoiceActivityRole,
+  getVoiceActivityText,
+  isVoiceTranscriptActivity,
+  type DirectLineCardAction,
+  type WebChatActivity
+} from 'botframework-webchat-core';
diff --git a/packages/component/src/Activity/StackedLayout.tsx b/packages/component/src/Activity/StackedLayout.tsx
index f387eb41ad..9e173e36fb 100644
--- a/packages/component/src/Activity/StackedLayout.tsx
+++ b/packages/component/src/Activity/StackedLayout.tsx
@@ -3,7 +3,14 @@
 import { hooks } from 'botframework-webchat-api';
 import type { RenderAttachment } from 'botframework-webchat-api';
 import { ActivityBorderDecorator } from 'botframework-webchat-api/decorator';
-import { getActivityLivestreamingMetadata, getOrgSchemaMessage, type WebChatActivity } from 'botframework-webchat-core';
+import {
+  getActivityLivestreamingMetadata,
+  getOrgSchemaMessage,
+  getVoiceActivityRole,
+  getVoiceActivityText,
+  isVoiceActivity,
+  type WebChatActivity
+} from 'botframework-webchat-core';
 import { useStyles } from '@msinternal/botframework-webchat-styles/react';
 import cx from 'classnames';
 import React, { memo, useCallback, useMemo, type ReactNode } from 'react';
@@ -124,7 +131,7 @@ const StackedLayout = ({
   const isMessageOrTyping = activity.type === 'message' || activity.type === 'typing';
 
   const attachments = useMemo(() => (isMessageOrTyping && activity.attachments) || [], [activity, isMessageOrTyping]);
-  const fromUser = activity.from.role === 'user';
+  const fromUser = activity.from.role === 'user' || getVoiceActivityRole(activity) === 'user';
   const messageBackDisplayText: string = (isMessageOrTyping && activity.channelData?.messageBack?.displayText) || '';
   const messageThing = useMemo(() => getOrgSchemaMessage(activity.entities), [activity]);
   const isCollapsible = useMemo(() => messageThing?.keywords?.includes('Collapsible'), [messageThing]);
@@ -134,7 +141,9 @@ const StackedLayout = ({
     ? messageBackDisplayText || activity.text
     : isLivestreaming && 'text' in activity
       ? (activity.text as string)
-      : '';
+      : isVoiceActivity(activity)
+        ? getVoiceActivityText(activity)
+        : '';
 
   const initials = fromUser ? userInitials : botInitials;
   const nubOffset = fromUser ? bubbleFromUserNubOffset : bubbleNubOffset;
diff --git a/packages/component/src/Middleware/Activity/createCoreMiddleware.tsx b/packages/component/src/Middleware/Activity/createCoreMiddleware.tsx
index a9479b8188..e6b6434cd1 100644
--- a/packages/component/src/Middleware/Activity/createCoreMiddleware.tsx
+++ b/packages/component/src/Middleware/Activity/createCoreMiddleware.tsx
@@ -1,43 +1,59 @@
 /* eslint complexity: ["error", 21] */
 import { ActivityMiddleware } from 'botframework-webchat-api';
-import { getActivityLivestreamingMetadata, getOrgSchemaMessage } from 'botframework-webchat-core';
+import {
+  getActivityLivestreamingMetadata,
+  getOrgSchemaMessage,
+  isVoiceTranscriptActivity
+} from 'botframework-webchat-core';
 import React from 'react';
 
 import CarouselLayout from '../../Activity/CarouselLayout';
 import StackedLayout from '../../Activity/StackedLayout';
 
+// TODO: [P4] Can we simplify these if-statement to something more readable?
+function shouldFilterActivity(activity, messageThing) {
+  const { type } = activity;
+  if (
+    type === 'conversationUpdate' ||
+    (type === 'event' && !isVoiceTranscriptActivity(activity)) ||
+    type === 'invoke' ||
+    // Do not show content for contentless livestream interims, or finalized activity without content.
+    (type === 'typing' &&
+      (getActivityLivestreamingMetadata(activity)?.type === 'contentless' ||
+        !(activity['text'] || activity.attachments?.length > 0 || messageThing?.abstract))) ||
+    (type === 'message' &&
+      // Do not show postback
+      (activity.channelData?.postBack ||
+        // Do not show messageBack if displayText is undefined
+        (activity.channelData?.messageBack && !activity.channelData.messageBack.displayText) ||
+        // Do not show empty bubbles (no text and attachments)
+        !(activity.text || activity.attachments?.length || messageThing?.abstract)))
+  ) {
+    return true;
+  }
+
+  return false;
+}
+
 export default function createCoreMiddleware(): ActivityMiddleware[] {
   return [
     () =>
       next =>
       (...args) => {
         const [{ activity }] = args;
+        const isMessageOrTyping = activity.type === 'message' || activity.type === 'typing';
 
-        // TODO: [P4] Can we simplify these if-statement to something more readable?
-
-        const { type } = activity;
         const messageThing = getOrgSchemaMessage(activity.entities);
 
         // Filter out activities that should not visible.
-        if (
-          type === 'conversationUpdate' ||
-          type === 'event' ||
-          type === 'invoke' ||
-          // Do not show content for contentless livestream interims, or finalized activity without content.
-          (type === 'typing' &&
-            (getActivityLivestreamingMetadata(activity)?.type === 'contentless' ||
-              !(activity['text'] || activity.attachments?.length > 0 || messageThing?.abstract))) ||
-          (type === 'message' &&
-            // Do not show postback
-            (activity.channelData?.postBack ||
-              // Do not show messageBack if displayText is undefined
-              (activity.channelData?.messageBack && !activity.channelData.messageBack.displayText) ||
-              // Do not show empty bubbles (no text and attachments)
-              !(activity.text || activity.attachments?.length || messageThing?.abstract)))
-        ) {
+        if (shouldFilterActivity(activity, messageThing)) {
           return false;
-        } else if (type === 'message' || type === 'typing') {
-          if ((activity.attachments?.length || 0) > 1 && activity.attachmentLayout === 'carousel') {
+        } else if (isMessageOrTyping || isVoiceTranscriptActivity(activity)) {
+          if (
+            isMessageOrTyping &&
+            (activity.attachments?.length || 0) > 1 &&
+            activity.attachmentLayout === 'carousel'
+          ) {
             // The following line is not a React functional component, it's a render function called by useCreateActivityRenderer() hook.
             // The function signature need to be compatible with older version of activity middleware, which was:
             //
diff --git a/packages/component/src/TextArea/TextArea.tsx b/packages/component/src/TextArea/TextArea.tsx
index 15ab98a91c..8ea830fb0b 100644
--- a/packages/component/src/TextArea/TextArea.tsx
+++ b/packages/component/src/TextArea/TextArea.tsx
@@ -37,6 +37,7 @@ const TextArea = forwardRef<
     onClick?: MouseEventHandler<HTMLTextAreaElement> | undefined;
     onInput?: FormEventHandler<HTMLTextAreaElement> | undefined;
     placeholder?: string | undefined;
+    readOnly?: boolean | undefined;
     startRows?: number | undefined;
     value?: string | undefined;
   }>
@@ -45,7 +46,7 @@ const TextArea = forwardRef<
   const classNames = useStyles(styles);
   const isInCompositionRef = useRef<boolean>(false);
 
-  const disabled = uiState === 'disabled';
+  const disabled = uiState === 'disabled' || props.readOnly;
 
   const handleCompositionEnd = useCallback(() => {
     isInCompositionRef.current = false;
diff --git a/packages/component/src/boot/hook.ts b/packages/component/src/boot/hook.ts
index 91976e66fb..426948db69 100644
--- a/packages/component/src/boot/hook.ts
+++ b/packages/component/src/boot/hook.ts
@@ -59,7 +59,9 @@ export {
   useSetNotification,
   useShouldSpeakIncomingActivity,
   useStartDictate,
+  useStartVoice,
   useStopDictate,
+  useStopVoice,
   useStyleOptions,
   useSubmitSendBox,
   useSuggestedActions,
@@ -72,7 +74,8 @@ export {
   useUIState,
   useUserID,
   useUsername,
-  useVoiceSelector
+  useVoiceSelector,
+  useVoiceState
 } from 'botframework-webchat-api/hook';
 
 // #region Overrides
diff --git a/packages/component/src/decorator/private/WebChatDecorator.tsx b/packages/component/src/decorator/private/WebChatDecorator.tsx
index 33d76363ee..edce239b69 100644
--- a/packages/component/src/decorator/private/WebChatDecorator.tsx
+++ b/packages/component/src/decorator/private/WebChatDecorator.tsx
@@ -13,6 +13,16 @@ import BorderFlair from './BorderFlair';
 import BorderLoader from './BorderLoader';
 
 const middleware: readonly DecoratorMiddleware[] = Object.freeze([
+  createActivityBorderMiddleware(function FluentBorderFlair({ request, Next, ...props }) {
+    if (request.modality.has('audio') && request.from === 'bot') {
+      return (
+        <BorderFlair showFlair={true}>
+          <Next {...props} />
+        </BorderFlair>
+      );
+    }
+    return <Next {...props} />;
+  }),
   createActivityBorderMiddleware(function BorderFlairDecorator({ request, Next, ...props }) {
     return (
       <BorderFlair showFlair={props.showFlair ?? request.livestreamingState === 'completing'}>
diff --git a/packages/core/src/actions/postVoiceActivity.ts b/packages/core/src/actions/postVoiceActivity.ts
new file mode 100644
index 0000000000..8ab7087012
--- /dev/null
+++ b/packages/core/src/actions/postVoiceActivity.ts
@@ -0,0 +1,21 @@
+import type { WebChatActivity } from '../types/WebChatActivity';
+
+const VOICE_POST_ACTIVITY = 'WEB_CHAT/VOICE_POST_ACTIVITY' as const;
+
+type VoicePostActivityAction = {
+  type: typeof VOICE_POST_ACTIVITY;
+  payload: { activity: WebChatActivity };
+};
+
+function postVoiceActivity(activity: WebChatActivity): VoicePostActivityAction {
+  return {
+    type: VOICE_POST_ACTIVITY,
+    payload: { activity }
+  };
+}
+
+export default postVoiceActivity;
+
+export { VOICE_POST_ACTIVITY };
+
+export type { VoicePostActivityAction };
diff --git a/packages/core/src/actions/registerVoiceHandler.ts b/packages/core/src/actions/registerVoiceHandler.ts
new file mode 100644
index 0000000000..3c0029835a
--- /dev/null
+++ b/packages/core/src/actions/registerVoiceHandler.ts
@@ -0,0 +1,24 @@
+const VOICE_REGISTER_HANDLER = 'WEB_CHAT/VOICE_REGISTER_HANDLER' as const;
+
+type VoiceHandler = {
+  queueAudio: (base64: string) => void;
+  stopAllAudio: () => void;
+};
+
+type VoiceRegisterHandlerAction = {
+  type: typeof VOICE_REGISTER_HANDLER;
+  payload: { id: string; voiceHandler: VoiceHandler };
+};
+
+function registerVoiceHandler(id: string, voiceHandler: VoiceHandler): VoiceRegisterHandlerAction {
+  return {
+    type: VOICE_REGISTER_HANDLER,
+    payload: { id, voiceHandler }
+  };
+}
+
+export default registerVoiceHandler;
+
+export { VOICE_REGISTER_HANDLER };
+
+export type { VoiceHandler, VoiceRegisterHandlerAction };
diff --git a/packages/core/src/actions/setVoiceState.ts b/packages/core/src/actions/setVoiceState.ts
new file mode 100644
index 0000000000..53fc12b7c2
--- /dev/null
+++ b/packages/core/src/actions/setVoiceState.ts
@@ -0,0 +1,21 @@
+const VOICE_SET_STATE = 'WEB_CHAT/VOICE_SET_STATE' as const;
+
+type VoiceState = 'idle' | 'listening' | 'user_speaking' | 'processing' | 'bot_speaking';
+
+type VoiceSetStateAction = {
+  type: typeof VOICE_SET_STATE;
+  payload: { voiceState: VoiceState };
+};
+
+function setVoiceState(voiceState: VoiceState): VoiceSetStateAction {
+  return {
+    type: VOICE_SET_STATE,
+    payload: { voiceState }
+  };
+}
+
+export default setVoiceState;
+
+export { VOICE_SET_STATE };
+
+export type { VoiceState, VoiceSetStateAction };
diff --git a/packages/core/src/actions/startVoiceRecording.ts b/packages/core/src/actions/startVoiceRecording.ts
new file mode 100644
index 0000000000..cbf050b392
--- /dev/null
+++ b/packages/core/src/actions/startVoiceRecording.ts
@@ -0,0 +1,17 @@
+const VOICE_START_RECORDING = 'WEB_CHAT/VOICE_START_RECORDING' as const;
+
+type VoiceStartRecordingAction = {
+  type: typeof VOICE_START_RECORDING;
+};
+
+function startVoiceRecording(): VoiceStartRecordingAction {
+  return {
+    type: VOICE_START_RECORDING
+  };
+}
+
+export default startVoiceRecording;
+
+export { VOICE_START_RECORDING };
+
+export type { VoiceStartRecordingAction };
diff --git a/packages/core/src/actions/stopVoiceRecording.ts b/packages/core/src/actions/stopVoiceRecording.ts
new file mode 100644
index 0000000000..f547fe9b81
--- /dev/null
+++ b/packages/core/src/actions/stopVoiceRecording.ts
@@ -0,0 +1,17 @@
+const VOICE_STOP_RECORDING = 'WEB_CHAT/VOICE_STOP_RECORDING' as const;
+
+type VoiceStopRecordingAction = {
+  type: typeof VOICE_STOP_RECORDING;
+};
+
+function stopVoiceRecording(): VoiceStopRecordingAction {
+  return {
+    type: VOICE_STOP_RECORDING
+  };
+}
+
+export default stopVoiceRecording;
+
+export { VOICE_STOP_RECORDING };
+
+export type { VoiceStopRecordingAction };
diff --git a/packages/core/src/actions/unregisterVoiceHandler.ts b/packages/core/src/actions/unregisterVoiceHandler.ts
new file mode 100644
index 0000000000..59fedcc27a
--- /dev/null
+++ b/packages/core/src/actions/unregisterVoiceHandler.ts
@@ -0,0 +1,19 @@
+const VOICE_UNREGISTER_HANDLER = 'WEB_CHAT/VOICE_UNREGISTER_HANDLER' as const;
+
+type VoiceUnregisterHandlerAction = {
+  type: typeof VOICE_UNREGISTER_HANDLER;
+  payload: { id: string };
+};
+
+function unregisterVoiceHandler(id: string): VoiceUnregisterHandlerAction {
+  return {
+    type: VOICE_UNREGISTER_HANDLER,
+    payload: { id }
+  };
+}
+
+export default unregisterVoiceHandler;
+
+export { VOICE_UNREGISTER_HANDLER };
+
+export type { VoiceUnregisterHandlerAction };
diff --git a/packages/core/src/createReducer.ts b/packages/core/src/createReducer.ts
index 1e10dfc220..e20f908304 100644
--- a/packages/core/src/createReducer.ts
+++ b/packages/core/src/createReducer.ts
@@ -17,6 +17,7 @@ import sendTypingIndicator from './reducers/sendTypingIndicator';
 import shouldSpeakIncomingActivity from './reducers/shouldSpeakIncomingActivity';
 import suggestedActions from './reducers/suggestedActions';
 import suggestedActionsOriginActivity from './reducers/suggestedActionsOriginActivity';
+import voiceActivity from './reducers/voiceActivity';
 
 import type { GlobalScopePonyfill } from './types/GlobalScopePonyfill';
 import type { RestrictedStoreDebugAPI } from './types/StoreDebugAPI';
@@ -41,7 +42,8 @@ export default function createReducer(ponyfill: GlobalScopePonyfill, restrictedS
       shouldSpeakIncomingActivity,
       suggestedActions,
       suggestedActionsOriginActivity,
-      typing: createTypingReducer(ponyfill)
+      typing: createTypingReducer(ponyfill),
+      voice: voiceActivity
     })
   );
 }
diff --git a/packages/core/src/createSagas.ts b/packages/core/src/createSagas.ts
index 916b62598d..1fe2cac6e3 100644
--- a/packages/core/src/createSagas.ts
+++ b/packages/core/src/createSagas.ts
@@ -24,6 +24,7 @@ import startSpeakActivityOnPostActivitySaga from './sagas/startSpeakActivityOnPo
 import stopDictateOnCardActionSaga from './sagas/stopDictateOnCardActionSaga';
 import stopSpeakingActivityOnInputSaga from './sagas/stopSpeakingActivityOnInputSaga';
 import submitSendBoxSaga from './sagas/submitSendBoxSaga';
+import postVoiceActivitySaga from './sagas/postVoiceActivitySaga';
 import { type GlobalScopePonyfill } from './types/GlobalScopePonyfill';
 
 type CreateSagasOptions = {
@@ -44,6 +45,7 @@ export default function createSagas({ ponyfill }: CreateSagasOptions): Saga {
     yield fork(markAllAsSpokenOnStopSpeakActivitySaga);
     yield fork(observeActivitySaga);
     yield fork(postActivitySaga, ponyfill);
+    yield fork(postVoiceActivitySaga, ponyfill);
     yield fork(queueIncomingActivitySaga, ponyfill);
     yield fork(sendEventToPostActivitySaga);
     yield fork(sendFilesToPostActivitySaga);
diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts
index e2534aa94d..27fde76d63 100644
--- a/packages/core/src/index.ts
+++ b/packages/core/src/index.ts
@@ -40,6 +40,18 @@ import getActivityLivestreamingMetadata from './utils/getActivityLivestreamingMe
 import getOrgSchemaMessage from './utils/getOrgSchemaMessage';
 import onErrorResumeNext from './utils/onErrorResumeNext';
 import singleToArray from './utils/singleToArray';
+import isVoiceActivity from './utils/voiceActivity/isVoiceActivity';
+import isVoiceTranscriptActivity from './utils/voiceActivity/isVoiceTranscriptActivity';
+import getVoiceActivityRole from './utils/voiceActivity/getVoiceActivityRole';
+import getVoiceActivityText from './utils/voiceActivity/getVoiceActivityText';
+import startVoiceRecording from './actions/startVoiceRecording';
+import stopVoiceRecording from './actions/stopVoiceRecording';
+import setVoiceState from './actions/setVoiceState';
+import registerVoiceHandler from './actions/registerVoiceHandler';
+import unregisterVoiceHandler from './actions/unregisterVoiceHandler';
+import postVoiceActivity from './actions/postVoiceActivity';
+import type { VoiceState } from './actions/setVoiceState';
+import type { VoiceHandler } from './actions/registerVoiceHandler';
 
 export {
   isForbiddenPropertyName,
@@ -99,6 +111,10 @@ export {
   emitTypingIndicator,
   getActivityLivestreamingMetadata,
   getOrgSchemaMessage,
+  getVoiceActivityRole,
+  getVoiceActivityText,
+  isVoiceActivity,
+  isVoiceTranscriptActivity,
   markActivity,
   onErrorResumeNext,
   parseAction,
@@ -109,6 +125,9 @@ export {
   parseThing,
   parseVoteAction,
   postActivity,
+  postVoiceActivity,
+  registerVoiceHandler,
+  unregisterVoiceHandler,
   sendEvent,
   sendFiles,
   sendMessage,
@@ -122,11 +141,14 @@ export {
   setSendBoxAttachments,
   setSendTimeout,
   setSendTypingIndicator,
+  setVoiceState,
   singleToArray,
   startDictate,
   startSpeakingActivity,
+  startVoiceRecording,
   stopDictate,
   stopSpeakingActivity,
+  stopVoiceRecording,
   submitSendBox
 };
 
@@ -155,6 +177,8 @@ export type {
   OrgSchemaThing,
   OrgSchemaUserReview,
   SendBoxAttachment,
+  VoiceHandler,
+  VoiceState,
   WebChatActivity
 };
 
diff --git a/packages/core/src/reducers/voiceActivity.ts b/packages/core/src/reducers/voiceActivity.ts
new file mode 100644
index 0000000000..d7f6953e49
--- /dev/null
+++ b/packages/core/src/reducers/voiceActivity.ts
@@ -0,0 +1,80 @@
+import { VOICE_REGISTER_HANDLER } from '../actions/registerVoiceHandler';
+import { VOICE_SET_STATE } from '../actions/setVoiceState';
+import { VOICE_START_RECORDING } from '../actions/startVoiceRecording';
+import { VOICE_STOP_RECORDING } from '../actions/stopVoiceRecording';
+import { VOICE_UNREGISTER_HANDLER } from '../actions/unregisterVoiceHandler';
+
+import type { VoiceHandler, VoiceRegisterHandlerAction } from '../actions/registerVoiceHandler';
+import type { VoiceSetStateAction, VoiceState } from '../actions/setVoiceState';
+import type { VoiceStartRecordingAction } from '../actions/startVoiceRecording';
+import type { VoiceStopRecordingAction } from '../actions/stopVoiceRecording';
+import type { VoiceUnregisterHandlerAction } from '../actions/unregisterVoiceHandler';
+
+type VoiceActivityActions =
+  | VoiceRegisterHandlerAction
+  | VoiceSetStateAction
+  | VoiceStartRecordingAction
+  | VoiceStopRecordingAction
+  | VoiceUnregisterHandlerAction;
+
+interface VoiceActivityState {
+  voiceState: VoiceState;
+  voiceHandlers: Map<string, VoiceHandler>;
+}
+
+const DEFAULT_STATE: VoiceActivityState = {
+  voiceState: 'idle',
+  voiceHandlers: new Map()
+};
+
+export default function voiceActivity(
+  state: VoiceActivityState = DEFAULT_STATE,
+  action: VoiceActivityActions
+): VoiceActivityState {
+  switch (action.type) {
+    case VOICE_REGISTER_HANDLER: {
+      const newHandlers = new Map(state.voiceHandlers);
+      newHandlers.set(action.payload.id, action.payload.voiceHandler);
+      return {
+        ...state,
+        voiceHandlers: newHandlers
+      };
+    }
+
+    case VOICE_UNREGISTER_HANDLER: {
+      const newHandlers = new Map(state.voiceHandlers);
+      newHandlers.delete(action.payload.id);
+      return {
+        ...state,
+        voiceHandlers: newHandlers
+      };
+    }
+
+    case VOICE_SET_STATE:
+      return {
+        ...state,
+        voiceState: action.payload.voiceState
+      };
+
+    case VOICE_START_RECORDING:
+      if (state.voiceState !== 'idle') {
+        console.warn(`botframework-webchat: Should not transit from "${state.voiceState}" to "listening"`);
+      }
+
+      return {
+        ...state,
+        voiceState: 'listening'
+      };
+
+    case VOICE_STOP_RECORDING:
+      return {
+        ...state,
+        voiceState: 'idle'
+      };
+
+    default:
+      return state;
+  }
+}
+
+export type { VoiceActivityState };
diff --git a/packages/core/src/sagas/observeActivitySaga.ts b/packages/core/src/sagas/observeActivitySaga.ts
index 3378c4c655..9ee290076e 100644
--- a/packages/core/src/sagas/observeActivitySaga.ts
+++ b/packages/core/src/sagas/observeActivitySaga.ts
@@ -1,9 +1,12 @@
-import { put } from 'redux-saga/effects';
+import { put, select } from 'redux-saga/effects';
 import updateIn from 'simple-update-in';
 
 import observeEach from './effects/observeEach';
 import queueIncomingActivity from '../actions/queueIncomingActivity';
+import setVoiceState from '../actions/setVoiceState';
 import whileConnected from './effects/whileConnected';
+import isVoiceActivity from '../utils/voiceActivity/isVoiceActivity';
+import isVoiceTranscriptActivity from '../utils/voiceActivity/isVoiceTranscriptActivity';
 import type { DirectLineActivity } from '../types/external/DirectLineActivity';
 import type { DirectLineJSBotConnection } from '../types/external/DirectLineJSBotConnection';
 import type { WebChatActivity } from '../types/WebChatActivity';
@@ -75,6 +78,53 @@ function patchFromName(activity: DirectLineActivity) {
 
 function* observeActivity({ directLine, userID }: { directLine: DirectLineJSBotConnection; userID?: string }) {
   yield observeEach(directLine.activity$, function* observeActivity(activity: DirectLineActivity) {
+    // Handle voice activities separately - don't store them in Redux (except transcripts)
+    if (isVoiceActivity(activity) && !isVoiceTranscriptActivity(activity)) {
+      const { recording, voiceHandlers } = yield select(state => ({
+        recording: state.voice.voiceState !== 'idle',
+        voiceHandlers: state.voice.voiceHandlers
+      }));
+
+      // Only process voice chunks if speech-to-speech is enabled.
+      if (!recording) {
+        return;
+      }
+
+      switch (activity.name) {
+        case 'media.chunk': {
+          const audioContent = activity?.value?.content;
+          if (audioContent) {
+            voiceHandlers.forEach(handler => handler.queueAudio(audioContent));
+          }
+          break;
+        }
+
+        case 'request.update': {
+          const state = activity?.value?.state;
+
+          switch (state) {
+            case 'detected':
+              voiceHandlers.forEach(handler => handler.stopAllAudio());
+              yield put(setVoiceState('user_speaking'));
+              break;
+
+            case 'processing':
+              yield put(setVoiceState('processing'));
+              break;
+
+            default:
+              break;
+          }
+          break;
+        }
+
+        default:
+          break;
+      }
+
+      return;
+    }
+
     // TODO: [P2] #3953 Move the patching logic to a DirectLineJS wrapper, instead of too close to inners of Web Chat.
     activity = patchNullAsUndefined(activity);
     activity = patchActivityWithFromRole(activity, userID);
diff --git a/packages/core/src/sagas/postVoiceActivitySaga.ts b/packages/core/src/sagas/postVoiceActivitySaga.ts
new file mode 100644
index 0000000000..ad7390deb0
--- /dev/null
+++ b/packages/core/src/sagas/postVoiceActivitySaga.ts
@@ -0,0 +1,76 @@
+import { select, takeEvery } from 'redux-saga/effects';
+import { VOICE_POST_ACTIVITY } from '../actions/postVoiceActivity';
+import languageSelector from '../selectors/language';
+import dateToLocaleISOString from '../utils/dateToLocaleISOString';
+import whileConnected from './effects/whileConnected';
+import observeOnce from './effects/observeOnce';
+
+import type { DirectLineJSBotConnection } from '../types/external/DirectLineJSBotConnection';
+import type { DirectLineActivity } from '../types/external/DirectLineActivity';
+import type { GlobalScopePonyfill } from '../types/GlobalScopePonyfill';
+import type { VoicePostActivityAction } from '../actions/postVoiceActivity';
+
+/**
+ * Saga for handling outgoing voice activities.
+ *
+ * Unlike regular postActivitySaga, this saga:
+ * - Does NOT wait for echo back
+ * - Does NOT store activity in Redux
+ * - Does NOT dispatch PENDING/FULFILLED/REJECTED actions
+ * - Fire and forget - just send to WebSocket
+ *
+ * This prevents memory leaks from storing thousands of voice chunks.
+ */
+function* postVoiceActivity(
+  directLine: DirectLineJSBotConnection,
+  userID: string,
+  username: string,
+  { payload: { activity } }: VoicePostActivityAction,
+  ponyfill: GlobalScopePonyfill
+) {
+  const locale: string = yield select(languageSelector);
+  const localTimeZone =
+    typeof window.Intl === 'undefined' ? undefined : new Intl.DateTimeFormat().resolvedOptions().timeZone;
+  const now = new ponyfill.Date();
+
+  const outgoingActivity = {
+    ...activity,
+    channelId: 'webchat',
+    from: {
+      id: userID,
+      name: username,
+      role: 'user'
+    },
+    locale,
+    localTimestamp: dateToLocaleISOString(now),
+    localTimezone: localTimeZone,
+    ...(activity.type === 'event'
+      ? {
+          name: activity.name,
+          value: activity.value
+        }
+      : {})
+  };
+
+  try {
+    yield observeOnce(directLine.postActivity(outgoingActivity as DirectLineActivity));
+  } catch (error) {
+    console.error('botframework-webchat: Failed to post voice activity to chat adapter.', error);
+  }
+}
+
+export default function* voiceActivitySaga(ponyfill: GlobalScopePonyfill) {
+  yield whileConnected(function* voiceActivityWhileConnected({
+    directLine,
+    userID,
+    username
+  }: {
+    directLine: DirectLineJSBotConnection;
+    userID: string;
+    username: string;
+  }) {
+    yield takeEvery(VOICE_POST_ACTIVITY, function* (action: VoicePostActivityAction) {
+      yield* postVoiceActivity(directLine, userID, username, action, ponyfill);
+    });
+  });
+}
diff --git a/packages/core/src/utils/voiceActivity/getVoiceActivityRole.spec.ts b/packages/core/src/utils/voiceActivity/getVoiceActivityRole.spec.ts
new file mode 100644
index 0000000000..e8dcba4ad7
--- /dev/null
+++ b/packages/core/src/utils/voiceActivity/getVoiceActivityRole.spec.ts
@@ -0,0 +1,150 @@
+import getVoiceActivityRole from './getVoiceActivityRole';
+import { WebChatActivity } from '../../types/WebChatActivity';
+
+// Mock activity factory for testing
+const createMockActivity = (type: string = 'event', name?: string, value?: any, valueType?: string): WebChatActivity =>
+  ({
+    type: type as any,
+    id: 'test-activity-id',
+    from: { id: 'test-user' },
+    channelData: {
+      'webchat:sequence-id': 1
+    },
+    ...(name && { name }),
+    ...(value && { value }),
+    ...(valueType && { valueType })
+  }) as WebChatActivity;
+
+const createMockVoiceActivity = (
+  name: string,
+  value: Record<string, any>,
+  valueType: string = 'application/vnd.microsoft.activity.azure.directline.audio.chunk'
+): WebChatActivity => createMockActivity('event', name, value, valueType);
+
+const createMockTranscriptActivity = (
+  origin: 'user' | 'agent',
+  transcription: string = 'test',
+  valueType: string = 'application/vnd.microsoft.activity.azure.directline.audio.transcript'
+): WebChatActivity => createMockActivity('event', 'media.end', { transcription, origin }, valueType);
+
+describe('getVoiceActivityRole', () => {
+  describe.each([
+    ['user', 'user', 'Hello world'],
+    ['user', 'user', '']
+  ] as const)('Voice transcript activities with origin %s', (expectedRole, origin, transcription) => {
+    test(`should return "${expectedRole}" for media.end with origin ${origin}${transcription ? '' : ' and empty transcription'}`, () => {
+      const activity = createMockTranscriptActivity(origin, transcription);
+
+      const result = getVoiceActivityRole(activity);
+
+      expect(result).toBe(expectedRole);
+    });
+  });
+
+  describe.each([
+    ['bot', 'agent', 'Hello, how can I help you?'],
+    ['bot', 'agent', '']
+  ] as const)('Voice transcript activities with origin %s', (expectedRole, origin, transcription) => {
+    test(`should return "${expectedRole}" for media.end with origin ${origin}${transcription ? '' : ' and empty transcription'}`, () => {
+      const activity = createMockTranscriptActivity(origin, transcription);
+
+      const result = getVoiceActivityRole(activity);
+
+      expect(result).toBe(expectedRole);
+    });
+  });
+
+  describe('Non-transcript voice activities', () => {
+    test.each([
+      ['media.chunk', { content: 'base64' }, 'application/vnd.microsoft.activity.azure.directline.audio.chunk'],
+      ['request.update', { state: 'detected' }, 'application/vnd.microsoft.activity.azure.directline.audio.state']
+    ])('should return undefined for %s', (name, value, valueType) => {
+      const activity = createMockVoiceActivity(name, value, valueType);
+
+      const result = getVoiceActivityRole(activity);
+
+      expect(result).toBeUndefined();
+    });
+  });
+
+  describe('Non-voice activities', () => {
+    test.each([
+      ['message', 'regular message activity'],
+      ['typing', 'typing activity']
+    ])('should return undefined for %s', type => {
+      const activity = createMockActivity(type);
+
+      const result = getVoiceActivityRole(activity);
+
+      expect(result).toBeUndefined();
+    });
+
+    test('should return undefined for event activity without audio valueType', () => {
+      const activity = createMockActivity('event', 'test', { someOtherData: 'test' }, 'application/json');
+
+      const result = getVoiceActivityRole(activity);
+
+      expect(result).toBeUndefined();
+    });
+  });
+
+  describe('Real-world scenarios', () => {
+    test('should correctly identify user transcript in conversation flow', () => {
+      const userActivities = [
+        createMockVoiceActivity(
+          'request.update',
+          { state: 'detected' },
+          'application/vnd.microsoft.activity.azure.directline.audio.state'
+        ),
+        createMockVoiceActivity(
+          'media.chunk',
+          { content: 'base64' },
+          'application/vnd.microsoft.activity.azure.directline.audio.chunk'
+        ),
+        createMockTranscriptActivity('user', 'What is the weather today?')
+      ];
+
+      const roles = userActivities.map(activity => getVoiceActivityRole(activity));
+
+      expect(roles).toEqual([undefined, undefined, 'user']);
+    });
+
+    test('should correctly identify bot transcript in conversation flow', () => {
+      const botActivities = [
+        createMockVoiceActivity(
+          'media.chunk',
+          { content: 'chunk1' },
+          'application/vnd.microsoft.activity.azure.directline.audio.chunk'
+        ),
+        createMockVoiceActivity(
+          'media.chunk',
+          { content: 'chunk2' },
+          'application/vnd.microsoft.activity.azure.directline.audio.chunk'
+        ),
+        createMockTranscriptActivity('agent', 'Today will be sunny with a high of 75 degrees.'),
+        createMockVoiceActivity(
+          'request.update',
+          { state: 'processing' },
+          'application/vnd.microsoft.activity.azure.directline.audio.state'
+        )
+      ];
+
+      const roles = botActivities.map(activity => getVoiceActivityRole(activity));
+
+      expect(roles).toEqual([undefined, undefined, 'bot', undefined]);
+    });
+
+    test('should handle mixed activity types in conversation', () => {
+      const mixedActivities = [
+        createMockActivity('message'),
+        createMockTranscriptActivity('user', 'Hello'),
+        createMockActivity('typing'),
+        createMockTranscriptActivity('agent', 'Hi there!')
+      ];
+
+      const roles = mixedActivities.map(activity => getVoiceActivityRole(activity));
+
+      expect(roles).toEqual([undefined, 'user', undefined, 'bot']);
+    });
+  });
+});
diff --git a/packages/core/src/utils/voiceActivity/getVoiceActivityRole.ts b/packages/core/src/utils/voiceActivity/getVoiceActivityRole.ts
new file mode 100644
index 0000000000..2801514e92
--- /dev/null
+++ b/packages/core/src/utils/voiceActivity/getVoiceActivityRole.ts
@@ -0,0 +1,16 @@
+import { WebChatActivity } from '../../types/WebChatActivity';
+import isVoiceTranscriptActivity from './isVoiceTranscriptActivity';
+
+const getVoiceActivityRole = (activity: WebChatActivity): 'bot' | 'user' | undefined => {
+  if (isVoiceTranscriptActivity(activity)) {
+    if (activity.value.origin === 'agent') {
+      return 'bot';
+    } else if (activity.value.origin === 'user') {
+      return 'user';
+    }
+  }
+
+  return undefined;
+};
+
+export default getVoiceActivityRole;
diff --git a/packages/core/src/utils/voiceActivity/getVoiceActivityText.spec.ts b/packages/core/src/utils/voiceActivity/getVoiceActivityText.spec.ts
new file mode 100644
index 0000000000..85197891de
--- /dev/null
+++ b/packages/core/src/utils/voiceActivity/getVoiceActivityText.spec.ts
@@ -0,0 +1,100 @@
+import getVoiceActivityText from './getVoiceActivityText';
+import { WebChatActivity } from '../../types/WebChatActivity';
+
+// Mock activity factory for testing
+const createMockActivity = (type: string = 'event', name?: string, value?: any, valueType?: string): WebChatActivity =>
+  ({
+    type: type as any,
+    id: 'test-activity-id',
+    from: { id: 'test-user' },
+    channelData: {
+      'webchat:sequence-id': 1
+    },
+    ...(name && { name }),
+    ...(value && { value }),
+    ...(valueType && { valueType })
+  }) as WebChatActivity;
+
+const createMockTranscriptActivity = (
+  transcription: string | undefined,
+  origin: 'user' | 'agent' = 'user',
+  valueType: string = 'application/vnd.microsoft.activity.azure.directline.audio.transcript'
+): WebChatActivity =>
+  createMockActivity(
+    'event',
+    'media.end',
+    transcription !== undefined ? { transcription, origin } : { origin },
+    valueType
+  );
+
+describe('getVoiceActivityText', () => {
+  describe('Voice transcript activities', () => {
+    test.each([
+      ['Hello world', 'Hello world'],
+      ['How can I help you today?', 'How can I help you today?'],
+      ['', '']
+    ])('should return %p for media.end with transcription %p', (expected, transcription) => {
+      const activity = createMockTranscriptActivity(transcription);
+
+      const result = getVoiceActivityText(activity);
+
+      expect(result).toBe(expected);
+    });
+
+    test('should return undefined for media.end without transcript property', () => {
+      const activity = createMockTranscriptActivity(undefined);
+
+      const result = getVoiceActivityText(activity);
+
+      expect(result).toBeUndefined();
+    });
+  });
+
+  describe('Non-transcript voice activities', () => {
+    test.each([['media.chunk'], ['request.update']])('should return undefined for %s activity', name => {
+      const activity = createMockActivity(
+        'event',
+        name,
+        { content: 'base64' },
+        'application/vnd.microsoft.activity.azure.directline.audio.chunk'
+      );
+
+      const result = getVoiceActivityText(activity);
+
+      expect(result).toBeUndefined();
+    });
+  });
+
+  describe('Non-voice activities', () => {
+    test.each([
+      ['message', undefined, undefined, undefined],
+      ['event', undefined, { someData: 'test' }, undefined]
+    ])('should return undefined for %s', (type, name, value, valueType) => {
+      const activity = createMockActivity(type, name, value, valueType);
+
+      const result = getVoiceActivityText(activity);
+
+      expect(result).toBeUndefined();
+    });
+  });
+
+  describe('Edge cases', () => {
+    test('should handle transcript with whitespace only', () => {
+      const activity = createMockTranscriptActivity('   ');
+
+      const result = getVoiceActivityText(activity);
+
+      expect(result).toBe('   ');
+    });
+
+    test('should handle very long transcript', () => {
+      const longText = 'A'.repeat(10000);
+      const activity = createMockTranscriptActivity(longText);
+
+      const result = getVoiceActivityText(activity);
+
+      expect(result).toBe(longText);
+      expect(result?.length).toBe(10000);
+    });
+  });
+});
diff --git a/packages/core/src/utils/voiceActivity/getVoiceActivityText.ts b/packages/core/src/utils/voiceActivity/getVoiceActivityText.ts
new file mode 100644
index 0000000000..9bd60ddee0
--- /dev/null
+++ b/packages/core/src/utils/voiceActivity/getVoiceActivityText.ts
@@ -0,0 +1,11 @@
+import { WebChatActivity } from '../../types/WebChatActivity';
+import isVoiceTranscriptActivity from './isVoiceTranscriptActivity';
+
+const getVoiceActivityText = (activity: WebChatActivity): string | undefined => {
+  if (isVoiceTranscriptActivity(activity)) {
+    return activity.value.transcription;
+  }
+  return undefined;
+};
+
+export default getVoiceActivityText;
diff --git a/packages/core/src/utils/voiceActivity/isVoiceActivity.spec.ts b/packages/core/src/utils/voiceActivity/isVoiceActivity.spec.ts
new file mode 100644
index 0000000000..5b57506669
--- /dev/null
+++ b/packages/core/src/utils/voiceActivity/isVoiceActivity.spec.ts
@@ -0,0 +1,202 @@
+import isVoiceActivity from './isVoiceActivity';
+import { WebChatActivity } from '../../types/WebChatActivity';
+
+// Mock activity factory for testing
+const createMockActivity = (type: string = 'event', name?: string, value?: any, valueType?: string): WebChatActivity =>
+  ({
+    type: type as any,
+    id: 'test-activity-id',
+    from: { id: 'test-user' },
+    channelData: {
+      'webchat:sequence-id': 1
+    },
+    ...(name && { name }),
+    ...(value && { value }),
+    ...(valueType && { valueType })
+  }) as WebChatActivity;
+
+const createMockVoiceActivity = (
+  name: string,
+  value: Record<string, any>,
+  valueType: string = 'application/vnd.microsoft.activity.azure.directline.audio.chunk'
+): WebChatActivity => createMockActivity('event', name, value, valueType);
+
+const createMockDtmfActivity = (name: string, value: Record<string, any>): WebChatActivity =>
+  createMockActivity('event', name, value, 'application/vnd.microsoft.activity.ccv2.dtmf');
+
+describe('isVoiceActivity', () => {
+  describe('Valid voice activities', () => {
+    test('should return true for event activity with azure directline audio valueType', () => {
+      const activity = createMockVoiceActivity(
+        'media.chunk',
+        { content: 'base64', contentType: 'audio/webm' },
+        'application/vnd.microsoft.activity.azure.directline.audio.chunk'
+      );
+
+      const result = isVoiceActivity(activity);
+
+      expect(result).toBe(true);
+    });
+
+    test('should return true for event activity with ccv2 audio valueType', () => {
+      const activity = createMockVoiceActivity(
+        'media.chunk',
+        { content: 'base64' },
+        'application/vnd.microsoft.activity.ccv2.audio.chunk'
+      );
+
+      const result = isVoiceActivity(activity);
+
+      expect(result).toBe(true);
+    });
+
+    test('should return true for event activity with dtmf valueType', () => {
+      const activity = createMockDtmfActivity('media.end', { key: '1' });
+
+      const result = isVoiceActivity(activity);
+
+      expect(result).toBe(true);
+    });
+
+    test('should return true for request.update with audio.state valueType', () => {
+      const activity = createMockVoiceActivity(
+        'request.update',
+        { state: 'detected', message: 'Your request is identified' },
+        'application/vnd.microsoft.activity.azure.directline.audio.state'
+      );
+
+      const result = isVoiceActivity(activity);
+
+      expect(result).toBe(true);
+    });
+  });
+
+  describe('Invalid activities', () => {
+    const testCases = [
+      {
+        name: 'message activity with audio valueType',
+        activity: () =>
+          createMockActivity(
+            'message',
+            'media.chunk',
+            { content: 'base64' },
+            'application/vnd.microsoft.activity.azure.directline.audio.chunk'
+          )
+      },
+      {
+        name: 'typing activity',
+        activity: () => createMockActivity('typing')
+      },
+      {
+        name: 'event activity with non-audio valueType',
+        activity: () => createMockActivity('event', 'test', { data: 'test' }, 'application/json')
+      },
+      {
+        name: 'event activity without valueType',
+        activity: () => createMockActivity('event', 'test', { someData: 'value' })
+      },
+      {
+        name: 'event activity with no value',
+        activity: () =>
+          createMockActivity(
+            'event',
+            'test',
+            undefined,
+            'application/vnd.microsoft.activity.azure.directline.audio.chunk'
+          )
+      },
+      {
+        name: 'event activity with no name',
+        activity: () =>
+          createMockActivity(
+            'event',
+            undefined,
+            { data: 'test' },
+            'application/vnd.microsoft.activity.azure.directline.audio.chunk'
+          )
+      }
+    ];
+
+    test.each(testCases)('should return false for $name', ({ activity }) => {
+      const result = isVoiceActivity(activity());
+
+      expect(result).toBe(false);
+    });
+  });
+
+  describe('Real-world voice activity scenarios', () => {
+    const voiceScenarios = [
+      {
+        name: 'request.update with speech detected state',
+        eventName: 'request.update',
+        value: { state: 'detected', message: 'Your request is identified' },
+        valueType: 'application/vnd.microsoft.activity.azure.directline.audio.state'
+      },
+      {
+        name: 'request.update with processing state',
+        eventName: 'request.update',
+        value: { state: 'processing', message: 'Your request is being processed' },
+        valueType: 'application/vnd.microsoft.activity.azure.directline.audio.state'
+      },
+      {
+        name: 'media.end with user transcription',
+        eventName: 'media.end',
+        value: { transcription: 'My destination is bangalore', origin: 'user' },
+        valueType: 'application/vnd.microsoft.activity.azure.directline.audio.transcript'
+      },
+      {
+        name: 'media.chunk with server audio response',
+        eventName: 'media.chunk',
+        value: { content: 'base64chunk', contentType: 'audio/webm' },
+        valueType: 'application/vnd.microsoft.activity.azure.directline.audio.chunk'
+      },
+      {
+        name: 'media.end with bot transcription',
+        eventName: 'media.end',
+        value: { transcription: 'Your destination is at 1000m above sea level', origin: 'agent' },
+        valueType: 'application/vnd.microsoft.activity.azure.directline.audio.transcript'
+      }
+    ];
+
+    test.each(voiceScenarios)('should return true for $name', ({ eventName, value, valueType }) => {
+      const activity = createMockVoiceActivity(eventName, value, valueType);
+
+      const result = isVoiceActivity(activity);
+
+      expect(result).toBe(true);
+    });
+  });
+
+  describe('Real-world DTMF activity scenarios', () => {
+    const dtmfScenarios = [
+      {
+        name: 'DTMF with digit 1',
+        eventName: 'media.end',
+        value: { key: '1' }
+      },
+      {
+        name: 'DTMF with star key',
+        eventName: 'media.end',
+        value: { key: '*' }
+      },
+      {
+        name: 'DTMF with hash key',
+        eventName: 'media.end',
+        value: { key: '#' }
+      },
+      {
+        name: 'DTMF with digit 5',
+        eventName: 'media.end',
+        value: { key: '5' }
+      }
+    ];
+
+    test.each(dtmfScenarios)('should return true for $name', ({ eventName, value }) => {
+      const activity = createMockDtmfActivity(eventName, value);
+
+      const result = isVoiceActivity(activity);
+
+      expect(result).toBe(true);
+    });
+  });
+});
diff --git a/packages/core/src/utils/voiceActivity/isVoiceActivity.ts b/packages/core/src/utils/voiceActivity/isVoiceActivity.ts
new file mode 100644
index 0000000000..f6c86b8dcc
--- /dev/null
+++ b/packages/core/src/utils/voiceActivity/isVoiceActivity.ts
@@ -0,0 +1,22 @@
+import { check, literal, looseObject, object, pipe, safeParse, string, type InferOutput } from 'valibot';
+
+import { WebChatActivity } from '../../types/WebChatActivity';
+
+// Activity spec proposal - https://github.com/microsoft/Agents/issues/416
+// valueType: contains 'audio' or 'dtmf' (works with any server prefix like azure.directline, ccv2, etc.)
+const VoiceActivitySchema = object({
+  name: string(),
+  type: literal('event'),
+  value: looseObject({}),
+  valueType: pipe(
+    string(),
+    check(value => value.includes('audio') || value.includes('dtmf'))
+  )
+});
+
+const isVoiceActivity = (
+  activity: WebChatActivity
+): activity is WebChatActivity & InferOutput<typeof VoiceActivitySchema> =>
+  safeParse(VoiceActivitySchema, activity).success;
+
+export default isVoiceActivity;
diff --git a/packages/core/src/utils/voiceActivity/isVoiceTranscriptActivity.spec.ts b/packages/core/src/utils/voiceActivity/isVoiceTranscriptActivity.spec.ts
new file mode 100644
index 0000000000..d3631fd019
--- /dev/null
+++ b/packages/core/src/utils/voiceActivity/isVoiceTranscriptActivity.spec.ts
@@ -0,0 +1,224 @@
+import isVoiceTranscriptActivity from './isVoiceTranscriptActivity';
+import { WebChatActivity } from '../../types/WebChatActivity';
+
+// Mock activity factory for testing
+const createMockActivity = (type: string = 'event', name?: string, value?: any, valueType?: string): WebChatActivity =>
+  ({
+    type: type as any,
+    id: 'test-activity-id',
+    from: { id: 'test-user' },
+    channelData: {
+      'webchat:sequence-id': 1
+    },
+    ...(name && { name }),
+    ...(value && { value }),
+    ...(valueType && { valueType })
+  }) as WebChatActivity;
+
+const createMockVoiceActivity = (
+  name: string,
+  value: Record<string, any>,
+  valueType: string = 'application/vnd.microsoft.activity.azure.directline.audio.chunk'
+): WebChatActivity => createMockActivity('event', name, value, valueType);
+
+const createMockTranscriptActivity = (
+  transcription: string,
+  origin: 'user' | 'agent',
+  valueType: string = 'application/vnd.microsoft.activity.azure.directline.audio.transcript'
+): WebChatActivity => createMockActivity('event', 'media.end', { transcription, origin }, valueType);
+
+describe('isVoiceTranscriptActivity', () => {
+  describe('Valid transcript activities', () => {
+    test('should return true for media.end with user transcription', () => {
+      const activity = createMockTranscriptActivity('Hello world', 'user');
+
+      const result = isVoiceTranscriptActivity(activity);
+
+      expect(result).toBe(true);
+    });
+
+    test('should return true for media.end with agent transcription', () => {
+      const activity = createMockTranscriptActivity('Hi there!', 'agent');
+
+      const result = isVoiceTranscriptActivity(activity);
+
+      expect(result).toBe(true);
+    });
+
+    test('should return true for media.end with empty transcription string', () => {
+      const activity = createMockTranscriptActivity('', 'user');
+
+      const result = isVoiceTranscriptActivity(activity);
+
+      expect(result).toBe(true);
+    });
+
+    test('should return true for ccv2 transcript valueType', () => {
+      const activity = createMockTranscriptActivity(
+        'Test transcript',
+        'user',
+        'application/vnd.microsoft.activity.ccv2.audio.transcript'
+      );
+
+      const result = isVoiceTranscriptActivity(activity);
+
+      expect(result).toBe(true);
+    });
+  });
+
+  describe('Invalid activities', () => {
+    const testCases = [
+      {
+        name: 'media.chunk voice activity',
+        activity: () =>
+          createMockVoiceActivity(
+            'media.chunk',
+            { content: 'base64' },
+            'application/vnd.microsoft.activity.azure.directline.audio.chunk'
+          )
+      },
+      {
+        name: 'request.update voice activity',
+        activity: () =>
+          createMockVoiceActivity(
+            'request.update',
+            { state: 'detected' },
+            'application/vnd.microsoft.activity.azure.directline.audio.state'
+          )
+      },
+      {
+        name: 'media.end without transcription',
+        activity: () =>
+          createMockActivity(
+            'event',
+            'media.end',
+            { origin: 'user' },
+            'application/vnd.microsoft.activity.azure.directline.audio.transcript'
+          )
+      },
+      {
+        name: 'media.end with non-string transcription',
+        activity: () =>
+          createMockActivity(
+            'event',
+            'media.end',
+            { transcription: 123, origin: 'user' },
+            'application/vnd.microsoft.activity.azure.directline.audio.transcript'
+          )
+      },
+      {
+        name: 'media.end with null transcription',
+        activity: () =>
+          createMockActivity(
+            'event',
+            'media.end',
+            { transcription: null, origin: 'user' },
+            'application/vnd.microsoft.activity.azure.directline.audio.transcript'
+          )
+      },
+      {
+        name: 'regular message activity',
+        activity: () => createMockActivity('message', 'test')
+      },
+      {
+        name: 'typing activity',
+        activity: () => createMockActivity('typing')
+      },
+      {
+        name: 'media.end with non-transcript valueType',
+        activity: () =>
+          createMockActivity(
+            'event',
+            'media.end',
+            { transcription: 'test', origin: 'user' },
+            'application/vnd.microsoft.activity.azure.directline.audio.chunk'
+          )
+      },
+      {
+        name: 'event activity without valueType',
+        activity: () => createMockActivity('event', 'media.end', { transcription: 'test', origin: 'user' })
+      },
+      {
+        name: 'event activity without name',
+        activity: () =>
+          createMockActivity(
+            'event',
+            undefined,
+            { transcription: 'test', origin: 'user' },
+            'application/vnd.microsoft.activity.azure.directline.audio.transcript'
+          )
+      }
+    ];
+
+    test.each(testCases)('should return false for $name', ({ activity }) => {
+      const result = isVoiceTranscriptActivity(activity());
+
+      expect(result).toBe(false);
+    });
+  });
+
+  describe('Real-world scenarios', () => {
+    test('should identify user transcript in conversation flow', () => {
+      const conversationActivities = [
+        createMockVoiceActivity(
+          'request.update',
+          { state: 'detected' },
+          'application/vnd.microsoft.activity.azure.directline.audio.state'
+        ),
+        createMockVoiceActivity(
+          'request.update',
+          { state: 'processing' },
+          'application/vnd.microsoft.activity.azure.directline.audio.state'
+        ),
+        createMockTranscriptActivity('What is the weather today?', 'user')
+      ];
+
+      const transcriptResults = conversationActivities.map(activity => isVoiceTranscriptActivity(activity));
+
+      expect(transcriptResults).toEqual([false, false, true]);
+    });
+
+    test('should identify agent transcript in response flow', () => {
+      const responseActivities = [
+        createMockVoiceActivity(
+          'request.update',
+          { state: 'response.available' },
+          'application/vnd.microsoft.activity.azure.directline.audio.state'
+        ),
+        createMockVoiceActivity(
+          'media.chunk',
+          { content: 'chunk1' },
+          'application/vnd.microsoft.activity.azure.directline.audio.chunk'
+        ),
+        createMockVoiceActivity(
+          'media.chunk',
+          { content: 'chunk2' },
+          'application/vnd.microsoft.activity.azure.directline.audio.chunk'
+        ),
+        createMockTranscriptActivity('Today will be sunny with a high of 75 degrees.', 'agent')
+      ];
+
+      const transcriptResults = responseActivities.map(activity => isVoiceTranscriptActivity(activity));
+
+      expect(transcriptResults).toEqual([false, false, false, true]);
+    });
+
+    test('should handle complete conversation with mixed activities', () => {
+      const mixedActivities = [
+        createMockActivity('message', 'test'),
+        createMockTranscriptActivity('Hello', 'user'),
+        createMockVoiceActivity(
+          'media.chunk',
+          { content: 'audio' },
+          'application/vnd.microsoft.activity.azure.directline.audio.chunk'
+        ),
+        createMockTranscriptActivity('Hi there!', 'agent'),
+        createMockActivity('typing')
+      ];
+
+      const transcriptResults = mixedActivities.map(activity => isVoiceTranscriptActivity(activity));
+
+      expect(transcriptResults).toEqual([false, true, false, true, false]);
+    });
+  });
+});
diff --git a/packages/core/src/utils/voiceActivity/isVoiceTranscriptActivity.ts b/packages/core/src/utils/voiceActivity/isVoiceTranscriptActivity.ts
new file mode 100644
index 0000000000..8b861d1783
--- /dev/null
+++ b/packages/core/src/utils/voiceActivity/isVoiceTranscriptActivity.ts
@@ -0,0 +1,24 @@
+import { check, literal, object, picklist, pipe, safeParse, string, type InferOutput } from 'valibot';
+
+import { WebChatActivity } from '../../types/WebChatActivity';
+
+// valueType: contains 'audio.transcript' (e.g., azure.directline.audio.transcript)
+const VoiceTranscriptActivitySchema = object({
+  name: literal('media.end'),
+  type: literal('event'),
+  value: object({
+    origin: picklist(['agent', 'user']),
+    transcription: string()
+  }),
+  valueType: pipe(
+    string(),
+    check(value => value.includes('audio.transcript'))
+  )
+});
+
+const isVoiceTranscriptActivity = (
+  activity: WebChatActivity
+): activity is WebChatActivity & InferOutput<typeof VoiceTranscriptActivitySchema> =>
+  safeParse(VoiceTranscriptActivitySchema, activity).success;
+
+export default isVoiceTranscriptActivity;
diff --git a/packages/fluent-theme/src/components/activity/PartGroupingDecorator.tsx b/packages/fluent-theme/src/components/activity/PartGroupingDecorator.tsx
index 6957fb797d..31b4e6626d 100644
--- a/packages/fluent-theme/src/components/activity/PartGroupingDecorator.tsx
+++ b/packages/fluent-theme/src/components/activity/PartGroupingDecorator.tsx
@@ -1,5 +1,10 @@
 import { reactNode, validateProps } from '@msinternal/botframework-webchat-react-valibot';
-import { getOrgSchemaMessage, PartGrouping, type WebChatActivity } from 'botframework-webchat/internal';
+import {
+  getOrgSchemaMessage,
+  getVoiceActivityRole,
+  PartGrouping,
+  type WebChatActivity
+} from 'botframework-webchat/internal';
 import cx from 'classnames';
 import React, { memo, useMemo, type ReactNode } from 'react';
 import { array, custom, object, optional, pipe, readonly, safeParse } from 'valibot';
@@ -38,8 +43,12 @@ function PartGroupingDecorator(props: PartGroupingDecoratorProps) {
     [activity, restActivities.length]
   );
 
-  const isFromUser = activity?.from?.role === 'user';
-  const isFromBot = activity?.from?.role === 'bot';
+  // S2S-both user and bot transcript comes from server (RT-LLM) hence need to check role explicitly.
+  // voiceActivityRole takes precedence over from.role since S2S activities always come from 'bot'
+  const voiceActivityRole = activity && getVoiceActivityRole(activity);
+
+  const isFromBot = voiceActivityRole ? voiceActivityRole === 'bot' : activity?.from?.role === 'bot';
+  const isFromUser = voiceActivityRole ? voiceActivityRole === 'user' : activity?.from?.role === 'user';
 
   return (
     <div
diff --git a/packages/fluent-theme/src/components/activityStatus/VoiceTranscriptActivityStatus.module.css b/packages/fluent-theme/src/components/activityStatus/VoiceTranscriptActivityStatus.module.css
new file mode 100644
index 0000000000..b23245625b
--- /dev/null
+++ b/packages/fluent-theme/src/components/activityStatus/VoiceTranscriptActivityStatus.module.css
@@ -0,0 +1,20 @@
+.voice-transcript-activity-status {
+  align-items: center;
+  color: var(--webchat__color--timestamp);
+  display: inline-flex;
+  font-family: var(--webchat__font--primary);
+  font-size: var(--webchat__font-size--small);
+  gap: var(--webchat-spacingHorizontalXS);
+  margin-block-start: calc(var(--webchat__padding--regular) / 2);
+}
+
+.voice-transcript-activity-status__agent-label {
+  color: var(--webchat-colorNeutralForeground3);
+  font-size: var(--webchat-fontSizeBase100);
+  font-weight: var(--webchat-fontWeightSemibold);
+  line-height: var(--webchat-lineHeightBase100);
+}
+
+.voice-transcript-activity-status__divider {
+  color: var(--webchat-colorNeutralStroke2);
+}
diff --git a/packages/fluent-theme/src/components/activityStatus/VoiceTranscriptActivityStatus.tsx b/packages/fluent-theme/src/components/activityStatus/VoiceTranscriptActivityStatus.tsx
new file mode 100644
index 0000000000..28f9202f0b
--- /dev/null
+++ b/packages/fluent-theme/src/components/activityStatus/VoiceTranscriptActivityStatus.tsx
@@ -0,0 +1,41 @@
+import { hooks } from 'botframework-webchat';
+import { Timestamp } from 'botframework-webchat/component';
+import { getVoiceActivityRole, getVoiceActivityText, type WebChatActivity } from 'botframework-webchat/internal';
+import React, { Fragment, memo } from 'react';
+
+import { useStyles } from '../../styles';
+import styles from './VoiceTranscriptActivityStatus.module.css';
+
+const { useLocalizer } = hooks;
+
+type VoiceTranscriptActivityStatusProps = Readonly<{
+  activity: WebChatActivity;
+}>;
+
+function VoiceTranscriptActivityStatus({ activity }: VoiceTranscriptActivityStatusProps) {
+  const classNames = useStyles(styles);
+  const localize = useLocalizer();
+  const { timestamp } = activity;
+  const role = getVoiceActivityRole(activity);
+  const text = getVoiceActivityText(activity);
+
+  const agentLabel = localize('ACTIVITY_STATUS_VOICE_TRANSCRIPT_AGENT_LABEL');
+
+  if (!text) {
+    return null;
+  }
+
+  return (
+    <span className={classNames['voice-transcript-activity-status']}>
+      {role === 'bot' && (
+        <Fragment>
+          <span className={classNames['voice-transcript-activity-status__agent-label']}>{agentLabel}</span>
+          {timestamp && <span className={classNames['voice-transcript-activity-status__divider']}>{'|'}</span>}
+        </Fragment>
+      )}
+      {timestamp && <Timestamp timestamp={timestamp} />}
+    </span>
+  );
+}
+
+export default memo(VoiceTranscriptActivityStatus);
diff --git a/packages/fluent-theme/src/components/icon/FluentIcon.module.css b/packages/fluent-theme/src/components/icon/FluentIcon.module.css
index a091a1eee6..f6f274c23a 100644
--- a/packages/fluent-theme/src/components/icon/FluentIcon.module.css
+++ b/packages/fluent-theme/src/components/icon/FluentIcon.module.css
@@ -50,4 +50,11 @@
   --webchat__fluent-icon--mask: url('data:image/svg+xml;utf8,<svg viewBox="0 0 20 20" xmlns="http://www.w3.org/2000/svg"><path d="M6 5.25a1.25 1.25 0 1 0 0-2.5 1.25 1.25 0 0 0 0 2.5Zm0 4a1.25 1.25 0 1 0 0-2.5 1.25 1.25 0 0 0 0 2.5ZM7.25 12a1.25 1.25 0 1 1-2.5 0 1.25 1.25 0 0 1 2.5 0ZM10 5.25a1.25 1.25 0 1 0 0-2.5 1.25 1.25 0 0 0 0 2.5ZM11.25 8a1.25 1.25 0 1 1-2.5 0 1.25 1.25 0 0 1 2.5 0ZM10 13.25a1.25 1.25 0 1 0 0-2.5 1.25 1.25 0 0 0 0 2.5ZM11.25 16a1.25 1.25 0 1 1-2.5 0 1.25 1.25 0 0 1 2.5 0ZM14 5.25a1.25 1.25 0 1 0 0-2.5 1.25 1.25 0 0 0 0 2.5ZM15.25 8a1.25 1.25 0 1 1-2.5 0 1.25 1.25 0 0 1 2.5 0ZM14 13.25a1.25 1.25 0 1 0 0-2.5 1.25 1.25 0 0 0 0 2.5Z"/></svg>');
 }
 
+:global(.webchat) .icon--microphone {
+  --webchat__fluent-icon--mask: url('data:image/svg+xml;utf8,<svg viewBox="0 0 20 20" xmlns="http://www.w3.org/2000/svg"><path d="M5.5 9.99998C5.5 9.72384 5.27614 9.49998 5 9.49998C4.72386 9.49998 4.5 9.72384 4.5 9.99998C4.5 12.869 6.69675 15.2249 9.5 15.4776V17.5C9.5 17.7761 9.72386 18 10 18C10.2761 18 10.5 17.7761 10.5 17.5V15.4776C13.3033 15.2249 15.5 12.869 15.5 9.99998C15.5 9.72384 15.2761 9.49998 15 9.49998C14.7239 9.49998 14.5 9.72384 14.5 9.99998C14.5 12.4853 12.4853 14.5 10 14.5C7.51472 14.5 5.5 12.4853 5.5 9.99998ZM13 10.0007C12.9996 11.6573 11.6566 13 10 13C8.34315 13 7 11.6568 7 9.99998V5C7 3.34315 8.34315 2 10 2C11.6569 2 13 3.34315 13 5V10.0007Z"/></svg>');
+}
+
+:global(.webchat) .icon--audio-playing {
+  --webchat__fluent-icon--mask: url('data:image/svg+xml;utf8,<svg viewBox="0 0 20 20" xmlns="http://www.w3.org/2000/svg"><path d="M10 2C10.4142 2 10.75 2.33579 10.75 2.75V17.25C10.75 17.6642 10.4142 18 10 18C9.58579 18 9.25 17.6642 9.25 17.25V2.75C9.25 2.33579 9.58579 2 10 2ZM13.75 5C14.1642 5 14.5 5.33579 14.5 5.75V14.25C14.5 14.6642 14.1642 15 13.75 15C13.3358 15 13 14.6642 13 14.25V5.75C13 5.33579 13.3358 5 13.75 5ZM7 5.75C7 5.33579 6.66421 5 6.25 5C5.83579 5 5.5 5.33579 5.5 5.75V14.25C5.5 14.6642 5.83579 15 6.25 15C6.66421 15 7 14.6642 7 14.25V5.75ZM17.25 8C17.6642 8 18 8.33579 18 8.75V11.25C18 11.6642 17.6642 12 17.25 12C16.8358 12 16.5 11.6642 16.5 11.25V8.75C16.5 8.33579 16.8358 8 17.25 8ZM3.5 8.75C3.5 8.33579 3.16421 8 2.75 8C2.33579 8 2 8.33579 2 8.75V11.25C2 11.6642 2.33579 12 2.75 12C3.16421 12 3.5 11.6642 3.5 11.25V8.75Z"/></svg>');
+}
 /* #endregion */
diff --git a/packages/fluent-theme/src/components/sendBox/MicrophoneToolbarButton.tsx b/packages/fluent-theme/src/components/sendBox/MicrophoneToolbarButton.tsx
new file mode 100644
index 0000000000..946f2a5e88
--- /dev/null
+++ b/packages/fluent-theme/src/components/sendBox/MicrophoneToolbarButton.tsx
@@ -0,0 +1,57 @@
+import { hooks } from 'botframework-webchat';
+import cx from 'classnames';
+import React, { memo, useCallback } from 'react';
+
+import { useStyles } from '../../styles';
+import testIds from '../../testIds';
+import { FluentIcon } from '../icon';
+import { ToolbarButton } from './Toolbar';
+
+import styles from './Toolbar.module.css';
+
+const { useVoiceState, useStartVoice, useStopVoice, useLocalizer } = hooks;
+
+function MicrophoneToolbarButton() {
+  const [voiceState] = useVoiceState();
+  const classNames = useStyles(styles);
+  const localize = useLocalizer();
+  const startVoice = useStartVoice();
+  const stopVoice = useStopVoice();
+
+  const recording = voiceState !== 'idle';
+
+  const handleMicrophoneClick = useCallback(() => {
+    if (recording) {
+      stopVoice(); // Stop recognition and synthesis.
+    } else {
+      startVoice(); // If it was stopped, will start recognition. It will synthesize when the bot respond.
+    }
+  }, [recording, startVoice, stopVoice]);
+
+  const ariaLabel = localize(
+    recording ? 'SPEECH_INPUT_MICROPHONE_BUTTON_OPEN_ALT' : 'SPEECH_INPUT_MICROPHONE_BUTTON_CLOSE_ALT'
+  );
+
+  const isBotSpeaking = voiceState === 'bot_speaking';
+  const isUserSpeaking = voiceState === 'user_speaking';
+
+  return (
+    <ToolbarButton
+      aria-label={ariaLabel}
+      className={cx({
+        [classNames['sendbox__toolbar-button--active']]: voiceState !== 'idle',
+        [classNames['sendbox__toolbar-button--with-pulse']]: isBotSpeaking || isUserSpeaking,
+        [classNames['sendbox__toolbar-button--with-gradient']]: isUserSpeaking
+      })}
+      data-testid={testIds.sendBoxMicrophoneButton}
+      onClick={handleMicrophoneClick}
+      type="button"
+    >
+      <FluentIcon appearance="text" icon={voiceState === 'bot_speaking' ? 'audio-playing' : 'microphone'} />
+    </ToolbarButton>
+  );
+}
+
+MicrophoneToolbarButton.displayName = 'SendBox.MicrophoneToolbarButton';
+
+export default memo(MicrophoneToolbarButton);
diff --git a/packages/fluent-theme/src/components/sendBox/SendBox.module.css b/packages/fluent-theme/src/components/sendBox/SendBox.module.css
index 473a71e55e..522205a042 100644
--- a/packages/fluent-theme/src/components/sendBox/SendBox.module.css
+++ b/packages/fluent-theme/src/components/sendBox/SendBox.module.css
@@ -55,7 +55,7 @@
     transition: clip-path var(--webchat-durationUltraFast) var(--webchat-curveAccelerateMid);
   }
 
-  &:focus-within::after {
+  &:not(:has(textarea[readonly][aria-disabled='true'])):focus-within::after {
     clip-path: inset(calc(100% - var(--webchat-strokeWidthThicker)) 0 0 0);
     transition: clip-path var(--webchat-durationNormal) var(--webchat-curveDecelerateMid);
   }
diff --git a/packages/fluent-theme/src/components/sendBox/SendBox.tsx b/packages/fluent-theme/src/components/sendBox/SendBox.tsx
index 5b1264c6e2..05fb2bf184 100644
--- a/packages/fluent-theme/src/components/sendBox/SendBox.tsx
+++ b/packages/fluent-theme/src/components/sendBox/SendBox.tsx
@@ -1,9 +1,11 @@
 import { Components, hooks } from 'botframework-webchat';
+import { usePostVoiceActivity, useShouldShowMicrophoneButton } from 'botframework-webchat/internal';
 import cx from 'classnames';
 import React, {
   memo,
   ReactNode,
   useCallback,
+  useMemo,
   useRef,
   useState,
   type FormEventHandler,
@@ -19,12 +21,14 @@ import { SuggestedActions } from '../suggestedActions';
 import { TelephoneKeypadSurrogate, useTelephoneKeypadShown, type DTMF } from '../telephoneKeypad';
 import AddAttachmentButton from './AddAttachmentButton';
 import ErrorMessage from './ErrorMessage';
+import useSpeechStateMessage from './private/useSpeechStateMessage';
 import useSubmitError from './private/useSubmitError';
 import useTranscriptNavigation from './private/useTranscriptNavigation';
 import useUniqueId from './private/useUniqueId';
 import styles from './SendBox.module.css';
 import TelephoneKeypadToolbarButton from './TelephoneKeypadToolbarButton';
 import { Toolbar, ToolbarButton, ToolbarSeparator } from './Toolbar';
+import MicrophoneToolbarButton from './MicrophoneToolbarButton';
 
 const {
   useFocus,
@@ -35,7 +39,8 @@ const {
   useSendBoxValue,
   useSendMessage,
   useStyleOptions,
-  useUIState
+  useUIState,
+  useVoiceState
 } = hooks;
 
 const { AttachmentBar, TextArea } = Components;
@@ -54,23 +59,35 @@ function SendBox(props: Props) {
   const [localMessage, setLocalMessage] = useState('');
   const [telephoneKeypadShown] = useTelephoneKeypadShown();
   const [uiState] = useUIState();
+  const [voiceState] = useVoiceState();
   const classNames = useStyles(styles);
   const variantClassName = useVariantClassName(styles);
   const errorMessageId = useUniqueId('sendbox__error-message-id');
   const inputRef = useRef<HTMLTextAreaElement>(null);
   const localize = useLocalizer();
   const makeThumbnail = useMakeThumbnail();
+  const postVoiceActivity = usePostVoiceActivity();
   const sendMessage = useSendMessage();
   const setFocus = useFocus();
+  const showMicrophoneButton = useShouldShowMicrophoneButton();
+  const speechStateMessage = useSpeechStateMessage();
 
   const message = props.isPrimary ? globalMessage : localMessage;
+  const recording = voiceState !== 'idle';
   const setMessage = props.isPrimary ? setGlobalMessage : setLocalMessage;
   const isBlueprint = uiState === 'blueprint';
 
   const [errorMessage, commitLatestError] = useSubmitError({ message, attachments });
   const isMessageLengthExceeded = !!maxMessageLength && message.length > maxMessageLength;
-  const shouldShowMessageLength =
-    !isBlueprint && !telephoneKeypadShown && maxMessageLength && isFinite(maxMessageLength);
+  const shouldShowMessageLength = useMemo(
+    () =>
+      !isBlueprint &&
+      !telephoneKeypadShown &&
+      !!maxMessageLength &&
+      isFinite(maxMessageLength) &&
+      !showMicrophoneButton,
+    [isBlueprint, telephoneKeypadShown, maxMessageLength, showMicrophoneButton]
+  );
   const shouldShowTelephoneKeypad = !isBlueprint && telephoneKeypadShown;
 
   useRegisterFocusSendBox(
@@ -156,9 +173,21 @@ function SendBox(props: Props) {
   );
 
   const handleTelephoneKeypadButtonClick = useCallback(
-    // TODO: We need more official way of sending DTMF.
-    (dtmf: DTMF) => sendMessage(`/DTMFKey ${dtmf}`),
-    [sendMessage]
+    (dtmf: DTMF) => {
+      if (recording) {
+        postVoiceActivity({
+          name: 'media.end',
+          type: 'event',
+          value: {
+            key: dtmf
+          }
+        } as any);
+      } else {
+        // TODO: We need more official way of sending DTMF.
+        sendMessage(`/DTMFKey ${dtmf}`);
+      }
+    },
+    [postVoiceActivity, recording, sendMessage]
   );
 
   const handleTranscriptNavigation = useTranscriptNavigation();
@@ -193,7 +222,10 @@ function SendBox(props: Props) {
           hidden={shouldShowTelephoneKeypad}
           onClick={handleClick}
           onInput={handleMessageChange}
-          placeholder={props.placeholder ?? localize('TEXT_INPUT_PLACEHOLDER')}
+          placeholder={
+            props.placeholder ?? (showMicrophoneButton ? speechStateMessage : localize('TEXT_INPUT_PLACEHOLDER'))
+          }
+          readOnly={showMicrophoneButton}
           ref={inputRef}
           value={message}
         />
@@ -226,14 +258,18 @@ function SendBox(props: Props) {
             {!hideTelephoneKeypadButton && <TelephoneKeypadToolbarButton />}
             {!disableFileUpload && <AddAttachmentButton onFilesAdded={handleAddFiles} />}
             <ToolbarSeparator />
-            <ToolbarButton
-              aria-label={localize('TEXT_INPUT_SEND_BUTTON_ALT')}
-              data-testid={testIds.sendBoxSendButton}
-              disabled={isMessageLengthExceeded || shouldShowTelephoneKeypad}
-              type="submit"
-            >
-              <FluentIcon appearance="text" icon="send" />
-            </ToolbarButton>
+            {showMicrophoneButton ? (
+              <MicrophoneToolbarButton />
+            ) : (
+              <ToolbarButton
+                aria-label={localize('TEXT_INPUT_SEND_BUTTON_ALT')}
+                data-testid={testIds.sendBoxSendButton}
+                disabled={isMessageLengthExceeded || shouldShowTelephoneKeypad}
+                type="submit"
+              >
+                <FluentIcon appearance="text" icon="send" />
+              </ToolbarButton>
+            )}
           </Toolbar>
         </div>
         {!disableFileUpload && <DropZone onFilesAdded={handleAddFiles} />}
diff --git a/packages/fluent-theme/src/components/sendBox/Toolbar.module.css b/packages/fluent-theme/src/components/sendBox/Toolbar.module.css
index c60a842bb9..1ffae08533 100644
--- a/packages/fluent-theme/src/components/sendBox/Toolbar.module.css
+++ b/packages/fluent-theme/src/components/sendBox/Toolbar.module.css
@@ -46,6 +46,54 @@
     color: var(--webchat-colorNeutralForegroundDisabled);
     cursor: not-allowed;
   }
+
+  &.sendbox__toolbar-button--active {
+    --webchat__toolbar-button--pulse-start-size: 30px;
+    --webchat__toolbar-button--pulse-end-size: 58px;
+    --webchat__toolbar-button--pulse-opacity: 0.5;
+    --webchat__toolbar-button--background-gradient-opacity: 0.15;
+    --webchat__toolbar-button--gradient-color-1: var(--webchat-colorBrandForeground1, #0078d4);
+    --webchat__toolbar-button--gradient-color-2: #2db4ff;
+    --webchat__toolbar-button--gradient-color-3: #d660ff;
+    --webchat__toolbar-button--gradient-color-4: #fea874;
+
+    background-color: var(--webchat-colorNeutralForeground2BrandSelected);
+    border-radius: 50%;
+    color: var(--webchat-colorNeutralBackground1);
+
+    @media (hover: hover) {
+      &:hover {
+        color: var(--webchat-colorNeutralBackground1);
+      }
+    }
+  }
+
+  &.sendbox__toolbar-button--with-pulse::before {
+    animation: toolbar-button__pulse 1s linear infinite alternate;
+    background-color: var(--webchat-colorNeutralForeground2BrandSelected);
+    border-radius: 50%;
+    content: '';
+    height: var(--webchat__toolbar-button--pulse-start-size);
+    opacity: var(--webchat__toolbar-button--pulse-opacity);
+    position: absolute;
+    width: var(--webchat__toolbar-button--pulse-start-size);
+  }
+
+  &.sendbox__toolbar-button--with-gradient::after {
+    background: linear-gradient(
+      90deg,
+      var(--webchat__toolbar-button--gradient-color-1) 0%,
+      color-mix(in srgb, var(--webchat__toolbar-button--gradient-color-1), var(--webchat__toolbar-button--gradient-color-2)) 33%,
+      color-mix(in srgb, var(--webchat__toolbar-button--gradient-color-1), var(--webchat__toolbar-button--gradient-color-3)) 66%,
+      color-mix(in srgb, var(--webchat__toolbar-button--gradient-color-1), var(--webchat__toolbar-button--gradient-color-4)) 100%
+    );
+    border-radius: 50%;
+    content: '';
+    height: var(--webchat__toolbar-button--pulse-end-size);
+    opacity: var(--webchat__toolbar-button--background-gradient-opacity);
+    position: absolute;
+    width: var(--webchat__toolbar-button--pulse-end-size);
+  }
 }
 
 :global(.webchat-fluent) .sendbox__toolbar-separator {
@@ -59,3 +107,15 @@
     display: none;
   }
 }
+
+@keyframes toolbar-button__pulse {
+  0% {
+    height: var(--webchat__toolbar-button--pulse-start-size);
+    width: var(--webchat__toolbar-button--pulse-start-size);
+  }
+
+  100% {
+    height: var(--webchat__toolbar-button--pulse-end-size);
+    width: var(--webchat__toolbar-button--pulse-end-size);
+  }
+}
diff --git a/packages/fluent-theme/src/components/sendBox/private/useSpeechStateMessage.ts b/packages/fluent-theme/src/components/sendBox/private/useSpeechStateMessage.ts
new file mode 100644
index 0000000000..cdc42e33b9
--- /dev/null
+++ b/packages/fluent-theme/src/components/sendBox/private/useSpeechStateMessage.ts
@@ -0,0 +1,29 @@
+import { hooks } from 'botframework-webchat';
+import { useMemo } from 'react';
+
+const { useLocalizer, useVoiceState } = hooks;
+
+export default function useSpeechPlaceholder(): string {
+  const [voiceState] = useVoiceState();
+  const localize = useLocalizer();
+
+  return useMemo(() => {
+    switch (voiceState) {
+      case 'bot_speaking':
+        return localize('TEXT_INPUT_SPEECH_BOT_SPEAKING_PLACEHOLDER');
+
+      case 'idle':
+        return localize('TEXT_INPUT_SPEECH_IDLE_PLACEHOLDER');
+
+      case 'listening':
+      case 'user_speaking':
+        return localize('TEXT_INPUT_SPEECH_LISTENING_PLACEHOLDER');
+
+      case 'processing':
+        return localize('TEXT_INPUT_SPEECH_PROCESSING_PLACEHOLDER');
+
+      default:
+        return localize('TEXT_INPUT_SPEECH_IDLE_PLACEHOLDER');
+    }
+  }, [voiceState, localize]);
+}
diff --git a/packages/fluent-theme/src/external.umd/botframework-webchat-api/internal.ts b/packages/fluent-theme/src/external.umd/botframework-webchat-api/internal.ts
index d3920b80ab..c2713f0336 100644
--- a/packages/fluent-theme/src/external.umd/botframework-webchat-api/internal.ts
+++ b/packages/fluent-theme/src/external.umd/botframework-webchat-api/internal.ts
@@ -1,5 +1,3 @@
 /// <reference types="node" />
 
-module.exports = {
-  internal: (globalThis as any).WebChat.internal
-};
+module.exports = (globalThis as any).WebChat.internal;
diff --git a/packages/fluent-theme/src/private/FluentThemeProvider.tsx b/packages/fluent-theme/src/private/FluentThemeProvider.tsx
index bfbc8fc02d..fcb0613a43 100644
--- a/packages/fluent-theme/src/private/FluentThemeProvider.tsx
+++ b/packages/fluent-theme/src/private/FluentThemeProvider.tsx
@@ -8,10 +8,16 @@ import {
   WebChatDecorator,
   type DecoratorMiddleware
 } from 'botframework-webchat/decorator';
-import { type ActivityMiddleware, type TypingIndicatorMiddleware } from 'botframework-webchat/internal';
+import {
+  isVoiceTranscriptActivity,
+  type ActivityMiddleware,
+  type ActivityStatusMiddleware,
+  type TypingIndicatorMiddleware
+} from 'botframework-webchat/internal';
 import React, { memo, useMemo } from 'react';
 import { custom, object, optional, pipe, readonly, string, type InferInput } from 'valibot';
 
+import VoiceTranscriptActivityStatus from '../components/activityStatus/VoiceTranscriptActivityStatus';
 import ActivityLoader from '../components/activity/ActivityLoader';
 import PartGroupDecorator from '../components/activity/PartGroupingDecorator';
 import AssetComposer from '../components/assets/AssetComposer';
@@ -73,6 +79,17 @@ const decoratorMiddleware: readonly DecoratorMiddleware[] = Object.freeze([
   })
 ]);
 
+const activityStatusMiddleware: readonly ActivityStatusMiddleware[] = Object.freeze([
+  () =>
+    next =>
+    ({ activity, ...args }) => {
+      if (isVoiceTranscriptActivity(activity)) {
+        return <VoiceTranscriptActivityStatus activity={activity} />;
+      }
+      return next({ activity, ...args });
+    }
+]);
+
 const typingIndicatorMiddleware: readonly TypingIndicatorMiddleware[] = Object.freeze([
   () =>
     next =>
@@ -99,6 +116,7 @@ function FluentThemeProvider(props: FluentThemeProviderProps) {
         <TelephoneKeypadProvider>
           <ThemeProvider
             activityMiddleware={activityMiddleware}
+            activityStatusMiddleware={activityStatusMiddleware}
             sendBoxMiddleware={sendBoxMiddleware}
             styleOptions={fluentStyleOptions}
             typingIndicatorMiddleware={typingIndicatorMiddleware}
diff --git a/packages/fluent-theme/src/testIds.ts b/packages/fluent-theme/src/testIds.ts
index 3eac5e83cf..ce6305a3ed 100644
--- a/packages/fluent-theme/src/testIds.ts
+++ b/packages/fluent-theme/src/testIds.ts
@@ -2,6 +2,7 @@ const testIds = {
   preChatMessageActivityStarterPromptsCardAction: 'pre-chat message activity starter prompts card action',
   sendBoxContainer: 'send box container',
   sendBoxDropZone: 'send box drop zone',
+  sendBoxMicrophoneButton: 'send box microphone button',
   sendBoxSendButton: 'send box send button',
   sendBoxSuggestedAction: 'send box suggested action',
   sendBoxTextBox: 'send box text area',
diff --git a/packages/test/page-object/src/globals/testHelpers/createDirectLineEmulator.js b/packages/test/page-object/src/globals/testHelpers/createDirectLineEmulator.js
index 3c81bfac9e..e3c902d464 100644
--- a/packages/test/page-object/src/globals/testHelpers/createDirectLineEmulator.js
+++ b/packages/test/page-object/src/globals/testHelpers/createDirectLineEmulator.js
@@ -40,6 +40,21 @@ export default function createDirectLineEmulator({ autoConnect = true, ponyfill
 
   const postActivityCallDeferreds = [];
   const postActivity = outgoingActivity => {
+    // Auto-handle voice activities (continuous sending by mic) without requiring actPostActivity
+    // Voice activities are fire-and-forget and don't echo back
+    if (outgoingActivity.type === 'event' && outgoingActivity.name.includes('media')) {
+      const id = uniqueId();
+
+      return new Observable(observer => {
+        try {
+          observer.next(id);
+          observer.complete();
+        } catch (error) {
+          observer.error(error);
+        }
+      });
+    }
+
     const returnPostActivityWithResolvers = withResolvers();
 
     const deferred = postActivityCallDeferreds.shift();
@@ -185,6 +200,18 @@ export default function createDirectLineEmulator({ autoConnect = true, ponyfill
           1000
         ));
     },
+    emulateIncomingVoiceActivity: activity => {
+      activity = updateIn(activity, ['timestamp'], timestamp =>
+        typeof timestamp === 'number'
+          ? new Date(now + timestamp).toISOString()
+          : 'timestamp' in activity
+            ? timestamp
+            : getTimestamp()
+      );
+      activity = updateIn(activity, ['type'], type => type || 'event');
+
+      activityDeferredObservable.next(activity);
+    },
     emulateOutgoingActivity: (activity, options) => {
       if (typeof activity === 'string') {
         activity = {