From b68f4145ce4c9d378a55619cecc2d84b1085025c Mon Sep 17 00:00:00 2001 From: Nick Shirokov Date: Wed, 18 Mar 2026 21:05:13 +0300 Subject: [PATCH] fix(web-test): reduce smart TTS wait from 100ms/char to 70ms/char MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Measured real Edge TTS (ru-RU-DmitryNeural) durations: 57 chars → 4.4s (77ms/char) 72 chars → 6.0s (84ms/char) 126 chars → 8.2s (65ms/char) 745 chars → 48.0s (64ms/char) Old 100ms/char overestimated by 30-55%, causing long silent pauses after speech in showImage/showTitleSlide/showCaption. New 70ms/char gives ~10% safety margin without excessive silence. Co-Authored-By: Claude Opus 4.6 (1M context) --- .claude/skills/web-test/recording.md | 2 +- .claude/skills/web-test/scripts/browser.mjs | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.claude/skills/web-test/recording.md b/.claude/skills/web-test/recording.md index bc8fd3d4..8b40f319 100644 --- a/.claude/skills/web-test/recording.md +++ b/.claude/skills/web-test/recording.md @@ -92,7 +92,7 @@ Display a text overlay on the page (visible in recording). Calling again updates The overlay uses `pointer-events: none` — does not interfere with clicking. -**Smart TTS wait** (during recording): `showCaption` automatically pauses for the estimated TTS speech duration (~100ms per character, min 2s). The next `wait()` call accounts for this — if the explicit pause is shorter than the TTS wait already done, no extra delay is added. If longer, only the remaining difference is waited. This means script authors don't need to calculate TTS timing manually. +**Smart TTS wait** (during recording): `showCaption` automatically pauses for the estimated TTS speech duration (~70ms per character, min 2s). The next `wait()` call accounts for this — if the explicit pause is shorter than the TTS wait already done, no extra delay is added. If longer, only the remaining difference is waited. This means script authors don't need to calculate TTS timing manually. ### `hideCaption()` diff --git a/.claude/skills/web-test/scripts/browser.mjs b/.claude/skills/web-test/scripts/browser.mjs index 2913da5f..a28b6337 100644 --- a/.claude/skills/web-test/scripts/browser.mjs +++ b/.claude/skills/web-test/scripts/browser.mjs @@ -3939,7 +3939,7 @@ export async function showCaption(text, opts = {}) { // Use video timeline position (accounts for frame duplication) instead of wall-clock recorder.captions.push({ text, speech, time: Math.round(recorder.videoTimeMs) }); // Estimate TTS duration and wait so the video has enough screen time for voiceover - smartWaitMs = Math.max(2000, speech.length * 100); + smartWaitMs = Math.max(2000, speech.length * 70); } const position = opts.position || 'bottom'; const fontSize = opts.fontSize || 24; @@ -4207,7 +4207,7 @@ export async function showTitleSlide(text, opts = {}) { const captionText = typeof speech === 'string' ? speech : text.replace(/\n/g, ' '); if (captionText) { recorder.captions.push({ text: captionText, speech: captionText, time: Math.round(recorder.videoTimeMs) }); - smartWaitMs = Math.max(2000, captionText.length * 100); + smartWaitMs = Math.max(2000, captionText.length * 70); } } @@ -4315,7 +4315,7 @@ export async function showImage(imagePath, opts = {}) { const captionText = typeof speech === 'string' ? speech : ''; if (captionText) { recorder.captions.push({ text: captionText, speech: captionText, time: Math.round(recorder.videoTimeMs) }); - smartWaitMs = Math.max(2000, captionText.length * 100); + smartWaitMs = Math.max(2000, captionText.length * 70); } }