diff --git a/.claude/skills/web-test/recording.md b/.claude/skills/web-test/recording.md index 91f16eda..ce1d0d2f 100644 --- a/.claude/skills/web-test/recording.md +++ b/.claude/skills/web-test/recording.md @@ -91,6 +91,8 @@ Display a text overlay on the page (visible in recording). Calling again updates The overlay uses `pointer-events: none` — does not interfere with clicking. +**Smart TTS wait** (during recording): `showCaption` automatically pauses for the estimated TTS speech duration (~100ms per character, min 2s). The next `wait()` call accounts for this — if the explicit pause is shorter than the TTS wait already done, no extra delay is added. If longer, only the remaining difference is waited. This means script authors don't need to calculate TTS timing manually. + ### `hideCaption()` Remove the caption overlay. @@ -181,7 +183,7 @@ const result = await stopRecording(); console.log(`Recorded ${result.duration}s, ${(result.size / 1024 / 1024).toFixed(1)} MB`); ``` -**Caption timing**: show the caption *before* the action with a `wait(1.5)` pause — the viewer reads what will happen, then sees it happen. Add `wait()` *after* the action only when the next step needs the result to load (e.g., form opening). +**Caption timing**: show the caption *before* the action — `showCaption` auto-waits for estimated TTS duration during recording. The subsequent `wait()` is absorbed by the credit system (no double-waiting). Add `wait()` *after* the action only when the next step needs the result to load (e.g., form opening). **Highlight timing**: `setHighlight(true)` enables auto-mode — each action function highlights the target for 500ms, then removes the highlight before performing the action. No manual `highlight()`/`unhighlight()` calls needed. Enable after title slide, disable before `stopRecording()`. @@ -299,4 +301,4 @@ const result = await addNarration('recordings/demo.mp4', { voice: 'ru-RU-Svetlan | Recording stops on disconnect | Expected — auto-stop prevents orphaned ffmpeg processes | | "No captions available" | Use `showCaption()` during recording, or pass `opts.captions` | | TTS timeout | Check internet connection. Edge TTS requires network access | -| Audio cuts off between captions | TTS is auto-trimmed to fit the timeline. Add longer `wait()` pauses | +| Audio cuts off between captions | Smart TTS wait should handle this automatically. If warnings appear, add longer `wait()` after `showCaption` | diff --git a/.claude/skills/web-test/scripts/browser.mjs b/.claude/skills/web-test/scripts/browser.mjs index 36c69f62..96db4a4b 100644 --- a/.claude/skills/web-test/scripts/browser.mjs +++ b/.claude/skills/web-test/scripts/browser.mjs @@ -2955,7 +2955,15 @@ export async function screenshot() { /** Wait for a specified number of seconds. */ export async function wait(seconds) { ensureConnected(); - await page.waitForTimeout(seconds * 1000); + let ms = seconds * 1000; + // Credit system: if showCaption already waited for TTS, subtract that time + if (recorder && recorder.captionCredit) { + const elapsed = Date.now() - recorder.captionCredit.at; + const credit = Math.max(0, recorder.captionCredit.waitedMs - elapsed); + ms = Math.max(0, ms - credit); + recorder.captionCredit = null; + } + if (ms > 0) await page.waitForTimeout(ms); return await getFormState(); } @@ -3131,10 +3139,13 @@ export async function showCaption(text, opts = {}) { ensureConnected(); // Collect caption for TTS narration if recording + let smartWaitMs = 0; if (recorder && text.trim() && opts.speech !== false) { const speech = typeof opts.speech === 'string' ? opts.speech : text; // Use video timeline position (accounts for frame duplication) instead of wall-clock recorder.captions.push({ text, speech, time: Math.round(recorder.videoTimeMs) }); + // Estimate TTS duration and wait so the video has enough screen time for voiceover + smartWaitMs = Math.max(2000, speech.length * 100); } const position = opts.position || 'bottom'; const fontSize = opts.fontSize || 24; @@ -3160,6 +3171,12 @@ export async function showCaption(text, opts = {}) { el.style.color = color; el.textContent = text; }, { text, position, fontSize, bg, color }); + + // Smart TTS wait: pause for estimated speech duration so video has enough screen time + if (smartWaitMs > 0) { + await page.waitForTimeout(smartWaitMs); + recorder.captionCredit = { waitedMs: smartWaitMs, at: Date.now() }; + } } /** Remove the caption overlay from the page. */ @@ -3293,12 +3310,17 @@ export async function addNarration(videoPath, opts = {}) { ffmpegInputs.push('-i', ttsFile); const filters = []; - // Speed up TTS if it's longer than gap to next caption + // Speed up TTS slightly if it's longer than gap to next caption (max 1.3x) if (i < captions.length - 1) { const maxDuration = (captions[i + 1].time - captions[i].time) / 1000; if (ttsDuration > maxDuration && maxDuration > 0.1) { - const tempo = Math.min(ttsDuration / maxDuration, 2.5); - filters.push(`atempo=${tempo.toFixed(4)}`); + const tempo = ttsDuration / maxDuration; + if (tempo <= 1.3) { + filters.push(`atempo=${tempo.toFixed(4)}`); + } else { + // Too fast — let audio overlap instead of distorting + warnings.push(`Caption ${i + 1}/${captions.length}: TTS ${ttsDuration.toFixed(1)}s > gap ${maxDuration.toFixed(1)}s (need ${Math.round(ttsDuration - maxDuration)}s more pause)`); + } } } @@ -3328,7 +3350,8 @@ export async function addNarration(videoPath, opts = {}) { '-y', '-i', videoPath, '-i', narrationPath, '-c:v', 'copy', '-c:a', 'aac', '-b:a', '128k', '-map', '0:v:0', '-map', '1:a:0', - '-shortest', '-movflags', '+faststart', outputPath, + '-t', String(Math.ceil(videoDuration)), + '-movflags', '+faststart', outputPath, ], { stdio: 'pipe', timeout: 120000 }); const stats = statSync(outputPath);