refactor(web-test): этап A.2 — вынести recording/* в отдельные модули

Перенос ~1200 LOC из browser.mjs в recording/{tts,captions,capture,highlight,narration}.mjs:
  - tts.mjs: resolveFfmpeg, resolveEdgeTts, edge/openai/elevenlabs providers,
    getTtsProvider, getAudioDuration, generateSilence
  - captions.mjs: showCaption/hideCaption/getCaptions, showTitleSlide/
    hideTitleSlide, showImage/hideImage
  - capture.mjs: screenshot, wait, isRecording, startRecording, stopRecording
  - highlight.mjs: highlight, unhighlight, setHighlight, isHighlightMode
  - narration.mjs: addNarration

browser.mjs стал тоньше на 1200 строк, re-export через `export { ... } from './recording/*.mjs'`.
Публичный API сохранён (56 экспортов). state.mjs нормализован на CRLF.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Nick Shirokov
2026-05-25 22:07:32 +03:00
parent cecf4dd9a2
commit 398c515390
7 changed files with 1373 additions and 1306 deletions
File diff suppressed because it is too large Load Diff
+113 -113
View File
@@ -1,113 +1,113 @@
// web-test core/state v1.16 — module-level state for the web-test engine.
// Source: https://github.com/Nikolay-Shirokov/cc-1c-skills
//
// Holds the single browser/page/recorder slot plus the multi-context registry,
// constants, and small state-only utilities (ensureConnected, getPage,
// resolveProjectPath, normYo). Mutable values are exported as `let` bindings
// for live read access from consumer modules; writes go through setters so
// imported bindings stay read-only at the import site.
import { dirname, resolve as pathResolve } from 'path';
import { fileURLToPath } from 'url';
// Project root: 4 levels up from .claude/skills/web-test/scripts/core/state.mjs
const __fn_state = fileURLToPath(import.meta.url);
export const projectRoot = pathResolve(dirname(__fn_state), '..', '..', '..', '..', '..');
/** Resolve a user-provided path relative to the project root (not cwd). */
export const resolveProjectPath = (p) => pathResolve(projectRoot, p);
// ──────────────────────────────────────────────────────────────────────────
// Mutable single-session state. Importers read via the live binding; writes
// must go through the corresponding setter (ESM imports are read-only).
// ──────────────────────────────────────────────────────────────────────────
export let browser = null;
export let page = null;
export let sessionPrefix = null; // e.g. "http://localhost:8081/bpdemo/ru_RU"
export let seanceId = null;
export let recorder = null; // { cdp, ffmpeg, startTime, outputPath, ffmpegError, captions }
export let lastCaptions = []; // captions from the last completed recording (for addNarration)
export let lastRecordingDuration = null; // wall-clock duration of the last recording (seconds)
export let highlightMode = false;
export let persistentUserDataDir = null; // temp dir for launchPersistentContext, cleaned on disconnect
// Clipboard preservation: save full clipboard contents (all MIME types) right
// before each writeText+Ctrl+V pair, restore right after. Toggled via
// setPreserveClipboard() from run.mjs.
export let preserveClipboard = true;
export let clipboardWarnLogged = false;
export const setBrowser = (v) => { browser = v; };
export const setPage = (v) => { page = v; };
export const setSessionPrefix = (v) => { sessionPrefix = v; };
export const setSeanceId = (v) => { seanceId = v; };
export const setRecorder = (v) => { recorder = v; };
export const setLastCaptions = (v) => { lastCaptions = v; };
export const setLastRecordingDuration = (v) => { lastRecordingDuration = v; };
export const setHighlightMode = (v) => { highlightMode = !!v; };
export const setPersistentUserDataDir = (v) => { persistentUserDataDir = v; };
export const setPreserveClipboard = (v) => { preserveClipboard = !!v; };
export const setClipboardWarnLogged = (v) => { clipboardWarnLogged = !!v; };
// ──────────────────────────────────────────────────────────────────────────
// Multi-context registry: name → { context, page, sessionPrefix, seanceId,
// recorder, lastCaptions, lastRecordingDuration, highlightMode }.
// Populated by createContext(); module-level vars above mirror the active
// slot. connect() does NOT use this Map — it preserves legacy single-session
// behavior for exec/run/start.
// ──────────────────────────────────────────────────────────────────────────
export const contexts = new Map();
export let activeContextName = null;
// Isolation mode for the current cmdTest session — set by the first
// createContext call. 'tab': all contexts share one persistent context
// (one window, multiple tabs, extension loads reliably). 'window': each
// context gets its own BrowserContext (separate window per context, full
// cookie isolation, extension may not load).
export let activeMode = null;
export const setActiveContextName = (v) => { activeContextName = v; };
export const setActiveMode = (v) => { activeMode = v; };
// ──────────────────────────────────────────────────────────────────────────
// Constants.
// ──────────────────────────────────────────────────────────────────────────
export const LOAD_TIMEOUT = 60000;
export const INIT_TIMEOUT = 60000;
export const ACTION_WAIT = 2000; // fallback minimum wait
export const MAX_WAIT = 10000; // max wait for stability
export const POLL_INTERVAL = 200; // polling interval
export const STABLE_CYCLES = 3; // consecutive stable cycles needed
// 1C browser extension ID (stable across versions, defined by key in manifest.json)
export const EXT_ID = 'pbhelknnhilelbnhfpcjlcabhmfangik';
// ──────────────────────────────────────────────────────────────────────────
// Utilities that only depend on state.
// ──────────────────────────────────────────────────────────────────────────
/** Normalize ё→е and  →space for fuzzy matching. */
export const normYo = (s) => s.replace(/ё/gi, 'е').replace(/ /g, ' ');
/** Check if browser is connected and page is usable. */
export function isConnected() {
if (!browser || !page || page.isClosed()) return false;
// launchPersistentContext returns BrowserContext (no isConnected), launch returns Browser
if (typeof browser.isConnected === 'function') return browser.isConnected();
// For persistent context, check via context's browser()
return browser.browser()?.isConnected() ?? false;
}
export function ensureConnected() {
if (!isConnected()) {
throw new Error('Browser not connected. Call web_connect first.');
}
}
/** Get the raw Playwright page object (for advanced scripting in skill mode). */
export function getPage() {
ensureConnected();
return page;
}
// web-test core/state v1.16 — module-level state for the web-test engine.
// Source: https://github.com/Nikolay-Shirokov/cc-1c-skills
//
// Holds the single browser/page/recorder slot plus the multi-context registry,
// constants, and small state-only utilities (ensureConnected, getPage,
// resolveProjectPath, normYo). Mutable values are exported as `let` bindings
// for live read access from consumer modules; writes go through setters so
// imported bindings stay read-only at the import site.
import { dirname, resolve as pathResolve } from 'path';
import { fileURLToPath } from 'url';
// Project root: 4 levels up from .claude/skills/web-test/scripts/core/state.mjs
const __fn_state = fileURLToPath(import.meta.url);
export const projectRoot = pathResolve(dirname(__fn_state), '..', '..', '..', '..', '..');
/** Resolve a user-provided path relative to the project root (not cwd). */
export const resolveProjectPath = (p) => pathResolve(projectRoot, p);
// ──────────────────────────────────────────────────────────────────────────
// Mutable single-session state. Importers read via the live binding; writes
// must go through the corresponding setter (ESM imports are read-only).
// ──────────────────────────────────────────────────────────────────────────
export let browser = null;
export let page = null;
export let sessionPrefix = null; // e.g. "http://localhost:8081/bpdemo/ru_RU"
export let seanceId = null;
export let recorder = null; // { cdp, ffmpeg, startTime, outputPath, ffmpegError, captions }
export let lastCaptions = []; // captions from the last completed recording (for addNarration)
export let lastRecordingDuration = null; // wall-clock duration of the last recording (seconds)
export let highlightMode = false;
export let persistentUserDataDir = null; // temp dir for launchPersistentContext, cleaned on disconnect
// Clipboard preservation: save full clipboard contents (all MIME types) right
// before each writeText+Ctrl+V pair, restore right after. Toggled via
// setPreserveClipboard() from run.mjs.
export let preserveClipboard = true;
export let clipboardWarnLogged = false;
export const setBrowser = (v) => { browser = v; };
export const setPage = (v) => { page = v; };
export const setSessionPrefix = (v) => { sessionPrefix = v; };
export const setSeanceId = (v) => { seanceId = v; };
export const setRecorder = (v) => { recorder = v; };
export const setLastCaptions = (v) => { lastCaptions = v; };
export const setLastRecordingDuration = (v) => { lastRecordingDuration = v; };
export const setHighlightMode = (v) => { highlightMode = !!v; };
export const setPersistentUserDataDir = (v) => { persistentUserDataDir = v; };
export const setPreserveClipboard = (v) => { preserveClipboard = !!v; };
export const setClipboardWarnLogged = (v) => { clipboardWarnLogged = !!v; };
// ──────────────────────────────────────────────────────────────────────────
// Multi-context registry: name → { context, page, sessionPrefix, seanceId,
// recorder, lastCaptions, lastRecordingDuration, highlightMode }.
// Populated by createContext(); module-level vars above mirror the active
// slot. connect() does NOT use this Map — it preserves legacy single-session
// behavior for exec/run/start.
// ──────────────────────────────────────────────────────────────────────────
export const contexts = new Map();
export let activeContextName = null;
// Isolation mode for the current cmdTest session — set by the first
// createContext call. 'tab': all contexts share one persistent context
// (one window, multiple tabs, extension loads reliably). 'window': each
// context gets its own BrowserContext (separate window per context, full
// cookie isolation, extension may not load).
export let activeMode = null;
export const setActiveContextName = (v) => { activeContextName = v; };
export const setActiveMode = (v) => { activeMode = v; };
// ──────────────────────────────────────────────────────────────────────────
// Constants.
// ──────────────────────────────────────────────────────────────────────────
export const LOAD_TIMEOUT = 60000;
export const INIT_TIMEOUT = 60000;
export const ACTION_WAIT = 2000; // fallback minimum wait
export const MAX_WAIT = 10000; // max wait for stability
export const POLL_INTERVAL = 200; // polling interval
export const STABLE_CYCLES = 3; // consecutive stable cycles needed
// 1C browser extension ID (stable across versions, defined by key in manifest.json)
export const EXT_ID = 'pbhelknnhilelbnhfpcjlcabhmfangik';
// ──────────────────────────────────────────────────────────────────────────
// Utilities that only depend on state.
// ──────────────────────────────────────────────────────────────────────────
/** Normalize ё→е and  →space for fuzzy matching. */
export const normYo = (s) => s.replace(/ё/gi, 'е').replace(/ /g, ' ');
/** Check if browser is connected and page is usable. */
export function isConnected() {
if (!browser || !page || page.isClosed()) return false;
// launchPersistentContext returns BrowserContext (no isConnected), launch returns Browser
if (typeof browser.isConnected === 'function') return browser.isConnected();
// For persistent context, check via context's browser()
return browser.browser()?.isConnected() ?? false;
}
export function ensureConnected() {
if (!isConnected()) {
throw new Error('Browser not connected. Call web_connect first.');
}
}
/** Get the raw Playwright page object (for advanced scripting in skill mode). */
export function getPage() {
ensureConnected();
return page;
}
@@ -0,0 +1,292 @@
// web-test recording/captions v1.16 — Overlay primitives: captions, title slides, image overlays.
// Source: https://github.com/Nikolay-Shirokov/cc-1c-skills
import { existsSync as fsExistsSync, readFileSync } from 'fs';
import { extname } from 'path';
import {
page, recorder, lastCaptions, ensureConnected, resolveProjectPath,
} from '../core/state.mjs';
/**
* Show a text caption overlay on the page (visible in recording).
* Calling again updates the text without creating a new element.
* @param {string} text — caption text
* @param {object} [opts]
* @param {'top'|'bottom'} [opts.position='bottom'] — vertical position
* @param {number} [opts.fontSize=24] — font size in pixels
* @param {string} [opts.background='rgba(0,0,0,0.7)'] — background color
* @param {string} [opts.color='#fff'] — text color
* @param {string|false} [opts.speech] — TTS narration text. Omit to use displayed text,
* pass a string for custom narration, or false to skip narration for this caption.
*/
export async function showCaption(text, opts = {}) {
ensureConnected();
// Collect caption for TTS narration if recording
let smartWaitMs = 0;
if (recorder && (text.trim() || typeof opts.speech === 'string') && opts.speech !== false) {
const speech = typeof opts.speech === 'string' ? opts.speech : text;
// Use video timeline position (accounts for frame duplication) instead of wall-clock
recorder.captions.push({ text: text || speech, speech, time: Math.round(recorder.videoTimeMs), ...(opts.voice ? { voice: opts.voice } : {}) });
// Estimate TTS duration and wait so the video has enough screen time for voiceover
smartWaitMs = Math.max(2000, speech.length * (recorder.speechRate || 70));
}
const position = opts.position || 'bottom';
const fontSize = opts.fontSize || 24;
const bg = opts.background || 'rgba(0,0,0,0.7)';
const color = opts.color || '#fff';
await page.evaluate(({ text, position, fontSize, bg, color }) => {
let el = document.getElementById('__web_test_caption');
if (!el) {
el = document.createElement('div');
el.id = '__web_test_caption';
el.style.cssText = `
position: fixed; left: 0; right: 0; z-index: 99999;
text-align: center; padding: 12px 24px;
font-family: Arial, sans-serif; pointer-events: none;
`;
document.body.appendChild(el);
}
el.style[position === 'top' ? 'top' : 'bottom'] = '20px';
el.style[position === 'top' ? 'bottom' : 'top'] = 'auto';
el.style.fontSize = fontSize + 'px';
el.style.background = bg;
el.style.color = color;
el.textContent = text;
}, { text, position, fontSize, bg, color });
// Smart TTS wait: pause for estimated speech duration so video has enough screen time.
// Split into chunks and flush frames periodically — CDP doesn't send screencast frames
// for static pages, so we must write duplicate frames to keep video timeline in sync.
if (smartWaitMs > 0) {
let remaining = smartWaitMs;
while (remaining > 0) {
const chunk = Math.min(remaining, 1000);
await page.waitForTimeout(chunk);
remaining -= chunk;
if (recorder?._flushFrames) recorder._flushFrames();
}
recorder.captionCredit = { waitedMs: smartWaitMs, at: Date.now() };
}
}
/** Remove the caption overlay from the page. */
export async function hideCaption() {
ensureConnected();
await page.evaluate(() => {
const el = document.getElementById('__web_test_caption');
if (el) el.remove();
});
}
/**
* Get captions collected during the current or last recording.
* @returns {Array<{text: string, speech: string, time: number}>}
*/
export function getCaptions() {
if (recorder) return [...recorder.captions];
return [...lastCaptions];
}
/**
* Show a full-screen title slide overlay (for video recordings).
* Repeated calls update the content. Use hideTitleSlide() to remove.
* @param {string} text Title text (\n → line break)
* @param {object} [opts]
* @param {string} [opts.subtitle] Smaller text below the title
* @param {string} [opts.background] CSS background (default: dark gradient)
* @param {string} [opts.color] Text color (default: '#fff')
* @param {number} [opts.fontSize] Title font size in px (default: 36)
*/
export async function showTitleSlide(text, opts = {}) {
ensureConnected();
const {
subtitle = '',
background = 'linear-gradient(135deg, #1a1a2e 0%, #16213e 50%, #0f3460 100%)',
color = '#fff',
fontSize = 36,
speech,
} = opts;
// Collect caption for TTS narration if recording
let smartWaitMs = 0;
if (recorder && speech && speech !== false) {
const captionText = typeof speech === 'string' ? speech : text.replace(/\n/g, ' ');
if (captionText) {
recorder.captions.push({ text: captionText, speech: captionText, time: Math.round(recorder.videoTimeMs), ...(opts.voice ? { voice: opts.voice } : {}) });
smartWaitMs = Math.max(2000, captionText.length * (recorder.speechRate || 70));
}
}
await page.evaluate(({ text, subtitle, background, color, fontSize }) => {
let div = document.getElementById('__web_test_title');
if (!div) {
div = document.createElement('div');
div.id = '__web_test_title';
document.body.appendChild(div);
}
div.style.cssText = [
'position:fixed', 'top:0', 'left:0', 'width:100%', 'height:100%',
`background:${background}`,
'display:flex', 'align-items:center', 'justify-content:center',
'z-index:999999', 'pointer-events:none',
].join(';');
// Remove other overlays to prevent flash between slides
const img = document.getElementById('__web_test_image');
if (img) img.remove();
const esc = s => s.replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/\n/g, '<br>');
let html = `<div style="font-size:${fontSize}px;font-weight:600;line-height:1.4;">${esc(text)}</div>`;
if (subtitle) {
html += `<div style="font-size:${Math.round(fontSize * 0.5)}px;margin-top:16px;opacity:0.7;">${esc(subtitle)}</div>`;
}
div.innerHTML = `<div style="text-align:center;max-width:70%;color:${color};font-family:'Segoe UI',Arial,sans-serif;">${html}</div>`;
}, { text, subtitle, background, color, fontSize });
// Smart TTS wait (same pattern as showCaption/showImage)
if (smartWaitMs > 0) {
let remaining = smartWaitMs;
while (remaining > 0) {
const chunk = Math.min(remaining, 1000);
await page.waitForTimeout(chunk);
remaining -= chunk;
if (recorder?._flushFrames) recorder._flushFrames();
}
recorder.captionCredit = { waitedMs: smartWaitMs, at: Date.now() };
}
}
/** Remove the title slide overlay. */
export async function hideTitleSlide() {
ensureConnected();
await page.evaluate(() => {
const el = document.getElementById('__web_test_title');
if (el) el.remove();
});
}
/**
* Show a full-screen image overlay (e.g. presentation slide screenshot).
* Reads the image file, base64-encodes it, and renders as a fixed overlay
* on the page — captured by CDP screencast automatically.
*
* Style presets:
* - 'blur' (default) — blurred+dimmed copy as background, image centered with shadow
* - 'dark' — dark background (#2a2a2a) with shadow
* - 'light' — white background with shadow
* - 'full' — image covers entire screen, no padding/shadow
*
* Custom background overrides the preset (e.g. background: '#003366').
*
* @param {string} imagePath — path to the image file (PNG, JPG, etc.)
* @param {object} [opts]
* @param {'blur'|'dark'|'light'|'full'} [opts.style='blur'] — display style preset
* @param {string} [opts.background] — custom background color/gradient (overrides style preset)
* @param {boolean} [opts.shadow] — show drop shadow (default: true for blur/dark/light, false for full)
* @param {string|false} [opts.speech] — TTS narration text while image is shown.
* Pass a string for narration, or false to skip. Omit to skip (no auto-text for images).
*/
export async function showImage(imagePath, opts = {}) {
ensureConnected();
const style = opts.style || 'blur';
const speech = opts.speech;
// Style presets
const presets = {
blur: { bg: '#222', fit: 'contain', shadow: true, blur: true },
dark: { bg: '#2a2a2a', fit: 'contain', shadow: true, blur: false },
light: { bg: '#ffffff', fit: 'contain', shadow: true, blur: false },
full: { bg: '#000', fit: 'contain', shadow: false, blur: false },
};
const preset = presets[style] || presets.blur;
const bg = opts.background || preset.bg;
const fit = preset.fit;
const shadow = opts.shadow !== undefined ? opts.shadow : preset.shadow;
const useBlur = opts.background ? false : preset.blur;
// Read image and base64-encode
const absPath = resolveProjectPath(imagePath);
if (!fsExistsSync(absPath)) {
throw new Error(`showImage: file not found: ${absPath}`);
}
const buf = readFileSync(absPath);
const ext = extname(absPath).toLowerCase().replace('.', '');
const mime = ext === 'jpg' || ext === 'jpeg' ? 'image/jpeg'
: ext === 'png' ? 'image/png'
: ext === 'gif' ? 'image/gif'
: ext === 'webp' ? 'image/webp'
: ext === 'svg' ? 'image/svg+xml'
: 'image/png';
const dataUrl = `data:${mime};base64,${buf.toString('base64')}`;
// Collect caption for TTS narration if recording
let smartWaitMs = 0;
if (recorder && speech && speech !== false) {
const captionText = typeof speech === 'string' ? speech : '';
if (captionText) {
recorder.captions.push({ text: captionText, speech: captionText, time: Math.round(recorder.videoTimeMs), ...(opts.voice ? { voice: opts.voice } : {}) });
smartWaitMs = Math.max(2000, captionText.length * (recorder.speechRate || 70));
}
}
// Padding: full style uses 100%, others use 92% for breathing room
const isFull = style === 'full';
const maxSize = isFull ? '100%' : '92%';
await page.evaluate(({ dataUrl, fit, bg, useBlur, shadow, maxSize, isFull }) => {
let div = document.getElementById('__web_test_image');
if (!div) {
div = document.createElement('div');
div.id = '__web_test_image';
document.body.appendChild(div);
}
// Remove other overlays to prevent flash between slides
const title = document.getElementById('__web_test_title');
if (title) title.remove();
div.style.cssText = [
'position:fixed', 'top:0', 'left:0', 'width:100%', 'height:100%',
`background:${bg}`,
'display:flex', 'align-items:center', 'justify-content:center',
'z-index:999999', 'pointer-events:none', 'overflow:hidden'
].join(';');
let html = '';
// Blurred background layer: the same image stretched to cover, blurred and dimmed
if (useBlur) {
html += `<img src="${dataUrl}" style="position:absolute;top:0;left:0;width:100%;height:100%;object-fit:cover;filter:blur(30px) brightness(0.5);transform:scale(1.1);" />`;
}
// Main image
const shadowCss = shadow ? 'box-shadow:0 4px 40px rgba(0,0,0,0.5);' : '';
const sizeCss = isFull
? `width:100%;height:100%;object-fit:${fit};`
: `max-width:${maxSize};max-height:${maxSize};min-width:50%;min-height:50%;object-fit:${fit};`;
html += `<img src="${dataUrl}" style="position:relative;${sizeCss}${shadowCss}" />`;
div.innerHTML = html;
}, { dataUrl, fit, bg, useBlur, shadow, maxSize, isFull });
// Smart TTS wait (same pattern as showCaption)
if (smartWaitMs > 0) {
let remaining = smartWaitMs;
while (remaining > 0) {
const chunk = Math.min(remaining, 1000);
await page.waitForTimeout(chunk);
remaining -= chunk;
if (recorder?._flushFrames) recorder._flushFrames();
}
recorder.captionCredit = { waitedMs: smartWaitMs, at: Date.now() };
}
}
/** Remove the image overlay. */
export async function hideImage() {
ensureConnected();
await page.evaluate(() => {
const el = document.getElementById('__web_test_image');
if (el) el.remove();
});
}
@@ -0,0 +1,244 @@
// web-test recording/capture v1.16 — Recording lifecycle (CDP screencast + ffmpeg pipe), screenshot, wait helpers.
// Source: https://github.com/Nikolay-Shirokov/cc-1c-skills
import { spawn } from 'child_process';
import { mkdirSync, statSync, writeFileSync } from 'fs';
import { dirname } from 'path';
import {
page, recorder, lastCaptions,
setRecorder, setLastCaptions, setLastRecordingDuration,
resolveProjectPath, ensureConnected,
} from '../core/state.mjs';
import { resolveFfmpeg } from './tts.mjs';
// getFormState lives in browser.mjs for now (moves to forms/ in a later stage).
// Imported lazily inside wait() to avoid initialization-time circular deps.
/** Take a screenshot. Returns PNG buffer. */
export async function screenshot() {
ensureConnected();
return await page.screenshot({ type: 'png' });
}
/** Wait for a specified number of seconds. */
export async function wait(seconds) {
ensureConnected();
let ms = seconds * 1000;
// Credit system: if showCaption already waited for TTS, subtract that time
if (recorder && recorder.captionCredit) {
const elapsed = Date.now() - recorder.captionCredit.at;
const credit = Math.max(0, recorder.captionCredit.waitedMs - elapsed);
ms = Math.max(0, ms - credit);
recorder.captionCredit = null;
}
if (ms > 0) {
// During recording, split long waits into chunks and flush frames
// to keep video timeline in sync (CDP may not send frames for static pages)
if (recorder?._flushFrames && ms > 1000) {
let remaining = ms;
while (remaining > 0) {
const chunk = Math.min(remaining, 1000);
await page.waitForTimeout(chunk);
remaining -= chunk;
recorder._flushFrames();
}
} else {
await page.waitForTimeout(ms);
}
}
const { getFormState } = await import('../browser.mjs');
return await getFormState();
}
// ============================================================
// Video recording — CDP screencast + ffmpeg
// ============================================================
/** Check if video recording is active. */
export function isRecording() {
return recorder !== null;
}
/**
* Start video recording via CDP screencast + ffmpeg.
* Frames are captured as JPEG and piped to ffmpeg for MP4 encoding.
* @param {string} outputPath — output .mp4 file path
* @param {object} [opts]
* @param {number} [opts.fps=25] — target framerate
* @param {number} [opts.quality=80] — JPEG quality (1-100)
* @param {string} [opts.ffmpegPath] — explicit path to ffmpeg binary
*/
export async function startRecording(outputPath, opts = {}) {
ensureConnected();
if (recorder) {
if (opts.force) {
try { await stopRecording(); } catch {}
} else {
throw new Error('Already recording. Call stopRecording() first, or use { force: true }.');
}
}
setLastCaptions([]);
setLastRecordingDuration(null);
const fps = opts.fps || 25;
const quality = opts.quality || 80;
const ffmpegPath = resolveFfmpeg(opts.ffmpegPath);
// Ensure output directory exists
const resolvedPath = resolveProjectPath(outputPath);
mkdirSync(dirname(resolvedPath), { recursive: true });
// Spawn ffmpeg process — single output file across context switches
const ffmpeg = spawn(ffmpegPath, [
'-y', // overwrite output
'-f', 'image2pipe', // input: piped images
'-framerate', String(fps), // input framerate
'-i', '-', // read from stdin
'-c:v', 'libx264', // H.264 codec
'-preset', 'fast', // good quality/speed balance
'-crf', '23', // default quality (good for screen content)
'-vf', 'scale=in_range=full:out_range=limited', // JPEG full→H.264 limited range
'-pix_fmt', 'yuv420p', // broad compatibility
'-color_range', 'tv', // limited range (16-235) — standard for H.264 players
'-movflags', '+faststart', // web-friendly MP4
resolvedPath
], { stdio: ['pipe', 'ignore', 'pipe'] });
ffmpeg.on('error', err => { if (recorder) recorder.ffmpegError += err.message; });
const frameDuration = 1000 / fps;
const speechRate = opts.speechRate || 70; // ms per character for smart TTS wait
// Frame handler shared across CDP sessions (lives in recorder, not closure):
// when the active context switches, we attach a new CDP session and route its
// frames to the same ffmpeg pipe — preserving a single continuous timeline.
const frameHandler = async ({ data, sessionId }, cdp) => {
if (!recorder) return;
const buf = Buffer.from(data, 'base64');
const now = Date.now();
if (!ffmpeg.stdin.destroyed) {
let framesWritten = 0;
if (recorder.lastFrameTime && recorder.lastFrameBuf) {
const gap = now - recorder.lastFrameTime;
const dupes = Math.round(gap / frameDuration) - 1;
for (let i = 0; i < dupes && i < fps * 30; i++) {
ffmpeg.stdin.write(recorder.lastFrameBuf);
framesWritten++;
}
}
ffmpeg.stdin.write(buf);
framesWritten++;
recorder.videoTimeMs += framesWritten * frameDuration;
}
recorder.lastFrameTime = now;
recorder.lastFrameBuf = buf;
try { await cdp.send('Page.screencastFrameAck', { sessionId }); } catch {}
};
// Duplicate the last frame to fill wall-clock gaps (static periods, context switches).
const _flushFrames = () => {
if (!recorder || !recorder.lastFrameBuf || !recorder.lastFrameTime || ffmpeg.stdin.destroyed) return;
const now = Date.now();
const gap = now - recorder.lastFrameTime;
const dupes = Math.round(gap / frameDuration);
for (let i = 0; i < dupes; i++) {
ffmpeg.stdin.write(recorder.lastFrameBuf);
recorder.videoTimeMs += frameDuration;
}
if (dupes > 0) recorder.lastFrameTime = now;
};
// Attach screencast to a specific page. Stops the old CDP first (if any).
// Called by startRecording for the initial page, and by setActiveContext when
// the active context changes mid-recording.
const _attachPage = async (targetPage) => {
if (recorder.cdp) {
_flushFrames(); // freeze the last frame of the outgoing page up to "now"
try { await recorder.cdp.send('Page.stopScreencast'); } catch {}
try { await recorder.cdp.detach(); } catch {}
recorder.cdp = null;
}
const cdp = await targetPage.context().newCDPSession(targetPage);
cdp.on('Page.screencastFrame', (ev) => frameHandler(ev, cdp));
await cdp.send('Page.startScreencast', { format: 'jpeg', quality, everyNthFrame: 1 });
recorder.cdp = cdp;
recorder.activePage = targetPage;
};
setRecorder({
cdp: null,
activePage: null,
ffmpeg,
startTime: Date.now(),
outputPath: resolvedPath,
ffmpegError: '',
captions: [],
videoTimeMs: 0,
frameDuration,
lastFrameTime: null,
lastFrameBuf: null,
_flushFrames,
_attachPage,
speechRate,
});
ffmpeg.stderr.on('data', d => { recorder.ffmpegError += d.toString(); });
await _attachPage(page);
}
/**
* Stop video recording. Finalizes the MP4 file.
* @returns {{ file: string, duration: number, size: number }}
*/
export async function stopRecording() {
if (!recorder) return { file: null, duration: 0, size: 0 };
const { cdp, ffmpeg, startTime, outputPath } = recorder;
// Final frame flush: write remaining frames to cover the gap since the last screencast frame
if (recorder._flushFrames) recorder._flushFrames();
// Stop CDP screencast
try { await cdp.send('Page.stopScreencast'); } catch {}
try { await cdp.detach(); } catch {}
// Close ffmpeg stdin and wait for encoding to finish
await new Promise((resolve, reject) => {
const timeout = setTimeout(() => {
ffmpeg.kill('SIGKILL');
reject(new Error('ffmpeg timed out after 30s'));
}, 30000);
ffmpeg.on('close', (code) => {
clearTimeout(timeout);
if (code === 0) resolve();
else reject(new Error(`ffmpeg exited with code ${code}: ${recorder?.ffmpegError || ''}`));
});
ffmpeg.on('error', (err) => {
clearTimeout(timeout);
reject(err);
});
ffmpeg.stdin.end();
});
const duration = (Date.now() - startTime) / 1000;
const stats = statSync(outputPath);
// Preserve captions for addNarration()
setLastCaptions(recorder.captions || []);
setLastRecordingDuration(duration);
if (lastCaptions.length) {
const captionsPath = outputPath.replace(/\.[^.]+$/, '.captions.json');
const captionsData = { recordingDuration: duration, videoTimestamps: true, captions: lastCaptions };
writeFileSync(captionsPath, JSON.stringify(captionsData, null, 2), 'utf-8');
}
setRecorder(null);
return {
file: outputPath,
duration: Math.round(duration * 10) / 10,
size: stats.size,
captions: lastCaptions.length
};
}
@@ -0,0 +1,340 @@
// web-test recording/highlight v1.16 — Visual highlight overlay (single + auto-mode for clickElement/fillFields/selectValue).
// Source: https://github.com/Nikolay-Shirokov/cc-1c-skills
import {
page, highlightMode, ensureConnected, normYo,
setHighlightMode,
} from '../core/state.mjs';
import {
readSubmenuScript, detectFormScript, resolveGridScript,
findClickTargetScript, resolveFieldsScript,
} from '../dom.mjs';
/**
* Highlight an element on the page (visual accent for video recordings).
* Uses overlay div for visibility (not clipped by overflow:hidden), with
* requestAnimationFrame tracking so it follows layout shifts (async banners etc).
* @param {string} text Element text/label (fuzzy match, same as clickElement/fillFields)
* @param {object} [opts]
* @param {string} [opts.color] Outline color (default: '#e74c3c')
* @param {number} [opts.padding] Extra padding around element (default: 4)
*/
export async function highlight(text, opts = {}) {
ensureConnected();
const { color = '#e74c3c', padding = 4, table } = opts;
// Remove previous highlight first
await unhighlight();
let elId = null;
// 0. Open submenu/popup — highest priority (submenu overlays the form,
// so form search would match grid rows behind the popup)
const popupItems = await page.evaluate(readSubmenuScript());
if (Array.isArray(popupItems) && popupItems.length > 0) {
const target = normYo(text.toLowerCase());
let found = popupItems.find(i => normYo(i.name.toLowerCase()) === target);
if (!found) found = popupItems.find(i => normYo(i.name.toLowerCase()).startsWith(target));
if (!found) found = popupItems.find(i => normYo(i.name.toLowerCase()).includes(target));
if (found) {
// 1C duplicates IDs in clouds — getElementById returns the hidden copy.
// Use elementFromPoint to find the visible element and get its actual rect.
await page.evaluate(({ x, y, color, padding }) => {
const el = document.elementFromPoint(x, y);
if (!el) return;
const block = el.closest('.submenuBlock') || el.closest('a.press') || el;
const r = block.getBoundingClientRect();
let div = document.getElementById('__web_test_highlight');
if (!div) {
div = document.createElement('div');
div.id = '__web_test_highlight';
document.body.appendChild(div);
}
div.style.cssText = [
'position:fixed', 'pointer-events:none', 'z-index:999998',
`top:${r.y - padding}px`, `left:${r.x - padding}px`,
`width:${r.width + padding * 2}px`, `height:${r.height + padding * 2}px`,
`outline:3px solid ${color}`, 'border-radius:4px',
`box-shadow:0 0 16px ${color}80`,
].join(';');
}, { x: found.x, y: found.y, color, padding });
return; // overlay placed, done
}
}
// 1. Visible commands on the function panel (cmd_XXX_txt elements)
// Must be checked BEFORE form search: when the section content panel
// is showing, the form behind it is hidden but detectFormScript still
// finds it, and form buttons match before commands.
if (!elId) {
elId = await page.evaluate(`(() => {
const norm = s => (s?.trim().replace(/\\u00a0/g, ' ') || '').replace(/ё/gi, 'е');
const target = ${JSON.stringify(normYo(text.toLowerCase()))};
const cmds = [...document.querySelectorAll('[id^="cmd_"][id$="_txt"]')].filter(e => e.offsetWidth > 0);
if (cmds.length === 0) return null;
let el = cmds.find(e => norm(e.innerText).toLowerCase() === target);
if (!el) el = cmds.find(e => norm(e.innerText).toLowerCase().startsWith(target));
if (!el) el = cmds.find(e => norm(e.innerText).toLowerCase().includes(target));
return el ? el.id : null;
})()`);
}
// 1b. Command group headers on the function panel (eAccentColor labels).
// Match header text, then highlight the header + commands below it
// until the next spacer/header/end.
if (!elId) {
const groupDone = await page.evaluate(({ target, color, padding }) => {
const container = document.querySelector('#funcPanel_container');
if (!container) return false;
const norm = s => (s?.trim().replace(/\u00a0/g, ' ') || '').replace(/ё/gi, 'е').toLowerCase();
const headers = [...container.querySelectorAll('.eAccentColor')].filter(e => e.offsetWidth > 0);
if (!headers.length) return false;
let headerEl = headers.find(h => norm(h.textContent) === target);
if (!headerEl) headerEl = headers.find(h => norm(h.textContent).startsWith(target));
if (!headerEl) headerEl = headers.find(h => norm(h.textContent).includes(target));
if (!headerEl) return false;
// Collect header + following cmd siblings until next spacer/header
const parent = headerEl.parentElement;
const children = [...parent.children];
const startIdx = children.indexOf(headerEl);
const groupEls = [headerEl];
for (let i = startIdx + 1; i < children.length; i++) {
const el = children[i];
if (el.classList.contains('eAccentColor')) break;
if (!el.id && !el.classList.contains('functionItem') && el.getBoundingClientRect().width < 10) break;
groupEls.push(el);
}
// Bounding box
let minX = Infinity, minY = Infinity, maxX = -Infinity, maxY = -Infinity;
for (const el of groupEls) {
const r = el.getBoundingClientRect();
if (r.width === 0 && r.height === 0) continue;
minX = Math.min(minX, r.left); minY = Math.min(minY, r.top);
maxX = Math.max(maxX, r.right); maxY = Math.max(maxY, r.bottom);
}
if (minX === Infinity) return false;
let div = document.getElementById('__web_test_highlight');
if (!div) { div = document.createElement('div'); div.id = '__web_test_highlight'; document.body.appendChild(div); }
div.style.cssText = [
'position:fixed', 'pointer-events:none', 'z-index:999998',
`top:${minY - padding}px`, `left:${minX - padding}px`,
`width:${maxX - minX + padding * 2}px`, `height:${maxY - minY + padding * 2}px`,
`outline:3px solid ${color}`, 'border-radius:4px',
`box-shadow:0 0 16px ${color}80`,
].join(';');
return true;
}, { target: normYo(text.toLowerCase()), color, padding });
if (groupDone) return;
}
// 2. Form groups/panels — checked BEFORE buttons/fields because group names
// often collide with command bar buttons (e.g. "БизнесПроцессы" is both a
// panel and a command bar element). Includes _container and _div elements
// but skips logicGroupContainer (Representation=None, height=0).
if (!elId) {
const formNum = await page.evaluate(detectFormScript());
if (formNum !== null) {
elId = await page.evaluate(`(() => {
const norm = s => (s?.trim().replace(/\\u00a0/g, ' ') || '').replace(/ё/gi, 'е');
const target = ${JSON.stringify(normYo(text.toLowerCase()))};
const p = 'form' + ${formNum} + '_';
// Group containers: _container or _div, but skip logicGroupContainer (invisible groups)
const groups = [...document.querySelectorAll('[id^="' + p + '"][id$="_container"], [id^="' + p + '"][id$="_div"]')]
.filter(el => el.offsetWidth > 0 && el.offsetHeight > 0 && !el.classList.contains('logicGroupContainer'));
const items = groups.map(el => {
const idName = el.id.replace(p, '').replace(/_(container|div)$/, '');
const titleEl = document.getElementById(p + idName + '#title_text')
|| document.getElementById(p + idName + '_title_text');
const label = norm(titleEl?.innerText || '').toLowerCase();
const name = norm(idName).toLowerCase();
const big = el.offsetWidth >= 100 && el.offsetHeight >= 50;
return { id: el.id, name, label, big };
});
let found = items.find(i => i.label === target);
if (!found) found = items.find(i => i.name === target);
// Fuzzy match: only large groups (min 100x50) to avoid matching command bars
if (!found) found = items.filter(i => i.big).find(i => i.label.startsWith(target) || i.name.startsWith(target));
if (!found && target.length >= 4) found = items.filter(i => i.big).find(i => i.label.includes(target) || i.name.includes(target));
return found ? found.id : null;
})()`);
}
}
// 3. Form-scoped search (buttons, links, fields, grid rows)
if (!elId) {
const formNum = await page.evaluate(detectFormScript());
if (formNum !== null) {
// 3a. Try button/link/tab/gridRow via findClickTargetScript
let gridSelector;
if (table) {
const resolved = await page.evaluate(resolveGridScript(formNum, table));
if (!resolved.error) gridSelector = resolved.gridSelector;
}
const target = await page.evaluate(findClickTargetScript(formNum, text, table ? { tableName: table, gridSelector } : undefined));
if (target && !target.error) {
if (target.id) {
elId = target.id;
} else if (target.x && target.y) {
// Grid row — find the gridLine element and tag it
elId = await page.evaluate(`(() => {
const p = ${JSON.stringify(`form${formNum}_`)};
const grid = document.querySelector('[id^="' + p + '"].grid');
if (!grid) return null;
const body = grid.querySelector('.gridBody');
if (!body) return null;
const norm = s => (s?.trim().replace(/\\u00a0/g, ' ') || '').replace(/ё/gi, 'е');
const target = ${JSON.stringify(normYo(text.toLowerCase()))};
for (const line of body.querySelectorAll('.gridLine')) {
const cells = [...line.querySelectorAll('.gridBoxText')].filter(b => b.offsetWidth > 0);
const rowText = cells.map(b => b.innerText?.trim() || '').join(' ').toLowerCase().replace(/ё/gi, 'е');
if (rowText.includes(target)) {
if (!line.id) line.id = '__wt_hl_tmp';
return line.id;
}
}
return null;
})()`);
}
}
// 3b. If not found as button — try as field via resolveFieldsScript
if (!elId) {
const dummyFields = { [text]: '' };
const resolved = await page.evaluate(resolveFieldsScript(formNum, dummyFields));
if (resolved?.length > 0 && !resolved[0].error && resolved[0].inputId) {
elId = resolved[0].inputId;
}
}
}
}
// 4. Fallback: sections (sidebar navigation)
if (!elId) {
elId = await page.evaluate(`(() => {
const norm = s => (s?.trim().replace(/\\u00a0/g, ' ') || '').replace(/ё/gi, 'е');
const target = ${JSON.stringify(normYo(text.toLowerCase()))};
const secs = [...document.querySelectorAll('[id^="themesCell_theme_"]')];
let el = secs.find(e => norm(e.innerText).toLowerCase() === target);
if (!el) el = secs.find(e => norm(e.innerText).toLowerCase().startsWith(target));
if (!el) el = secs.find(e => norm(e.innerText).toLowerCase().includes(target));
return el ? el.id : null;
})()`);
}
if (!elId) {
// Collect available elements to help the caller fix the name
const available = await page.evaluate(`(() => {
const norm = s => (s?.trim().replace(/\\u00a0/g, ' ') || '').replace(/ё/gi, 'е');
const result = {};
// Commands
const cmds = [...document.querySelectorAll('[id^="cmd_"][id$="_txt"]')].filter(e => e.offsetWidth > 0).map(e => norm(e.innerText));
if (cmds.length) result.commands = cmds;
// Command group headers
const fp = document.querySelector('#funcPanel_container');
if (fp) {
const gh = [...fp.querySelectorAll('.eAccentColor')].filter(e => e.offsetWidth > 0).map(e => norm(e.textContent));
if (gh.length) result.commandGroups = gh;
}
// Sections
const secs = [...document.querySelectorAll('[id^="themesCell_theme_"]')].map(e => norm(e.innerText)).filter(Boolean);
if (secs.length) result.sections = secs;
// Form elements
${(() => {
// Detect form inline to avoid extra evaluate round-trip
return `
const forms = {};
document.querySelectorAll('[id^="form"]').forEach(el => {
const m = el.id.match(/^form(\\d+)_/);
if (m) forms[m[1]] = (forms[m[1]] || 0) + 1;
});
let formNum = null, maxCount = 0;
for (const [n, c] of Object.entries(forms)) {
if (parseInt(n) > 0 && c > maxCount) { maxCount = c; formNum = n; }
}
if (formNum !== null) {
const p = 'form' + formNum + '_';
// Groups (_container or _div, skip logicGroupContainer, min 100x50)
const groups = [...document.querySelectorAll('[id^="' + p + '"][id$="_container"], [id^="' + p + '"][id$="_div"]')]
.filter(el => el.offsetWidth >= 100 && el.offsetHeight >= 50 && !el.classList.contains('logicGroupContainer'))
.map(el => {
const idName = el.id.replace(p, '').replace(/_(container|div)$/, '');
const titleEl = document.getElementById(p + idName + '#title_text') || document.getElementById(p + idName + '_title_text');
return norm(titleEl?.innerText || '') || idName;
}).filter(Boolean);
if (groups.length) result.groups = groups;
// Buttons/links
const btns = [...document.querySelectorAll('[id^="' + p + '"].btnText, [id^="' + p + '"] .btnText, [id^="' + p + '"].hplnk')]
.filter(el => el.offsetWidth > 0).map(el => norm(el.innerText)).filter(Boolean);
if (btns.length) result.buttons = [...new Set(btns)];
}`;
})()}
return result;
})()`);
const parts = [];
for (const [cat, items] of Object.entries(available)) {
parts.push(` ${cat}: ${items.join(', ')}`);
}
const hint = parts.length ? `\nAvailable:\n${parts.join('\n')}` : '';
throw new Error(`highlight: "${text}" not found${hint}`);
}
// Overlay div + rAF tracking loop (not clipped by overflow:hidden, follows layout shifts)
await page.evaluate(({ elId, color, padding }) => {
const target = document.getElementById(elId);
if (!target) return;
let div = document.getElementById('__web_test_highlight');
if (!div) {
div = document.createElement('div');
div.id = '__web_test_highlight';
document.body.appendChild(div);
}
function sync() {
const r = target.getBoundingClientRect();
div.style.cssText = [
'position:fixed', 'pointer-events:none', 'z-index:999998',
`top:${r.y - padding}px`, `left:${r.x - padding}px`,
`width:${r.width + padding * 2}px`, `height:${r.height + padding * 2}px`,
`outline:3px solid ${color}`, 'border-radius:4px',
`box-shadow:0 0 16px ${color}80`,
].join(';');
}
sync();
// Track position changes via rAF
function tick() {
if (!document.getElementById('__web_test_highlight')) return; // stopped
sync();
requestAnimationFrame(tick);
}
requestAnimationFrame(tick);
}, { elId, color, padding });
}
/** Remove the highlight overlay. */
export async function unhighlight() {
ensureConnected();
await page.evaluate(() => {
const el = document.getElementById('__web_test_highlight');
if (el) el.remove(); // also stops rAF loop (id check)
// Clean up temp ID from grid rows
const tmp = document.getElementById('__wt_hl_tmp');
if (tmp) tmp.removeAttribute('id');
});
}
/**
* Toggle auto-highlight mode. When enabled, clickElement/fillFields/selectValue
* automatically highlight the target element before acting.
* @param {boolean} on true to enable, false to disable
*/
export function setHighlight(on) {
setHighlightMode(!!on);
}
/** @returns {boolean} Whether auto-highlight mode is active. */
export function isHighlightMode() {
return highlightMode;
}
@@ -0,0 +1,196 @@
// web-test recording/narration v1.16 — Post-process: generate TTS audio for captions and merge with recorded video.
// Source: https://github.com/Nikolay-Shirokov/cc-1c-skills
import { execFileSync } from 'child_process';
import { existsSync as fsExistsSync, mkdirSync, readFileSync, rmSync, statSync } from 'fs';
import { extname, join as pathJoin } from 'path';
import { tmpdir } from 'os';
import {
lastCaptions, lastRecordingDuration, resolveProjectPath,
} from '../core/state.mjs';
import {
resolveFfmpeg, getTtsProvider, getAudioDuration, generateSilence,
} from './tts.mjs';
/**
* Add TTS narration to a recorded video.
* Generates speech from captions and merges audio with the video.
* @param {string} videoPath — path to the recorded MP4 file
* @param {object} [opts]
* @param {Array<{text: string, speech: string, time: number, voice?: string}>} [opts.captions] — explicit captions (default: from last recording or .captions.json). Each caption may include a `voice` field to override the global voice for that segment
* @param {string} [opts.provider='edge'] — TTS provider: 'edge' or 'openai'
* @param {string} [opts.voice] — voice name (provider-specific)
* @param {string} [opts.apiKey] — API key (for openai provider)
* @param {string} [opts.apiUrl] — API endpoint (for openai provider)
* @param {string} [opts.model] — model name (for openai provider, default: 'tts-1')
* @param {string} [opts.ffmpegPath] — path to ffmpeg binary
* @param {string} [opts.outputPath] — output file path (default: video-narrated.mp4)
* @returns {{ file: string, duration: number, size: number, captions: number, warnings?: string[] }}
*/
export async function addNarration(videoPath, opts = {}) {
if (!videoPath) return { file: null, duration: 0, size: 0, captions: 0 };
videoPath = resolveProjectPath(videoPath);
const ffmpegPath = resolveFfmpeg(opts.ffmpegPath);
const ttsProvider = getTtsProvider(opts.provider || 'edge');
const ttsOpts = { voice: opts.voice, apiKey: opts.apiKey, apiUrl: opts.apiUrl, model: opts.model };
// Resolve captions: explicit > lastCaptions > .captions.json
let captions = opts.captions;
let videoTimestamps = true; // new recordings use video-time timestamps (no scaling needed)
let recordingDuration = null; // wall-clock duration (for legacy scaling fallback)
if (!captions || !captions.length) {
if (lastCaptions.length) {
captions = [...lastCaptions];
recordingDuration = lastRecordingDuration;
// Runtime captions always use video timestamps (set in showCaption)
}
}
if (!captions || !captions.length) {
const captionsJsonPath = videoPath.replace(/\.[^.]+$/, '.captions.json');
if (fsExistsSync(captionsJsonPath)) {
const raw = JSON.parse(readFileSync(captionsJsonPath, 'utf-8'));
// Support formats: array (old), { recordingDuration, captions } (v2), { videoTimestamps, captions } (v3)
if (Array.isArray(raw)) {
captions = raw;
videoTimestamps = false;
} else {
captions = raw.captions;
videoTimestamps = !!raw.videoTimestamps;
recordingDuration = raw.recordingDuration || null;
}
}
}
if (!captions || !captions.length) {
throw new Error('No captions available. Record with showCaption() first, or pass opts.captions.');
}
const videoDuration = getAudioDuration(videoPath, ffmpegPath);
// Legacy fallback: scale wall-clock timestamps to video duration
// (only for old captions without videoTimestamps flag)
if (!videoTimestamps && recordingDuration && recordingDuration > 0) {
const timeScale = videoDuration / recordingDuration;
if (Math.abs(timeScale - 1) > 0.005) {
captions = captions.map(c => ({ ...c, time: Math.round(c.time * timeScale) }));
}
}
// Output path
const ext = extname(videoPath);
const base = videoPath.slice(0, -ext.length);
const outputPath = opts.outputPath || `${base}-narrated${ext}`;
// Temp directory
const tempDir = pathJoin(tmpdir(), `web-test-tts-${Date.now()}`);
mkdirSync(tempDir, { recursive: true });
const warnings = [];
try {
// Phase 1: Generate TTS audio for each caption
const ttsFiles = [];
const BATCH_SIZE = (opts.provider === 'elevenlabs') ? 2 : 5;
for (let batchStart = 0; batchStart < captions.length; batchStart += BATCH_SIZE) {
const batch = captions.slice(batchStart, batchStart + BATCH_SIZE);
const promises = batch.map(async (cap, batchIdx) => {
const idx = batchStart + batchIdx;
const ttsFile = pathJoin(tempDir, `tts_${idx}.mp3`);
const capTtsOpts = cap.voice ? { ...ttsOpts, voice: cap.voice } : ttsOpts;
try {
await ttsProvider(cap.speech, ttsFile, capTtsOpts);
} catch (err) {
// Retry once
try {
await ttsProvider(cap.speech, ttsFile, capTtsOpts);
} catch (retryErr) {
warnings.push(`TTS failed for caption ${idx}: ${retryErr.message || retryErr.cause?.message || String(retryErr)}`);
// Generate 1s silence as placeholder
generateSilence(ttsFile, 1, ffmpegPath);
}
}
return ttsFile;
});
const results = await Promise.all(promises);
ttsFiles.push(...results);
}
// Phase 2+3: Place each TTS at its exact timestamp using adelay + amix
// This avoids MP3 frame quantization drift from silence-file concatenation
const ffmpegInputs = [];
const filterParts = [];
const mixLabels = [];
for (let i = 0; i < captions.length; i++) {
const captionTimeMs = Math.round(captions[i].time);
const ttsFile = ttsFiles[i];
const ttsDuration = getAudioDuration(ttsFile, ffmpegPath);
ffmpegInputs.push('-i', ttsFile);
const filters = [];
// Speed up TTS slightly if it's longer than gap to next caption (max 1.3x)
if (i < captions.length - 1) {
const maxDuration = (captions[i + 1].time - captions[i].time) / 1000;
if (ttsDuration > maxDuration && maxDuration > 0.1) {
const tempo = ttsDuration / maxDuration;
if (tempo <= 1.3) {
filters.push(`atempo=${tempo.toFixed(4)}`);
} else {
// Too fast — let audio overlap instead of distorting
warnings.push(`Caption ${i + 1}/${captions.length}: TTS ${ttsDuration.toFixed(1)}s > gap ${maxDuration.toFixed(1)}s (need ${Math.round(ttsDuration - maxDuration)}s more pause)`);
}
}
}
// Delay to exact caption timestamp (milliseconds)
if (captionTimeMs > 0) {
filters.push(`adelay=${captionTimeMs}|${captionTimeMs}`);
}
const label = `a${i}`;
mixLabels.push(`[${label}]`);
// Input indices are shifted by 1 because silence reference is input [0]
filterParts.push(`[${i + 1}]${filters.length ? filters.join(',') : 'acopy'}[${label}]`);
}
// Generate a silence reference track as input [0] so amix runs for full video duration
const silencePath = pathJoin(tempDir, 'silence.mp3');
generateSilence(silencePath, Math.ceil(videoDuration), ffmpegPath);
const filterComplex = filterParts.join(';') + ';' +
`[0]${mixLabels.join('')}amix=inputs=${captions.length + 1}:normalize=0:duration=first`;
const narrationPath = pathJoin(tempDir, 'narration.mp3');
execFileSync(ffmpegPath, [
'-y', '-i', silencePath, ...ffmpegInputs,
'-filter_complex', filterComplex,
'-t', String(Math.ceil(videoDuration)),
'-c:a', 'libmp3lame', '-b:a', '128k', narrationPath,
], { stdio: 'pipe', timeout: 120000 });
// Phase 4: Merge video + narration audio
execFileSync(ffmpegPath, [
'-y', '-i', videoPath, '-i', narrationPath,
'-c:v', 'copy', '-c:a', 'aac', '-b:a', '128k',
'-map', '0:v:0', '-map', '1:a:0',
'-t', String(Math.ceil(videoDuration)),
'-movflags', '+faststart', outputPath,
], { stdio: 'pipe', timeout: 120000 });
const stats = statSync(outputPath);
const duration = getAudioDuration(outputPath, ffmpegPath);
const result = {
file: outputPath,
duration: Math.round(duration * 10) / 10,
size: stats.size,
captions: captions.length,
};
if (warnings.length) result.warnings = warnings;
return result;
} finally {
// Cleanup temp directory
try { rmSync(tempDir, { recursive: true, force: true }); } catch {}
}
}
@@ -0,0 +1,175 @@
// web-test recording/tts v1.16 — TTS providers (edge/openai/elevenlabs) and ffmpeg/ffprobe helpers.
// Source: https://github.com/Nikolay-Shirokov/cc-1c-skills
import { execFileSync, spawn } from 'child_process';
import { existsSync as fsExistsSync, writeFileSync } from 'fs';
import { resolve as pathResolve } from 'path';
import { pathToFileURL } from 'url';
import { projectRoot } from '../core/state.mjs';
/** Resolve ffmpeg binary path. */
export function resolveFfmpeg(explicit) {
// 1. Explicit path
if (explicit) {
try { execFileSync(explicit, ['-version'], { stdio: 'ignore', timeout: 5000 }); return explicit; }
catch { throw new Error(`ffmpeg not found at: ${explicit}`); }
}
// 2. FFMPEG_PATH env var
const envPath = process.env.FFMPEG_PATH;
if (envPath) {
try { execFileSync(envPath, ['-version'], { stdio: 'ignore', timeout: 5000 }); return envPath; }
catch { /* fall through */ }
}
// 3. System PATH
try { execFileSync('ffmpeg', ['-version'], { stdio: 'ignore', timeout: 5000 }); return 'ffmpeg'; }
catch { /* fall through */ }
// 4. tools/ffmpeg/bin/ffmpeg.exe relative to project root
const localPath = pathResolve(projectRoot, 'tools', 'ffmpeg', 'bin', 'ffmpeg.exe');
if (fsExistsSync(localPath)) {
try { execFileSync(localPath, ['-version'], { stdio: 'ignore', timeout: 5000 }); return localPath; }
catch { /* fall through */ }
}
// 5. Error with instructions
throw new Error(
'ffmpeg not found. Install it:\n' +
' - Download from https://www.gyan.dev/ffmpeg/builds/ (essentials build)\n' +
' - Add to PATH, or set FFMPEG_PATH env var, or place in tools/ffmpeg/bin/\n' +
' - Or pass ffmpegPath option to startRecording()'
);
}
// ── TTS providers ──────────────────────────────────────────────────────────
/** Resolve node-edge-tts module: global install → tools/tts/ → error with instructions. */
let _edgeTtsModule = null;
export async function resolveEdgeTts() {
if (_edgeTtsModule) return _edgeTtsModule;
// 1. Global/project-level install (standard Node resolution)
try {
_edgeTtsModule = await import('node-edge-tts');
return _edgeTtsModule;
} catch { /* fall through */ }
// 2. tools/tts/ relative to project root
const localPath = pathResolve(projectRoot, 'tools', 'tts', 'node_modules', 'node-edge-tts', 'dist', 'edge-tts.js');
if (fsExistsSync(localPath)) {
try {
_edgeTtsModule = await import(pathToFileURL(localPath).href);
return _edgeTtsModule;
} catch { /* fall through */ }
}
// 3. Error with instructions
throw new Error(
'node-edge-tts not found. Install it:\n' +
' - npm install --prefix tools/tts node-edge-tts\n' +
' - or: npm install node-edge-tts (global/project-level)'
);
}
/**
* Edge TTS provider (free, no API key). Uses node-edge-tts package.
* @param {string} text — text to synthesize
* @param {string} outputPath — path for the output mp3 file
* @param {object} opts — { voice }
*/
export async function edgeTtsProvider(text, outputPath, opts = {}) {
const { EdgeTTS } = await resolveEdgeTts();
const voice = opts.voice || 'ru-RU-DmitryNeural';
const tts = new EdgeTTS({ voice });
await Promise.race([
tts.ttsPromise(text, outputPath),
new Promise((_, reject) => setTimeout(() => reject(new Error('Edge TTS timeout (30s)')), 30000)),
]);
}
/**
* OpenAI-compatible TTS provider. Requires apiKey.
* @param {string} text — text to synthesize
* @param {string} outputPath — path for the output mp3 file
* @param {object} opts — { apiKey, apiUrl, voice, model }
*/
export async function openaiTtsProvider(text, outputPath, opts = {}) {
const apiUrl = opts.apiUrl || 'https://api.openai.com/v1/audio/speech';
if (!opts.apiKey) throw new Error('OpenAI TTS requires apiKey');
const resp = await fetch(apiUrl, {
method: 'POST',
headers: { 'Authorization': `Bearer ${opts.apiKey}`, 'Content-Type': 'application/json' },
body: JSON.stringify({
model: opts.model || 'tts-1',
input: text,
voice: opts.voice || 'alloy',
response_format: 'mp3',
}),
});
if (!resp.ok) throw new Error(`OpenAI TTS error ${resp.status}: ${await resp.text()}`);
const buf = Buffer.from(await resp.arrayBuffer());
writeFileSync(outputPath, buf);
}
/**
* ElevenLabs TTS provider. Requires apiKey.
* @param {string} text — text to synthesize
* @param {string} outputPath — path for the output mp3 file
* @param {object} opts — { apiKey, apiUrl, voice, model }
*/
export async function elevenlabsTtsProvider(text, outputPath, opts = {}) {
const voiceId = opts.voice || 'JBFqnCBsd6RMkjVDRZzb'; // George
const apiUrl = opts.apiUrl || `https://api.elevenlabs.io/v1/text-to-speech/${voiceId}`;
if (!opts.apiKey) throw new Error('ElevenLabs TTS requires apiKey');
const resp = await fetch(apiUrl, {
method: 'POST',
headers: { 'xi-api-key': opts.apiKey, 'Content-Type': 'application/json' },
body: JSON.stringify({
text,
model_id: opts.model || 'eleven_multilingual_v2',
}),
});
if (!resp.ok) throw new Error(`ElevenLabs TTS error ${resp.status}: ${await resp.text()}`);
const buf = Buffer.from(await resp.arrayBuffer());
writeFileSync(outputPath, buf);
}
/** Get TTS provider function by name. */
export function getTtsProvider(name) {
switch (name) {
case 'openai': return openaiTtsProvider;
case 'elevenlabs': return elevenlabsTtsProvider;
case 'edge': default: return edgeTtsProvider;
}
}
// ── TTS audio helpers ──────────────────────────────────────────────────────
/**
* Get audio duration in seconds using ffprobe.
* @param {string} filePath — path to audio file
* @param {string} ffmpegPath — path to ffmpeg binary (ffprobe is found next to it)
* @returns {number} duration in seconds
*/
export function getAudioDuration(filePath, ffmpegPath) {
const ffprobePath = ffmpegPath.replace(/ffmpeg(\.exe)?$/i, 'ffprobe$1');
const out = execFileSync(ffprobePath, [
'-v', 'error', '-show_entries', 'format=duration',
'-of', 'default=noprint_wrappers=1:nokey=1', filePath,
], { encoding: 'utf8', timeout: 10000 }).trim();
return parseFloat(out) || 0;
}
/**
* Generate a silence mp3 file of given duration.
* @param {string} outputPath — path for the output mp3 file
* @param {number} seconds — duration in seconds
* @param {string} ffmpegPath — path to ffmpeg binary
*/
export function generateSilence(outputPath, seconds, ffmpegPath) {
execFileSync(ffmpegPath, [
'-y', '-f', 'lavfi', '-i', `anullsrc=r=24000:cl=mono`,
'-t', String(seconds), '-c:a', 'libmp3lame', '-b:a', '32k', outputPath,
], { stdio: 'pipe', timeout: 10000 });
}