Merge branch 'dev'

This commit is contained in:
Nick Shirokov
2026-03-12 13:30:07 +03:00
5 changed files with 91 additions and 12 deletions
@@ -509,6 +509,8 @@ function Borrow-Form {
$autoCmdXml = $autoCmdXml -replace '<Autofill>true</Autofill>', '<Autofill>false</Autofill>'
# Strip ExcludedCommand (references to standard commands invalid in extension)
$autoCmdXml = [regex]::Replace($autoCmdXml, '\s*<ExcludedCommand>[^<]*</ExcludedCommand>', '')
# Strip DataPath in AutoCommandBar buttons (e.g. Объект.Ref — invalid in extension)
$autoCmdXml = [regex]::Replace($autoCmdXml, '\s*<DataPath>[^<]*</DataPath>', '')
}
# ChildItems: copy full tree, clean up base-config references
@@ -680,6 +680,8 @@ def main():
auto_cmd_xml = auto_cmd_xml.replace('<Autofill>true</Autofill>', '<Autofill>false</Autofill>')
# Strip ExcludedCommand (references to standard commands invalid in extension)
auto_cmd_xml = re.sub(r'\s*<ExcludedCommand>[^<]*</ExcludedCommand>', '', auto_cmd_xml)
# Strip DataPath in AutoCommandBar buttons (e.g. Объект.Ref — invalid in extension)
auto_cmd_xml = re.sub(r'\s*<DataPath>[^<]*</DataPath>', '', auto_cmd_xml)
# ChildItems: copy full tree, clean up base-config references
child_items_xml = ""
+4 -2
View File
@@ -91,6 +91,8 @@ Display a text overlay on the page (visible in recording). Calling again updates
The overlay uses `pointer-events: none` — does not interfere with clicking.
**Smart TTS wait** (during recording): `showCaption` automatically pauses for the estimated TTS speech duration (~100ms per character, min 2s). The next `wait()` call accounts for this — if the explicit pause is shorter than the TTS wait already done, no extra delay is added. If longer, only the remaining difference is waited. This means script authors don't need to calculate TTS timing manually.
### `hideCaption()`
Remove the caption overlay.
@@ -181,7 +183,7 @@ const result = await stopRecording();
console.log(`Recorded ${result.duration}s, ${(result.size / 1024 / 1024).toFixed(1)} MB`);
```
**Caption timing**: show the caption *before* the action with a `wait(1.5)` pause — the viewer reads what will happen, then sees it happen. Add `wait()` *after* the action only when the next step needs the result to load (e.g., form opening).
**Caption timing**: show the caption *before* the action `showCaption` auto-waits for estimated TTS duration during recording. The subsequent `wait()` is absorbed by the credit system (no double-waiting). Add `wait()` *after* the action only when the next step needs the result to load (e.g., form opening).
**Highlight timing**: `setHighlight(true)` enables auto-mode — each action function highlights the target for 500ms, then removes the highlight before performing the action. No manual `highlight()`/`unhighlight()` calls needed. Enable after title slide, disable before `stopRecording()`.
@@ -299,4 +301,4 @@ const result = await addNarration('recordings/demo.mp4', { voice: 'ru-RU-Svetlan
| Recording stops on disconnect | Expected — auto-stop prevents orphaned ffmpeg processes |
| "No captions available" | Use `showCaption()` during recording, or pass `opts.captions` |
| TTS timeout | Check internet connection. Edge TTS requires network access |
| Audio cuts off between captions | TTS is auto-trimmed to fit the timeline. Add longer `wait()` pauses |
| Audio cuts off between captions | Smart TTS wait should handle this automatically. If warnings appear, add longer `wait()` after `showCaption` |
+79 -10
View File
@@ -2955,7 +2955,29 @@ export async function screenshot() {
/** Wait for a specified number of seconds. */
export async function wait(seconds) {
ensureConnected();
await page.waitForTimeout(seconds * 1000);
let ms = seconds * 1000;
// Credit system: if showCaption already waited for TTS, subtract that time
if (recorder && recorder.captionCredit) {
const elapsed = Date.now() - recorder.captionCredit.at;
const credit = Math.max(0, recorder.captionCredit.waitedMs - elapsed);
ms = Math.max(0, ms - credit);
recorder.captionCredit = null;
}
if (ms > 0) {
// During recording, split long waits into chunks and flush frames
// to keep video timeline in sync (CDP may not send frames for static pages)
if (recorder?._flushFrames && ms > 1000) {
let remaining = ms;
while (remaining > 0) {
const chunk = Math.min(remaining, 1000);
await page.waitForTimeout(chunk);
remaining -= chunk;
recorder._flushFrames();
}
} else {
await page.waitForTimeout(ms);
}
}
return await getFormState();
}
@@ -3031,7 +3053,7 @@ export async function startRecording(outputPath, opts = {}) {
// Fill the gap with duplicates of the previous frame
const gap = now - lastFrameTime;
const dupes = Math.round(gap / frameDuration) - 1;
for (let i = 0; i < dupes && i < fps * 2; i++) {
for (let i = 0; i < dupes && i < fps * 30; i++) {
ffmpeg.stdin.write(lastFrameBuf);
framesWritten++;
}
@@ -3054,7 +3076,23 @@ export async function startRecording(outputPath, opts = {}) {
everyNthFrame: 1
});
recorder = { cdp, ffmpeg, startTime: Date.now(), outputPath: resolvedPath, ffmpegError: '', captions: [], videoTimeMs: 0 };
// Expose a frame-writing helper on the recorder object.
// During static periods (e.g. smart TTS pauses), CDP may not send screencast
// frames. Call _flushFrames() to fill the gap with duplicates of the last frame,
// keeping video timeline in sync with wall-clock time.
const _flushFrames = () => {
if (!lastFrameBuf || !lastFrameTime || ffmpeg.stdin.destroyed) return;
const now = Date.now();
const gap = now - lastFrameTime;
const dupes = Math.round(gap / frameDuration);
for (let i = 0; i < dupes; i++) {
ffmpeg.stdin.write(lastFrameBuf);
if (recorder) recorder.videoTimeMs += frameDuration;
}
if (dupes > 0) lastFrameTime = now;
};
recorder = { cdp, ffmpeg, startTime: Date.now(), outputPath: resolvedPath, ffmpegError: '', captions: [], videoTimeMs: 0, _flushFrames };
// Redirect stderr accumulation to the recorder object
ffmpeg.stderr.removeAllListeners('data');
ffmpeg.stderr.on('data', d => { recorder.ffmpegError += d.toString(); });
@@ -3069,6 +3107,9 @@ export async function stopRecording() {
const { cdp, ffmpeg, startTime, outputPath } = recorder;
// Final frame flush: write remaining frames to cover the gap since the last screencast frame
if (recorder._flushFrames) recorder._flushFrames();
// Stop CDP screencast
try { await cdp.send('Page.stopScreencast'); } catch {}
try { await cdp.detach(); } catch {}
@@ -3131,10 +3172,13 @@ export async function showCaption(text, opts = {}) {
ensureConnected();
// Collect caption for TTS narration if recording
let smartWaitMs = 0;
if (recorder && text.trim() && opts.speech !== false) {
const speech = typeof opts.speech === 'string' ? opts.speech : text;
// Use video timeline position (accounts for frame duplication) instead of wall-clock
recorder.captions.push({ text, speech, time: Math.round(recorder.videoTimeMs) });
// Estimate TTS duration and wait so the video has enough screen time for voiceover
smartWaitMs = Math.max(2000, speech.length * 100);
}
const position = opts.position || 'bottom';
const fontSize = opts.fontSize || 24;
@@ -3160,6 +3204,20 @@ export async function showCaption(text, opts = {}) {
el.style.color = color;
el.textContent = text;
}, { text, position, fontSize, bg, color });
// Smart TTS wait: pause for estimated speech duration so video has enough screen time.
// Split into chunks and flush frames periodically — CDP doesn't send screencast frames
// for static pages, so we must write duplicate frames to keep video timeline in sync.
if (smartWaitMs > 0) {
let remaining = smartWaitMs;
while (remaining > 0) {
const chunk = Math.min(remaining, 1000);
await page.waitForTimeout(chunk);
remaining -= chunk;
if (recorder?._flushFrames) recorder._flushFrames();
}
recorder.captionCredit = { waitedMs: smartWaitMs, at: Date.now() };
}
}
/** Remove the caption overlay from the page. */
@@ -3293,12 +3351,17 @@ export async function addNarration(videoPath, opts = {}) {
ffmpegInputs.push('-i', ttsFile);
const filters = [];
// Speed up TTS if it's longer than gap to next caption
// Speed up TTS slightly if it's longer than gap to next caption (max 1.3x)
if (i < captions.length - 1) {
const maxDuration = (captions[i + 1].time - captions[i].time) / 1000;
if (ttsDuration > maxDuration && maxDuration > 0.1) {
const tempo = Math.min(ttsDuration / maxDuration, 2.5);
filters.push(`atempo=${tempo.toFixed(4)}`);
const tempo = ttsDuration / maxDuration;
if (tempo <= 1.3) {
filters.push(`atempo=${tempo.toFixed(4)}`);
} else {
// Too fast — let audio overlap instead of distorting
warnings.push(`Caption ${i + 1}/${captions.length}: TTS ${ttsDuration.toFixed(1)}s > gap ${maxDuration.toFixed(1)}s (need ${Math.round(ttsDuration - maxDuration)}s more pause)`);
}
}
}
@@ -3309,15 +3372,20 @@ export async function addNarration(videoPath, opts = {}) {
const label = `a${i}`;
mixLabels.push(`[${label}]`);
filterParts.push(`[${i}]${filters.length ? filters.join(',') : 'acopy'}[${label}]`);
// Input indices are shifted by 1 because silence reference is input [0]
filterParts.push(`[${i + 1}]${filters.length ? filters.join(',') : 'acopy'}[${label}]`);
}
// Generate a silence reference track as input [0] so amix runs for full video duration
const silencePath = pathJoin(tempDir, 'silence.mp3');
generateSilence(silencePath, Math.ceil(videoDuration), ffmpegPath);
const filterComplex = filterParts.join(';') + ';' +
mixLabels.join('') + `amix=inputs=${captions.length}:normalize=0`;
`[0]${mixLabels.join('')}amix=inputs=${captions.length + 1}:normalize=0:duration=first`;
const narrationPath = pathJoin(tempDir, 'narration.mp3');
execFileSync(ffmpegPath, [
'-y', ...ffmpegInputs,
'-y', '-i', silencePath, ...ffmpegInputs,
'-filter_complex', filterComplex,
'-t', String(Math.ceil(videoDuration)),
'-c:a', 'libmp3lame', '-b:a', '128k', narrationPath,
@@ -3328,7 +3396,8 @@ export async function addNarration(videoPath, opts = {}) {
'-y', '-i', videoPath, '-i', narrationPath,
'-c:v', 'copy', '-c:a', 'aac', '-b:a', '128k',
'-map', '0:v:0', '-map', '1:a:0',
'-shortest', '-movflags', '+faststart', outputPath,
'-t', String(Math.ceil(videoDuration)),
'-movflags', '+faststart', outputPath,
], { stdio: 'pipe', timeout: 120000 });
const stats = statSync(outputPath);
+4
View File
@@ -905,12 +905,16 @@ export function checkErrorsScript() {
}
// Single-button modal: error dialog with pressDefault + staticText
// Skip forms with input fields — those are data entry forms (e.g. register record),
// not error dialogs. Real error modals only have staticText + buttons.
if (!result.confirmation) {
for (const [fn, buttons] of Object.entries(formButtons)) {
const p = 'form' + fn + '_';
const elCount = document.querySelectorAll('[id^="' + p + '"]').length;
if (elCount > 100) continue;
if (buttons.length !== 1 || !buttons[0].classList.contains('pressDefault')) continue;
const hasInputs = document.querySelectorAll('input.editInput[id^="' + p + '"]').length > 0;
if (hasInputs) continue;
const texts = [...document.querySelectorAll('[id^="' + p + '"].staticText')]
.filter(el => el.offsetWidth > 0)
.map(el => el.innerText?.trim())