feat(web-test): structured readSpreadsheet with header detection

Parse SpreadsheetDocument into { title, meta, headers, data, totals }: - Auto-detect header row (most non-empty before first numeric row) - Group header prefix for duplicate column names (Сейчас/Доступно) - Data rows as objects {column: value}, only non-empty cells - Separate totals row (Итого/Всего) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-07-29 08:01:02 +03:00 · 2026-02-28 16:17:40 +03:00
parent 56203e2b71
commit 366378c4b5
1 changed files with 89 additions and 2 deletions
@@ -431,7 +431,10 @@ export async function readTable({ maxRows = 20, offset = 0 } = {}) {
 * Read report output (SpreadsheetDocumentField) rendered in iframes.
 * 1C renders spreadsheet documents as absolutely-positioned div cells inside iframes.
 * Each cell is a div[x] inside a row div[y], text content in <span>.
- * Returns { headers: string[][], data: string[][] } — arrays of cell arrays.
+ *
+ * Returns structured data:
+ *   { title, headers, data: [{col: val}], totals: {col: val}, total }
+ * If header detection fails, falls back to { rows: string[][], total }.
 */
 export async function readSpreadsheet() {
  ensureConnected();
@@ -481,7 +484,91 @@ export async function readSpreadsheet() {
    return arr;
  });

-  return { rows, total: rows.length };
+  // --- Structured parsing ---
+  const hasNumber = (row) => row.some(c => /^[\d\s\u00a0]/.test(c) && /\d/.test(c));
+  const nonEmpty = (row) => row.filter(c => c !== '').length;
+
+  // 1. Find header row: row with most non-empty cells BEFORE first row with numbers
+  let headerIdx = -1;
+  let bestCount = 0;
+  for (let i = 0; i < rows.length; i++) {
+    if (hasNumber(rows[i])) break;
+    const cnt = nonEmpty(rows[i]);
+    if (cnt >= bestCount) { bestCount = cnt; headerIdx = i; }
+  }
+
+  if (headerIdx === -1 || bestCount < 2) return { rows, total: rows.length };
+
+  const headerRow = rows[headerIdx];
+
+  // 2. Check for group header row (row just before header, sparse text entries)
+  let groupRow = null;
+  if (headerIdx > 0) {
+    const prev = rows[headerIdx - 1];
+    const prevCnt = nonEmpty(prev);
+    if (prevCnt > 0 && prevCnt < bestCount) groupRow = prev;
+  }
+
+  // 3. Build column names; disambiguate duplicates with group prefix
+  const nameCounts = {};
+  for (let c = 0; c <= maxCol; c++) {
+    const n = headerRow[c];
+    if (n) nameCounts[n] = (nameCounts[n] || 0) + 1;
+  }
+
+  const colNames = [];
+  let curGroup = '';
+  for (let c = 0; c <= maxCol; c++) {
+    if (groupRow && groupRow[c]) curGroup = groupRow[c];
+    const name = headerRow[c];
+    if (!name) { colNames.push(null); continue; }
+    colNames.push(nameCounts[name] > 1 && curGroup ? `${curGroup} / ${name}` : name);
+  }
+
+  // 4. Skip sub-header rows after header (text-only rows before first numeric row)
+  let dataStart = headerIdx + 1;
+  while (dataStart < rows.length && !hasNumber(rows[dataStart])) dataStart++;
+
+  // 5. Convert data rows to objects
+  const data = [];
+  let totals = null;
+  const toObj = (row) => {
+    const obj = {};
+    for (let c = 0; c < colNames.length; c++) {
+      if (colNames[c] && row[c]) obj[colNames[c]] = row[c];
+    }
+    return obj;
+  };
+
+  for (let i = dataStart; i < rows.length; i++) {
+    if (!hasNumber(rows[i]) && nonEmpty(rows[i]) === 0) continue;
+    const first = rows[i][0]?.trim().toLowerCase();
+    if (first === 'итого' || first === 'всего') {
+      totals = toObj(rows[i]);
+    } else {
+      data.push(toObj(rows[i]));
+    }
+  }
+
+  // 6. Meta: title, params, filters from rows before header
+  const metaEnd = groupRow ? headerIdx - 1 : headerIdx;
+  let title = '';
+  const meta = [];
+  for (let i = 0; i < metaEnd; i++) {
+    const parts = rows[i].filter(c => c);
+    if (!parts.length) continue;
+    if (!title) { title = parts.join(' '); continue; }
+    meta.push(parts.join(' '));
+  }
+
+  return {
+    title: title || undefined,
+    meta: meta.length ? meta : undefined,
+    headers: colNames.filter(n => n),
+    data,
+    totals: totals || undefined,
+    total: data.length,
+  };
 }

 /**