feat(skd-edit): normalize line endings + diagnostics on patch-query not-found

patch-query теперь нормализует CRLF/CR → LF в old/new/query перед поиском, поэтому многострочные шаблоны с любым стилем переводов строк находятся корректно (XmlDocument декодирует text-узлы как LF). При not-found вместо сухого сообщения выводится воронка диагностики: 1) cross-dataset probe — «Found in dataset 'Y' instead — wrong -DataSet?» 2) tolerant probe (collapse whitespace + NBSP) — «would match with whitespace normalized» + точка расхождения 3) prefix divergence — «matched N of M chars, expected 'X' (U+...) but got 'Y' (U+...)» + короткий контекст Тесты: 4 новых кейса (positive CRLF-tolerant + 3 диагностических negative). Регрессия 45/45 PS + 45/45 Python. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-07-31 08:47:45 +03:00 · 2026-05-20 19:53:24 +03:00
parent 6e14f2502e
commit ce1ba0bab1
8 changed files with 398 additions and 11 deletions
@@ -265,7 +265,7 @@ Shorthand: `"старое => новое [@once]"`. По умолчанию за

 `@once` — упасть с ошибкой, если в запросе не **ровно одно** вхождение. Защищает от случайных замен в комментариях и однотипных идентификаторах.

-Многострочные подстроки поддерживаются — переводы строк в `старое`/`новое` сравниваются буквально (включая отступы).
+Многострочные подстроки поддерживаются.

 ### set-outputParameter — установить параметр вывода

@@ -1,4 +1,4 @@
-# skd-edit v1.23 — Atomic 1C DCS editor
+# skd-edit v1.24 — Atomic 1C DCS editor
 # Source: https://github.com/Nikolay-Shirokov/cc-1c-skills
 param(
 	[Parameter(Mandatory)]
@@ -1729,6 +1729,131 @@ function Set-OrCreateChildElementWithAttr($parent, [string]$localName, [string]$
 	}
 }

+function Get-AllDataSets {
+	$schNs = "http://v8.1c.ru/8.1/data-composition-system/schema"
+	$root = $xmlDoc.DocumentElement
+	$result = @()
+	foreach ($child in $root.ChildNodes) {
+		if ($child.NodeType -eq 'Element' -and $child.LocalName -eq 'dataSet' -and $child.NamespaceURI -eq $schNs) {
+			$result += $child
+		}
+	}
+	return ,$result
+}
+
+function Normalize-LineEndings([string]$s) {
+	if ($null -eq $s) { return $s }
+	return $s.Replace("`r`n", "`n").Replace("`r", "`n")
+}
+
+function Escape-Whitespace([string]$s) {
+	$sb = New-Object System.Text.StringBuilder
+	foreach ($c in $s.ToCharArray()) {
+		$code = [int]$c
+		if ($c -eq "`n") { [void]$sb.Append('\n') }
+		elseif ($c -eq "`r") { [void]$sb.Append('\r') }
+		elseif ($c -eq "`t") { [void]$sb.Append('\t') }
+		elseif ($code -lt 32 -or $code -eq 0xA0 -or ($code -ge 0x2000 -and $code -le 0x200F) -or $code -eq 0xFEFF) {
+			[void]$sb.AppendFormat('\u{0:X4}', $code)
+		} else {
+			[void]$sb.Append($c)
+		}
+	}
+	return $sb.ToString()
+}
+
+function Collapse-Whitespace([string]$s) {
+	return ([regex]::Replace($s, "[\s ]+", " ")).Trim()
+}
+
+function Find-LongestPrefixMatch([string]$haystack, [string]$needle) {
+	# Binary search: largest L such that needle.Substring(0, L) is a substring of haystack.
+	# Monotonic — if length L matches at position P, then length L-1 (prefix) also matches at P.
+	if ($needle.Length -eq 0 -or $haystack.Length -eq 0) {
+		return @{ Length = 0; Offset = -1 }
+	}
+	if ($haystack.IndexOf([string]$needle[0]) -lt 0) {
+		return @{ Length = 0; Offset = -1 }
+	}
+	$lo = 1; $hi = $needle.Length
+	$bestLen = 1; $bestOffset = $haystack.IndexOf([string]$needle[0])
+	while ($lo -le $hi) {
+		$mid = [int](($lo + $hi) / 2)
+		$idx = $haystack.IndexOf($needle.Substring(0, $mid))
+		if ($idx -ge 0) { $bestLen = $mid; $bestOffset = $idx; $lo = $mid + 1 }
+		else { $hi = $mid - 1 }
+	}
+	return @{ Length = $bestLen; Offset = $bestOffset }
+}
+
+function Format-PatchQueryNotFound([string]$oldStr, [string]$queryText, $currentDsNode, [string]$dsName) {
+	$schNs = "http://v8.1c.ru/8.1/data-composition-system/schema"
+	$lines = @("Substring not found in query of dataset '$dsName'.")
+
+	# Step 1 — cross-dataset probe
+	foreach ($ds in (Get-AllDataSets)) {
+		if ($ds -eq $currentDsNode) { continue }
+		$q = Find-FirstElement $ds @("query") $schNs
+		if (-not $q) { continue }
+		$qt = Normalize-LineEndings $q.InnerText
+		if ($qt.Contains($oldStr)) {
+			$otherName = Get-DataSetName $ds
+			$lines += "Found in dataset '$otherName' instead — wrong -DataSet?"
+			return ($lines -join "`n")
+		}
+	}
+
+	# Step 2 — tolerant probe (whitespace + NBSP collapsed)
+	$normNeedle = Collapse-Whitespace $oldStr
+	$normHay = Collapse-Whitespace $queryText
+	$tolerant = ($normNeedle.Length -gt 0 -and $normHay.Contains($normNeedle))
+
+	# Step 3 — prefix divergence (used by both Step 2 reporting and standalone Step 3)
+	$prefix = Find-LongestPrefixMatch -haystack $queryText -needle $oldStr
+	$divergence = $null
+	if ($prefix.Length -gt 0 -and $prefix.Length -lt $oldStr.Length) {
+		$queryPos = $prefix.Offset + $prefix.Length
+		$searchChar = $oldStr[$prefix.Length]
+		$beforeLen = [Math]::Min(20, $prefix.Length)
+		$before = $oldStr.Substring($prefix.Length - $beforeLen, $beforeLen)
+		$divergence = [ordered]@{
+			matched = $prefix.Length
+			total = $oldStr.Length
+			before = $before
+			searchChar = $searchChar
+			queryChar = $(if ($queryPos -lt $queryText.Length) { $queryText[$queryPos] } else { $null })
+		}
+	}
+
+	if ($tolerant) {
+		$lines += "Not found exactly, but would match with whitespace normalized (tabs/spaces/NBSP)."
+		if ($divergence) {
+			$lines += "Diverged at offset $($divergence.matched) of $($divergence.total):"
+			$lines += "  before:    '$(Escape-Whitespace $divergence.before)'"
+			$lines += "  in search: '$(Escape-Whitespace ([string]$divergence.searchChar))' (U+$('{0:X4}' -f [int]$divergence.searchChar))"
+			if ($null -ne $divergence.queryChar) {
+				$lines += "  in query:  '$(Escape-Whitespace ([string]$divergence.queryChar))' (U+$('{0:X4}' -f [int]$divergence.queryChar))"
+			}
+		}
+		return ($lines -join "`n")
+	}
+
+	# Step 3 standalone
+	if ($prefix.Length -eq 0) {
+		$lines += "No common prefix with query. Check -DataSet (current: '$dsName')."
+		return ($lines -join "`n")
+	}
+	$lines += "Matched first $($divergence.matched) of $($divergence.total) chars, then diverged:"
+	$lines += "  before:    '$(Escape-Whitespace $divergence.before)'"
+	$lines += "  in search: '$(Escape-Whitespace ([string]$divergence.searchChar))' (U+$('{0:X4}' -f [int]$divergence.searchChar))"
+	if ($null -ne $divergence.queryChar) {
+		$lines += "  in query:  '$(Escape-Whitespace ([string]$divergence.queryChar))' (U+$('{0:X4}' -f [int]$divergence.queryChar))"
+	} else {
+		$lines += "  in query:  (end of query)"
+	}
+	return ($lines -join "`n")
+}
+
 function Resolve-DataSet {
 	$schNs = "http://v8.1c.ru/8.1/data-composition-system/schema"
 	$root = $xmlDoc.DocumentElement
@@ -2697,13 +2822,14 @@ switch ($Operation) {
 				Write-Error "patch-query value must contain ' => ' separator: old => new"
 				exit 1
 			}
-			$oldStr = $val.Substring(0, $sepIdx)
-			$newStr = $val.Substring($sepIdx + 4)
-			$queryText = $queryEl.InnerText
+			$oldStr = Normalize-LineEndings $val.Substring(0, $sepIdx)
+			$newStr = Normalize-LineEndings $val.Substring($sepIdx + 4)
+			$queryText = Normalize-LineEndings $queryEl.InnerText

 			$count = ([regex]::Matches($queryText, [regex]::Escape($oldStr))).Count
 			if ($count -eq 0) {
-				Write-Error "Substring not found in query of dataset '$dsName': $oldStr"
+				$diag = Format-PatchQueryNotFound $oldStr $queryText $dsNode $dsName
+				Write-Error $diag
 				exit 1
 			}
 			if ($once -and $count -ne 1) {
@@ -1,4 +1,4 @@
-# skd-edit v1.23 — Atomic 1C DCS editor (Python port)
+# skd-edit v1.24 — Atomic 1C DCS editor (Python port)
 # Source: https://github.com/Nikolay-Shirokov/cc-1c-skills
 import argparse
 import os
@@ -1524,6 +1524,118 @@ def set_or_create_child_element_with_attr(parent, ln, ns_uri, value, xsi_type, i
            insert_before_element(parent, node, None, indent)


+def get_all_data_sets():
+    return [c for c in xml_doc
+            if isinstance(c.tag, str) and local_name(c) == "dataSet" and etree.QName(c.tag).namespace == SCH_NS]
+
+
+def normalize_line_endings(s):
+    if s is None:
+        return s
+    return s.replace("\r\n", "\n").replace("\r", "\n")
+
+
+def escape_whitespace(s):
+    out = []
+    for ch in s:
+        code = ord(ch)
+        if ch == "\n": out.append("\\n")
+        elif ch == "\r": out.append("\\r")
+        elif ch == "\t": out.append("\\t")
+        elif code < 32 or code == 0xA0 or (0x2000 <= code <= 0x200F) or code == 0xFEFF:
+            out.append(f"\\u{code:04X}")
+        else:
+            out.append(ch)
+    return "".join(out)
+
+
+def collapse_whitespace(s):
+    return re.sub(r"[\s ]+", " ", s).strip()
+
+
+def find_longest_prefix_match(haystack, needle):
+    """Binary search: largest L such that needle[:L] is a substring of haystack."""
+    if not needle or not haystack:
+        return (0, -1)
+    first_idx = haystack.find(needle[0])
+    if first_idx < 0:
+        return (0, -1)
+    lo, hi = 1, len(needle)
+    best_len, best_off = 1, first_idx
+    while lo <= hi:
+        mid = (lo + hi) // 2
+        idx = haystack.find(needle[:mid])
+        if idx >= 0:
+            best_len, best_off = mid, idx
+            lo = mid + 1
+        else:
+            hi = mid - 1
+    return (best_len, best_off)
+
+
+def format_patch_query_not_found(old_str, query_text, current_ds_node, ds_name):
+    lines = [f"Substring not found in query of dataset '{ds_name}'."]
+
+    # Step 1 — cross-dataset probe
+    for ds in get_all_data_sets():
+        if ds is current_ds_node:
+            continue
+        q = find_first_element(ds, ["query"], SCH_NS)
+        if q is None:
+            continue
+        qt = normalize_line_endings(q.text or "")
+        if old_str in qt:
+            other = get_data_set_name(ds)
+            lines.append(f"Found in dataset '{other}' instead — wrong -DataSet?")
+            return "\n".join(lines)
+
+    # Step 2 — tolerant probe
+    norm_needle = collapse_whitespace(old_str)
+    norm_hay = collapse_whitespace(query_text)
+    tolerant = bool(norm_needle) and (norm_needle in norm_hay)
+
+    # Step 3 — divergence
+    matched, off = find_longest_prefix_match(query_text, old_str)
+    divergence = None
+    if 0 < matched < len(old_str):
+        query_pos = off + matched
+        before_len = min(20, matched)
+        divergence = {
+            "matched": matched,
+            "total": len(old_str),
+            "before": old_str[matched - before_len:matched],
+            "search_char": old_str[matched],
+            "query_char": query_text[query_pos] if query_pos < len(query_text) else None,
+        }
+
+    if tolerant:
+        lines.append("Not found exactly, but would match with whitespace normalized (tabs/spaces/NBSP).")
+        if divergence:
+            lines.append(f"Diverged at offset {divergence['matched']} of {divergence['total']}:")
+            lines.append(f"  before:    '{escape_whitespace(divergence['before'])}'")
+            sc = divergence['search_char']
+            lines.append(f"  in search: '{escape_whitespace(sc)}' (U+{ord(sc):04X})")
+            qc = divergence['query_char']
+            if qc is not None:
+                lines.append(f"  in query:  '{escape_whitespace(qc)}' (U+{ord(qc):04X})")
+        return "\n".join(lines)
+
+    if matched == 0:
+        lines.append(f"No common prefix with query. Check -DataSet (current: '{ds_name}').")
+        return "\n".join(lines)
+
+    lines.append(f"Matched first {divergence['matched']} of {divergence['total']} chars, then diverged:")
+    lines.append(f"  before:    '{escape_whitespace(divergence['before'])}'")
+    sc = divergence['search_char']
+    lines.append(f"  in search: '{escape_whitespace(sc)}' (U+{ord(sc):04X})")
+    qc = divergence['query_char']
+    if qc is not None:
+        lines.append(f"  in query:  '{escape_whitespace(qc)}' (U+{ord(qc):04X})")
+    else:
+        lines.append("  in query:  (end of query)")
+    return "\n".join(lines)
+
+
 def resolve_data_set():
    root_el = xml_doc

@@ -2255,13 +2367,13 @@ elif operation == "patch-query":
        if sep_idx < 0:
            print("patch-query value must contain ' => ' separator: old => new", file=sys.stderr)
            sys.exit(1)
-        old_str = val[:sep_idx]
-        new_str = val[sep_idx + 4:]
-        query_text = query_el.text or ""
+        old_str = normalize_line_endings(val[:sep_idx])
+        new_str = normalize_line_endings(val[sep_idx + 4:])
+        query_text = normalize_line_endings(query_el.text or "")

        count = query_text.count(old_str)
        if count == 0:
-            print(f"Substring not found in query of dataset '{ds_name}': {old_str}", file=sys.stderr)
+            print(format_patch_query_not_found(old_str, query_text, ds_node, ds_name), file=sys.stderr)
            sys.exit(1)
        if once and count != 1:
            print(f"@once: expected 1 occurrence of '{old_str}' in dataset '{ds_name}', found {count}", file=sys.stderr)
@@ -0,0 +1,21 @@
+{
+  "name": "patch-query: CRLF в -Value матчит LF-текст в InnerText",
+  "preRun": [
+    {
+      "script": "skd-compile/scripts/skd-compile",
+      "input": {
+        "dataSets": [{
+          "name": "Основной",
+          "query": "ВЫБРАТЬ\n\tТ.Поле\nИЗ\n\tРегистр КАК Т",
+          "fields": ["Поле"]
+        }]
+      },
+      "args": { "-DefinitionFile": "{inputFile}", "-OutputPath": "{workDir}/Template.xml" }
+    }
+  ],
+  "params": {
+    "templatePath": "Template.xml",
+    "operation": "patch-query",
+    "value": "ВЫБРАТЬ\r\n\tТ.Поле => ВЫБРАТЬ\r\n\tТ.Имя"
+  }
+}
@@ -0,0 +1,29 @@
+{
+  "name": "patch-query: diagnostic — NBSP в search улавливается tolerant probe",
+  "preRun": [
+    {
+      "script": "skd-compile/scripts/skd-compile",
+      "input": {
+        "dataSets": [
+          {
+            "name": "Основной",
+            "query": "ВЫБРАТЬ Т.Поле ИЗ Регистр КАК Т",
+            "fields": [
+              "Поле"
+            ]
+          }
+        ]
+      },
+      "args": {
+        "-DefinitionFile": "{inputFile}",
+        "-OutputPath": "{workDir}/Template.xml"
+      }
+    }
+  ],
+  "params": {
+    "templatePath": "Template.xml",
+    "operation": "patch-query",
+    "value": "ВЫБРАТЬ Т.Поле => x"
+  },
+  "expectError": "whitespace normalized"
+}
@@ -0,0 +1,30 @@
+{
+  "name": "patch-query: diagnostic — подстрока в другом dataSet",
+  "preRun": [
+    {
+      "script": "skd-compile/scripts/skd-compile",
+      "input": {
+        "dataSets": [
+          {
+            "name": "Первый",
+            "query": "ВЫБРАТЬ 1 КАК Поле",
+            "fields": ["Поле"]
+          },
+          {
+            "name": "Второй",
+            "query": "ВЫБРАТЬ Т.УникальныйМаркер ИЗ Регистр КАК Т",
+            "fields": ["УникальныйМаркер: string"]
+          }
+        ]
+      },
+      "args": { "-DefinitionFile": "{inputFile}", "-OutputPath": "{workDir}/Template.xml" }
+    }
+  ],
+  "params": {
+    "templatePath": "Template.xml",
+    "operation": "patch-query",
+    "value": "УникальныйМаркер => Новый"
+  },
+  "args_extra": ["-DataSet", "Первый"],
+  "expectError": "Found in dataset 'Второй' instead"
+}
@@ -0,0 +1,22 @@
+{
+  "name": "patch-query: diagnostic — опечатка показывает точку расхождения",
+  "preRun": [
+    {
+      "script": "skd-compile/scripts/skd-compile",
+      "input": {
+        "dataSets": [{
+          "name": "Основной",
+          "query": "ВЫБРАТЬ Т.Наименование ИЗ Регистр КАК Т",
+          "fields": ["Наименование: string"]
+        }]
+      },
+      "args": { "-DefinitionFile": "{inputFile}", "-OutputPath": "{workDir}/Template.xml" }
+    }
+  ],
+  "params": {
+    "templatePath": "Template.xml",
+    "operation": "patch-query",
+    "value": "Т.Наеменование => Т.Имя"
+  },
+  "expectError": "diverged"
+}
@@ -0,0 +1,47 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<DataCompositionSchema xmlns="http://v8.1c.ru/8.1/data-composition-system/schema"
+		xmlns:dcscom="http://v8.1c.ru/8.1/data-composition-system/common"
+		xmlns:dcscor="http://v8.1c.ru/8.1/data-composition-system/core"
+		xmlns:dcsset="http://v8.1c.ru/8.1/data-composition-system/settings"
+		xmlns:v8="http://v8.1c.ru/8.1/data/core"
+		xmlns:v8ui="http://v8.1c.ru/8.1/data/ui"
+		xmlns:xs="http://www.w3.org/2001/XMLSchema"
+		xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+	<dataSource>
+		<name>ИсточникДанных1</name>
+		<dataSourceType>Local</dataSourceType>
+	</dataSource>
+	<dataSet xsi:type="DataSetQuery">
+		<name>Основной</name>
+		<field xsi:type="DataSetFieldField">
+			<dataPath>Поле</dataPath>
+			<field>Поле</field>
+		</field>
+		<dataSource>ИсточникДанных1</dataSource>
+		<query>ВЫБРАТЬ
+	Т.Имя
+ИЗ
+	Регистр КАК Т</query>
+	</dataSet>
+	<settingsVariant>
+		<dcsset:name>Основной</dcsset:name>
+		<dcsset:presentation xsi:type="v8:LocalStringType">
+			<v8:item>
+				<v8:lang>ru</v8:lang>
+				<v8:content>Основной</v8:content>
+			</v8:item>
+		</dcsset:presentation>
+		<dcsset:settings xmlns:style="http://v8.1c.ru/8.1/data/ui/style" xmlns:sys="http://v8.1c.ru/8.1/data/ui/fonts/system" xmlns:web="http://v8.1c.ru/8.1/data/ui/colors/web" xmlns:win="http://v8.1c.ru/8.1/data/ui/colors/windows">
+			<dcsset:selection>
+			</dcsset:selection>
+			<dcsset:item xsi:type="dcsset:StructureItemGroup">
+				<dcsset:order>
+					<dcsset:item xsi:type="dcsset:OrderItemAuto"/>
+				</dcsset:order>
+				<dcsset:selection>
+					<dcsset:item xsi:type="dcsset:SelectedItemAuto"/>
+				</dcsset:selection>
+			</dcsset:item>
+		</dcsset:settings>
+	</settingsVariant>
+</DataCompositionSchema>