feat(skd-edit): normalize line endings + diagnostics on patch-query not-found

patch-query теперь нормализует CRLF/CR → LF в old/new/query перед поиском,
поэтому многострочные шаблоны с любым стилем переводов строк находятся
корректно (XmlDocument декодирует text-узлы как LF).

При not-found вместо сухого сообщения выводится воронка диагностики:
  1) cross-dataset probe — «Found in dataset 'Y' instead — wrong -DataSet?»
  2) tolerant probe (collapse whitespace + NBSP) — «would match with
     whitespace normalized» + точка расхождения
  3) prefix divergence — «matched N of M chars, expected 'X' (U+...) but
     got 'Y' (U+...)» + короткий контекст

Тесты: 4 новых кейса (positive CRLF-tolerant + 3 диагностических negative).
Регрессия 45/45 PS + 45/45 Python.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Nick Shirokov
2026-05-20 19:53:24 +03:00
parent 6e14f2502e
commit ce1ba0bab1
8 changed files with 398 additions and 11 deletions
+1 -1
View File
@@ -265,7 +265,7 @@ Shorthand: `"старое => новое [@once]"`. По умолчанию за
`@once` — упасть с ошибкой, если в запросе не **ровно одно** вхождение. Защищает от случайных замен в комментариях и однотипных идентификаторах.
Многострочные подстроки поддерживаются — переводы строк в `старое`/`новое` сравниваются буквально (включая отступы).
Многострочные подстроки поддерживаются.
### set-outputParameter — установить параметр вывода
+131 -5
View File
@@ -1,4 +1,4 @@
# skd-edit v1.23 — Atomic 1C DCS editor
# skd-edit v1.24 — Atomic 1C DCS editor
# Source: https://github.com/Nikolay-Shirokov/cc-1c-skills
param(
[Parameter(Mandatory)]
@@ -1729,6 +1729,131 @@ function Set-OrCreateChildElementWithAttr($parent, [string]$localName, [string]$
}
}
function Get-AllDataSets {
$schNs = "http://v8.1c.ru/8.1/data-composition-system/schema"
$root = $xmlDoc.DocumentElement
$result = @()
foreach ($child in $root.ChildNodes) {
if ($child.NodeType -eq 'Element' -and $child.LocalName -eq 'dataSet' -and $child.NamespaceURI -eq $schNs) {
$result += $child
}
}
return ,$result
}
function Normalize-LineEndings([string]$s) {
if ($null -eq $s) { return $s }
return $s.Replace("`r`n", "`n").Replace("`r", "`n")
}
function Escape-Whitespace([string]$s) {
$sb = New-Object System.Text.StringBuilder
foreach ($c in $s.ToCharArray()) {
$code = [int]$c
if ($c -eq "`n") { [void]$sb.Append('\n') }
elseif ($c -eq "`r") { [void]$sb.Append('\r') }
elseif ($c -eq "`t") { [void]$sb.Append('\t') }
elseif ($code -lt 32 -or $code -eq 0xA0 -or ($code -ge 0x2000 -and $code -le 0x200F) -or $code -eq 0xFEFF) {
[void]$sb.AppendFormat('\u{0:X4}', $code)
} else {
[void]$sb.Append($c)
}
}
return $sb.ToString()
}
function Collapse-Whitespace([string]$s) {
return ([regex]::Replace($s, "[\s ]+", " ")).Trim()
}
function Find-LongestPrefixMatch([string]$haystack, [string]$needle) {
# Binary search: largest L such that needle.Substring(0, L) is a substring of haystack.
# Monotonic — if length L matches at position P, then length L-1 (prefix) also matches at P.
if ($needle.Length -eq 0 -or $haystack.Length -eq 0) {
return @{ Length = 0; Offset = -1 }
}
if ($haystack.IndexOf([string]$needle[0]) -lt 0) {
return @{ Length = 0; Offset = -1 }
}
$lo = 1; $hi = $needle.Length
$bestLen = 1; $bestOffset = $haystack.IndexOf([string]$needle[0])
while ($lo -le $hi) {
$mid = [int](($lo + $hi) / 2)
$idx = $haystack.IndexOf($needle.Substring(0, $mid))
if ($idx -ge 0) { $bestLen = $mid; $bestOffset = $idx; $lo = $mid + 1 }
else { $hi = $mid - 1 }
}
return @{ Length = $bestLen; Offset = $bestOffset }
}
function Format-PatchQueryNotFound([string]$oldStr, [string]$queryText, $currentDsNode, [string]$dsName) {
$schNs = "http://v8.1c.ru/8.1/data-composition-system/schema"
$lines = @("Substring not found in query of dataset '$dsName'.")
# Step 1 — cross-dataset probe
foreach ($ds in (Get-AllDataSets)) {
if ($ds -eq $currentDsNode) { continue }
$q = Find-FirstElement $ds @("query") $schNs
if (-not $q) { continue }
$qt = Normalize-LineEndings $q.InnerText
if ($qt.Contains($oldStr)) {
$otherName = Get-DataSetName $ds
$lines += "Found in dataset '$otherName' instead — wrong -DataSet?"
return ($lines -join "`n")
}
}
# Step 2 — tolerant probe (whitespace + NBSP collapsed)
$normNeedle = Collapse-Whitespace $oldStr
$normHay = Collapse-Whitespace $queryText
$tolerant = ($normNeedle.Length -gt 0 -and $normHay.Contains($normNeedle))
# Step 3 — prefix divergence (used by both Step 2 reporting and standalone Step 3)
$prefix = Find-LongestPrefixMatch -haystack $queryText -needle $oldStr
$divergence = $null
if ($prefix.Length -gt 0 -and $prefix.Length -lt $oldStr.Length) {
$queryPos = $prefix.Offset + $prefix.Length
$searchChar = $oldStr[$prefix.Length]
$beforeLen = [Math]::Min(20, $prefix.Length)
$before = $oldStr.Substring($prefix.Length - $beforeLen, $beforeLen)
$divergence = [ordered]@{
matched = $prefix.Length
total = $oldStr.Length
before = $before
searchChar = $searchChar
queryChar = $(if ($queryPos -lt $queryText.Length) { $queryText[$queryPos] } else { $null })
}
}
if ($tolerant) {
$lines += "Not found exactly, but would match with whitespace normalized (tabs/spaces/NBSP)."
if ($divergence) {
$lines += "Diverged at offset $($divergence.matched) of $($divergence.total):"
$lines += " before: '$(Escape-Whitespace $divergence.before)'"
$lines += " in search: '$(Escape-Whitespace ([string]$divergence.searchChar))' (U+$('{0:X4}' -f [int]$divergence.searchChar))"
if ($null -ne $divergence.queryChar) {
$lines += " in query: '$(Escape-Whitespace ([string]$divergence.queryChar))' (U+$('{0:X4}' -f [int]$divergence.queryChar))"
}
}
return ($lines -join "`n")
}
# Step 3 standalone
if ($prefix.Length -eq 0) {
$lines += "No common prefix with query. Check -DataSet (current: '$dsName')."
return ($lines -join "`n")
}
$lines += "Matched first $($divergence.matched) of $($divergence.total) chars, then diverged:"
$lines += " before: '$(Escape-Whitespace $divergence.before)'"
$lines += " in search: '$(Escape-Whitespace ([string]$divergence.searchChar))' (U+$('{0:X4}' -f [int]$divergence.searchChar))"
if ($null -ne $divergence.queryChar) {
$lines += " in query: '$(Escape-Whitespace ([string]$divergence.queryChar))' (U+$('{0:X4}' -f [int]$divergence.queryChar))"
} else {
$lines += " in query: (end of query)"
}
return ($lines -join "`n")
}
function Resolve-DataSet {
$schNs = "http://v8.1c.ru/8.1/data-composition-system/schema"
$root = $xmlDoc.DocumentElement
@@ -2697,13 +2822,14 @@ switch ($Operation) {
Write-Error "patch-query value must contain ' => ' separator: old => new"
exit 1
}
$oldStr = $val.Substring(0, $sepIdx)
$newStr = $val.Substring($sepIdx + 4)
$queryText = $queryEl.InnerText
$oldStr = Normalize-LineEndings $val.Substring(0, $sepIdx)
$newStr = Normalize-LineEndings $val.Substring($sepIdx + 4)
$queryText = Normalize-LineEndings $queryEl.InnerText
$count = ([regex]::Matches($queryText, [regex]::Escape($oldStr))).Count
if ($count -eq 0) {
Write-Error "Substring not found in query of dataset '$dsName': $oldStr"
$diag = Format-PatchQueryNotFound $oldStr $queryText $dsNode $dsName
Write-Error $diag
exit 1
}
if ($once -and $count -ne 1) {
+117 -5
View File
@@ -1,4 +1,4 @@
# skd-edit v1.23 — Atomic 1C DCS editor (Python port)
# skd-edit v1.24 — Atomic 1C DCS editor (Python port)
# Source: https://github.com/Nikolay-Shirokov/cc-1c-skills
import argparse
import os
@@ -1524,6 +1524,118 @@ def set_or_create_child_element_with_attr(parent, ln, ns_uri, value, xsi_type, i
insert_before_element(parent, node, None, indent)
def get_all_data_sets():
return [c for c in xml_doc
if isinstance(c.tag, str) and local_name(c) == "dataSet" and etree.QName(c.tag).namespace == SCH_NS]
def normalize_line_endings(s):
if s is None:
return s
return s.replace("\r\n", "\n").replace("\r", "\n")
def escape_whitespace(s):
out = []
for ch in s:
code = ord(ch)
if ch == "\n": out.append("\\n")
elif ch == "\r": out.append("\\r")
elif ch == "\t": out.append("\\t")
elif code < 32 or code == 0xA0 or (0x2000 <= code <= 0x200F) or code == 0xFEFF:
out.append(f"\\u{code:04X}")
else:
out.append(ch)
return "".join(out)
def collapse_whitespace(s):
return re.sub(r"[\s ]+", " ", s).strip()
def find_longest_prefix_match(haystack, needle):
"""Binary search: largest L such that needle[:L] is a substring of haystack."""
if not needle or not haystack:
return (0, -1)
first_idx = haystack.find(needle[0])
if first_idx < 0:
return (0, -1)
lo, hi = 1, len(needle)
best_len, best_off = 1, first_idx
while lo <= hi:
mid = (lo + hi) // 2
idx = haystack.find(needle[:mid])
if idx >= 0:
best_len, best_off = mid, idx
lo = mid + 1
else:
hi = mid - 1
return (best_len, best_off)
def format_patch_query_not_found(old_str, query_text, current_ds_node, ds_name):
lines = [f"Substring not found in query of dataset '{ds_name}'."]
# Step 1 — cross-dataset probe
for ds in get_all_data_sets():
if ds is current_ds_node:
continue
q = find_first_element(ds, ["query"], SCH_NS)
if q is None:
continue
qt = normalize_line_endings(q.text or "")
if old_str in qt:
other = get_data_set_name(ds)
lines.append(f"Found in dataset '{other}' instead — wrong -DataSet?")
return "\n".join(lines)
# Step 2 — tolerant probe
norm_needle = collapse_whitespace(old_str)
norm_hay = collapse_whitespace(query_text)
tolerant = bool(norm_needle) and (norm_needle in norm_hay)
# Step 3 — divergence
matched, off = find_longest_prefix_match(query_text, old_str)
divergence = None
if 0 < matched < len(old_str):
query_pos = off + matched
before_len = min(20, matched)
divergence = {
"matched": matched,
"total": len(old_str),
"before": old_str[matched - before_len:matched],
"search_char": old_str[matched],
"query_char": query_text[query_pos] if query_pos < len(query_text) else None,
}
if tolerant:
lines.append("Not found exactly, but would match with whitespace normalized (tabs/spaces/NBSP).")
if divergence:
lines.append(f"Diverged at offset {divergence['matched']} of {divergence['total']}:")
lines.append(f" before: '{escape_whitespace(divergence['before'])}'")
sc = divergence['search_char']
lines.append(f" in search: '{escape_whitespace(sc)}' (U+{ord(sc):04X})")
qc = divergence['query_char']
if qc is not None:
lines.append(f" in query: '{escape_whitespace(qc)}' (U+{ord(qc):04X})")
return "\n".join(lines)
if matched == 0:
lines.append(f"No common prefix with query. Check -DataSet (current: '{ds_name}').")
return "\n".join(lines)
lines.append(f"Matched first {divergence['matched']} of {divergence['total']} chars, then diverged:")
lines.append(f" before: '{escape_whitespace(divergence['before'])}'")
sc = divergence['search_char']
lines.append(f" in search: '{escape_whitespace(sc)}' (U+{ord(sc):04X})")
qc = divergence['query_char']
if qc is not None:
lines.append(f" in query: '{escape_whitespace(qc)}' (U+{ord(qc):04X})")
else:
lines.append(" in query: (end of query)")
return "\n".join(lines)
def resolve_data_set():
root_el = xml_doc
@@ -2255,13 +2367,13 @@ elif operation == "patch-query":
if sep_idx < 0:
print("patch-query value must contain ' => ' separator: old => new", file=sys.stderr)
sys.exit(1)
old_str = val[:sep_idx]
new_str = val[sep_idx + 4:]
query_text = query_el.text or ""
old_str = normalize_line_endings(val[:sep_idx])
new_str = normalize_line_endings(val[sep_idx + 4:])
query_text = normalize_line_endings(query_el.text or "")
count = query_text.count(old_str)
if count == 0:
print(f"Substring not found in query of dataset '{ds_name}': {old_str}", file=sys.stderr)
print(format_patch_query_not_found(old_str, query_text, ds_node, ds_name), file=sys.stderr)
sys.exit(1)
if once and count != 1:
print(f"@once: expected 1 occurrence of '{old_str}' in dataset '{ds_name}', found {count}", file=sys.stderr)
@@ -0,0 +1,21 @@
{
"name": "patch-query: CRLF в -Value матчит LF-текст в InnerText",
"preRun": [
{
"script": "skd-compile/scripts/skd-compile",
"input": {
"dataSets": [{
"name": "Основной",
"query": "ВЫБРАТЬ\n\tТ.Поле\nИЗ\n\tРегистр КАК Т",
"fields": ["Поле"]
}]
},
"args": { "-DefinitionFile": "{inputFile}", "-OutputPath": "{workDir}/Template.xml" }
}
],
"params": {
"templatePath": "Template.xml",
"operation": "patch-query",
"value": "ВЫБРАТЬ\r\n\tТ.Поле => ВЫБРАТЬ\r\n\tТ.Имя"
}
}
@@ -0,0 +1,29 @@
{
"name": "patch-query: diagnostic — NBSP в search улавливается tolerant probe",
"preRun": [
{
"script": "skd-compile/scripts/skd-compile",
"input": {
"dataSets": [
{
"name": "Основной",
"query": "ВЫБРАТЬ Т.Поле ИЗ Регистр КАК Т",
"fields": [
"Поле"
]
}
]
},
"args": {
"-DefinitionFile": "{inputFile}",
"-OutputPath": "{workDir}/Template.xml"
}
}
],
"params": {
"templatePath": "Template.xml",
"operation": "patch-query",
"value": "ВЫБРАТЬ Т.Поле => x"
},
"expectError": "whitespace normalized"
}
@@ -0,0 +1,30 @@
{
"name": "patch-query: diagnostic — подстрока в другом dataSet",
"preRun": [
{
"script": "skd-compile/scripts/skd-compile",
"input": {
"dataSets": [
{
"name": "Первый",
"query": "ВЫБРАТЬ 1 КАК Поле",
"fields": ["Поле"]
},
{
"name": "Второй",
"query": "ВЫБРАТЬ Т.УникальныйМаркер ИЗ Регистр КАК Т",
"fields": ["УникальныйМаркер: string"]
}
]
},
"args": { "-DefinitionFile": "{inputFile}", "-OutputPath": "{workDir}/Template.xml" }
}
],
"params": {
"templatePath": "Template.xml",
"operation": "patch-query",
"value": "УникальныйМаркер => Новый"
},
"args_extra": ["-DataSet", "Первый"],
"expectError": "Found in dataset 'Второй' instead"
}
@@ -0,0 +1,22 @@
{
"name": "patch-query: diagnostic — опечатка показывает точку расхождения",
"preRun": [
{
"script": "skd-compile/scripts/skd-compile",
"input": {
"dataSets": [{
"name": "Основной",
"query": "ВЫБРАТЬ Т.Наименование ИЗ Регистр КАК Т",
"fields": ["Наименование: string"]
}]
},
"args": { "-DefinitionFile": "{inputFile}", "-OutputPath": "{workDir}/Template.xml" }
}
],
"params": {
"templatePath": "Template.xml",
"operation": "patch-query",
"value": "Т.Наеменование => Т.Имя"
},
"expectError": "diverged"
}
@@ -0,0 +1,47 @@
<?xml version="1.0" encoding="UTF-8"?>
<DataCompositionSchema xmlns="http://v8.1c.ru/8.1/data-composition-system/schema"
xmlns:dcscom="http://v8.1c.ru/8.1/data-composition-system/common"
xmlns:dcscor="http://v8.1c.ru/8.1/data-composition-system/core"
xmlns:dcsset="http://v8.1c.ru/8.1/data-composition-system/settings"
xmlns:v8="http://v8.1c.ru/8.1/data/core"
xmlns:v8ui="http://v8.1c.ru/8.1/data/ui"
xmlns:xs="http://www.w3.org/2001/XMLSchema"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<dataSource>
<name>ИсточникДанных1</name>
<dataSourceType>Local</dataSourceType>
</dataSource>
<dataSet xsi:type="DataSetQuery">
<name>Основной</name>
<field xsi:type="DataSetFieldField">
<dataPath>Поле</dataPath>
<field>Поле</field>
</field>
<dataSource>ИсточникДанных1</dataSource>
<query>ВЫБРАТЬ
Т.Имя
ИЗ
Регистр КАК Т</query>
</dataSet>
<settingsVariant>
<dcsset:name>Основной</dcsset:name>
<dcsset:presentation xsi:type="v8:LocalStringType">
<v8:item>
<v8:lang>ru</v8:lang>
<v8:content>Основной</v8:content>
</v8:item>
</dcsset:presentation>
<dcsset:settings xmlns:style="http://v8.1c.ru/8.1/data/ui/style" xmlns:sys="http://v8.1c.ru/8.1/data/ui/fonts/system" xmlns:web="http://v8.1c.ru/8.1/data/ui/colors/web" xmlns:win="http://v8.1c.ru/8.1/data/ui/colors/windows">
<dcsset:selection>
</dcsset:selection>
<dcsset:item xsi:type="dcsset:StructureItemGroup">
<dcsset:order>
<dcsset:item xsi:type="dcsset:OrderItemAuto"/>
</dcsset:order>
<dcsset:selection>
<dcsset:item xsi:type="dcsset:SelectedItemAuto"/>
</dcsset:selection>
</dcsset:item>
</dcsset:settings>
</settingsVariant>
</DataCompositionSchema>