Files
2026-06-04 09:28:00 +00:00

706 lines
22 KiB
Python

#!/usr/bin/env python3
# mxl-decompile v1.0 — Decompile 1C spreadsheet to JSON
# Source: https://github.com/Nikolay-Shirokov/cc-1c-skills
import argparse
import json
import os
import sys
from collections import OrderedDict
from lxml import etree
# --- Namespace map ---
NSMAP = {
"d": "http://v8.1c.ru/8.2/data/spreadsheet",
"v8": "http://v8.1c.ru/8.1/data/core",
"v8ui": "http://v8.1c.ru/8.1/data/ui",
"xsi": "http://www.w3.org/2001/XMLSchema-instance",
}
XSI_NS = "http://www.w3.org/2001/XMLSchema-instance"
def find(node, xpath):
return node.find(xpath, NSMAP)
def findall(node, xpath):
return node.findall(xpath, NSMAP)
def text_of(node):
if node is not None and node.text:
return node.text
return None
def int_of(node, default=0):
if node is not None and node.text:
return int(node.text)
return default
# --- Main ---
def main():
sys.stdout.reconfigure(encoding="utf-8")
sys.stderr.reconfigure(encoding="utf-8")
parser = argparse.ArgumentParser(description="Decompile 1C spreadsheet to JSON", allow_abbrev=False)
parser.add_argument("-TemplatePath", "-Path", required=True, help="Path to Template.xml")
parser.add_argument("-OutputPath", default=None, help="Output JSON path (stdout if omitted)")
args = parser.parse_args()
template_path = args.TemplatePath
output_path = args.OutputPath
# --- 1. Load and parse XML ---
if not os.path.isfile(template_path):
print(f"File not found: {template_path}", file=sys.stderr)
sys.exit(1)
parser_xml = etree.XMLParser(remove_blank_text=False)
tree = etree.parse(template_path, parser_xml)
root = tree.getroot()
# --- 2. Extract font palette ---
raw_fonts = []
for f_node in findall(root, "d:font"):
raw_fonts.append({
"Face": f_node.get("faceName", ""),
"Size": int(f_node.get("height", "0")),
"Bold": f_node.get("bold") == "true",
"Italic": f_node.get("italic") == "true",
"Underline": f_node.get("underline") == "true",
"Strikeout": f_node.get("strikeout") == "true",
})
# --- 3. Extract line palette ---
raw_lines = []
for l_node in findall(root, "d:line"):
raw_lines.append({"Width": int(l_node.get("width", "0"))})
# --- 4. Extract format palette ---
raw_formats = []
for fmt_node in findall(root, "d:format"):
fmt = {
"FontIdx": -1,
"LB": -1, "TB": -1, "RB": -1, "BB": -1,
"Width": 0, "Height": 0,
"HA": "", "VA": "",
"Wrap": False, "FillType": "", "DataFormat": "",
}
n = find(fmt_node, "d:font")
if n is not None and n.text:
fmt["FontIdx"] = int(n.text)
n = find(fmt_node, "d:leftBorder")
if n is not None and n.text:
fmt["LB"] = int(n.text)
n = find(fmt_node, "d:topBorder")
if n is not None and n.text:
fmt["TB"] = int(n.text)
n = find(fmt_node, "d:rightBorder")
if n is not None and n.text:
fmt["RB"] = int(n.text)
n = find(fmt_node, "d:bottomBorder")
if n is not None and n.text:
fmt["BB"] = int(n.text)
n = find(fmt_node, "d:width")
if n is not None and n.text:
fmt["Width"] = int(n.text)
n = find(fmt_node, "d:height")
if n is not None and n.text:
fmt["Height"] = int(n.text)
n = find(fmt_node, "d:horizontalAlignment")
if n is not None and n.text:
fmt["HA"] = n.text
n = find(fmt_node, "d:verticalAlignment")
if n is not None and n.text:
fmt["VA"] = n.text
n = find(fmt_node, "d:textPlacement")
if n is not None and n.text == "Wrap":
fmt["Wrap"] = True
n = find(fmt_node, "d:fillType")
if n is not None and n.text:
fmt["FillType"] = n.text
n = find(fmt_node, "d:format/v8:item/v8:content")
if n is not None and n.text:
fmt["DataFormat"] = n.text
raw_formats.append(fmt)
def get_format(idx):
if idx <= 0 or idx > len(raw_formats):
return None
return raw_formats[idx - 1]
# --- 5. Extract columns and default width ---
col_node = find(root, "d:columns")
total_columns = int_of(find(col_node, "d:size"))
col_format_indices = {}
for ci in findall(col_node, "d:columnsItem"):
col_idx = int_of(find(ci, "d:index"))
fmt_idx = int_of(find(ci, "d:column/d:formatIndex"))
col_format_indices[col_idx] = fmt_idx
default_fmt_idx = 0
n = find(root, "d:defaultFormatIndex")
if n is not None and n.text:
default_fmt_idx = int(n.text)
default_width = 10
if default_fmt_idx > 0:
def_fmt = get_format(default_fmt_idx)
if def_fmt and def_fmt["Width"] > 0:
default_width = def_fmt["Width"]
# Build column width map (1-based col -> width), only non-default
col_width_map = OrderedDict()
for col0 in sorted(col_format_indices.keys()):
fmt = get_format(col_format_indices[col0])
if fmt and fmt["Width"] > 0 and fmt["Width"] != default_width:
col1 = str(col0 + 1)
col_width_map[col1] = fmt["Width"]
# --- 6. Extract merges ---
merge_map = {}
for m_node in findall(root, "d:merge"):
r = int_of(find(m_node, "d:r"))
c = int_of(find(m_node, "d:c"))
w = int_of(find(m_node, "d:w"))
h_node = find(m_node, "d:h")
h = int_of(h_node) if h_node is not None else 0
merge_map[f"{r},{c}"] = {"W": w, "H": h}
# --- 7. Extract named items ---
named_areas = []
for ni_node in findall(root, "d:namedItem"):
xsi_type = ni_node.get(f"{{{XSI_NS}}}type", "")
if xsi_type != "NamedItemCells":
continue
area_node = find(ni_node, "d:area")
area_type_node = find(area_node, "d:type")
area_type = text_of(area_type_node) or ""
if area_type != "Rows":
continue
named_areas.append({
"Name": text_of(find(ni_node, "d:name")) or "",
"BeginRow": int_of(find(area_node, "d:beginRow")),
"EndRow": int_of(find(area_node, "d:endRow")),
})
# --- 8. Extract rows ---
row_data = {}
for ri_node in findall(root, "d:rowsItem"):
row_idx = int_of(find(ri_node, "d:index"))
row_node = find(ri_node, "d:row")
index_to = row_idx
it_node = find(ri_node, "d:indexTo")
if it_node is not None and it_node.text:
index_to = int(it_node.text)
row_fmt_idx = 0
fmt_node = find(row_node, "d:formatIndex")
if fmt_node is not None and fmt_node.text:
row_fmt_idx = int(fmt_node.text)
is_empty = False
empty_node = find(row_node, "d:empty")
if empty_node is not None and empty_node.text == "true":
is_empty = True
cells = []
if not is_empty:
col = -1
for c_group in findall(row_node, "d:c"):
i_node = find(c_group, "d:i")
if i_node is not None and i_node.text:
col = int(i_node.text)
else:
col += 1
c_content = find(c_group, "d:c")
if c_content is None:
continue
cell_fmt_idx = 0
f_node = find(c_content, "d:f")
if f_node is not None and f_node.text:
cell_fmt_idx = int(f_node.text)
param = None
p_node = find(c_content, "d:parameter")
if p_node is not None and p_node.text:
param = p_node.text
detail = None
d_node = find(c_content, "d:detailParameter")
if d_node is not None and d_node.text:
detail = d_node.text
text = None
t_node = find(c_content, "d:tl/v8:item/v8:content")
if t_node is not None and t_node.text:
text = t_node.text
cells.append({
"Col": col,
"FormatIdx": cell_fmt_idx,
"Param": param,
"Detail": detail,
"Text": text,
})
for r in range(row_idx, index_to + 1):
row_data[r] = {
"FormatIdx": row_fmt_idx,
"Cells": cells,
"Empty": is_empty,
}
# --- 9. Build style key (ignoring fillType) ---
def get_border_desc(fmt):
if not fmt:
return {"Border": "none", "Thick": False}
lb = fmt["LB"] >= 0
tb = fmt["TB"] >= 0
rb = fmt["RB"] >= 0
bb = fmt["BB"] >= 0
if not lb and not tb and not rb and not bb:
return {"Border": "none", "Thick": False}
thick = False
for b_idx in [fmt["LB"], fmt["TB"], fmt["RB"], fmt["BB"]]:
if b_idx >= 0 and b_idx < len(raw_lines) and raw_lines[b_idx]["Width"] >= 2:
thick = True
break
if lb and tb and rb and bb:
return {"Border": "all", "Thick": thick}
sides = []
if tb:
sides.append("top")
if bb:
sides.append("bottom")
if lb:
sides.append("left")
if rb:
sides.append("right")
return {"Border": ",".join(sides), "Thick": thick}
def get_style_key(fmt):
if not fmt:
return "empty"
fi = fmt["FontIdx"] if fmt["FontIdx"] >= 0 else 0
bd = get_border_desc(fmt)
return f"f={fi}|b={bd['Border']}|bw={bd['Thick']}|ha={fmt['HA']}|va={fmt['VA']}|wr={fmt['Wrap']}|df={fmt['DataFormat']}"
# --- 10. Name fonts ---
font_names = {}
font_defs = OrderedDict()
if len(raw_fonts) > 0:
font_names[0] = "default"
font_defs["default"] = raw_fonts[0]
def get_font_key(f):
return f"{f['Face']}|{f['Size']}|{f['Bold']}|{f['Italic']}|{f['Underline']}|{f['Strikeout']}"
font_key_map = {}
if len(raw_fonts) > 0:
font_key_map[get_font_key(raw_fonts[0])] = "default"
for i in range(1, len(raw_fonts)):
f = raw_fonts[i]
df = raw_fonts[0]
# Dedup: if identical font already named, reuse
f_key = get_font_key(f)
if f_key in font_key_map:
font_names[i] = font_key_map[f_key]
continue
name = None
if f["Face"] == df["Face"] and f["Size"] == df["Size"]:
if f["Bold"] and not df["Bold"] and not f["Italic"] and not f["Underline"] and not f["Strikeout"]:
name = "bold"
elif f["Italic"] and not df["Italic"] and not f["Bold"]:
name = "italic"
elif f["Underline"] and not df["Underline"] and not f["Bold"] and not f["Italic"]:
name = "underline"
elif f["Face"] == df["Face"] and f["Size"] > df["Size"] and f["Bold"]:
name = "header"
elif f["Face"] == df["Face"] and f["Size"] < df["Size"]:
name = "small"
if not name:
parts = []
if f["Face"] and f["Face"] != df["Face"]:
parts.append(f["Face"].lower())
parts.append(str(f["Size"]))
if f["Bold"]:
parts.append("bold")
if f["Italic"]:
parts.append("italic")
if f["Underline"]:
parts.append("underline")
if f["Strikeout"]:
parts.append("strikeout")
name = "-".join(parts)
base_name = name
suffix = 2
while name in font_defs:
name = f"{base_name}{suffix}"
suffix += 1
font_names[i] = name
font_defs[name] = f
font_key_map[f_key] = name
# --- 11. Collect and name styles ---
style_keys = OrderedDict()
format_to_style_key = {}
for rd in row_data.values():
for cell in rd["Cells"]:
fmt = get_format(cell["FormatIdx"])
if not fmt:
continue
key = get_style_key(fmt)
if key not in style_keys:
style_keys[key] = fmt
format_to_style_key[cell["FormatIdx"]] = key
def name_style(fmt):
if not fmt:
return "default"
parts = []
fi = fmt["FontIdx"] if fmt["FontIdx"] >= 0 else 0
if fi in font_names and font_names[fi] != "default":
parts.append(font_names[fi])
bd = get_border_desc(fmt)
if bd["Border"] != "none":
if bd["Border"] == "all":
parts.append("bordered")
else:
parts.append(f"border-{bd['Border']}")
if fmt["HA"] == "Center":
parts.append("center")
elif fmt["HA"] == "Right":
parts.append("right")
if fmt["VA"] == "Center":
parts.append("vcenter")
elif fmt["VA"] == "Top":
parts.append("vtop")
if fmt["Wrap"]:
parts.append("wrap")
if fmt["DataFormat"]:
parts.append("fmt")
if len(parts) == 0:
return "default"
return "-".join(parts)
style_names = OrderedDict()
style_defs = OrderedDict()
for key in style_keys:
fmt = style_keys[key]
name = name_style(fmt)
base_name = name
suffix = 2
while name in style_defs:
name = f"{base_name}{suffix}"
suffix += 1
style_names[key] = name
s_def = OrderedDict()
fi = fmt["FontIdx"] if fmt["FontIdx"] >= 0 else 0
if fi in font_names and font_names[fi] != "default":
s_def["font"] = font_names[fi]
if fmt["HA"]:
a_map = {"Left": "left", "Center": "center", "Right": "right"}
a = a_map.get(fmt["HA"])
if a:
s_def["align"] = a
if fmt["VA"]:
va_map = {"Top": "top", "Center": "center"}
a = va_map.get(fmt["VA"])
if a:
s_def["valign"] = a
bd = get_border_desc(fmt)
if bd["Border"] != "none":
s_def["border"] = bd["Border"]
if bd["Thick"]:
s_def["borderWidth"] = "thick"
if fmt["Wrap"]:
s_def["wrap"] = True
if fmt["DataFormat"]:
s_def["format"] = fmt["DataFormat"]
style_defs[name] = s_def
def get_style_name(fmt_idx):
key = format_to_style_key.get(fmt_idx)
if key and key in style_names:
return style_names[key]
return "default"
# --- 12. Build areas ---
dsl_areas = []
for area in named_areas:
area_rows = []
for global_row in range(area["BeginRow"], area["EndRow"] + 1):
rd = row_data.get(global_row)
if not rd or rd["Empty"]:
area_rows.append(OrderedDict())
continue
dsl_row = OrderedDict()
# Row height
if rd["FormatIdx"] > 0:
row_fmt = get_format(rd["FormatIdx"])
if row_fmt and row_fmt["Height"] > 0:
dsl_row["height"] = row_fmt["Height"]
# Separate content cells from gap-fill cells
content_cells = []
gap_cells = []
for cell in rd["Cells"]:
has_content = cell["Param"] or cell["Text"]
has_merge = f"{global_row},{cell['Col']}" in merge_map
if has_content or has_merge:
content_cells.append(cell)
else:
gap_cells.append(cell)
# Detect rowStyle
row_style_name = None
row_style_key = None
if len(gap_cells) > 0:
gap_keys = {}
for gc in gap_cells:
fmt = get_format(gc["FormatIdx"])
gap_keys[get_style_key(fmt)] = True
if len(gap_keys) == 1:
row_style_key = list(gap_keys.keys())[0]
if row_style_key in style_names:
row_style_name = style_names[row_style_key]
if row_style_name and row_style_name != "default":
dsl_row["rowStyle"] = row_style_name
# Build cell list
dsl_cells = []
for cell in sorted(content_cells, key=lambda c: c["Col"]):
dsl_cell = OrderedDict()
dsl_cell["col"] = cell["Col"] + 1
# Span/rowspan from merge
mk = f"{global_row},{cell['Col']}"
if mk in merge_map:
m = merge_map[mk]
if m["W"] > 0:
dsl_cell["span"] = m["W"] + 1
if m["H"] > 0:
dsl_cell["rowspan"] = m["H"] + 1
# Style
cell_fmt = get_format(cell["FormatIdx"])
cell_style_key = get_style_key(cell_fmt)
if row_style_key and cell_style_key == row_style_key:
pass # Inherits rowStyle
else:
sn = get_style_name(cell["FormatIdx"])
if sn != "default" or not row_style_name:
dsl_cell["style"] = sn
# Content
fill_type = cell_fmt["FillType"] if cell_fmt else ""
if cell["Param"]:
dsl_cell["param"] = cell["Param"]
if cell["Detail"]:
dsl_cell["detail"] = cell["Detail"]
elif fill_type == "Template" and cell["Text"]:
dsl_cell["template"] = cell["Text"]
elif cell["Text"]:
dsl_cell["text"] = cell["Text"]
dsl_cells.append(dsl_cell)
if len(dsl_cells) > 0:
dsl_row["cells"] = dsl_cells
area_rows.append(dsl_row)
# Compress consecutive empty rows ({}) into { empty = N }
compressed_rows = []
empty_run = 0
for r in area_rows:
if len(r) == 0:
empty_run += 1
else:
if empty_run > 0:
if empty_run == 1:
compressed_rows.append(OrderedDict())
else:
compressed_rows.append(OrderedDict([("empty", empty_run)]))
empty_run = 0
compressed_rows.append(r)
if empty_run > 0:
if empty_run == 1:
compressed_rows.append(OrderedDict())
else:
compressed_rows.append(OrderedDict([("empty", empty_run)]))
dsl_areas.append(OrderedDict([
("name", area["Name"]),
("rows", compressed_rows),
]))
# --- 13. Compress columnWidths ---
compressed_widths = OrderedDict()
if len(col_width_map) > 0:
# Group columns by width
width_to_cols = {}
for col_str, width in col_width_map.items():
width_to_cols.setdefault(width, []).append(col_str)
for width, cols in width_to_cols.items():
cols_sorted = sorted(cols, key=lambda x: int(x))
ranges = []
range_start = cols_sorted[0]
range_prev = cols_sorted[0]
for i in range(1, len(cols_sorted)):
if int(cols_sorted[i]) == int(range_prev) + 1:
range_prev = cols_sorted[i]
else:
if range_start == range_prev:
ranges.append(range_start)
else:
ranges.append(f"{range_start}-{range_prev}")
range_start = cols_sorted[i]
range_prev = cols_sorted[i]
if range_start == range_prev:
ranges.append(range_start)
else:
ranges.append(f"{range_start}-{range_prev}")
for rng in ranges:
compressed_widths[rng] = width
# --- 14. Build fonts output ---
fonts_out = OrderedDict()
for name, f in font_defs.items():
f_out = OrderedDict()
f_out["face"] = f["Face"]
f_out["size"] = f["Size"]
if f["Bold"]:
f_out["bold"] = True
if f["Italic"]:
f_out["italic"] = True
if f["Underline"]:
f_out["underline"] = True
if f["Strikeout"]:
f_out["strikeout"] = True
fonts_out[name] = f_out
# --- 15. Assemble result ---
result = OrderedDict()
result["columns"] = total_columns
result["defaultWidth"] = default_width
if len(compressed_widths) > 0:
result["columnWidths"] = compressed_widths
# Remove empty "default" style
if "default" in style_defs and len(style_defs["default"]) == 0:
del style_defs["default"]
# Remove unused styles
used_styles = set()
for a in dsl_areas:
for r in a["rows"]:
if "rowStyle" in r:
used_styles.add(r["rowStyle"])
if "cells" in r:
for c in r["cells"]:
if "style" in c:
used_styles.add(c["style"])
to_remove = [s for s in style_defs if s not in used_styles]
for s in to_remove:
del style_defs[s]
result["fonts"] = fonts_out
result["styles"] = style_defs
result["areas"] = dsl_areas
# --- 16. Convert to JSON ---
json_str = json.dumps(result, ensure_ascii=False, indent=2)
# --- 17. Output ---
if output_path:
abs_path = os.path.join(os.getcwd(), output_path) if not os.path.isabs(output_path) else output_path
with open(abs_path, "w", encoding="utf-8") as fh:
fh.write(json_str)
print(f"[OK] Decompiled: {output_path}")
else:
print(json_str)
print(f" Areas: {len(named_areas)}, Rows: {len(row_data)}, Columns: {total_columns}", file=sys.stderr)
print(f" Fonts: {len(font_defs)}, Styles: {len(style_defs)}, Merges: {len(merge_map)}", file=sys.stderr)
if __name__ == "__main__":
main()