Please enter the commit message for your changes. Lines starting

with '#' will be ignored, and an empty message aborts the commit.

On branch main
Your branch is up to date with 'origin/main'.

Changes to be committed:
	new file:   .claude/skills/algorithmic-art/.openskills.json
	new file:   .claude/skills/algorithmic-art/LICENSE.txt
	new file:   .claude/skills/algorithmic-art/SKILL.md
	new file:   .claude/skills/algorithmic-art/templates/generator_template.js
	new file:   .claude/skills/algorithmic-art/templates/viewer.html
	new file:   .claude/skills/brand-guidelines/.openskills.json
	new file:   .claude/skills/brand-guidelines/LICENSE.txt
	new file:   .claude/skills/brand-guidelines/SKILL.md
	new file:   .claude/skills/canvas-design/.openskills.json
	new file:   .claude/skills/canvas-design/LICENSE.txt
	new file:   .claude/skills/canvas-design/SKILL.md
	new file:   .claude/skills/canvas-design/canvas-fonts/ArsenalSC-OFL.txt
	new file:   .claude/skills/canvas-design/canvas-fonts/ArsenalSC-Regular.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/BigShoulders-Bold.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/BigShoulders-OFL.txt
	new file:   .claude/skills/canvas-design/canvas-fonts/BigShoulders-Regular.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/Boldonse-OFL.txt
	new file:   .claude/skills/canvas-design/canvas-fonts/Boldonse-Regular.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/BricolageGrotesque-Bold.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/BricolageGrotesque-OFL.txt
	new file:   .claude/skills/canvas-design/canvas-fonts/BricolageGrotesque-Regular.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/CrimsonPro-Bold.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/CrimsonPro-Italic.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/CrimsonPro-OFL.txt
	new file:   .claude/skills/canvas-design/canvas-fonts/CrimsonPro-Regular.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/DMMono-OFL.txt
	new file:   .claude/skills/canvas-design/canvas-fonts/DMMono-Regular.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/EricaOne-OFL.txt
	new file:   .claude/skills/canvas-design/canvas-fonts/EricaOne-Regular.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/GeistMono-Bold.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/GeistMono-OFL.txt
	new file:   .claude/skills/canvas-design/canvas-fonts/GeistMono-Regular.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/Gloock-OFL.txt
	new file:   .claude/skills/canvas-design/canvas-fonts/Gloock-Regular.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/IBMPlexMono-Bold.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/IBMPlexMono-OFL.txt
	new file:   .claude/skills/canvas-design/canvas-fonts/IBMPlexMono-Regular.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/IBMPlexSerif-Bold.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/IBMPlexSerif-BoldItalic.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/IBMPlexSerif-Italic.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/IBMPlexSerif-Regular.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/InstrumentSans-Bold.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/InstrumentSans-BoldItalic.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/InstrumentSans-Italic.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/InstrumentSans-OFL.txt
	new file:   .claude/skills/canvas-design/canvas-fonts/InstrumentSans-Regular.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/InstrumentSerif-Italic.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/InstrumentSerif-Regular.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/Italiana-OFL.txt
	new file:   .claude/skills/canvas-design/canvas-fonts/Italiana-Regular.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/JetBrainsMono-Bold.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/JetBrainsMono-OFL.txt
	new file:   .claude/skills/canvas-design/canvas-fonts/JetBrainsMono-Regular.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/Jura-Light.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/Jura-Medium.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/Jura-OFL.txt
	new file:   .claude/skills/canvas-design/canvas-fonts/LibreBaskerville-OFL.txt
	new file:   .claude/skills/canvas-design/canvas-fonts/LibreBaskerville-Regular.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/Lora-Bold.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/Lora-BoldItalic.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/Lora-Italic.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/Lora-OFL.txt
	new file:   .claude/skills/canvas-design/canvas-fonts/Lora-Regular.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/NationalPark-Bold.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/NationalPark-OFL.txt
	new file:   .claude/skills/canvas-design/canvas-fonts/NationalPark-Regular.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/NothingYouCouldDo-OFL.txt
	new file:   .claude/skills/canvas-design/canvas-fonts/NothingYouCouldDo-Regular.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/Outfit-Bold.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/Outfit-OFL.txt
	new file:   .claude/skills/canvas-design/canvas-fonts/Outfit-Regular.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/PixelifySans-Medium.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/PixelifySans-OFL.txt
	new file:   .claude/skills/canvas-design/canvas-fonts/PoiretOne-OFL.txt
	new file:   .claude/skills/canvas-design/canvas-fonts/PoiretOne-Regular.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/RedHatMono-Bold.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/RedHatMono-OFL.txt
	new file:   .claude/skills/canvas-design/canvas-fonts/RedHatMono-Regular.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/Silkscreen-OFL.txt
	new file:   .claude/skills/canvas-design/canvas-fonts/Silkscreen-Regular.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/SmoochSans-Medium.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/SmoochSans-OFL.txt
	new file:   .claude/skills/canvas-design/canvas-fonts/Tektur-Medium.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/Tektur-OFL.txt
	new file:   .claude/skills/canvas-design/canvas-fonts/Tektur-Regular.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/WorkSans-Bold.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/WorkSans-BoldItalic.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/WorkSans-Italic.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/WorkSans-OFL.txt
	new file:   .claude/skills/canvas-design/canvas-fonts/WorkSans-Regular.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/YoungSerif-OFL.txt
	new file:   .claude/skills/canvas-design/canvas-fonts/YoungSerif-Regular.ttf
	new file:   .claude/skills/doc-coauthoring/.openskills.json
	new file:   .claude/skills/doc-coauthoring/SKILL.md
	new file:   .claude/skills/docx/.openskills.json
	new file:   .claude/skills/docx/LICENSE.txt
	new file:   .claude/skills/docx/SKILL.md
	new file:   .claude/skills/docx/scripts/__init__.py
	new file:   .claude/skills/docx/scripts/accept_changes.py
	new file:   .claude/skills/docx/scripts/comment.py
	new file:   .claude/skills/docx/scripts/office/helpers/__init__.py
	new file:   .claude/skills/docx/scripts/office/helpers/merge_runs.py
	new file:   .claude/skills/docx/scripts/office/helpers/simplify_redlines.py
	new file:   .claude/skills/docx/scripts/office/pack.py
	new file:   .claude/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/mce/mc.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/microsoft/wml-2010.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/microsoft/wml-2012.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/microsoft/wml-2018.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/microsoft/wml-cex-2018.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/microsoft/wml-cid-2016.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/microsoft/wml-symex-2015.xsd
	new file:   .claude/skills/docx/scripts/office/soffice.py
	new file:   .claude/skills/docx/scripts/office/unpack.py
	new file:   .claude/skills/docx/scripts/office/validate.py
	new file:   .claude/skills/docx/scripts/office/validators/__init__.py
	new file:   .claude/skills/docx/scripts/office/validators/base.py
	new file:   .claude/skills/docx/scripts/office/validators/docx.py
	new file:   .claude/skills/docx/scripts/office/validators/pptx.py
	new file:   .claude/skills/docx/scripts/office/validators/redlining.py
	new file:   .claude/skills/docx/scripts/templates/comments.xml
	new file:   .claude/skills/docx/scripts/templates/commentsExtended.xml
	new file:   .claude/skills/docx/scripts/templates/commentsExtensible.xml
	new file:   .claude/skills/docx/scripts/templates/commentsIds.xml
	new file:   .claude/skills/docx/scripts/templates/people.xml
	new file:   .claude/skills/frontend-design/.openskills.json
	new file:   .claude/skills/frontend-design/LICENSE.txt
	new file:   .claude/skills/frontend-design/SKILL.md
	new file:   .claude/skills/internal-comms/.openskills.json
	new file:   .claude/skills/internal-comms/LICENSE.txt
	new file:   .claude/skills/internal-comms/SKILL.md
	new file:   .claude/skills/internal-comms/examples/3p-updates.md
	new file:   .claude/skills/internal-comms/examples/company-newsletter.md
	new file:   .claude/skills/internal-comms/examples/faq-answers.md
	new file:   .claude/skills/internal-comms/examples/general-comms.md
	new file:   .claude/skills/mcp-builder/.openskills.json
	new file:   .claude/skills/mcp-builder/LICENSE.txt
	new file:   .claude/skills/mcp-builder/SKILL.md
	new file:   .claude/skills/mcp-builder/reference/evaluation.md
	new file:   .claude/skills/mcp-builder/reference/mcp_best_practices.md
	new file:   .claude/skills/mcp-builder/reference/node_mcp_server.md
	new file:   .claude/skills/mcp-builder/reference/python_mcp_server.md
	new file:   .claude/skills/mcp-builder/scripts/connections.py
	new file:   .claude/skills/mcp-builder/scripts/evaluation.py
	new file:   .claude/skills/mcp-builder/scripts/example_evaluation.xml
	new file:   .claude/skills/mcp-builder/scripts/requirements.txt
	new file:   .claude/skills/pdf/.openskills.json
	new file:   .claude/skills/pdf/LICENSE.txt
	new file:   .claude/skills/pdf/SKILL.md
	new file:   .claude/skills/pdf/forms.md
	new file:   .claude/skills/pdf/reference.md
	new file:   .claude/skills/pdf/scripts/check_bounding_boxes.py
	new file:   .claude/skills/pdf/scripts/check_fillable_fields.py
	new file:   .claude/skills/pdf/scripts/convert_pdf_to_images.py
	new file:   .claude/skills/pdf/scripts/create_validation_image.py
	new file:   .claude/skills/pdf/scripts/extract_form_field_info.py
	new file:   .claude/skills/pdf/scripts/extract_form_structure.py
	new file:   .claude/skills/pdf/scripts/fill_fillable_fields.py
	new file:   .claude/skills/pdf/scripts/fill_pdf_form_with_annotations.py
	new file:   .claude/skills/pptx/.openskills.json
	new file:   .claude/skills/pptx/LICENSE.txt
	new file:   .claude/skills/pptx/SKILL.md
	new file:   .claude/skills/pptx/editing.md
	new file:   .claude/skills/pptx/pptxgenjs.md
	new file:   .claude/skills/pptx/scripts/__init__.py
	new file:   .claude/skills/pptx/scripts/add_slide.py
	new file:   .claude/skills/pptx/scripts/clean.py
	new file:   .claude/skills/pptx/scripts/office/helpers/__init__.py
	new file:   .claude/skills/pptx/scripts/office/helpers/merge_runs.py
	new file:   .claude/skills/pptx/scripts/office/helpers/simplify_redlines.py
	new file:   .claude/skills/pptx/scripts/office/pack.py
	new file:   .claude/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/mce/mc.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/microsoft/wml-2010.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/microsoft/wml-2012.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/microsoft/wml-2018.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/microsoft/wml-cex-2018.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/microsoft/wml-cid-2016.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/microsoft/wml-symex-2015.xsd
	new file:   .claude/skills/pptx/scripts/office/soffice.py
	new file:   .claude/skills/pptx/scripts/office/unpack.py
	new file:   .claude/skills/pptx/scripts/office/validate.py
	new file:   .claude/skills/pptx/scripts/office/validators/__init__.py
	new file:   .claude/skills/pptx/scripts/office/validators/base.py
	new file:   .claude/skills/pptx/scripts/office/validators/docx.py
	new file:   .claude/skills/pptx/scripts/office/validators/pptx.py
	new file:   .claude/skills/pptx/scripts/office/validators/redlining.py
	new file:   .claude/skills/pptx/scripts/thumbnail.py
	new file:   .claude/skills/skill-creator/.openskills.json
	new file:   .claude/skills/skill-creator/LICENSE.txt
	new file:   .claude/skills/skill-creator/SKILL.md
	new file:   .claude/skills/skill-creator/agents/analyzer.md
	new file:   .claude/skills/skill-creator/agents/comparator.md
	new file:   .claude/skills/skill-creator/agents/grader.md
	new file:   .claude/skills/skill-creator/assets/eval_review.html
	new file:   .claude/skills/skill-creator/eval-viewer/generate_review.py
	new file:   .claude/skills/skill-creator/eval-viewer/viewer.html
	new file:   .claude/skills/skill-creator/references/schemas.md
	new file:   .claude/skills/skill-creator/scripts/__init__.py
	new file:   .claude/skills/skill-creator/scripts/aggregate_benchmark.py
	new file:   .claude/skills/skill-creator/scripts/generate_report.py
	new file:   .claude/skills/skill-creator/scripts/improve_description.py
	new file:   .claude/skills/skill-creator/scripts/package_skill.py
	new file:   .claude/skills/skill-creator/scripts/quick_validate.py
	new file:   .claude/skills/skill-creator/scripts/run_eval.py
	new file:   .claude/skills/skill-creator/scripts/run_loop.py
	new file:   .claude/skills/skill-creator/scripts/utils.py
	new file:   .claude/skills/slack-gif-creator/.openskills.json
	new file:   .claude/skills/slack-gif-creator/LICENSE.txt
	new file:   .claude/skills/slack-gif-creator/SKILL.md
	new file:   .claude/skills/slack-gif-creator/core/easing.py
	new file:   .claude/skills/slack-gif-creator/core/frame_composer.py
	new file:   .claude/skills/slack-gif-creator/core/gif_builder.py
	new file:   .claude/skills/slack-gif-creator/core/validators.py
	new file:   .claude/skills/slack-gif-creator/requirements.txt
	new file:   .claude/skills/template/.openskills.json
	new file:   .claude/skills/template/SKILL.md
	new file:   .claude/skills/theme-factory/.openskills.json
	new file:   .claude/skills/theme-factory/LICENSE.txt
	new file:   .claude/skills/theme-factory/SKILL.md
	new file:   .claude/skills/theme-factory/theme-showcase.pdf
	new file:   .claude/skills/theme-factory/themes/arctic-frost.md
	new file:   .claude/skills/theme-factory/themes/botanical-garden.md
	new file:   .claude/skills/theme-factory/themes/desert-rose.md
	new file:   .claude/skills/theme-factory/themes/forest-canopy.md
	new file:   .claude/skills/theme-factory/themes/golden-hour.md
	new file:   .claude/skills/theme-factory/themes/midnight-galaxy.md
	new file:   .claude/skills/theme-factory/themes/modern-minimalist.md
	new file:   .claude/skills/theme-factory/themes/ocean-depths.md
	new file:   .claude/skills/theme-factory/themes/sunset-boulevard.md
	new file:   .claude/skills/theme-factory/themes/tech-innovation.md
	new file:   .claude/skills/web-artifacts-builder/.openskills.json
	new file:   .claude/skills/web-artifacts-builder/LICENSE.txt
	new file:   .claude/skills/web-artifacts-builder/SKILL.md
	new file:   .claude/skills/web-artifacts-builder/scripts/bundle-artifact.sh
	new file:   .claude/skills/web-artifacts-builder/scripts/init-artifact.sh
	new file:   .claude/skills/web-artifacts-builder/scripts/shadcn-components.tar.gz
	new file:   .claude/skills/webapp-testing/.openskills.json
	new file:   .claude/skills/webapp-testing/LICENSE.txt
	new file:   .claude/skills/webapp-testing/SKILL.md
	new file:   .claude/skills/webapp-testing/examples/console_logging.py
	new file:   .claude/skills/webapp-testing/examples/element_discovery.py
	new file:   .claude/skills/webapp-testing/examples/static_html_automation.py
	new file:   .claude/skills/webapp-testing/scripts/with_server.py
	new file:   .claude/skills/xlsx/.openskills.json
	new file:   .claude/skills/xlsx/LICENSE.txt
	new file:   .claude/skills/xlsx/SKILL.md
	new file:   .claude/skills/xlsx/scripts/office/helpers/__init__.py
	new file:   .claude/skills/xlsx/scripts/office/helpers/merge_runs.py
	new file:   .claude/skills/xlsx/scripts/office/helpers/simplify_redlines.py
	new file:   .claude/skills/xlsx/scripts/office/pack.py
	new file:   .claude/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/mce/mc.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/microsoft/wml-2010.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/microsoft/wml-2012.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/microsoft/wml-2018.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/microsoft/wml-cex-2018.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/microsoft/wml-cid-2016.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/microsoft/wml-symex-2015.xsd
	new file:   .claude/skills/xlsx/scripts/office/soffice.py
	new file:   .claude/skills/xlsx/scripts/office/unpack.py
	new file:   .claude/skills/xlsx/scripts/office/validate.py
	new file:   .claude/skills/xlsx/scripts/office/validators/__init__.py
	new file:   .claude/skills/xlsx/scripts/office/validators/base.py
	new file:   .claude/skills/xlsx/scripts/office/validators/docx.py
	new file:   .claude/skills/xlsx/scripts/office/validators/pptx.py
	new file:   .claude/skills/xlsx/scripts/office/validators/redlining.py
	new file:   .claude/skills/xlsx/scripts/recalc.py
	new file:   .env.example
	new file:   .gitignore
	new file:   config/mcp.json
	new file:   config/models.json
	new file:   config/personalities.json
	new file:   docs/AGENTS.md
	new file:   docs/AI_IMPLEMENTATION.md
	new file:   docs/AI_INTEGRATION_COMPLETE.md
	new file:   docs/AI_QUICKSTART.md
	new file:   docs/AI_SUMMARY.md
	new file:   docs/CHANGELOG.md
	new file:   docs/CONFIG_GUIDE.md
	new file:   docs/FIXES.md
	new file:   docs/PROJECT_REFACTOR.md
	new file:   docs/README.md
	new file:   docs/README_INDEX.md
	new file:   examples/ai_example.py
	new file:   main.py
	new file:   pytest.ini
	new file:   requirements.txt
	new file:   scripts/migrate_to_vector_db.py
	new file:   skills/cmd_zip_skill/README.md
	new file:   skills/cmd_zip_skill/__init__.py
	new file:   skills/cmd_zip_skill/main.py
	new file:   skills/cmd_zip_skill/skill.json
	new file:   skills/cmd_zip_skill_1772465404375/README.md
	new file:   skills/cmd_zip_skill_1772465404375/__init__.py
	new file:   skills/cmd_zip_skill_1772465404375/main.py
	new file:   skills/cmd_zip_skill_1772465404375/skill.json
	new file:   skills/cmd_zip_skill_1772465434774/README.md
	new file:   skills/cmd_zip_skill_1772465434774/__init__.py
	new file:   skills/cmd_zip_skill_1772465434774/main.py
	new file:   skills/cmd_zip_skill_1772465434774/skill.json
	new file:   skills/cmd_zip_skill_1772465467809/README.md
	new file:   skills/cmd_zip_skill_1772465467809/__init__.py
	new file:   skills/cmd_zip_skill_1772465467809/main.py
	new file:   skills/cmd_zip_skill_1772465467809/skill.json
	new file:   skills/cmd_zip_skill_1772465652075/README.md
	new file:   skills/cmd_zip_skill_1772465652075/__init__.py
	new file:   skills/cmd_zip_skill_1772465652075/main.py
	new file:   skills/cmd_zip_skill_1772465652075/skill.json
	new file:   skills/cmd_zip_skill_1772465685352/README.md
	new file:   skills/cmd_zip_skill_1772465685352/__init__.py
	new file:   skills/cmd_zip_skill_1772465685352/main.py
	new file:   skills/cmd_zip_skill_1772465685352/skill.json
	new file:   skills/cmd_zip_skill_1772465936294/README.md
	new file:   skills/cmd_zip_skill_1772465936294/__init__.py
	new file:   skills/cmd_zip_skill_1772465936294/main.py
	new file:   skills/cmd_zip_skill_1772465936294/skill.json
	new file:   skills/cmd_zip_skill_1772465966322/README.md
	new file:   skills/cmd_zip_skill_1772465966322/__init__.py
	new file:   skills/cmd_zip_skill_1772465966322/main.py
	new file:   skills/cmd_zip_skill_1772465966322/skill.json
	new file:   skills/cmd_zip_skill_1772466071278/README.md
	new file:   skills/cmd_zip_skill_1772466071278/__init__.py
	new file:   skills/cmd_zip_skill_1772466071278/main.py
	new file:   skills/cmd_zip_skill_1772466071278/skill.json
	new file:   skills/skills_creator/README.md
	new file:   skills/skills_creator/__init__.py
	new file:   skills/skills_creator/main.py
	new file:   skills/skills_creator/skill.json
	new file:   src/__init__.py
	new file:   src/ai/__init__.py
	new file:   src/ai/base.py
	new file:   src/ai/client.py
	new file:   src/ai/docs/README.md
	new file:   src/ai/mcp/__init__.py
	new file:   src/ai/mcp/base.py
	new file:   src/ai/mcp/servers/__init__.py
	new file:   src/ai/mcp/servers/filesystem.py
	new file:   src/ai/memory.py
	new file:   src/ai/models/__init__.py
	new file:   src/ai/models/anthropic_model.py
	new file:   src/ai/models/openai_model.py
	new file:   src/ai/personality.py
	new file:   src/ai/skills/__init__.py
	new file:   src/ai/skills/base.py
	new file:   src/ai/task_manager.py
	new file:   src/ai/vector_store/__init__.py
	new file:   src/ai/vector_store/base.py
	new file:   src/ai/vector_store/chroma_store.py
	new file:   src/ai/vector_store/json_store.py
	new file:   src/core/__init__.py
	new file:   src/core/bot.py
	new file:   src/core/config.py
	new file:   src/handlers/__init__.py
	new file:   src/handlers/message_handler.py
	new file:   src/handlers/message_handler_ai.py
	new file:   src/utils/__init__.py
	new file:   src/utils/logger.py
	new file:   start.bat
	new file:   tests/test_ai.py
This commit is contained in:
Mimikko-zeus
2026-03-03 01:23:23 +08:00
parent b7940f2ff6
commit ae208af6a9
453 changed files with 99883 additions and 0 deletions

View File

@@ -0,0 +1,15 @@
"""
Validation modules for Word document processing.
"""
from .base import BaseSchemaValidator
from .docx import DOCXSchemaValidator
from .pptx import PPTXSchemaValidator
from .redlining import RedliningValidator
__all__ = [
"BaseSchemaValidator",
"DOCXSchemaValidator",
"PPTXSchemaValidator",
"RedliningValidator",
]

View File

@@ -0,0 +1,847 @@
"""
Base validator with common validation logic for document files.
"""
import re
from pathlib import Path
import defusedxml.minidom
import lxml.etree
class BaseSchemaValidator:
IGNORED_VALIDATION_ERRORS = [
"hyphenationZone",
"purl.org/dc/terms",
]
UNIQUE_ID_REQUIREMENTS = {
"comment": ("id", "file"),
"commentrangestart": ("id", "file"),
"commentrangeend": ("id", "file"),
"bookmarkstart": ("id", "file"),
"bookmarkend": ("id", "file"),
"sldid": ("id", "file"),
"sldmasterid": ("id", "global"),
"sldlayoutid": ("id", "global"),
"cm": ("authorid", "file"),
"sheet": ("sheetid", "file"),
"definedname": ("id", "file"),
"cxnsp": ("id", "file"),
"sp": ("id", "file"),
"pic": ("id", "file"),
"grpsp": ("id", "file"),
}
EXCLUDED_ID_CONTAINERS = {
"sectionlst",
}
ELEMENT_RELATIONSHIP_TYPES = {}
SCHEMA_MAPPINGS = {
"word": "ISO-IEC29500-4_2016/wml.xsd",
"ppt": "ISO-IEC29500-4_2016/pml.xsd",
"xl": "ISO-IEC29500-4_2016/sml.xsd",
"[Content_Types].xml": "ecma/fouth-edition/opc-contentTypes.xsd",
"app.xml": "ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd",
"core.xml": "ecma/fouth-edition/opc-coreProperties.xsd",
"custom.xml": "ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd",
".rels": "ecma/fouth-edition/opc-relationships.xsd",
"people.xml": "microsoft/wml-2012.xsd",
"commentsIds.xml": "microsoft/wml-cid-2016.xsd",
"commentsExtensible.xml": "microsoft/wml-cex-2018.xsd",
"commentsExtended.xml": "microsoft/wml-2012.xsd",
"chart": "ISO-IEC29500-4_2016/dml-chart.xsd",
"theme": "ISO-IEC29500-4_2016/dml-main.xsd",
"drawing": "ISO-IEC29500-4_2016/dml-main.xsd",
}
MC_NAMESPACE = "http://schemas.openxmlformats.org/markup-compatibility/2006"
XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace"
PACKAGE_RELATIONSHIPS_NAMESPACE = (
"http://schemas.openxmlformats.org/package/2006/relationships"
)
OFFICE_RELATIONSHIPS_NAMESPACE = (
"http://schemas.openxmlformats.org/officeDocument/2006/relationships"
)
CONTENT_TYPES_NAMESPACE = (
"http://schemas.openxmlformats.org/package/2006/content-types"
)
MAIN_CONTENT_FOLDERS = {"word", "ppt", "xl"}
OOXML_NAMESPACES = {
"http://schemas.openxmlformats.org/officeDocument/2006/math",
"http://schemas.openxmlformats.org/officeDocument/2006/relationships",
"http://schemas.openxmlformats.org/schemaLibrary/2006/main",
"http://schemas.openxmlformats.org/drawingml/2006/main",
"http://schemas.openxmlformats.org/drawingml/2006/chart",
"http://schemas.openxmlformats.org/drawingml/2006/chartDrawing",
"http://schemas.openxmlformats.org/drawingml/2006/diagram",
"http://schemas.openxmlformats.org/drawingml/2006/picture",
"http://schemas.openxmlformats.org/drawingml/2006/spreadsheetDrawing",
"http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing",
"http://schemas.openxmlformats.org/wordprocessingml/2006/main",
"http://schemas.openxmlformats.org/presentationml/2006/main",
"http://schemas.openxmlformats.org/spreadsheetml/2006/main",
"http://schemas.openxmlformats.org/officeDocument/2006/sharedTypes",
"http://www.w3.org/XML/1998/namespace",
}
def __init__(self, unpacked_dir, original_file=None, verbose=False):
self.unpacked_dir = Path(unpacked_dir).resolve()
self.original_file = Path(original_file) if original_file else None
self.verbose = verbose
self.schemas_dir = Path(__file__).parent.parent / "schemas"
patterns = ["*.xml", "*.rels"]
self.xml_files = [
f for pattern in patterns for f in self.unpacked_dir.rglob(pattern)
]
if not self.xml_files:
print(f"Warning: No XML files found in {self.unpacked_dir}")
def validate(self):
raise NotImplementedError("Subclasses must implement the validate method")
def repair(self) -> int:
return self.repair_whitespace_preservation()
def repair_whitespace_preservation(self) -> int:
repairs = 0
for xml_file in self.xml_files:
try:
content = xml_file.read_text(encoding="utf-8")
dom = defusedxml.minidom.parseString(content)
modified = False
for elem in dom.getElementsByTagName("*"):
if elem.tagName.endswith(":t") and elem.firstChild:
text = elem.firstChild.nodeValue
if text and (text.startswith((' ', '\t')) or text.endswith((' ', '\t'))):
if elem.getAttribute("xml:space") != "preserve":
elem.setAttribute("xml:space", "preserve")
text_preview = repr(text[:30]) + "..." if len(text) > 30 else repr(text)
print(f" Repaired: {xml_file.name}: Added xml:space='preserve' to {elem.tagName}: {text_preview}")
repairs += 1
modified = True
if modified:
xml_file.write_bytes(dom.toxml(encoding="UTF-8"))
except Exception:
pass
return repairs
def validate_xml(self):
errors = []
for xml_file in self.xml_files:
try:
lxml.etree.parse(str(xml_file))
except lxml.etree.XMLSyntaxError as e:
errors.append(
f" {xml_file.relative_to(self.unpacked_dir)}: "
f"Line {e.lineno}: {e.msg}"
)
except Exception as e:
errors.append(
f" {xml_file.relative_to(self.unpacked_dir)}: "
f"Unexpected error: {str(e)}"
)
if errors:
print(f"FAILED - Found {len(errors)} XML violations:")
for error in errors:
print(error)
return False
else:
if self.verbose:
print("PASSED - All XML files are well-formed")
return True
def validate_namespaces(self):
errors = []
for xml_file in self.xml_files:
try:
root = lxml.etree.parse(str(xml_file)).getroot()
declared = set(root.nsmap.keys()) - {None}
for attr_val in [
v for k, v in root.attrib.items() if k.endswith("Ignorable")
]:
undeclared = set(attr_val.split()) - declared
errors.extend(
f" {xml_file.relative_to(self.unpacked_dir)}: "
f"Namespace '{ns}' in Ignorable but not declared"
for ns in undeclared
)
except lxml.etree.XMLSyntaxError:
continue
if errors:
print(f"FAILED - {len(errors)} namespace issues:")
for error in errors:
print(error)
return False
if self.verbose:
print("PASSED - All namespace prefixes properly declared")
return True
def validate_unique_ids(self):
errors = []
global_ids = {}
for xml_file in self.xml_files:
try:
root = lxml.etree.parse(str(xml_file)).getroot()
file_ids = {}
mc_elements = root.xpath(
".//mc:AlternateContent", namespaces={"mc": self.MC_NAMESPACE}
)
for elem in mc_elements:
elem.getparent().remove(elem)
for elem in root.iter():
tag = (
elem.tag.split("}")[-1].lower()
if "}" in elem.tag
else elem.tag.lower()
)
if tag in self.UNIQUE_ID_REQUIREMENTS:
in_excluded_container = any(
ancestor.tag.split("}")[-1].lower() in self.EXCLUDED_ID_CONTAINERS
for ancestor in elem.iterancestors()
)
if in_excluded_container:
continue
attr_name, scope = self.UNIQUE_ID_REQUIREMENTS[tag]
id_value = None
for attr, value in elem.attrib.items():
attr_local = (
attr.split("}")[-1].lower()
if "}" in attr
else attr.lower()
)
if attr_local == attr_name:
id_value = value
break
if id_value is not None:
if scope == "global":
if id_value in global_ids:
prev_file, prev_line, prev_tag = global_ids[
id_value
]
errors.append(
f" {xml_file.relative_to(self.unpacked_dir)}: "
f"Line {elem.sourceline}: Global ID '{id_value}' in <{tag}> "
f"already used in {prev_file} at line {prev_line} in <{prev_tag}>"
)
else:
global_ids[id_value] = (
xml_file.relative_to(self.unpacked_dir),
elem.sourceline,
tag,
)
elif scope == "file":
key = (tag, attr_name)
if key not in file_ids:
file_ids[key] = {}
if id_value in file_ids[key]:
prev_line = file_ids[key][id_value]
errors.append(
f" {xml_file.relative_to(self.unpacked_dir)}: "
f"Line {elem.sourceline}: Duplicate {attr_name}='{id_value}' in <{tag}> "
f"(first occurrence at line {prev_line})"
)
else:
file_ids[key][id_value] = elem.sourceline
except (lxml.etree.XMLSyntaxError, Exception) as e:
errors.append(
f" {xml_file.relative_to(self.unpacked_dir)}: Error: {e}"
)
if errors:
print(f"FAILED - Found {len(errors)} ID uniqueness violations:")
for error in errors:
print(error)
return False
else:
if self.verbose:
print("PASSED - All required IDs are unique")
return True
def validate_file_references(self):
errors = []
rels_files = list(self.unpacked_dir.rglob("*.rels"))
if not rels_files:
if self.verbose:
print("PASSED - No .rels files found")
return True
all_files = []
for file_path in self.unpacked_dir.rglob("*"):
if (
file_path.is_file()
and file_path.name != "[Content_Types].xml"
and not file_path.name.endswith(".rels")
):
all_files.append(file_path.resolve())
all_referenced_files = set()
if self.verbose:
print(
f"Found {len(rels_files)} .rels files and {len(all_files)} target files"
)
for rels_file in rels_files:
try:
rels_root = lxml.etree.parse(str(rels_file)).getroot()
rels_dir = rels_file.parent
referenced_files = set()
broken_refs = []
for rel in rels_root.findall(
".//ns:Relationship",
namespaces={"ns": self.PACKAGE_RELATIONSHIPS_NAMESPACE},
):
target = rel.get("Target")
if target and not target.startswith(
("http", "mailto:")
):
if target.startswith("/"):
target_path = self.unpacked_dir / target.lstrip("/")
elif rels_file.name == ".rels":
target_path = self.unpacked_dir / target
else:
base_dir = rels_dir.parent
target_path = base_dir / target
try:
target_path = target_path.resolve()
if target_path.exists() and target_path.is_file():
referenced_files.add(target_path)
all_referenced_files.add(target_path)
else:
broken_refs.append((target, rel.sourceline))
except (OSError, ValueError):
broken_refs.append((target, rel.sourceline))
if broken_refs:
rel_path = rels_file.relative_to(self.unpacked_dir)
for broken_ref, line_num in broken_refs:
errors.append(
f" {rel_path}: Line {line_num}: Broken reference to {broken_ref}"
)
except Exception as e:
rel_path = rels_file.relative_to(self.unpacked_dir)
errors.append(f" Error parsing {rel_path}: {e}")
unreferenced_files = set(all_files) - all_referenced_files
if unreferenced_files:
for unref_file in sorted(unreferenced_files):
unref_rel_path = unref_file.relative_to(self.unpacked_dir)
errors.append(f" Unreferenced file: {unref_rel_path}")
if errors:
print(f"FAILED - Found {len(errors)} relationship validation errors:")
for error in errors:
print(error)
print(
"CRITICAL: These errors will cause the document to appear corrupt. "
+ "Broken references MUST be fixed, "
+ "and unreferenced files MUST be referenced or removed."
)
return False
else:
if self.verbose:
print(
"PASSED - All references are valid and all files are properly referenced"
)
return True
def validate_all_relationship_ids(self):
import lxml.etree
errors = []
for xml_file in self.xml_files:
if xml_file.suffix == ".rels":
continue
rels_dir = xml_file.parent / "_rels"
rels_file = rels_dir / f"{xml_file.name}.rels"
if not rels_file.exists():
continue
try:
rels_root = lxml.etree.parse(str(rels_file)).getroot()
rid_to_type = {}
for rel in rels_root.findall(
f".//{{{self.PACKAGE_RELATIONSHIPS_NAMESPACE}}}Relationship"
):
rid = rel.get("Id")
rel_type = rel.get("Type", "")
if rid:
if rid in rid_to_type:
rels_rel_path = rels_file.relative_to(self.unpacked_dir)
errors.append(
f" {rels_rel_path}: Line {rel.sourceline}: "
f"Duplicate relationship ID '{rid}' (IDs must be unique)"
)
type_name = (
rel_type.split("/")[-1] if "/" in rel_type else rel_type
)
rid_to_type[rid] = type_name
xml_root = lxml.etree.parse(str(xml_file)).getroot()
r_ns = self.OFFICE_RELATIONSHIPS_NAMESPACE
rid_attrs_to_check = ["id", "embed", "link"]
for elem in xml_root.iter():
for attr_name in rid_attrs_to_check:
rid_attr = elem.get(f"{{{r_ns}}}{attr_name}")
if not rid_attr:
continue
xml_rel_path = xml_file.relative_to(self.unpacked_dir)
elem_name = (
elem.tag.split("}")[-1] if "}" in elem.tag else elem.tag
)
if rid_attr not in rid_to_type:
errors.append(
f" {xml_rel_path}: Line {elem.sourceline}: "
f"<{elem_name}> r:{attr_name} references non-existent relationship '{rid_attr}' "
f"(valid IDs: {', '.join(sorted(rid_to_type.keys())[:5])}{'...' if len(rid_to_type) > 5 else ''})"
)
elif attr_name == "id" and self.ELEMENT_RELATIONSHIP_TYPES:
expected_type = self._get_expected_relationship_type(
elem_name
)
if expected_type:
actual_type = rid_to_type[rid_attr]
if expected_type not in actual_type.lower():
errors.append(
f" {xml_rel_path}: Line {elem.sourceline}: "
f"<{elem_name}> references '{rid_attr}' which points to '{actual_type}' "
f"but should point to a '{expected_type}' relationship"
)
except Exception as e:
xml_rel_path = xml_file.relative_to(self.unpacked_dir)
errors.append(f" Error processing {xml_rel_path}: {e}")
if errors:
print(f"FAILED - Found {len(errors)} relationship ID reference errors:")
for error in errors:
print(error)
print("\nThese ID mismatches will cause the document to appear corrupt!")
return False
else:
if self.verbose:
print("PASSED - All relationship ID references are valid")
return True
def _get_expected_relationship_type(self, element_name):
elem_lower = element_name.lower()
if elem_lower in self.ELEMENT_RELATIONSHIP_TYPES:
return self.ELEMENT_RELATIONSHIP_TYPES[elem_lower]
if elem_lower.endswith("id") and len(elem_lower) > 2:
prefix = elem_lower[:-2]
if prefix.endswith("master"):
return prefix.lower()
elif prefix.endswith("layout"):
return prefix.lower()
else:
if prefix == "sld":
return "slide"
return prefix.lower()
if elem_lower.endswith("reference") and len(elem_lower) > 9:
prefix = elem_lower[:-9]
return prefix.lower()
return None
def validate_content_types(self):
errors = []
content_types_file = self.unpacked_dir / "[Content_Types].xml"
if not content_types_file.exists():
print("FAILED - [Content_Types].xml file not found")
return False
try:
root = lxml.etree.parse(str(content_types_file)).getroot()
declared_parts = set()
declared_extensions = set()
for override in root.findall(
f".//{{{self.CONTENT_TYPES_NAMESPACE}}}Override"
):
part_name = override.get("PartName")
if part_name is not None:
declared_parts.add(part_name.lstrip("/"))
for default in root.findall(
f".//{{{self.CONTENT_TYPES_NAMESPACE}}}Default"
):
extension = default.get("Extension")
if extension is not None:
declared_extensions.add(extension.lower())
declarable_roots = {
"sld",
"sldLayout",
"sldMaster",
"presentation",
"document",
"workbook",
"worksheet",
"theme",
}
media_extensions = {
"png": "image/png",
"jpg": "image/jpeg",
"jpeg": "image/jpeg",
"gif": "image/gif",
"bmp": "image/bmp",
"tiff": "image/tiff",
"wmf": "image/x-wmf",
"emf": "image/x-emf",
}
all_files = list(self.unpacked_dir.rglob("*"))
all_files = [f for f in all_files if f.is_file()]
for xml_file in self.xml_files:
path_str = str(xml_file.relative_to(self.unpacked_dir)).replace(
"\\", "/"
)
if any(
skip in path_str
for skip in [".rels", "[Content_Types]", "docProps/", "_rels/"]
):
continue
try:
root_tag = lxml.etree.parse(str(xml_file)).getroot().tag
root_name = root_tag.split("}")[-1] if "}" in root_tag else root_tag
if root_name in declarable_roots and path_str not in declared_parts:
errors.append(
f" {path_str}: File with <{root_name}> root not declared in [Content_Types].xml"
)
except Exception:
continue
for file_path in all_files:
if file_path.suffix.lower() in {".xml", ".rels"}:
continue
if file_path.name == "[Content_Types].xml":
continue
if "_rels" in file_path.parts or "docProps" in file_path.parts:
continue
extension = file_path.suffix.lstrip(".").lower()
if extension and extension not in declared_extensions:
if extension in media_extensions:
relative_path = file_path.relative_to(self.unpacked_dir)
errors.append(
f' {relative_path}: File with extension \'{extension}\' not declared in [Content_Types].xml - should add: <Default Extension="{extension}" ContentType="{media_extensions[extension]}"/>'
)
except Exception as e:
errors.append(f" Error parsing [Content_Types].xml: {e}")
if errors:
print(f"FAILED - Found {len(errors)} content type declaration errors:")
for error in errors:
print(error)
return False
else:
if self.verbose:
print(
"PASSED - All content files are properly declared in [Content_Types].xml"
)
return True
def validate_file_against_xsd(self, xml_file, verbose=False):
xml_file = Path(xml_file).resolve()
unpacked_dir = self.unpacked_dir.resolve()
is_valid, current_errors = self._validate_single_file_xsd(
xml_file, unpacked_dir
)
if is_valid is None:
return None, set()
elif is_valid:
return True, set()
original_errors = self._get_original_file_errors(xml_file)
assert current_errors is not None
new_errors = current_errors - original_errors
new_errors = {
e for e in new_errors
if not any(pattern in e for pattern in self.IGNORED_VALIDATION_ERRORS)
}
if new_errors:
if verbose:
relative_path = xml_file.relative_to(unpacked_dir)
print(f"FAILED - {relative_path}: {len(new_errors)} new error(s)")
for error in list(new_errors)[:3]:
truncated = error[:250] + "..." if len(error) > 250 else error
print(f" - {truncated}")
return False, new_errors
else:
if verbose:
print(
f"PASSED - No new errors (original had {len(current_errors)} errors)"
)
return True, set()
def validate_against_xsd(self):
new_errors = []
original_error_count = 0
valid_count = 0
skipped_count = 0
for xml_file in self.xml_files:
relative_path = str(xml_file.relative_to(self.unpacked_dir))
is_valid, new_file_errors = self.validate_file_against_xsd(
xml_file, verbose=False
)
if is_valid is None:
skipped_count += 1
continue
elif is_valid and not new_file_errors:
valid_count += 1
continue
elif is_valid:
original_error_count += 1
valid_count += 1
continue
new_errors.append(f" {relative_path}: {len(new_file_errors)} new error(s)")
for error in list(new_file_errors)[:3]:
new_errors.append(
f" - {error[:250]}..." if len(error) > 250 else f" - {error}"
)
if self.verbose:
print(f"Validated {len(self.xml_files)} files:")
print(f" - Valid: {valid_count}")
print(f" - Skipped (no schema): {skipped_count}")
if original_error_count:
print(f" - With original errors (ignored): {original_error_count}")
print(
f" - With NEW errors: {len(new_errors) > 0 and len([e for e in new_errors if not e.startswith(' ')]) or 0}"
)
if new_errors:
print("\nFAILED - Found NEW validation errors:")
for error in new_errors:
print(error)
return False
else:
if self.verbose:
print("\nPASSED - No new XSD validation errors introduced")
return True
def _get_schema_path(self, xml_file):
if xml_file.name in self.SCHEMA_MAPPINGS:
return self.schemas_dir / self.SCHEMA_MAPPINGS[xml_file.name]
if xml_file.suffix == ".rels":
return self.schemas_dir / self.SCHEMA_MAPPINGS[".rels"]
if "charts/" in str(xml_file) and xml_file.name.startswith("chart"):
return self.schemas_dir / self.SCHEMA_MAPPINGS["chart"]
if "theme/" in str(xml_file) and xml_file.name.startswith("theme"):
return self.schemas_dir / self.SCHEMA_MAPPINGS["theme"]
if xml_file.parent.name in self.MAIN_CONTENT_FOLDERS:
return self.schemas_dir / self.SCHEMA_MAPPINGS[xml_file.parent.name]
return None
def _clean_ignorable_namespaces(self, xml_doc):
xml_string = lxml.etree.tostring(xml_doc, encoding="unicode")
xml_copy = lxml.etree.fromstring(xml_string)
for elem in xml_copy.iter():
attrs_to_remove = []
for attr in elem.attrib:
if "{" in attr:
ns = attr.split("}")[0][1:]
if ns not in self.OOXML_NAMESPACES:
attrs_to_remove.append(attr)
for attr in attrs_to_remove:
del elem.attrib[attr]
self._remove_ignorable_elements(xml_copy)
return lxml.etree.ElementTree(xml_copy)
def _remove_ignorable_elements(self, root):
elements_to_remove = []
for elem in list(root):
if not hasattr(elem, "tag") or callable(elem.tag):
continue
tag_str = str(elem.tag)
if tag_str.startswith("{"):
ns = tag_str.split("}")[0][1:]
if ns not in self.OOXML_NAMESPACES:
elements_to_remove.append(elem)
continue
self._remove_ignorable_elements(elem)
for elem in elements_to_remove:
root.remove(elem)
def _preprocess_for_mc_ignorable(self, xml_doc):
root = xml_doc.getroot()
if f"{{{self.MC_NAMESPACE}}}Ignorable" in root.attrib:
del root.attrib[f"{{{self.MC_NAMESPACE}}}Ignorable"]
return xml_doc
def _validate_single_file_xsd(self, xml_file, base_path):
schema_path = self._get_schema_path(xml_file)
if not schema_path:
return None, None
try:
with open(schema_path, "rb") as xsd_file:
parser = lxml.etree.XMLParser()
xsd_doc = lxml.etree.parse(
xsd_file, parser=parser, base_url=str(schema_path)
)
schema = lxml.etree.XMLSchema(xsd_doc)
with open(xml_file, "r") as f:
xml_doc = lxml.etree.parse(f)
xml_doc, _ = self._remove_template_tags_from_text_nodes(xml_doc)
xml_doc = self._preprocess_for_mc_ignorable(xml_doc)
relative_path = xml_file.relative_to(base_path)
if (
relative_path.parts
and relative_path.parts[0] in self.MAIN_CONTENT_FOLDERS
):
xml_doc = self._clean_ignorable_namespaces(xml_doc)
if schema.validate(xml_doc):
return True, set()
else:
errors = set()
for error in schema.error_log:
errors.add(error.message)
return False, errors
except Exception as e:
return False, {str(e)}
def _get_original_file_errors(self, xml_file):
if self.original_file is None:
return set()
import tempfile
import zipfile
xml_file = Path(xml_file).resolve()
unpacked_dir = self.unpacked_dir.resolve()
relative_path = xml_file.relative_to(unpacked_dir)
with tempfile.TemporaryDirectory() as temp_dir:
temp_path = Path(temp_dir)
with zipfile.ZipFile(self.original_file, "r") as zip_ref:
zip_ref.extractall(temp_path)
original_xml_file = temp_path / relative_path
if not original_xml_file.exists():
return set()
is_valid, errors = self._validate_single_file_xsd(
original_xml_file, temp_path
)
return errors if errors else set()
def _remove_template_tags_from_text_nodes(self, xml_doc):
warnings = []
template_pattern = re.compile(r"\{\{[^}]*\}\}")
xml_string = lxml.etree.tostring(xml_doc, encoding="unicode")
xml_copy = lxml.etree.fromstring(xml_string)
def process_text_content(text, content_type):
if not text:
return text
matches = list(template_pattern.finditer(text))
if matches:
for match in matches:
warnings.append(
f"Found template tag in {content_type}: {match.group()}"
)
return template_pattern.sub("", text)
return text
for elem in xml_copy.iter():
if not hasattr(elem, "tag") or callable(elem.tag):
continue
tag_str = str(elem.tag)
if tag_str.endswith("}t") or tag_str == "t":
continue
elem.text = process_text_content(elem.text, "text content")
elem.tail = process_text_content(elem.tail, "tail content")
return lxml.etree.ElementTree(xml_copy), warnings
if __name__ == "__main__":
raise RuntimeError("This module should not be run directly.")

View File

@@ -0,0 +1,446 @@
"""
Validator for Word document XML files against XSD schemas.
"""
import random
import re
import tempfile
import zipfile
import defusedxml.minidom
import lxml.etree
from .base import BaseSchemaValidator
class DOCXSchemaValidator(BaseSchemaValidator):
WORD_2006_NAMESPACE = "http://schemas.openxmlformats.org/wordprocessingml/2006/main"
W14_NAMESPACE = "http://schemas.microsoft.com/office/word/2010/wordml"
W16CID_NAMESPACE = "http://schemas.microsoft.com/office/word/2016/wordml/cid"
ELEMENT_RELATIONSHIP_TYPES = {}
def validate(self):
if not self.validate_xml():
return False
all_valid = True
if not self.validate_namespaces():
all_valid = False
if not self.validate_unique_ids():
all_valid = False
if not self.validate_file_references():
all_valid = False
if not self.validate_content_types():
all_valid = False
if not self.validate_against_xsd():
all_valid = False
if not self.validate_whitespace_preservation():
all_valid = False
if not self.validate_deletions():
all_valid = False
if not self.validate_insertions():
all_valid = False
if not self.validate_all_relationship_ids():
all_valid = False
if not self.validate_id_constraints():
all_valid = False
if not self.validate_comment_markers():
all_valid = False
self.compare_paragraph_counts()
return all_valid
def validate_whitespace_preservation(self):
errors = []
for xml_file in self.xml_files:
if xml_file.name != "document.xml":
continue
try:
root = lxml.etree.parse(str(xml_file)).getroot()
for elem in root.iter(f"{{{self.WORD_2006_NAMESPACE}}}t"):
if elem.text:
text = elem.text
if re.search(r"^[ \t\n\r]", text) or re.search(
r"[ \t\n\r]$", text
):
xml_space_attr = f"{{{self.XML_NAMESPACE}}}space"
if (
xml_space_attr not in elem.attrib
or elem.attrib[xml_space_attr] != "preserve"
):
text_preview = (
repr(text)[:50] + "..."
if len(repr(text)) > 50
else repr(text)
)
errors.append(
f" {xml_file.relative_to(self.unpacked_dir)}: "
f"Line {elem.sourceline}: w:t element with whitespace missing xml:space='preserve': {text_preview}"
)
except (lxml.etree.XMLSyntaxError, Exception) as e:
errors.append(
f" {xml_file.relative_to(self.unpacked_dir)}: Error: {e}"
)
if errors:
print(f"FAILED - Found {len(errors)} whitespace preservation violations:")
for error in errors:
print(error)
return False
else:
if self.verbose:
print("PASSED - All whitespace is properly preserved")
return True
def validate_deletions(self):
errors = []
for xml_file in self.xml_files:
if xml_file.name != "document.xml":
continue
try:
root = lxml.etree.parse(str(xml_file)).getroot()
namespaces = {"w": self.WORD_2006_NAMESPACE}
for t_elem in root.xpath(".//w:del//w:t", namespaces=namespaces):
if t_elem.text:
text_preview = (
repr(t_elem.text)[:50] + "..."
if len(repr(t_elem.text)) > 50
else repr(t_elem.text)
)
errors.append(
f" {xml_file.relative_to(self.unpacked_dir)}: "
f"Line {t_elem.sourceline}: <w:t> found within <w:del>: {text_preview}"
)
for instr_elem in root.xpath(
".//w:del//w:instrText", namespaces=namespaces
):
text_preview = (
repr(instr_elem.text or "")[:50] + "..."
if len(repr(instr_elem.text or "")) > 50
else repr(instr_elem.text or "")
)
errors.append(
f" {xml_file.relative_to(self.unpacked_dir)}: "
f"Line {instr_elem.sourceline}: <w:instrText> found within <w:del> (use <w:delInstrText>): {text_preview}"
)
except (lxml.etree.XMLSyntaxError, Exception) as e:
errors.append(
f" {xml_file.relative_to(self.unpacked_dir)}: Error: {e}"
)
if errors:
print(f"FAILED - Found {len(errors)} deletion validation violations:")
for error in errors:
print(error)
return False
else:
if self.verbose:
print("PASSED - No w:t elements found within w:del elements")
return True
def count_paragraphs_in_unpacked(self):
count = 0
for xml_file in self.xml_files:
if xml_file.name != "document.xml":
continue
try:
root = lxml.etree.parse(str(xml_file)).getroot()
paragraphs = root.findall(f".//{{{self.WORD_2006_NAMESPACE}}}p")
count = len(paragraphs)
except Exception as e:
print(f"Error counting paragraphs in unpacked document: {e}")
return count
def count_paragraphs_in_original(self):
original = self.original_file
if original is None:
return 0
count = 0
try:
with tempfile.TemporaryDirectory() as temp_dir:
with zipfile.ZipFile(original, "r") as zip_ref:
zip_ref.extractall(temp_dir)
doc_xml_path = temp_dir + "/word/document.xml"
root = lxml.etree.parse(doc_xml_path).getroot()
paragraphs = root.findall(f".//{{{self.WORD_2006_NAMESPACE}}}p")
count = len(paragraphs)
except Exception as e:
print(f"Error counting paragraphs in original document: {e}")
return count
def validate_insertions(self):
errors = []
for xml_file in self.xml_files:
if xml_file.name != "document.xml":
continue
try:
root = lxml.etree.parse(str(xml_file)).getroot()
namespaces = {"w": self.WORD_2006_NAMESPACE}
invalid_elements = root.xpath(
".//w:ins//w:delText[not(ancestor::w:del)]", namespaces=namespaces
)
for elem in invalid_elements:
text_preview = (
repr(elem.text or "")[:50] + "..."
if len(repr(elem.text or "")) > 50
else repr(elem.text or "")
)
errors.append(
f" {xml_file.relative_to(self.unpacked_dir)}: "
f"Line {elem.sourceline}: <w:delText> within <w:ins>: {text_preview}"
)
except (lxml.etree.XMLSyntaxError, Exception) as e:
errors.append(
f" {xml_file.relative_to(self.unpacked_dir)}: Error: {e}"
)
if errors:
print(f"FAILED - Found {len(errors)} insertion validation violations:")
for error in errors:
print(error)
return False
else:
if self.verbose:
print("PASSED - No w:delText elements within w:ins elements")
return True
def compare_paragraph_counts(self):
original_count = self.count_paragraphs_in_original()
new_count = self.count_paragraphs_in_unpacked()
diff = new_count - original_count
diff_str = f"+{diff}" if diff > 0 else str(diff)
print(f"\nParagraphs: {original_count}{new_count} ({diff_str})")
def _parse_id_value(self, val: str, base: int = 16) -> int:
return int(val, base)
def validate_id_constraints(self):
errors = []
para_id_attr = f"{{{self.W14_NAMESPACE}}}paraId"
durable_id_attr = f"{{{self.W16CID_NAMESPACE}}}durableId"
for xml_file in self.xml_files:
try:
for elem in lxml.etree.parse(str(xml_file)).iter():
if val := elem.get(para_id_attr):
if self._parse_id_value(val, base=16) >= 0x80000000:
errors.append(
f" {xml_file.name}:{elem.sourceline}: paraId={val} >= 0x80000000"
)
if val := elem.get(durable_id_attr):
if xml_file.name == "numbering.xml":
try:
if self._parse_id_value(val, base=10) >= 0x7FFFFFFF:
errors.append(
f" {xml_file.name}:{elem.sourceline}: "
f"durableId={val} >= 0x7FFFFFFF"
)
except ValueError:
errors.append(
f" {xml_file.name}:{elem.sourceline}: "
f"durableId={val} must be decimal in numbering.xml"
)
else:
if self._parse_id_value(val, base=16) >= 0x7FFFFFFF:
errors.append(
f" {xml_file.name}:{elem.sourceline}: "
f"durableId={val} >= 0x7FFFFFFF"
)
except Exception:
pass
if errors:
print(f"FAILED - {len(errors)} ID constraint violations:")
for e in errors:
print(e)
elif self.verbose:
print("PASSED - All paraId/durableId values within constraints")
return not errors
def validate_comment_markers(self):
errors = []
document_xml = None
comments_xml = None
for xml_file in self.xml_files:
if xml_file.name == "document.xml" and "word" in str(xml_file):
document_xml = xml_file
elif xml_file.name == "comments.xml":
comments_xml = xml_file
if not document_xml:
if self.verbose:
print("PASSED - No document.xml found (skipping comment validation)")
return True
try:
doc_root = lxml.etree.parse(str(document_xml)).getroot()
namespaces = {"w": self.WORD_2006_NAMESPACE}
range_starts = {
elem.get(f"{{{self.WORD_2006_NAMESPACE}}}id")
for elem in doc_root.xpath(
".//w:commentRangeStart", namespaces=namespaces
)
}
range_ends = {
elem.get(f"{{{self.WORD_2006_NAMESPACE}}}id")
for elem in doc_root.xpath(
".//w:commentRangeEnd", namespaces=namespaces
)
}
references = {
elem.get(f"{{{self.WORD_2006_NAMESPACE}}}id")
for elem in doc_root.xpath(
".//w:commentReference", namespaces=namespaces
)
}
orphaned_ends = range_ends - range_starts
for comment_id in sorted(
orphaned_ends, key=lambda x: int(x) if x and x.isdigit() else 0
):
errors.append(
f' document.xml: commentRangeEnd id="{comment_id}" has no matching commentRangeStart'
)
orphaned_starts = range_starts - range_ends
for comment_id in sorted(
orphaned_starts, key=lambda x: int(x) if x and x.isdigit() else 0
):
errors.append(
f' document.xml: commentRangeStart id="{comment_id}" has no matching commentRangeEnd'
)
comment_ids = set()
if comments_xml and comments_xml.exists():
comments_root = lxml.etree.parse(str(comments_xml)).getroot()
comment_ids = {
elem.get(f"{{{self.WORD_2006_NAMESPACE}}}id")
for elem in comments_root.xpath(
".//w:comment", namespaces=namespaces
)
}
marker_ids = range_starts | range_ends | references
invalid_refs = marker_ids - comment_ids
for comment_id in sorted(
invalid_refs, key=lambda x: int(x) if x and x.isdigit() else 0
):
if comment_id:
errors.append(
f' document.xml: marker id="{comment_id}" references non-existent comment'
)
except (lxml.etree.XMLSyntaxError, Exception) as e:
errors.append(f" Error parsing XML: {e}")
if errors:
print(f"FAILED - {len(errors)} comment marker violations:")
for error in errors:
print(error)
return False
else:
if self.verbose:
print("PASSED - All comment markers properly paired")
return True
def repair(self) -> int:
repairs = super().repair()
repairs += self.repair_durableId()
return repairs
def repair_durableId(self) -> int:
repairs = 0
for xml_file in self.xml_files:
try:
content = xml_file.read_text(encoding="utf-8")
dom = defusedxml.minidom.parseString(content)
modified = False
for elem in dom.getElementsByTagName("*"):
if not elem.hasAttribute("w16cid:durableId"):
continue
durable_id = elem.getAttribute("w16cid:durableId")
needs_repair = False
if xml_file.name == "numbering.xml":
try:
needs_repair = (
self._parse_id_value(durable_id, base=10) >= 0x7FFFFFFF
)
except ValueError:
needs_repair = True
else:
try:
needs_repair = (
self._parse_id_value(durable_id, base=16) >= 0x7FFFFFFF
)
except ValueError:
needs_repair = True
if needs_repair:
value = random.randint(1, 0x7FFFFFFE)
if xml_file.name == "numbering.xml":
new_id = str(value)
else:
new_id = f"{value:08X}"
elem.setAttribute("w16cid:durableId", new_id)
print(
f" Repaired: {xml_file.name}: durableId {durable_id}{new_id}"
)
repairs += 1
modified = True
if modified:
xml_file.write_bytes(dom.toxml(encoding="UTF-8"))
except Exception:
pass
return repairs
if __name__ == "__main__":
raise RuntimeError("This module should not be run directly.")

View File

@@ -0,0 +1,275 @@
"""
Validator for PowerPoint presentation XML files against XSD schemas.
"""
import re
from .base import BaseSchemaValidator
class PPTXSchemaValidator(BaseSchemaValidator):
PRESENTATIONML_NAMESPACE = (
"http://schemas.openxmlformats.org/presentationml/2006/main"
)
ELEMENT_RELATIONSHIP_TYPES = {
"sldid": "slide",
"sldmasterid": "slidemaster",
"notesmasterid": "notesmaster",
"sldlayoutid": "slidelayout",
"themeid": "theme",
"tablestyleid": "tablestyles",
}
def validate(self):
if not self.validate_xml():
return False
all_valid = True
if not self.validate_namespaces():
all_valid = False
if not self.validate_unique_ids():
all_valid = False
if not self.validate_uuid_ids():
all_valid = False
if not self.validate_file_references():
all_valid = False
if not self.validate_slide_layout_ids():
all_valid = False
if not self.validate_content_types():
all_valid = False
if not self.validate_against_xsd():
all_valid = False
if not self.validate_notes_slide_references():
all_valid = False
if not self.validate_all_relationship_ids():
all_valid = False
if not self.validate_no_duplicate_slide_layouts():
all_valid = False
return all_valid
def validate_uuid_ids(self):
import lxml.etree
errors = []
uuid_pattern = re.compile(
r"^[\{\(]?[0-9A-Fa-f]{8}-?[0-9A-Fa-f]{4}-?[0-9A-Fa-f]{4}-?[0-9A-Fa-f]{4}-?[0-9A-Fa-f]{12}[\}\)]?$"
)
for xml_file in self.xml_files:
try:
root = lxml.etree.parse(str(xml_file)).getroot()
for elem in root.iter():
for attr, value in elem.attrib.items():
attr_name = attr.split("}")[-1].lower()
if attr_name == "id" or attr_name.endswith("id"):
if self._looks_like_uuid(value):
if not uuid_pattern.match(value):
errors.append(
f" {xml_file.relative_to(self.unpacked_dir)}: "
f"Line {elem.sourceline}: ID '{value}' appears to be a UUID but contains invalid hex characters"
)
except (lxml.etree.XMLSyntaxError, Exception) as e:
errors.append(
f" {xml_file.relative_to(self.unpacked_dir)}: Error: {e}"
)
if errors:
print(f"FAILED - Found {len(errors)} UUID ID validation errors:")
for error in errors:
print(error)
return False
else:
if self.verbose:
print("PASSED - All UUID-like IDs contain valid hex values")
return True
def _looks_like_uuid(self, value):
clean_value = value.strip("{}()").replace("-", "")
return len(clean_value) == 32 and all(c.isalnum() for c in clean_value)
def validate_slide_layout_ids(self):
import lxml.etree
errors = []
slide_masters = list(self.unpacked_dir.glob("ppt/slideMasters/*.xml"))
if not slide_masters:
if self.verbose:
print("PASSED - No slide masters found")
return True
for slide_master in slide_masters:
try:
root = lxml.etree.parse(str(slide_master)).getroot()
rels_file = slide_master.parent / "_rels" / f"{slide_master.name}.rels"
if not rels_file.exists():
errors.append(
f" {slide_master.relative_to(self.unpacked_dir)}: "
f"Missing relationships file: {rels_file.relative_to(self.unpacked_dir)}"
)
continue
rels_root = lxml.etree.parse(str(rels_file)).getroot()
valid_layout_rids = set()
for rel in rels_root.findall(
f".//{{{self.PACKAGE_RELATIONSHIPS_NAMESPACE}}}Relationship"
):
rel_type = rel.get("Type", "")
if "slideLayout" in rel_type:
valid_layout_rids.add(rel.get("Id"))
for sld_layout_id in root.findall(
f".//{{{self.PRESENTATIONML_NAMESPACE}}}sldLayoutId"
):
r_id = sld_layout_id.get(
f"{{{self.OFFICE_RELATIONSHIPS_NAMESPACE}}}id"
)
layout_id = sld_layout_id.get("id")
if r_id and r_id not in valid_layout_rids:
errors.append(
f" {slide_master.relative_to(self.unpacked_dir)}: "
f"Line {sld_layout_id.sourceline}: sldLayoutId with id='{layout_id}' "
f"references r:id='{r_id}' which is not found in slide layout relationships"
)
except (lxml.etree.XMLSyntaxError, Exception) as e:
errors.append(
f" {slide_master.relative_to(self.unpacked_dir)}: Error: {e}"
)
if errors:
print(f"FAILED - Found {len(errors)} slide layout ID validation errors:")
for error in errors:
print(error)
print(
"Remove invalid references or add missing slide layouts to the relationships file."
)
return False
else:
if self.verbose:
print("PASSED - All slide layout IDs reference valid slide layouts")
return True
def validate_no_duplicate_slide_layouts(self):
import lxml.etree
errors = []
slide_rels_files = list(self.unpacked_dir.glob("ppt/slides/_rels/*.xml.rels"))
for rels_file in slide_rels_files:
try:
root = lxml.etree.parse(str(rels_file)).getroot()
layout_rels = [
rel
for rel in root.findall(
f".//{{{self.PACKAGE_RELATIONSHIPS_NAMESPACE}}}Relationship"
)
if "slideLayout" in rel.get("Type", "")
]
if len(layout_rels) > 1:
errors.append(
f" {rels_file.relative_to(self.unpacked_dir)}: has {len(layout_rels)} slideLayout references"
)
except Exception as e:
errors.append(
f" {rels_file.relative_to(self.unpacked_dir)}: Error: {e}"
)
if errors:
print("FAILED - Found slides with duplicate slideLayout references:")
for error in errors:
print(error)
return False
else:
if self.verbose:
print("PASSED - All slides have exactly one slideLayout reference")
return True
def validate_notes_slide_references(self):
import lxml.etree
errors = []
notes_slide_references = {}
slide_rels_files = list(self.unpacked_dir.glob("ppt/slides/_rels/*.xml.rels"))
if not slide_rels_files:
if self.verbose:
print("PASSED - No slide relationship files found")
return True
for rels_file in slide_rels_files:
try:
root = lxml.etree.parse(str(rels_file)).getroot()
for rel in root.findall(
f".//{{{self.PACKAGE_RELATIONSHIPS_NAMESPACE}}}Relationship"
):
rel_type = rel.get("Type", "")
if "notesSlide" in rel_type:
target = rel.get("Target", "")
if target:
normalized_target = target.replace("../", "")
slide_name = rels_file.stem.replace(
".xml", ""
)
if normalized_target not in notes_slide_references:
notes_slide_references[normalized_target] = []
notes_slide_references[normalized_target].append(
(slide_name, rels_file)
)
except (lxml.etree.XMLSyntaxError, Exception) as e:
errors.append(
f" {rels_file.relative_to(self.unpacked_dir)}: Error: {e}"
)
for target, references in notes_slide_references.items():
if len(references) > 1:
slide_names = [ref[0] for ref in references]
errors.append(
f" Notes slide '{target}' is referenced by multiple slides: {', '.join(slide_names)}"
)
for slide_name, rels_file in references:
errors.append(f" - {rels_file.relative_to(self.unpacked_dir)}")
if errors:
print(
f"FAILED - Found {len([e for e in errors if not e.startswith(' ')])} notes slide reference validation errors:"
)
for error in errors:
print(error)
print("Each slide may optionally have its own slide file.")
return False
else:
if self.verbose:
print("PASSED - All notes slide references are unique")
return True
if __name__ == "__main__":
raise RuntimeError("This module should not be run directly.")

View File

@@ -0,0 +1,247 @@
"""
Validator for tracked changes in Word documents.
"""
import subprocess
import tempfile
import zipfile
from pathlib import Path
class RedliningValidator:
def __init__(self, unpacked_dir, original_docx, verbose=False, author="Claude"):
self.unpacked_dir = Path(unpacked_dir)
self.original_docx = Path(original_docx)
self.verbose = verbose
self.author = author
self.namespaces = {
"w": "http://schemas.openxmlformats.org/wordprocessingml/2006/main"
}
def repair(self) -> int:
return 0
def validate(self):
modified_file = self.unpacked_dir / "word" / "document.xml"
if not modified_file.exists():
print(f"FAILED - Modified document.xml not found at {modified_file}")
return False
try:
import xml.etree.ElementTree as ET
tree = ET.parse(modified_file)
root = tree.getroot()
del_elements = root.findall(".//w:del", self.namespaces)
ins_elements = root.findall(".//w:ins", self.namespaces)
author_del_elements = [
elem
for elem in del_elements
if elem.get(f"{{{self.namespaces['w']}}}author") == self.author
]
author_ins_elements = [
elem
for elem in ins_elements
if elem.get(f"{{{self.namespaces['w']}}}author") == self.author
]
if not author_del_elements and not author_ins_elements:
if self.verbose:
print(f"PASSED - No tracked changes by {self.author} found.")
return True
except Exception:
pass
with tempfile.TemporaryDirectory() as temp_dir:
temp_path = Path(temp_dir)
try:
with zipfile.ZipFile(self.original_docx, "r") as zip_ref:
zip_ref.extractall(temp_path)
except Exception as e:
print(f"FAILED - Error unpacking original docx: {e}")
return False
original_file = temp_path / "word" / "document.xml"
if not original_file.exists():
print(
f"FAILED - Original document.xml not found in {self.original_docx}"
)
return False
try:
import xml.etree.ElementTree as ET
modified_tree = ET.parse(modified_file)
modified_root = modified_tree.getroot()
original_tree = ET.parse(original_file)
original_root = original_tree.getroot()
except ET.ParseError as e:
print(f"FAILED - Error parsing XML files: {e}")
return False
self._remove_author_tracked_changes(original_root)
self._remove_author_tracked_changes(modified_root)
modified_text = self._extract_text_content(modified_root)
original_text = self._extract_text_content(original_root)
if modified_text != original_text:
error_message = self._generate_detailed_diff(
original_text, modified_text
)
print(error_message)
return False
if self.verbose:
print(f"PASSED - All changes by {self.author} are properly tracked")
return True
def _generate_detailed_diff(self, original_text, modified_text):
error_parts = [
f"FAILED - Document text doesn't match after removing {self.author}'s tracked changes",
"",
"Likely causes:",
" 1. Modified text inside another author's <w:ins> or <w:del> tags",
" 2. Made edits without proper tracked changes",
" 3. Didn't nest <w:del> inside <w:ins> when deleting another's insertion",
"",
"For pre-redlined documents, use correct patterns:",
" - To reject another's INSERTION: Nest <w:del> inside their <w:ins>",
" - To restore another's DELETION: Add new <w:ins> AFTER their <w:del>",
"",
]
git_diff = self._get_git_word_diff(original_text, modified_text)
if git_diff:
error_parts.extend(["Differences:", "============", git_diff])
else:
error_parts.append("Unable to generate word diff (git not available)")
return "\n".join(error_parts)
def _get_git_word_diff(self, original_text, modified_text):
try:
with tempfile.TemporaryDirectory() as temp_dir:
temp_path = Path(temp_dir)
original_file = temp_path / "original.txt"
modified_file = temp_path / "modified.txt"
original_file.write_text(original_text, encoding="utf-8")
modified_file.write_text(modified_text, encoding="utf-8")
result = subprocess.run(
[
"git",
"diff",
"--word-diff=plain",
"--word-diff-regex=.",
"-U0",
"--no-index",
str(original_file),
str(modified_file),
],
capture_output=True,
text=True,
)
if result.stdout.strip():
lines = result.stdout.split("\n")
content_lines = []
in_content = False
for line in lines:
if line.startswith("@@"):
in_content = True
continue
if in_content and line.strip():
content_lines.append(line)
if content_lines:
return "\n".join(content_lines)
result = subprocess.run(
[
"git",
"diff",
"--word-diff=plain",
"-U0",
"--no-index",
str(original_file),
str(modified_file),
],
capture_output=True,
text=True,
)
if result.stdout.strip():
lines = result.stdout.split("\n")
content_lines = []
in_content = False
for line in lines:
if line.startswith("@@"):
in_content = True
continue
if in_content and line.strip():
content_lines.append(line)
return "\n".join(content_lines)
except (subprocess.CalledProcessError, FileNotFoundError, Exception):
pass
return None
def _remove_author_tracked_changes(self, root):
ins_tag = f"{{{self.namespaces['w']}}}ins"
del_tag = f"{{{self.namespaces['w']}}}del"
author_attr = f"{{{self.namespaces['w']}}}author"
for parent in root.iter():
to_remove = []
for child in parent:
if child.tag == ins_tag and child.get(author_attr) == self.author:
to_remove.append(child)
for elem in to_remove:
parent.remove(elem)
deltext_tag = f"{{{self.namespaces['w']}}}delText"
t_tag = f"{{{self.namespaces['w']}}}t"
for parent in root.iter():
to_process = []
for child in parent:
if child.tag == del_tag and child.get(author_attr) == self.author:
to_process.append((child, list(parent).index(child)))
for del_elem, del_index in reversed(to_process):
for elem in del_elem.iter():
if elem.tag == deltext_tag:
elem.tag = t_tag
for child in reversed(list(del_elem)):
parent.insert(del_index, child)
parent.remove(del_elem)
def _extract_text_content(self, root):
p_tag = f"{{{self.namespaces['w']}}}p"
t_tag = f"{{{self.namespaces['w']}}}t"
paragraphs = []
for p_elem in root.findall(f".//{p_tag}"):
text_parts = []
for t_elem in p_elem.findall(f".//{t_tag}"):
if t_elem.text:
text_parts.append(t_elem.text)
paragraph_text = "".join(text_parts)
if paragraph_text:
paragraphs.append(paragraph_text)
return "\n".join(paragraphs)
if __name__ == "__main__":
raise RuntimeError("This module should not be run directly.")