Files
QQbot/.claude/skills/skill-creator/scripts/aggregate_benchmark.py
Mimikko-zeus ae208af6a9 Please enter the commit message for your changes. Lines starting
with '#' will be ignored, and an empty message aborts the commit.

On branch main
Your branch is up to date with 'origin/main'.

Changes to be committed:
	new file:   .claude/skills/algorithmic-art/.openskills.json
	new file:   .claude/skills/algorithmic-art/LICENSE.txt
	new file:   .claude/skills/algorithmic-art/SKILL.md
	new file:   .claude/skills/algorithmic-art/templates/generator_template.js
	new file:   .claude/skills/algorithmic-art/templates/viewer.html
	new file:   .claude/skills/brand-guidelines/.openskills.json
	new file:   .claude/skills/brand-guidelines/LICENSE.txt
	new file:   .claude/skills/brand-guidelines/SKILL.md
	new file:   .claude/skills/canvas-design/.openskills.json
	new file:   .claude/skills/canvas-design/LICENSE.txt
	new file:   .claude/skills/canvas-design/SKILL.md
	new file:   .claude/skills/canvas-design/canvas-fonts/ArsenalSC-OFL.txt
	new file:   .claude/skills/canvas-design/canvas-fonts/ArsenalSC-Regular.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/BigShoulders-Bold.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/BigShoulders-OFL.txt
	new file:   .claude/skills/canvas-design/canvas-fonts/BigShoulders-Regular.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/Boldonse-OFL.txt
	new file:   .claude/skills/canvas-design/canvas-fonts/Boldonse-Regular.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/BricolageGrotesque-Bold.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/BricolageGrotesque-OFL.txt
	new file:   .claude/skills/canvas-design/canvas-fonts/BricolageGrotesque-Regular.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/CrimsonPro-Bold.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/CrimsonPro-Italic.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/CrimsonPro-OFL.txt
	new file:   .claude/skills/canvas-design/canvas-fonts/CrimsonPro-Regular.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/DMMono-OFL.txt
	new file:   .claude/skills/canvas-design/canvas-fonts/DMMono-Regular.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/EricaOne-OFL.txt
	new file:   .claude/skills/canvas-design/canvas-fonts/EricaOne-Regular.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/GeistMono-Bold.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/GeistMono-OFL.txt
	new file:   .claude/skills/canvas-design/canvas-fonts/GeistMono-Regular.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/Gloock-OFL.txt
	new file:   .claude/skills/canvas-design/canvas-fonts/Gloock-Regular.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/IBMPlexMono-Bold.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/IBMPlexMono-OFL.txt
	new file:   .claude/skills/canvas-design/canvas-fonts/IBMPlexMono-Regular.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/IBMPlexSerif-Bold.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/IBMPlexSerif-BoldItalic.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/IBMPlexSerif-Italic.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/IBMPlexSerif-Regular.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/InstrumentSans-Bold.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/InstrumentSans-BoldItalic.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/InstrumentSans-Italic.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/InstrumentSans-OFL.txt
	new file:   .claude/skills/canvas-design/canvas-fonts/InstrumentSans-Regular.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/InstrumentSerif-Italic.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/InstrumentSerif-Regular.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/Italiana-OFL.txt
	new file:   .claude/skills/canvas-design/canvas-fonts/Italiana-Regular.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/JetBrainsMono-Bold.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/JetBrainsMono-OFL.txt
	new file:   .claude/skills/canvas-design/canvas-fonts/JetBrainsMono-Regular.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/Jura-Light.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/Jura-Medium.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/Jura-OFL.txt
	new file:   .claude/skills/canvas-design/canvas-fonts/LibreBaskerville-OFL.txt
	new file:   .claude/skills/canvas-design/canvas-fonts/LibreBaskerville-Regular.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/Lora-Bold.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/Lora-BoldItalic.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/Lora-Italic.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/Lora-OFL.txt
	new file:   .claude/skills/canvas-design/canvas-fonts/Lora-Regular.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/NationalPark-Bold.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/NationalPark-OFL.txt
	new file:   .claude/skills/canvas-design/canvas-fonts/NationalPark-Regular.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/NothingYouCouldDo-OFL.txt
	new file:   .claude/skills/canvas-design/canvas-fonts/NothingYouCouldDo-Regular.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/Outfit-Bold.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/Outfit-OFL.txt
	new file:   .claude/skills/canvas-design/canvas-fonts/Outfit-Regular.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/PixelifySans-Medium.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/PixelifySans-OFL.txt
	new file:   .claude/skills/canvas-design/canvas-fonts/PoiretOne-OFL.txt
	new file:   .claude/skills/canvas-design/canvas-fonts/PoiretOne-Regular.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/RedHatMono-Bold.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/RedHatMono-OFL.txt
	new file:   .claude/skills/canvas-design/canvas-fonts/RedHatMono-Regular.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/Silkscreen-OFL.txt
	new file:   .claude/skills/canvas-design/canvas-fonts/Silkscreen-Regular.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/SmoochSans-Medium.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/SmoochSans-OFL.txt
	new file:   .claude/skills/canvas-design/canvas-fonts/Tektur-Medium.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/Tektur-OFL.txt
	new file:   .claude/skills/canvas-design/canvas-fonts/Tektur-Regular.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/WorkSans-Bold.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/WorkSans-BoldItalic.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/WorkSans-Italic.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/WorkSans-OFL.txt
	new file:   .claude/skills/canvas-design/canvas-fonts/WorkSans-Regular.ttf
	new file:   .claude/skills/canvas-design/canvas-fonts/YoungSerif-OFL.txt
	new file:   .claude/skills/canvas-design/canvas-fonts/YoungSerif-Regular.ttf
	new file:   .claude/skills/doc-coauthoring/.openskills.json
	new file:   .claude/skills/doc-coauthoring/SKILL.md
	new file:   .claude/skills/docx/.openskills.json
	new file:   .claude/skills/docx/LICENSE.txt
	new file:   .claude/skills/docx/SKILL.md
	new file:   .claude/skills/docx/scripts/__init__.py
	new file:   .claude/skills/docx/scripts/accept_changes.py
	new file:   .claude/skills/docx/scripts/comment.py
	new file:   .claude/skills/docx/scripts/office/helpers/__init__.py
	new file:   .claude/skills/docx/scripts/office/helpers/merge_runs.py
	new file:   .claude/skills/docx/scripts/office/helpers/simplify_redlines.py
	new file:   .claude/skills/docx/scripts/office/pack.py
	new file:   .claude/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/mce/mc.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/microsoft/wml-2010.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/microsoft/wml-2012.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/microsoft/wml-2018.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/microsoft/wml-cex-2018.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/microsoft/wml-cid-2016.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd
	new file:   .claude/skills/docx/scripts/office/schemas/microsoft/wml-symex-2015.xsd
	new file:   .claude/skills/docx/scripts/office/soffice.py
	new file:   .claude/skills/docx/scripts/office/unpack.py
	new file:   .claude/skills/docx/scripts/office/validate.py
	new file:   .claude/skills/docx/scripts/office/validators/__init__.py
	new file:   .claude/skills/docx/scripts/office/validators/base.py
	new file:   .claude/skills/docx/scripts/office/validators/docx.py
	new file:   .claude/skills/docx/scripts/office/validators/pptx.py
	new file:   .claude/skills/docx/scripts/office/validators/redlining.py
	new file:   .claude/skills/docx/scripts/templates/comments.xml
	new file:   .claude/skills/docx/scripts/templates/commentsExtended.xml
	new file:   .claude/skills/docx/scripts/templates/commentsExtensible.xml
	new file:   .claude/skills/docx/scripts/templates/commentsIds.xml
	new file:   .claude/skills/docx/scripts/templates/people.xml
	new file:   .claude/skills/frontend-design/.openskills.json
	new file:   .claude/skills/frontend-design/LICENSE.txt
	new file:   .claude/skills/frontend-design/SKILL.md
	new file:   .claude/skills/internal-comms/.openskills.json
	new file:   .claude/skills/internal-comms/LICENSE.txt
	new file:   .claude/skills/internal-comms/SKILL.md
	new file:   .claude/skills/internal-comms/examples/3p-updates.md
	new file:   .claude/skills/internal-comms/examples/company-newsletter.md
	new file:   .claude/skills/internal-comms/examples/faq-answers.md
	new file:   .claude/skills/internal-comms/examples/general-comms.md
	new file:   .claude/skills/mcp-builder/.openskills.json
	new file:   .claude/skills/mcp-builder/LICENSE.txt
	new file:   .claude/skills/mcp-builder/SKILL.md
	new file:   .claude/skills/mcp-builder/reference/evaluation.md
	new file:   .claude/skills/mcp-builder/reference/mcp_best_practices.md
	new file:   .claude/skills/mcp-builder/reference/node_mcp_server.md
	new file:   .claude/skills/mcp-builder/reference/python_mcp_server.md
	new file:   .claude/skills/mcp-builder/scripts/connections.py
	new file:   .claude/skills/mcp-builder/scripts/evaluation.py
	new file:   .claude/skills/mcp-builder/scripts/example_evaluation.xml
	new file:   .claude/skills/mcp-builder/scripts/requirements.txt
	new file:   .claude/skills/pdf/.openskills.json
	new file:   .claude/skills/pdf/LICENSE.txt
	new file:   .claude/skills/pdf/SKILL.md
	new file:   .claude/skills/pdf/forms.md
	new file:   .claude/skills/pdf/reference.md
	new file:   .claude/skills/pdf/scripts/check_bounding_boxes.py
	new file:   .claude/skills/pdf/scripts/check_fillable_fields.py
	new file:   .claude/skills/pdf/scripts/convert_pdf_to_images.py
	new file:   .claude/skills/pdf/scripts/create_validation_image.py
	new file:   .claude/skills/pdf/scripts/extract_form_field_info.py
	new file:   .claude/skills/pdf/scripts/extract_form_structure.py
	new file:   .claude/skills/pdf/scripts/fill_fillable_fields.py
	new file:   .claude/skills/pdf/scripts/fill_pdf_form_with_annotations.py
	new file:   .claude/skills/pptx/.openskills.json
	new file:   .claude/skills/pptx/LICENSE.txt
	new file:   .claude/skills/pptx/SKILL.md
	new file:   .claude/skills/pptx/editing.md
	new file:   .claude/skills/pptx/pptxgenjs.md
	new file:   .claude/skills/pptx/scripts/__init__.py
	new file:   .claude/skills/pptx/scripts/add_slide.py
	new file:   .claude/skills/pptx/scripts/clean.py
	new file:   .claude/skills/pptx/scripts/office/helpers/__init__.py
	new file:   .claude/skills/pptx/scripts/office/helpers/merge_runs.py
	new file:   .claude/skills/pptx/scripts/office/helpers/simplify_redlines.py
	new file:   .claude/skills/pptx/scripts/office/pack.py
	new file:   .claude/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/mce/mc.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/microsoft/wml-2010.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/microsoft/wml-2012.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/microsoft/wml-2018.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/microsoft/wml-cex-2018.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/microsoft/wml-cid-2016.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd
	new file:   .claude/skills/pptx/scripts/office/schemas/microsoft/wml-symex-2015.xsd
	new file:   .claude/skills/pptx/scripts/office/soffice.py
	new file:   .claude/skills/pptx/scripts/office/unpack.py
	new file:   .claude/skills/pptx/scripts/office/validate.py
	new file:   .claude/skills/pptx/scripts/office/validators/__init__.py
	new file:   .claude/skills/pptx/scripts/office/validators/base.py
	new file:   .claude/skills/pptx/scripts/office/validators/docx.py
	new file:   .claude/skills/pptx/scripts/office/validators/pptx.py
	new file:   .claude/skills/pptx/scripts/office/validators/redlining.py
	new file:   .claude/skills/pptx/scripts/thumbnail.py
	new file:   .claude/skills/skill-creator/.openskills.json
	new file:   .claude/skills/skill-creator/LICENSE.txt
	new file:   .claude/skills/skill-creator/SKILL.md
	new file:   .claude/skills/skill-creator/agents/analyzer.md
	new file:   .claude/skills/skill-creator/agents/comparator.md
	new file:   .claude/skills/skill-creator/agents/grader.md
	new file:   .claude/skills/skill-creator/assets/eval_review.html
	new file:   .claude/skills/skill-creator/eval-viewer/generate_review.py
	new file:   .claude/skills/skill-creator/eval-viewer/viewer.html
	new file:   .claude/skills/skill-creator/references/schemas.md
	new file:   .claude/skills/skill-creator/scripts/__init__.py
	new file:   .claude/skills/skill-creator/scripts/aggregate_benchmark.py
	new file:   .claude/skills/skill-creator/scripts/generate_report.py
	new file:   .claude/skills/skill-creator/scripts/improve_description.py
	new file:   .claude/skills/skill-creator/scripts/package_skill.py
	new file:   .claude/skills/skill-creator/scripts/quick_validate.py
	new file:   .claude/skills/skill-creator/scripts/run_eval.py
	new file:   .claude/skills/skill-creator/scripts/run_loop.py
	new file:   .claude/skills/skill-creator/scripts/utils.py
	new file:   .claude/skills/slack-gif-creator/.openskills.json
	new file:   .claude/skills/slack-gif-creator/LICENSE.txt
	new file:   .claude/skills/slack-gif-creator/SKILL.md
	new file:   .claude/skills/slack-gif-creator/core/easing.py
	new file:   .claude/skills/slack-gif-creator/core/frame_composer.py
	new file:   .claude/skills/slack-gif-creator/core/gif_builder.py
	new file:   .claude/skills/slack-gif-creator/core/validators.py
	new file:   .claude/skills/slack-gif-creator/requirements.txt
	new file:   .claude/skills/template/.openskills.json
	new file:   .claude/skills/template/SKILL.md
	new file:   .claude/skills/theme-factory/.openskills.json
	new file:   .claude/skills/theme-factory/LICENSE.txt
	new file:   .claude/skills/theme-factory/SKILL.md
	new file:   .claude/skills/theme-factory/theme-showcase.pdf
	new file:   .claude/skills/theme-factory/themes/arctic-frost.md
	new file:   .claude/skills/theme-factory/themes/botanical-garden.md
	new file:   .claude/skills/theme-factory/themes/desert-rose.md
	new file:   .claude/skills/theme-factory/themes/forest-canopy.md
	new file:   .claude/skills/theme-factory/themes/golden-hour.md
	new file:   .claude/skills/theme-factory/themes/midnight-galaxy.md
	new file:   .claude/skills/theme-factory/themes/modern-minimalist.md
	new file:   .claude/skills/theme-factory/themes/ocean-depths.md
	new file:   .claude/skills/theme-factory/themes/sunset-boulevard.md
	new file:   .claude/skills/theme-factory/themes/tech-innovation.md
	new file:   .claude/skills/web-artifacts-builder/.openskills.json
	new file:   .claude/skills/web-artifacts-builder/LICENSE.txt
	new file:   .claude/skills/web-artifacts-builder/SKILL.md
	new file:   .claude/skills/web-artifacts-builder/scripts/bundle-artifact.sh
	new file:   .claude/skills/web-artifacts-builder/scripts/init-artifact.sh
	new file:   .claude/skills/web-artifacts-builder/scripts/shadcn-components.tar.gz
	new file:   .claude/skills/webapp-testing/.openskills.json
	new file:   .claude/skills/webapp-testing/LICENSE.txt
	new file:   .claude/skills/webapp-testing/SKILL.md
	new file:   .claude/skills/webapp-testing/examples/console_logging.py
	new file:   .claude/skills/webapp-testing/examples/element_discovery.py
	new file:   .claude/skills/webapp-testing/examples/static_html_automation.py
	new file:   .claude/skills/webapp-testing/scripts/with_server.py
	new file:   .claude/skills/xlsx/.openskills.json
	new file:   .claude/skills/xlsx/LICENSE.txt
	new file:   .claude/skills/xlsx/SKILL.md
	new file:   .claude/skills/xlsx/scripts/office/helpers/__init__.py
	new file:   .claude/skills/xlsx/scripts/office/helpers/merge_runs.py
	new file:   .claude/skills/xlsx/scripts/office/helpers/simplify_redlines.py
	new file:   .claude/skills/xlsx/scripts/office/pack.py
	new file:   .claude/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/mce/mc.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/microsoft/wml-2010.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/microsoft/wml-2012.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/microsoft/wml-2018.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/microsoft/wml-cex-2018.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/microsoft/wml-cid-2016.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd
	new file:   .claude/skills/xlsx/scripts/office/schemas/microsoft/wml-symex-2015.xsd
	new file:   .claude/skills/xlsx/scripts/office/soffice.py
	new file:   .claude/skills/xlsx/scripts/office/unpack.py
	new file:   .claude/skills/xlsx/scripts/office/validate.py
	new file:   .claude/skills/xlsx/scripts/office/validators/__init__.py
	new file:   .claude/skills/xlsx/scripts/office/validators/base.py
	new file:   .claude/skills/xlsx/scripts/office/validators/docx.py
	new file:   .claude/skills/xlsx/scripts/office/validators/pptx.py
	new file:   .claude/skills/xlsx/scripts/office/validators/redlining.py
	new file:   .claude/skills/xlsx/scripts/recalc.py
	new file:   .env.example
	new file:   .gitignore
	new file:   config/mcp.json
	new file:   config/models.json
	new file:   config/personalities.json
	new file:   docs/AGENTS.md
	new file:   docs/AI_IMPLEMENTATION.md
	new file:   docs/AI_INTEGRATION_COMPLETE.md
	new file:   docs/AI_QUICKSTART.md
	new file:   docs/AI_SUMMARY.md
	new file:   docs/CHANGELOG.md
	new file:   docs/CONFIG_GUIDE.md
	new file:   docs/FIXES.md
	new file:   docs/PROJECT_REFACTOR.md
	new file:   docs/README.md
	new file:   docs/README_INDEX.md
	new file:   examples/ai_example.py
	new file:   main.py
	new file:   pytest.ini
	new file:   requirements.txt
	new file:   scripts/migrate_to_vector_db.py
	new file:   skills/cmd_zip_skill/README.md
	new file:   skills/cmd_zip_skill/__init__.py
	new file:   skills/cmd_zip_skill/main.py
	new file:   skills/cmd_zip_skill/skill.json
	new file:   skills/cmd_zip_skill_1772465404375/README.md
	new file:   skills/cmd_zip_skill_1772465404375/__init__.py
	new file:   skills/cmd_zip_skill_1772465404375/main.py
	new file:   skills/cmd_zip_skill_1772465404375/skill.json
	new file:   skills/cmd_zip_skill_1772465434774/README.md
	new file:   skills/cmd_zip_skill_1772465434774/__init__.py
	new file:   skills/cmd_zip_skill_1772465434774/main.py
	new file:   skills/cmd_zip_skill_1772465434774/skill.json
	new file:   skills/cmd_zip_skill_1772465467809/README.md
	new file:   skills/cmd_zip_skill_1772465467809/__init__.py
	new file:   skills/cmd_zip_skill_1772465467809/main.py
	new file:   skills/cmd_zip_skill_1772465467809/skill.json
	new file:   skills/cmd_zip_skill_1772465652075/README.md
	new file:   skills/cmd_zip_skill_1772465652075/__init__.py
	new file:   skills/cmd_zip_skill_1772465652075/main.py
	new file:   skills/cmd_zip_skill_1772465652075/skill.json
	new file:   skills/cmd_zip_skill_1772465685352/README.md
	new file:   skills/cmd_zip_skill_1772465685352/__init__.py
	new file:   skills/cmd_zip_skill_1772465685352/main.py
	new file:   skills/cmd_zip_skill_1772465685352/skill.json
	new file:   skills/cmd_zip_skill_1772465936294/README.md
	new file:   skills/cmd_zip_skill_1772465936294/__init__.py
	new file:   skills/cmd_zip_skill_1772465936294/main.py
	new file:   skills/cmd_zip_skill_1772465936294/skill.json
	new file:   skills/cmd_zip_skill_1772465966322/README.md
	new file:   skills/cmd_zip_skill_1772465966322/__init__.py
	new file:   skills/cmd_zip_skill_1772465966322/main.py
	new file:   skills/cmd_zip_skill_1772465966322/skill.json
	new file:   skills/cmd_zip_skill_1772466071278/README.md
	new file:   skills/cmd_zip_skill_1772466071278/__init__.py
	new file:   skills/cmd_zip_skill_1772466071278/main.py
	new file:   skills/cmd_zip_skill_1772466071278/skill.json
	new file:   skills/skills_creator/README.md
	new file:   skills/skills_creator/__init__.py
	new file:   skills/skills_creator/main.py
	new file:   skills/skills_creator/skill.json
	new file:   src/__init__.py
	new file:   src/ai/__init__.py
	new file:   src/ai/base.py
	new file:   src/ai/client.py
	new file:   src/ai/docs/README.md
	new file:   src/ai/mcp/__init__.py
	new file:   src/ai/mcp/base.py
	new file:   src/ai/mcp/servers/__init__.py
	new file:   src/ai/mcp/servers/filesystem.py
	new file:   src/ai/memory.py
	new file:   src/ai/models/__init__.py
	new file:   src/ai/models/anthropic_model.py
	new file:   src/ai/models/openai_model.py
	new file:   src/ai/personality.py
	new file:   src/ai/skills/__init__.py
	new file:   src/ai/skills/base.py
	new file:   src/ai/task_manager.py
	new file:   src/ai/vector_store/__init__.py
	new file:   src/ai/vector_store/base.py
	new file:   src/ai/vector_store/chroma_store.py
	new file:   src/ai/vector_store/json_store.py
	new file:   src/core/__init__.py
	new file:   src/core/bot.py
	new file:   src/core/config.py
	new file:   src/handlers/__init__.py
	new file:   src/handlers/message_handler.py
	new file:   src/handlers/message_handler_ai.py
	new file:   src/utils/__init__.py
	new file:   src/utils/logger.py
	new file:   start.bat
	new file:   tests/test_ai.py
2026-03-03 01:23:23 +08:00

402 lines
14 KiB
Python

#!/usr/bin/env python3
"""
Aggregate individual run results into benchmark summary statistics.
Reads grading.json files from run directories and produces:
- run_summary with mean, stddev, min, max for each metric
- delta between with_skill and without_skill configurations
Usage:
python aggregate_benchmark.py <benchmark_dir>
Example:
python aggregate_benchmark.py benchmarks/2026-01-15T10-30-00/
The script supports two directory layouts:
Workspace layout (from skill-creator iterations):
<benchmark_dir>/
└── eval-N/
├── with_skill/
│ ├── run-1/grading.json
│ └── run-2/grading.json
└── without_skill/
├── run-1/grading.json
└── run-2/grading.json
Legacy layout (with runs/ subdirectory):
<benchmark_dir>/
└── runs/
└── eval-N/
├── with_skill/
│ └── run-1/grading.json
└── without_skill/
└── run-1/grading.json
"""
import argparse
import json
import math
import sys
from datetime import datetime, timezone
from pathlib import Path
def calculate_stats(values: list[float]) -> dict:
"""Calculate mean, stddev, min, max for a list of values."""
if not values:
return {"mean": 0.0, "stddev": 0.0, "min": 0.0, "max": 0.0}
n = len(values)
mean = sum(values) / n
if n > 1:
variance = sum((x - mean) ** 2 for x in values) / (n - 1)
stddev = math.sqrt(variance)
else:
stddev = 0.0
return {
"mean": round(mean, 4),
"stddev": round(stddev, 4),
"min": round(min(values), 4),
"max": round(max(values), 4)
}
def load_run_results(benchmark_dir: Path) -> dict:
"""
Load all run results from a benchmark directory.
Returns dict keyed by config name (e.g. "with_skill"/"without_skill",
or "new_skill"/"old_skill"), each containing a list of run results.
"""
# Support both layouts: eval dirs directly under benchmark_dir, or under runs/
runs_dir = benchmark_dir / "runs"
if runs_dir.exists():
search_dir = runs_dir
elif list(benchmark_dir.glob("eval-*")):
search_dir = benchmark_dir
else:
print(f"No eval directories found in {benchmark_dir} or {benchmark_dir / 'runs'}")
return {}
results: dict[str, list] = {}
for eval_idx, eval_dir in enumerate(sorted(search_dir.glob("eval-*"))):
metadata_path = eval_dir / "eval_metadata.json"
if metadata_path.exists():
try:
with open(metadata_path) as mf:
eval_id = json.load(mf).get("eval_id", eval_idx)
except (json.JSONDecodeError, OSError):
eval_id = eval_idx
else:
try:
eval_id = int(eval_dir.name.split("-")[1])
except ValueError:
eval_id = eval_idx
# Discover config directories dynamically rather than hardcoding names
for config_dir in sorted(eval_dir.iterdir()):
if not config_dir.is_dir():
continue
# Skip non-config directories (inputs, outputs, etc.)
if not list(config_dir.glob("run-*")):
continue
config = config_dir.name
if config not in results:
results[config] = []
for run_dir in sorted(config_dir.glob("run-*")):
run_number = int(run_dir.name.split("-")[1])
grading_file = run_dir / "grading.json"
if not grading_file.exists():
print(f"Warning: grading.json not found in {run_dir}")
continue
try:
with open(grading_file) as f:
grading = json.load(f)
except json.JSONDecodeError as e:
print(f"Warning: Invalid JSON in {grading_file}: {e}")
continue
# Extract metrics
result = {
"eval_id": eval_id,
"run_number": run_number,
"pass_rate": grading.get("summary", {}).get("pass_rate", 0.0),
"passed": grading.get("summary", {}).get("passed", 0),
"failed": grading.get("summary", {}).get("failed", 0),
"total": grading.get("summary", {}).get("total", 0),
}
# Extract timing — check grading.json first, then sibling timing.json
timing = grading.get("timing", {})
result["time_seconds"] = timing.get("total_duration_seconds", 0.0)
timing_file = run_dir / "timing.json"
if result["time_seconds"] == 0.0 and timing_file.exists():
try:
with open(timing_file) as tf:
timing_data = json.load(tf)
result["time_seconds"] = timing_data.get("total_duration_seconds", 0.0)
result["tokens"] = timing_data.get("total_tokens", 0)
except json.JSONDecodeError:
pass
# Extract metrics if available
metrics = grading.get("execution_metrics", {})
result["tool_calls"] = metrics.get("total_tool_calls", 0)
if not result.get("tokens"):
result["tokens"] = metrics.get("output_chars", 0)
result["errors"] = metrics.get("errors_encountered", 0)
# Extract expectations — viewer requires fields: text, passed, evidence
raw_expectations = grading.get("expectations", [])
for exp in raw_expectations:
if "text" not in exp or "passed" not in exp:
print(f"Warning: expectation in {grading_file} missing required fields (text, passed, evidence): {exp}")
result["expectations"] = raw_expectations
# Extract notes from user_notes_summary
notes_summary = grading.get("user_notes_summary", {})
notes = []
notes.extend(notes_summary.get("uncertainties", []))
notes.extend(notes_summary.get("needs_review", []))
notes.extend(notes_summary.get("workarounds", []))
result["notes"] = notes
results[config].append(result)
return results
def aggregate_results(results: dict) -> dict:
"""
Aggregate run results into summary statistics.
Returns run_summary with stats for each configuration and delta.
"""
run_summary = {}
configs = list(results.keys())
for config in configs:
runs = results.get(config, [])
if not runs:
run_summary[config] = {
"pass_rate": {"mean": 0.0, "stddev": 0.0, "min": 0.0, "max": 0.0},
"time_seconds": {"mean": 0.0, "stddev": 0.0, "min": 0.0, "max": 0.0},
"tokens": {"mean": 0, "stddev": 0, "min": 0, "max": 0}
}
continue
pass_rates = [r["pass_rate"] for r in runs]
times = [r["time_seconds"] for r in runs]
tokens = [r.get("tokens", 0) for r in runs]
run_summary[config] = {
"pass_rate": calculate_stats(pass_rates),
"time_seconds": calculate_stats(times),
"tokens": calculate_stats(tokens)
}
# Calculate delta between the first two configs (if two exist)
if len(configs) >= 2:
primary = run_summary.get(configs[0], {})
baseline = run_summary.get(configs[1], {})
else:
primary = run_summary.get(configs[0], {}) if configs else {}
baseline = {}
delta_pass_rate = primary.get("pass_rate", {}).get("mean", 0) - baseline.get("pass_rate", {}).get("mean", 0)
delta_time = primary.get("time_seconds", {}).get("mean", 0) - baseline.get("time_seconds", {}).get("mean", 0)
delta_tokens = primary.get("tokens", {}).get("mean", 0) - baseline.get("tokens", {}).get("mean", 0)
run_summary["delta"] = {
"pass_rate": f"{delta_pass_rate:+.2f}",
"time_seconds": f"{delta_time:+.1f}",
"tokens": f"{delta_tokens:+.0f}"
}
return run_summary
def generate_benchmark(benchmark_dir: Path, skill_name: str = "", skill_path: str = "") -> dict:
"""
Generate complete benchmark.json from run results.
"""
results = load_run_results(benchmark_dir)
run_summary = aggregate_results(results)
# Build runs array for benchmark.json
runs = []
for config in results:
for result in results[config]:
runs.append({
"eval_id": result["eval_id"],
"configuration": config,
"run_number": result["run_number"],
"result": {
"pass_rate": result["pass_rate"],
"passed": result["passed"],
"failed": result["failed"],
"total": result["total"],
"time_seconds": result["time_seconds"],
"tokens": result.get("tokens", 0),
"tool_calls": result.get("tool_calls", 0),
"errors": result.get("errors", 0)
},
"expectations": result["expectations"],
"notes": result["notes"]
})
# Determine eval IDs from results
eval_ids = sorted(set(
r["eval_id"]
for config in results.values()
for r in config
))
benchmark = {
"metadata": {
"skill_name": skill_name or "<skill-name>",
"skill_path": skill_path or "<path/to/skill>",
"executor_model": "<model-name>",
"analyzer_model": "<model-name>",
"timestamp": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
"evals_run": eval_ids,
"runs_per_configuration": 3
},
"runs": runs,
"run_summary": run_summary,
"notes": [] # To be filled by analyzer
}
return benchmark
def generate_markdown(benchmark: dict) -> str:
"""Generate human-readable benchmark.md from benchmark data."""
metadata = benchmark["metadata"]
run_summary = benchmark["run_summary"]
# Determine config names (excluding "delta")
configs = [k for k in run_summary if k != "delta"]
config_a = configs[0] if len(configs) >= 1 else "config_a"
config_b = configs[1] if len(configs) >= 2 else "config_b"
label_a = config_a.replace("_", " ").title()
label_b = config_b.replace("_", " ").title()
lines = [
f"# Skill Benchmark: {metadata['skill_name']}",
"",
f"**Model**: {metadata['executor_model']}",
f"**Date**: {metadata['timestamp']}",
f"**Evals**: {', '.join(map(str, metadata['evals_run']))} ({metadata['runs_per_configuration']} runs each per configuration)",
"",
"## Summary",
"",
f"| Metric | {label_a} | {label_b} | Delta |",
"|--------|------------|---------------|-------|",
]
a_summary = run_summary.get(config_a, {})
b_summary = run_summary.get(config_b, {})
delta = run_summary.get("delta", {})
# Format pass rate
a_pr = a_summary.get("pass_rate", {})
b_pr = b_summary.get("pass_rate", {})
lines.append(f"| Pass Rate | {a_pr.get('mean', 0)*100:.0f}% ± {a_pr.get('stddev', 0)*100:.0f}% | {b_pr.get('mean', 0)*100:.0f}% ± {b_pr.get('stddev', 0)*100:.0f}% | {delta.get('pass_rate', '')} |")
# Format time
a_time = a_summary.get("time_seconds", {})
b_time = b_summary.get("time_seconds", {})
lines.append(f"| Time | {a_time.get('mean', 0):.1f}s ± {a_time.get('stddev', 0):.1f}s | {b_time.get('mean', 0):.1f}s ± {b_time.get('stddev', 0):.1f}s | {delta.get('time_seconds', '')}s |")
# Format tokens
a_tokens = a_summary.get("tokens", {})
b_tokens = b_summary.get("tokens", {})
lines.append(f"| Tokens | {a_tokens.get('mean', 0):.0f} ± {a_tokens.get('stddev', 0):.0f} | {b_tokens.get('mean', 0):.0f} ± {b_tokens.get('stddev', 0):.0f} | {delta.get('tokens', '')} |")
# Notes section
if benchmark.get("notes"):
lines.extend([
"",
"## Notes",
""
])
for note in benchmark["notes"]:
lines.append(f"- {note}")
return "\n".join(lines)
def main():
parser = argparse.ArgumentParser(
description="Aggregate benchmark run results into summary statistics"
)
parser.add_argument(
"benchmark_dir",
type=Path,
help="Path to the benchmark directory"
)
parser.add_argument(
"--skill-name",
default="",
help="Name of the skill being benchmarked"
)
parser.add_argument(
"--skill-path",
default="",
help="Path to the skill being benchmarked"
)
parser.add_argument(
"--output", "-o",
type=Path,
help="Output path for benchmark.json (default: <benchmark_dir>/benchmark.json)"
)
args = parser.parse_args()
if not args.benchmark_dir.exists():
print(f"Directory not found: {args.benchmark_dir}")
sys.exit(1)
# Generate benchmark
benchmark = generate_benchmark(args.benchmark_dir, args.skill_name, args.skill_path)
# Determine output paths
output_json = args.output or (args.benchmark_dir / "benchmark.json")
output_md = output_json.with_suffix(".md")
# Write benchmark.json
with open(output_json, "w") as f:
json.dump(benchmark, f, indent=2)
print(f"Generated: {output_json}")
# Write benchmark.md
markdown = generate_markdown(benchmark)
with open(output_md, "w") as f:
f.write(markdown)
print(f"Generated: {output_md}")
# Print summary
run_summary = benchmark["run_summary"]
configs = [k for k in run_summary if k != "delta"]
delta = run_summary.get("delta", {})
print(f"\nSummary:")
for config in configs:
pr = run_summary[config]["pass_rate"]["mean"]
label = config.replace("_", " ").title()
print(f" {label}: {pr*100:.1f}% pass rate")
print(f" Delta: {delta.get('pass_rate', '')}")
if __name__ == "__main__":
main()