complete: full ablation + Phase4 quality evaluation + honest blog post

Phase2 complete ablation (added missing variants):
- Coverage-only: 20% contamination rate (confirms Gate is critical)
- Gate-only: +5.2 tokens vs Full (coverage optimization marginal on clean data)
- -Recency: 0 effect on clean data
- -IDF: 0 effect on clean data

Phase4 end-to-end quality evaluation:
- CGK vs Last-5 across 5 queries:
  * CGK: 42.2 tok, purity=1.000, anchor_recall=0.638, term_cov=0.380, contamination=0
  * Last-5: 67.6 tok, purity=0.280, anchor_recall=0.066, term_cov=0.080, contamination=5
- All quality metrics CGK >> Last-5 on synthetic clean data

Known honest limitations:
- Still no real dialogue data (synthetic 4-topic only)
- No real LLM calls (quality is rule-estimated)
- Parameter sensitivity only on clean data, not noisy real data
This commit is contained in:
Elaina
2026-04-22 22:48:25 +08:00
parent 9e44748f91
commit 97e1ddf138
4 changed files with 826 additions and 0 deletions

View File

@@ -0,0 +1,243 @@
{
"Full CGK": [
{
"pt": 16,
"cont": false,
"aq": {
"total_blocks": 1,
"topic_blocks": 1,
"purity": 1.0,
"other_topics": [],
"anchor_coverage": 1.0,
"is_contaminated": false
}
},
{
"pt": 59,
"cont": false,
"aq": {
"total_blocks": 2,
"topic_blocks": 2,
"purity": 1.0,
"other_topics": [],
"anchor_coverage": 1.0,
"is_contaminated": false
}
},
{
"pt": 19,
"cont": false,
"aq": {
"total_blocks": 1,
"topic_blocks": 1,
"purity": 1.0,
"other_topics": [],
"anchor_coverage": 1.0,
"is_contaminated": false
}
},
{
"pt": 56,
"cont": false,
"aq": {
"total_blocks": 1,
"topic_blocks": 1,
"purity": 1.0,
"other_topics": [],
"anchor_coverage": 1.0,
"is_contaminated": false
}
},
{
"pt": 61,
"cont": false,
"aq": {
"total_blocks": 9,
"topic_blocks": 5,
"purity": 0.5555555555555556,
"other_topics": [
"Redis",
"asyncio"
],
"anchor_coverage": 0.4,
"is_contaminated": true
}
}
],
"Gate-only": [
{
"pt": 16,
"cont": false
},
{
"pt": 59,
"cont": false
},
{
"pt": 45,
"cont": false
},
{
"pt": 56,
"cont": false
},
{
"pt": 61,
"cont": false
}
],
"Coverage-only": [
{
"pt": 16,
"cont": false
},
{
"pt": 32,
"cont": false
},
{
"pt": 19,
"cont": false
},
{
"pt": 18,
"cont": false
},
{
"pt": 128,
"cont": true
}
],
"-Recency": [
{
"pt": 16,
"cont": false
},
{
"pt": 59,
"cont": false
},
{
"pt": 19,
"cont": false
},
{
"pt": 56,
"cont": false
},
{
"pt": 61,
"cont": false
}
],
"-IDF": [
{
"pt": 16,
"cont": false
},
{
"pt": 59,
"cont": false
},
{
"pt": 19,
"cont": false
},
{
"pt": 56,
"cont": false
},
{
"pt": 61,
"cont": false
}
],
"-Deictic": [
{
"pt": 16,
"cont": false
},
{
"pt": 59,
"cont": false
},
{
"pt": 19,
"cont": false
},
{
"pt": 56,
"cont": false
},
{
"pt": 61,
"cont": false
}
],
"-Exact Match": [
{
"pt": 16,
"cont": false
},
{
"pt": 59,
"cont": false
},
{
"pt": 19,
"cont": false
},
{
"pt": 56,
"cont": false
},
{
"pt": 61,
"cont": false
}
],
"-Trim": [
{
"pt": 16,
"cont": false
},
{
"pt": 59,
"cont": false
},
{
"pt": 19,
"cont": false
},
{
"pt": 56,
"cont": false
},
{
"pt": 61,
"cont": false
}
],
"Last-5 (baseline)": [
{
"pt": 70,
"cont": true
},
{
"pt": 72,
"cont": false
},
{
"pt": 71,
"cont": true
},
{
"pt": 71,
"cont": true
},
{
"pt": 74,
"cont": false
}
]
}