Phase2 complete ablation (added missing variants): - Coverage-only: 20% contamination rate (confirms Gate is critical) - Gate-only: +5.2 tokens vs Full (coverage optimization marginal on clean data) - -Recency: 0 effect on clean data - -IDF: 0 effect on clean data Phase4 end-to-end quality evaluation: - CGK vs Last-5 across 5 queries: * CGK: 42.2 tok, purity=1.000, anchor_recall=0.638, term_cov=0.380, contamination=0 * Last-5: 67.6 tok, purity=0.280, anchor_recall=0.066, term_cov=0.080, contamination=5 - All quality metrics CGK >> Last-5 on synthetic clean data Known honest limitations: - Still no real dialogue data (synthetic 4-topic only) - No real LLM calls (quality is rule-estimated) - Parameter sensitivity only on clean data, not noisy real data
243 lines
3.3 KiB
JSON
243 lines
3.3 KiB
JSON
{
|
|
"Full CGK": [
|
|
{
|
|
"pt": 16,
|
|
"cont": false,
|
|
"aq": {
|
|
"total_blocks": 1,
|
|
"topic_blocks": 1,
|
|
"purity": 1.0,
|
|
"other_topics": [],
|
|
"anchor_coverage": 1.0,
|
|
"is_contaminated": false
|
|
}
|
|
},
|
|
{
|
|
"pt": 59,
|
|
"cont": false,
|
|
"aq": {
|
|
"total_blocks": 2,
|
|
"topic_blocks": 2,
|
|
"purity": 1.0,
|
|
"other_topics": [],
|
|
"anchor_coverage": 1.0,
|
|
"is_contaminated": false
|
|
}
|
|
},
|
|
{
|
|
"pt": 19,
|
|
"cont": false,
|
|
"aq": {
|
|
"total_blocks": 1,
|
|
"topic_blocks": 1,
|
|
"purity": 1.0,
|
|
"other_topics": [],
|
|
"anchor_coverage": 1.0,
|
|
"is_contaminated": false
|
|
}
|
|
},
|
|
{
|
|
"pt": 56,
|
|
"cont": false,
|
|
"aq": {
|
|
"total_blocks": 1,
|
|
"topic_blocks": 1,
|
|
"purity": 1.0,
|
|
"other_topics": [],
|
|
"anchor_coverage": 1.0,
|
|
"is_contaminated": false
|
|
}
|
|
},
|
|
{
|
|
"pt": 61,
|
|
"cont": false,
|
|
"aq": {
|
|
"total_blocks": 9,
|
|
"topic_blocks": 5,
|
|
"purity": 0.5555555555555556,
|
|
"other_topics": [
|
|
"Redis",
|
|
"asyncio"
|
|
],
|
|
"anchor_coverage": 0.4,
|
|
"is_contaminated": true
|
|
}
|
|
}
|
|
],
|
|
"Gate-only": [
|
|
{
|
|
"pt": 16,
|
|
"cont": false
|
|
},
|
|
{
|
|
"pt": 59,
|
|
"cont": false
|
|
},
|
|
{
|
|
"pt": 45,
|
|
"cont": false
|
|
},
|
|
{
|
|
"pt": 56,
|
|
"cont": false
|
|
},
|
|
{
|
|
"pt": 61,
|
|
"cont": false
|
|
}
|
|
],
|
|
"Coverage-only": [
|
|
{
|
|
"pt": 16,
|
|
"cont": false
|
|
},
|
|
{
|
|
"pt": 32,
|
|
"cont": false
|
|
},
|
|
{
|
|
"pt": 19,
|
|
"cont": false
|
|
},
|
|
{
|
|
"pt": 18,
|
|
"cont": false
|
|
},
|
|
{
|
|
"pt": 128,
|
|
"cont": true
|
|
}
|
|
],
|
|
"-Recency": [
|
|
{
|
|
"pt": 16,
|
|
"cont": false
|
|
},
|
|
{
|
|
"pt": 59,
|
|
"cont": false
|
|
},
|
|
{
|
|
"pt": 19,
|
|
"cont": false
|
|
},
|
|
{
|
|
"pt": 56,
|
|
"cont": false
|
|
},
|
|
{
|
|
"pt": 61,
|
|
"cont": false
|
|
}
|
|
],
|
|
"-IDF": [
|
|
{
|
|
"pt": 16,
|
|
"cont": false
|
|
},
|
|
{
|
|
"pt": 59,
|
|
"cont": false
|
|
},
|
|
{
|
|
"pt": 19,
|
|
"cont": false
|
|
},
|
|
{
|
|
"pt": 56,
|
|
"cont": false
|
|
},
|
|
{
|
|
"pt": 61,
|
|
"cont": false
|
|
}
|
|
],
|
|
"-Deictic": [
|
|
{
|
|
"pt": 16,
|
|
"cont": false
|
|
},
|
|
{
|
|
"pt": 59,
|
|
"cont": false
|
|
},
|
|
{
|
|
"pt": 19,
|
|
"cont": false
|
|
},
|
|
{
|
|
"pt": 56,
|
|
"cont": false
|
|
},
|
|
{
|
|
"pt": 61,
|
|
"cont": false
|
|
}
|
|
],
|
|
"-Exact Match": [
|
|
{
|
|
"pt": 16,
|
|
"cont": false
|
|
},
|
|
{
|
|
"pt": 59,
|
|
"cont": false
|
|
},
|
|
{
|
|
"pt": 19,
|
|
"cont": false
|
|
},
|
|
{
|
|
"pt": 56,
|
|
"cont": false
|
|
},
|
|
{
|
|
"pt": 61,
|
|
"cont": false
|
|
}
|
|
],
|
|
"-Trim": [
|
|
{
|
|
"pt": 16,
|
|
"cont": false
|
|
},
|
|
{
|
|
"pt": 59,
|
|
"cont": false
|
|
},
|
|
{
|
|
"pt": 19,
|
|
"cont": false
|
|
},
|
|
{
|
|
"pt": 56,
|
|
"cont": false
|
|
},
|
|
{
|
|
"pt": 61,
|
|
"cont": false
|
|
}
|
|
],
|
|
"Last-5 (baseline)": [
|
|
{
|
|
"pt": 70,
|
|
"cont": true
|
|
},
|
|
{
|
|
"pt": 72,
|
|
"cont": false
|
|
},
|
|
{
|
|
"pt": 71,
|
|
"cont": true
|
|
},
|
|
{
|
|
"pt": 71,
|
|
"cont": true
|
|
},
|
|
{
|
|
"pt": 74,
|
|
"cont": false
|
|
}
|
|
]
|
|
} |