fix: selector gain函数使用IDF加权,与文档一致
- selector.select() 接收 idf_cache 参数 - gain = ΣIDF(t) for t ∈ new_anchors / cost^α(与文档公式一致) - gatekeeper.select() 将 anchor_extractor._idf_cache 传入selector - sparse.py recency 注释澄清为'新鲜度奖励'而非'时间衰减' - 所有测试 9/9 通过
This commit is contained in:
473
evaluation_results.json
Normal file
473
evaluation_results.json
Normal file
@@ -0,0 +1,473 @@
|
||||
{
|
||||
"timestamp": "2026-04-22T01:33:35.948796",
|
||||
"stages": [
|
||||
{
|
||||
"name": "Redis分布式锁话题",
|
||||
"turns": [
|
||||
{
|
||||
"turn_id": 1,
|
||||
"query": "Redis 锁续租为什么会脑裂?",
|
||||
"prompt": "你是一个有帮助的助手。\n\n【相关上下文】\n(无相关上下文)\n\n【当前问题】\n用户: Redis 锁续租为什么会脑裂?",
|
||||
"response": "",
|
||||
"usage": {
|
||||
"total_tokens": 371,
|
||||
"total_characters": 0,
|
||||
"prompt_tokens": 71,
|
||||
"completion_tokens": 300,
|
||||
"completion_tokens_details": {
|
||||
"reasoning_tokens": 300
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"turn_id": 2,
|
||||
"query": "如何避免这种情况?",
|
||||
"prompt": "你是一个有帮助的助手。\n\n【相关上下文】\n(无相关上下文)\n\n【当前问题】\n用户: 如何避免这种情况?",
|
||||
"response": "",
|
||||
"usage": {
|
||||
"total_tokens": 366,
|
||||
"total_characters": 0,
|
||||
"prompt_tokens": 66,
|
||||
"completion_tokens": 300,
|
||||
"completion_tokens_details": {
|
||||
"reasoning_tokens": 300
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"turn_id": 3,
|
||||
"query": "锁的 TTL 应该怎么设置才合理?",
|
||||
"selected_context_turns": [],
|
||||
"prompt": "你是一个有帮助的助手。\n\n【相关上下文】\n(无相关上下文)\n\n【当前问题】\n用户: 锁的 TTL 应该怎么设置才合理?",
|
||||
"response": "",
|
||||
"usage": {
|
||||
"total_tokens": 372,
|
||||
"total_characters": 0,
|
||||
"prompt_tokens": 72,
|
||||
"completion_tokens": 300,
|
||||
"completion_tokens_details": {
|
||||
"reasoning_tokens": 300
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "Python异步编程话题",
|
||||
"turns": [
|
||||
{
|
||||
"turn_id": 4,
|
||||
"query": "Python 异步编程怎么做?请用 asyncio 举例子",
|
||||
"prompt": "你是一个有帮助的助手。\n\n【相关上下文】\n(无相关上下文)\n\n【当前问题】\n用户: Python 异步编程怎么做?请用 asyncio 举例子",
|
||||
"response": "## Python 异步编程概",
|
||||
"usage": {
|
||||
"total_tokens": 373,
|
||||
"total_characters": 0,
|
||||
"prompt_tokens": 73,
|
||||
"completion_tokens": 300,
|
||||
"completion_tokens_details": {
|
||||
"reasoning_tokens": 294
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"turn_id": 5,
|
||||
"query": "asyncio 的并发性能怎么样?",
|
||||
"selected_context_turns": [
|
||||
3
|
||||
],
|
||||
"topic_switch_correct": false,
|
||||
"prompt": "你是一个有帮助的助手。\n\n【相关上下文】\n【轮次 3】\n用户: Python 异步编程怎么做?请用 asyncio 举例子\n助手: ## Python 异步编程概\n\n【当前问题】\n用户: asyncio 的并发性能怎么样?",
|
||||
"response": "",
|
||||
"usage": {
|
||||
"total_tokens": 392,
|
||||
"total_characters": 0,
|
||||
"prompt_tokens": 92,
|
||||
"completion_tokens": 300,
|
||||
"completion_tokens_details": {
|
||||
"reasoning_tokens": 300
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "指代词强制继承",
|
||||
"turns": [
|
||||
{
|
||||
"turn_id": 6,
|
||||
"query": "它的生态系统和社区支持如何?",
|
||||
"selected_context_turns": [
|
||||
2,
|
||||
3
|
||||
],
|
||||
"deictic_triggered": false,
|
||||
"prompt": "你是一个有帮助的助手。\n\n【相关上下文】\n【轮次 2】\n用户: 如何避免这种情况?\n助手: \n\n【轮次 3】\n用户: Python 异步编程怎么做?请用 asyncio 举例子\n助手: ## Python 异步编程概\n\n【当前问题】\n用户: 它的生态系统和社区支持如何?",
|
||||
"response": "",
|
||||
"usage": {
|
||||
"total_tokens": 409,
|
||||
"total_characters": 0,
|
||||
"prompt_tokens": 109,
|
||||
"completion_tokens": 300,
|
||||
"completion_tokens_details": {
|
||||
"reasoning_tokens": 300
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "长对话20轮测试",
|
||||
"total_turns": 20,
|
||||
"avg_prompt_length": 661.1,
|
||||
"max_prompt_length": 2342,
|
||||
"turns": [
|
||||
{
|
||||
"turn": 1,
|
||||
"query": "关于Redi,再说说",
|
||||
"context_turns": [],
|
||||
"prompt_length": 52,
|
||||
"token_usage": {
|
||||
"total_tokens": 367,
|
||||
"total_characters": 0,
|
||||
"prompt_tokens": 67,
|
||||
"completion_tokens": 300,
|
||||
"completion_tokens_details": {
|
||||
"reasoning_tokens": 85
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"turn": 2,
|
||||
"query": "Redis 和 Memcached 区别是什么",
|
||||
"context_turns": [
|
||||
1
|
||||
],
|
||||
"prompt_length": 566,
|
||||
"token_usage": {
|
||||
"total_tokens": 596,
|
||||
"total_characters": 0,
|
||||
"prompt_tokens": 296,
|
||||
"completion_tokens": 300,
|
||||
"completion_tokens_details": {
|
||||
"reasoning_tokens": 300
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"turn": 3,
|
||||
"query": "Python 深拷贝和浅拷贝区别",
|
||||
"context_turns": [
|
||||
1,
|
||||
2
|
||||
],
|
||||
"prompt_length": 600,
|
||||
"token_usage": {
|
||||
"total_tokens": 615,
|
||||
"total_characters": 0,
|
||||
"prompt_tokens": 315,
|
||||
"completion_tokens": 300,
|
||||
"completion_tokens_details": {
|
||||
"reasoning_tokens": 77
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"turn": 4,
|
||||
"query": "关于Pyth,再说说",
|
||||
"context_turns": [
|
||||
1,
|
||||
3
|
||||
],
|
||||
"prompt_length": 1129,
|
||||
"token_usage": {
|
||||
"total_tokens": 836,
|
||||
"total_characters": 0,
|
||||
"prompt_tokens": 536,
|
||||
"completion_tokens": 300,
|
||||
"completion_tokens_details": {
|
||||
"reasoning_tokens": 87
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"turn": 5,
|
||||
"query": "Go 语言的 goroutine 原理",
|
||||
"context_turns": [
|
||||
3,
|
||||
1,
|
||||
4
|
||||
],
|
||||
"prompt_length": 1648,
|
||||
"token_usage": {
|
||||
"total_tokens": 1068,
|
||||
"total_characters": 0,
|
||||
"prompt_tokens": 768,
|
||||
"completion_tokens": 300,
|
||||
"completion_tokens_details": {
|
||||
"reasoning_tokens": 86
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"turn": 6,
|
||||
"query": "Go 的 channel 用法",
|
||||
"context_turns": [
|
||||
5,
|
||||
3,
|
||||
1,
|
||||
4
|
||||
],
|
||||
"prompt_length": 2342,
|
||||
"token_usage": {
|
||||
"total_tokens": 1299,
|
||||
"total_characters": 0,
|
||||
"prompt_tokens": 999,
|
||||
"completion_tokens": 300,
|
||||
"completion_tokens_details": {
|
||||
"reasoning_tokens": 87
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"turn": 7,
|
||||
"query": "关于Redi,再说说",
|
||||
"context_turns": [
|
||||
1
|
||||
],
|
||||
"prompt_length": 553,
|
||||
"token_usage": {
|
||||
"total_tokens": 594,
|
||||
"total_characters": 0,
|
||||
"prompt_tokens": 294,
|
||||
"completion_tokens": 300,
|
||||
"completion_tokens_details": {
|
||||
"reasoning_tokens": 145
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"turn": 8,
|
||||
"query": "Redis 和 Memcached 区别是什么",
|
||||
"context_turns": [
|
||||
2
|
||||
],
|
||||
"prompt_length": 96,
|
||||
"token_usage": {
|
||||
"total_tokens": 383,
|
||||
"total_characters": 0,
|
||||
"prompt_tokens": 83,
|
||||
"completion_tokens": 300,
|
||||
"completion_tokens_details": {
|
||||
"reasoning_tokens": 300
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"turn": 9,
|
||||
"query": "Python 深拷贝和浅拷贝区别",
|
||||
"context_turns": [
|
||||
3
|
||||
],
|
||||
"prompt_length": 624,
|
||||
"token_usage": {
|
||||
"total_tokens": 606,
|
||||
"total_characters": 0,
|
||||
"prompt_tokens": 306,
|
||||
"completion_tokens": 300,
|
||||
"completion_tokens_details": {
|
||||
"reasoning_tokens": 77
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"turn": 10,
|
||||
"query": "关于Pyth,再说说",
|
||||
"context_turns": [
|
||||
4
|
||||
],
|
||||
"prompt_length": 552,
|
||||
"token_usage": {
|
||||
"total_tokens": 592,
|
||||
"total_characters": 0,
|
||||
"prompt_tokens": 292,
|
||||
"completion_tokens": 300,
|
||||
"completion_tokens_details": {
|
||||
"reasoning_tokens": 186
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"turn": 11,
|
||||
"query": "Go 语言的 goroutine 原理",
|
||||
"context_turns": [
|
||||
5
|
||||
],
|
||||
"prompt_length": 749,
|
||||
"token_usage": {
|
||||
"total_tokens": 597,
|
||||
"total_characters": 0,
|
||||
"prompt_tokens": 297,
|
||||
"completion_tokens": 300,
|
||||
"completion_tokens_details": {
|
||||
"reasoning_tokens": 106
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"turn": 12,
|
||||
"query": "Go 的 channel 用法",
|
||||
"context_turns": [
|
||||
6
|
||||
],
|
||||
"prompt_length": 646,
|
||||
"token_usage": {
|
||||
"total_tokens": 591,
|
||||
"total_characters": 0,
|
||||
"prompt_tokens": 291,
|
||||
"completion_tokens": 300,
|
||||
"completion_tokens_details": {
|
||||
"reasoning_tokens": 98
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"turn": 13,
|
||||
"query": "关于Redi,再说说",
|
||||
"context_turns": [
|
||||
1
|
||||
],
|
||||
"prompt_length": 553,
|
||||
"token_usage": {
|
||||
"total_tokens": 594,
|
||||
"total_characters": 0,
|
||||
"prompt_tokens": 294,
|
||||
"completion_tokens": 300,
|
||||
"completion_tokens_details": {
|
||||
"reasoning_tokens": 94
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"turn": 14,
|
||||
"query": "Redis 和 Memcached 区别是什么",
|
||||
"context_turns": [
|
||||
8
|
||||
],
|
||||
"prompt_length": 96,
|
||||
"token_usage": {
|
||||
"total_tokens": 383,
|
||||
"total_characters": 0,
|
||||
"prompt_tokens": 83,
|
||||
"completion_tokens": 300,
|
||||
"completion_tokens_details": {
|
||||
"reasoning_tokens": 300
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"turn": 15,
|
||||
"query": "Python 深拷贝和浅拷贝区别",
|
||||
"context_turns": [
|
||||
3
|
||||
],
|
||||
"prompt_length": 624,
|
||||
"token_usage": {
|
||||
"total_tokens": 606,
|
||||
"total_characters": 0,
|
||||
"prompt_tokens": 306,
|
||||
"completion_tokens": 300,
|
||||
"completion_tokens_details": {
|
||||
"reasoning_tokens": 70
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"turn": 16,
|
||||
"query": "关于Pyth,再说说",
|
||||
"context_turns": [
|
||||
10
|
||||
],
|
||||
"prompt_length": 347,
|
||||
"token_usage": {
|
||||
"total_tokens": 493,
|
||||
"total_characters": 0,
|
||||
"prompt_tokens": 193,
|
||||
"completion_tokens": 300,
|
||||
"completion_tokens_details": {
|
||||
"reasoning_tokens": 50
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"turn": 17,
|
||||
"query": "Go 语言的 goroutine 原理",
|
||||
"context_turns": [
|
||||
5
|
||||
],
|
||||
"prompt_length": 749,
|
||||
"token_usage": {
|
||||
"total_tokens": 597,
|
||||
"total_characters": 0,
|
||||
"prompt_tokens": 297,
|
||||
"completion_tokens": 300,
|
||||
"completion_tokens_details": {
|
||||
"reasoning_tokens": 107
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"turn": 18,
|
||||
"query": "Go 的 channel 用法",
|
||||
"context_turns": [
|
||||
6
|
||||
],
|
||||
"prompt_length": 646,
|
||||
"token_usage": {
|
||||
"total_tokens": 591,
|
||||
"total_characters": 0,
|
||||
"prompt_tokens": 291,
|
||||
"completion_tokens": 300,
|
||||
"completion_tokens_details": {
|
||||
"reasoning_tokens": 197
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"turn": 19,
|
||||
"query": "关于Redi,再说说",
|
||||
"context_turns": [
|
||||
1
|
||||
],
|
||||
"prompt_length": 553,
|
||||
"token_usage": {
|
||||
"total_tokens": 594,
|
||||
"total_characters": 0,
|
||||
"prompt_tokens": 294,
|
||||
"completion_tokens": 300,
|
||||
"completion_tokens_details": {
|
||||
"reasoning_tokens": 124
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"turn": 20,
|
||||
"query": "Redis 和 Memcached 区别是什么",
|
||||
"context_turns": [
|
||||
14
|
||||
],
|
||||
"prompt_length": 97,
|
||||
"token_usage": {
|
||||
"total_tokens": 383,
|
||||
"total_characters": 0,
|
||||
"prompt_tokens": 83,
|
||||
"completion_tokens": 300,
|
||||
"completion_tokens_details": {
|
||||
"reasoning_tokens": 300
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
Reference in New Issue
Block a user