diff --git a/src/gatekeeper.py b/src/gatekeeper.py
index 92bf873..49950c6 100644
--- a/src/gatekeeper.py
+++ b/src/gatekeeper.py
@@ -192,13 +192,16 @@ class ContextGatekeeper:
             if user_to_keep or kept_asst_sents:
                 new_user = '。'.join(user_to_keep) + ('。' if user_to_keep and kept_asst_sents else '')
                 new_asst = '。'.join(kept_asst_sents)
+                # 裁剪后重新估算 token 数，不用原始值
+                new_tokens_user = Block._estimate_tokens(new_user)
+                new_tokens_asst = Block._estimate_tokens(new_asst)
                 trimmed_block = Block(
                     user_text=new_user or block.user_text,
                     assistant_text=new_asst or block.assistant_text,
                     turn_id=block.turn_id,
                     anchors=block.anchors,
-                    tokens_user=block.tokens_user,
-                    tokens_assistant=block.tokens_assistant
+                    tokens_user=new_tokens_user,
+                    tokens_assistant=new_tokens_asst
                 )
                 trimmed.append(trimmed_block)
             else:
@@ -262,4 +265,6 @@ class ContextGatekeeper:
         self.blocks.clear()
         self.turn_counter = 0
         self._active_topic = None
+        self.anchor_extractor._idf_cache.clear()
+        self.anchor_extractor._doc_count = 0
         # constraints 保留
\ No newline at end of file
diff --git a/src/sparse.py b/src/sparse.py
index 484c8a3..a2efdc7 100644
--- a/src/sparse.py
+++ b/src/sparse.py
@@ -92,18 +92,18 @@ class SparseRetriever:
         q_anchors_lower = [a.lower() for a in query_anchors]
 
         # 内容词: 从 query 原文提取的 topic-discriminative 词汇
-        # 只包括: 英文术语、代码标识符、版本号
-        # 中文通用词（如"怎么"、"执行"）不具有话题区分度，排除
+        # 包括: 英文术语/标识符、版本号、2+字符中文词
+        # 中文通用短词（如"怎么"）不具有话题区分度，排除
         content_words = set()
-        # 英文单词和代码标识符（长度>=2）
+        # 英文单词和代码标识符（所有长度 >= 2）
         for w in re.findall(r'[a-zA-Z_][a-zA-Z0-9_-]*', query_text):
             if len(w) >= 2:
                 content_words.add(w.lower())
         # 版本号
         for v in re.findall(r'v?\d+(\.\d+)*', query_text):
             content_words.add(v.lower())
-        # 完整中文术语（连续中文字符 >= 4，足够具体的术语）
-        for chunk in re.findall(r'[\u4e00-\u9fff]{4,}', query_text):
+        # 2字及以上中文词（覆盖"PostgreSQL"等专有名词）
+        for chunk in re.findall(r'[\u4e00-\u9fff]{2,}', query_text):
             content_words.add(chunk.lower())
 
         for i, block in enumerate(blocks):
@@ -111,16 +111,13 @@ class SparseRetriever:
 
             # 话题切换时: 过滤掉不包含任何内容词的块
             # 这些块属于旧话题，不应参与当前查询的候选
-            # 例如: 问 PostgreSQL 时，只有包含 'postgresql' 或 'explain' 等词的块才能通过
             if topic_switched and content_words:
                 block_text = (block.user_text + ' ' + block.assistant_text).lower()
-                # 检查 block 是否包含 query 的任意一个内容词
                 block_contains_content = any(
                     cw in block_text for cw in content_words
                 )
                 if not block_contains_content:
-                    scored.append((block, 0.0))
-                    continue
+                    continue  # 直接跳过，不加入 scored 列表
 
             s = self.score(block, query_anchors, recency, idf_cache)
             scored.append((block, s))