Refactor MemorySystem to enhance Chroma vector store initialization with conflict handling and backup creation. Added methods for conflict detection and store repair, improving robustness against schema conflicts.

This commit is contained in:
Mimikko-zeus
2026-03-03 12:59:17 +08:00
parent 6e7ef24e35
commit 586f09c3a5
2 changed files with 128 additions and 7 deletions

View File

@@ -3,6 +3,8 @@
""" """
import asyncio import asyncio
import hashlib import hashlib
import shutil
import time
import uuid import uuid
from typing import List, Dict, Optional, Tuple, Callable, Awaitable from typing import List, Dict, Optional, Tuple, Callable, Awaitable
from dataclasses import dataclass, field from dataclasses import dataclass, field
@@ -98,19 +100,61 @@ class MemorySystem:
# 初始化向量存储 # 初始化向量存储
if use_vector_db: if use_vector_db:
try:
# 使用 Chroma 向量数据库
chroma_path = storage_path.parent / "chroma_db" chroma_path = storage_path.parent / "chroma_db"
self.vector_store: VectorStore = ChromaVectorStore(chroma_path) chroma_store = self._init_chroma_store(chroma_path)
if chroma_store is not None:
self.vector_store = chroma_store
logger.info("Using Chroma vector store") logger.info("Using Chroma vector store")
except Exception as e: else:
logger.warning(f"Chroma 初始化失败,降级为 JSON 存储: {e}")
self.vector_store = JSONVectorStore(storage_path) self.vector_store = JSONVectorStore(storage_path)
else: else:
# 使用 JSON 存储(向后兼容) # 使用 JSON 存储(向后兼容)
self.vector_store = JSONVectorStore(storage_path) self.vector_store = JSONVectorStore(storage_path)
logger.info("使用 JSON 存储") logger.info("使用 JSON 存储")
@staticmethod
def _is_chroma_table_conflict(error: Exception) -> bool:
msg = str(error).lower()
return "table embeddings already exists" in msg
def _init_chroma_store(self, chroma_path: Path) -> Optional[VectorStore]:
"""初始化 Chroma遇到已知 sqlite schema 冲突时尝试修复。"""
try:
return ChromaVectorStore(chroma_path)
except Exception as error:
if not self._is_chroma_table_conflict(error):
logger.warning(f"Chroma 初始化失败,降级为 JSON 存储: {error}")
return None
# 先做一次短暂重试,处理并发启动时的瞬时冲突。
logger.warning(f"Chroma 初始化出现 schema 冲突,正在重试: {error}")
time.sleep(0.2)
try:
return ChromaVectorStore(chroma_path)
except Exception as retry_error:
if not self._is_chroma_table_conflict(retry_error):
logger.warning(f"Chroma 重试失败,降级为 JSON 存储: {retry_error}")
return None
backup_name = (
f"{chroma_path.name}_backup_conflict_"
f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}"
)
backup_path = chroma_path.parent / backup_name
try:
if chroma_path.exists():
shutil.move(str(chroma_path), str(backup_path))
chroma_path.mkdir(parents=True, exist_ok=True)
repaired = ChromaVectorStore(chroma_path)
logger.warning(
f"检测到 Chroma 元数据库冲突,已重建目录并保留备份: {backup_path}"
)
return repaired
except Exception as repair_error:
logger.warning(f"Chroma 修复失败,降级为 JSON 存储: {repair_error}")
return None
@staticmethod @staticmethod
def _normalize_embedding(values: List[float], dim: int = 1024) -> List[float]: def _normalize_embedding(values: List[float], dim: int = 1024) -> List[float]:
if not values: if not values:
@@ -463,4 +507,3 @@ class MemorySystem:
async def close(self): async def close(self):
"""关闭记忆系统。""" """关闭记忆系统。"""
await self.vector_store.close() await self.vector_store.close()

View File

@@ -0,0 +1,78 @@
from pathlib import Path
import src.ai.memory as memory_module
class _FakeJSONStore:
def __init__(self, storage_path: Path):
self.storage_path = storage_path
def test_memory_retries_once_on_chroma_conflict(monkeypatch, tmp_path: Path):
class _FlakyChroma:
calls = 0
def __init__(self, persist_directory: Path):
type(self).calls += 1
self.persist_directory = persist_directory
if type(self).calls == 1:
raise RuntimeError("table embeddings already exists")
monkeypatch.setattr(memory_module, "ChromaVectorStore", _FlakyChroma)
monkeypatch.setattr(memory_module, "JSONVectorStore", _FakeJSONStore)
memory = memory_module.MemorySystem(
storage_path=tmp_path / "long_term_memory.json",
use_vector_db=True,
)
assert isinstance(memory.vector_store, _FlakyChroma)
assert _FlakyChroma.calls == 2
def test_memory_rebuilds_chroma_directory_when_conflict_persists(monkeypatch, tmp_path: Path):
class _RecoverOnThirdTryChroma:
calls = 0
def __init__(self, persist_directory: Path):
type(self).calls += 1
self.persist_directory = persist_directory
if type(self).calls <= 2:
raise RuntimeError("table embeddings already exists")
monkeypatch.setattr(memory_module, "ChromaVectorStore", _RecoverOnThirdTryChroma)
monkeypatch.setattr(memory_module, "JSONVectorStore", _FakeJSONStore)
storage_root = tmp_path / "ai"
chroma_dir = storage_root / "chroma_db"
chroma_dir.mkdir(parents=True, exist_ok=True)
(chroma_dir / "marker.txt").write_text("legacy-data", encoding="utf-8")
memory = memory_module.MemorySystem(
storage_path=storage_root / "long_term_memory.json",
use_vector_db=True,
)
assert isinstance(memory.vector_store, _RecoverOnThirdTryChroma)
assert _RecoverOnThirdTryChroma.calls == 3
backups = sorted(storage_root.glob("chroma_db_backup_conflict_*"))
assert backups, "expected backup directory after repair"
assert (backups[-1] / "marker.txt").exists()
assert chroma_dir.exists()
def test_memory_falls_back_to_json_if_chroma_repair_fails(monkeypatch, tmp_path: Path):
class _AlwaysFailChroma:
def __init__(self, persist_directory: Path):
raise RuntimeError("table embeddings already exists")
monkeypatch.setattr(memory_module, "ChromaVectorStore", _AlwaysFailChroma)
monkeypatch.setattr(memory_module, "JSONVectorStore", _FakeJSONStore)
memory = memory_module.MemorySystem(
storage_path=tmp_path / "long_term_memory.json",
use_vector_db=True,
)
assert isinstance(memory.vector_store, _FakeJSONStore)