Refactor MemorySystem to enhance Chroma vector store initialization with conflict handling and backup creation. Added methods for conflict detection and store repair, improving robustness against schema conflicts.

This commit is contained in:
Mimikko-zeus
2026-03-03 12:59:17 +08:00
parent 6e7ef24e35
commit 586f09c3a5
2 changed files with 128 additions and 7 deletions

View File

@@ -3,6 +3,8 @@
"""
import asyncio
import hashlib
import shutil
import time
import uuid
from typing import List, Dict, Optional, Tuple, Callable, Awaitable
from dataclasses import dataclass, field
@@ -98,18 +100,60 @@ class MemorySystem:
# 初始化向量存储
if use_vector_db:
try:
# 使用 Chroma 向量数据库
chroma_path = storage_path.parent / "chroma_db"
self.vector_store: VectorStore = ChromaVectorStore(chroma_path)
chroma_path = storage_path.parent / "chroma_db"
chroma_store = self._init_chroma_store(chroma_path)
if chroma_store is not None:
self.vector_store = chroma_store
logger.info("Using Chroma vector store")
except Exception as e:
logger.warning(f"Chroma 初始化失败,降级为 JSON 存储: {e}")
else:
self.vector_store = JSONVectorStore(storage_path)
else:
# 使用 JSON 存储(向后兼容)
self.vector_store = JSONVectorStore(storage_path)
logger.info("使用 JSON 存储")
@staticmethod
def _is_chroma_table_conflict(error: Exception) -> bool:
msg = str(error).lower()
return "table embeddings already exists" in msg
def _init_chroma_store(self, chroma_path: Path) -> Optional[VectorStore]:
"""初始化 Chroma遇到已知 sqlite schema 冲突时尝试修复。"""
try:
return ChromaVectorStore(chroma_path)
except Exception as error:
if not self._is_chroma_table_conflict(error):
logger.warning(f"Chroma 初始化失败,降级为 JSON 存储: {error}")
return None
# 先做一次短暂重试,处理并发启动时的瞬时冲突。
logger.warning(f"Chroma 初始化出现 schema 冲突,正在重试: {error}")
time.sleep(0.2)
try:
return ChromaVectorStore(chroma_path)
except Exception as retry_error:
if not self._is_chroma_table_conflict(retry_error):
logger.warning(f"Chroma 重试失败,降级为 JSON 存储: {retry_error}")
return None
backup_name = (
f"{chroma_path.name}_backup_conflict_"
f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}"
)
backup_path = chroma_path.parent / backup_name
try:
if chroma_path.exists():
shutil.move(str(chroma_path), str(backup_path))
chroma_path.mkdir(parents=True, exist_ok=True)
repaired = ChromaVectorStore(chroma_path)
logger.warning(
f"检测到 Chroma 元数据库冲突,已重建目录并保留备份: {backup_path}"
)
return repaired
except Exception as repair_error:
logger.warning(f"Chroma 修复失败,降级为 JSON 存储: {repair_error}")
return None
@staticmethod
def _normalize_embedding(values: List[float], dim: int = 1024) -> List[float]:
@@ -463,4 +507,3 @@ class MemorySystem:
async def close(self):
"""关闭记忆系统。"""
await self.vector_store.close()