Refactor MemorySystem to enhance Chroma vector store initialization with conflict handling and backup creation. Added methods for conflict detection and store repair, improving robustness against schema conflicts.
This commit is contained in:
@@ -3,6 +3,8 @@
|
||||
"""
|
||||
import asyncio
|
||||
import hashlib
|
||||
import shutil
|
||||
import time
|
||||
import uuid
|
||||
from typing import List, Dict, Optional, Tuple, Callable, Awaitable
|
||||
from dataclasses import dataclass, field
|
||||
@@ -98,18 +100,60 @@ class MemorySystem:
|
||||
|
||||
# 初始化向量存储
|
||||
if use_vector_db:
|
||||
try:
|
||||
# 使用 Chroma 向量数据库
|
||||
chroma_path = storage_path.parent / "chroma_db"
|
||||
self.vector_store: VectorStore = ChromaVectorStore(chroma_path)
|
||||
chroma_path = storage_path.parent / "chroma_db"
|
||||
chroma_store = self._init_chroma_store(chroma_path)
|
||||
if chroma_store is not None:
|
||||
self.vector_store = chroma_store
|
||||
logger.info("Using Chroma vector store")
|
||||
except Exception as e:
|
||||
logger.warning(f"Chroma 初始化失败,降级为 JSON 存储: {e}")
|
||||
else:
|
||||
self.vector_store = JSONVectorStore(storage_path)
|
||||
else:
|
||||
# 使用 JSON 存储(向后兼容)
|
||||
self.vector_store = JSONVectorStore(storage_path)
|
||||
logger.info("使用 JSON 存储")
|
||||
|
||||
@staticmethod
|
||||
def _is_chroma_table_conflict(error: Exception) -> bool:
|
||||
msg = str(error).lower()
|
||||
return "table embeddings already exists" in msg
|
||||
|
||||
def _init_chroma_store(self, chroma_path: Path) -> Optional[VectorStore]:
|
||||
"""初始化 Chroma,遇到已知 sqlite schema 冲突时尝试修复。"""
|
||||
try:
|
||||
return ChromaVectorStore(chroma_path)
|
||||
except Exception as error:
|
||||
if not self._is_chroma_table_conflict(error):
|
||||
logger.warning(f"Chroma 初始化失败,降级为 JSON 存储: {error}")
|
||||
return None
|
||||
|
||||
# 先做一次短暂重试,处理并发启动时的瞬时冲突。
|
||||
logger.warning(f"Chroma 初始化出现 schema 冲突,正在重试: {error}")
|
||||
time.sleep(0.2)
|
||||
try:
|
||||
return ChromaVectorStore(chroma_path)
|
||||
except Exception as retry_error:
|
||||
if not self._is_chroma_table_conflict(retry_error):
|
||||
logger.warning(f"Chroma 重试失败,降级为 JSON 存储: {retry_error}")
|
||||
return None
|
||||
|
||||
backup_name = (
|
||||
f"{chroma_path.name}_backup_conflict_"
|
||||
f"{datetime.now().strftime('%Y%m%d_%H%M%S')}_{uuid.uuid4().hex[:8]}"
|
||||
)
|
||||
backup_path = chroma_path.parent / backup_name
|
||||
|
||||
try:
|
||||
if chroma_path.exists():
|
||||
shutil.move(str(chroma_path), str(backup_path))
|
||||
chroma_path.mkdir(parents=True, exist_ok=True)
|
||||
repaired = ChromaVectorStore(chroma_path)
|
||||
logger.warning(
|
||||
f"检测到 Chroma 元数据库冲突,已重建目录并保留备份: {backup_path}"
|
||||
)
|
||||
return repaired
|
||||
except Exception as repair_error:
|
||||
logger.warning(f"Chroma 修复失败,降级为 JSON 存储: {repair_error}")
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _normalize_embedding(values: List[float], dim: int = 1024) -> List[float]:
|
||||
@@ -463,4 +507,3 @@ class MemorySystem:
|
||||
async def close(self):
|
||||
"""关闭记忆系统。"""
|
||||
await self.vector_store.close()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user