Files
agent-skills/sn-image-base/scripts/sn_image_base/vlm/vlm_adapter.py
Hermes Agent ccc63d1e70 first commit
2026-05-10 13:52:46 +08:00

56 lines
2.1 KiB
Python

"""Abstract base class for VLM (Vision Language Model) adapters."""
from __future__ import annotations
from abc import ABC, abstractmethod
class VlmAdapter(ABC):
"""Uniform async interface for a single Vision Language Model backend.
Each concrete adapter wraps one LLM endpoint + model combination and
exposes a single :meth:`vision_completion` coroutine. Synchronous
calling is intentionally **not** supported; callers must run inside an
asyncio event loop.
**Client ownership contract** — when a shared
:class:`httpx.AsyncClient` is supplied at construction time the adapter
*reuses* it and must **not** close it; the caller retains full ownership
of the client's lifecycle. When no external client is provided the
adapter creates and owns an internal client and must close it in
:meth:`aclose`.
"""
@abstractmethod
async def vision_completion(
self,
user_prompt: str,
images: list[str | bytes],
system_prompt: str = "",
model: str | None = None,
) -> str:
"""Send image(s) and a text prompt to the model; return the reply.
Args:
user_prompt: User-facing text instruction.
images: One or more images to pass to the model. Each element
is either a file-path string or raw image bytes.
system_prompt: System-level instruction prepended to the
conversation. Defaults to ''.
model: Model name to use. If None, uses the default set at
initialization.
Returns:
str: Raw text response from the model (may contain JSON or
markdown-wrapped JSON depending on the model and prompt).
"""
@abstractmethod
async def aclose(self) -> None:
"""Release async resources owned by this adapter.
Must be called when the adapter is no longer needed. Adapters that
were given an external shared client must implement this as a no-op;
adapters that created their own internal client must close it here.
"""