first commit
This commit is contained in:
6
sn-image-base/.gitattributes
vendored
Normal file
6
sn-image-base/.gitattributes
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
*.py text eol=lf encoding=utf-8
|
||||
*.{yml,yaml} text eol=lf encoding=utf-8
|
||||
*.toml text eol=lf encoding=utf-8
|
||||
*.json text eol=lf encoding=utf-8
|
||||
*.{md,txt} text eol=lf encoding=utf-8
|
||||
*.{png,jpg,jpeg,gif,ico,tiff,bmp} binary
|
||||
214
sn-image-base/.gitignore
vendored
Normal file
214
sn-image-base/.gitignore
vendored
Normal file
@@ -0,0 +1,214 @@
|
||||
# File created using '.gitignore Generator' for Visual Studio Code: https://bit.ly/vscode-gig
|
||||
# Created by https://www.toptal.com/developers/gitignore/api/visualstudiocode,linux,python
|
||||
# Edit at https://www.toptal.com/developers/gitignore?templates=visualstudiocode,linux,python
|
||||
|
||||
### Linux ###
|
||||
*~
|
||||
|
||||
# temporary files which can be created if a process still has a handle open of a deleted file
|
||||
.fuse_hidden*
|
||||
|
||||
# KDE directory preferences
|
||||
.directory
|
||||
|
||||
# Linux trash folder which might appear on any partition or disk
|
||||
.Trash-*
|
||||
|
||||
# .nfs files are created when an open file is removed but is still being accessed
|
||||
.nfs*
|
||||
|
||||
### Python ###
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
share/python-wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.nox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
*.py,cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
cover/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
db.sqlite3-journal
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
.pybuilder/
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# IPython
|
||||
profile_default/
|
||||
ipython_config.py
|
||||
|
||||
# pyenv
|
||||
# For a library or package, you might want to ignore these files since the code is
|
||||
# intended to run in multiple environments; otherwise, check them in:
|
||||
# .python-version
|
||||
|
||||
# pipenv
|
||||
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||
# install all needed dependencies.
|
||||
#Pipfile.lock
|
||||
|
||||
# poetry
|
||||
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
||||
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||
# commonly ignored for libraries.
|
||||
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
||||
#poetry.lock
|
||||
|
||||
# pdm
|
||||
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
||||
#pdm.lock
|
||||
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
||||
# in version control.
|
||||
# https://pdm.fming.dev/#use-with-ide
|
||||
.pdm.toml
|
||||
|
||||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
||||
__pypackages__/
|
||||
|
||||
# Celery stuff
|
||||
celerybeat-schedule
|
||||
celerybeat.pid
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# Pyre type checker
|
||||
.pyre/
|
||||
|
||||
# pytype static type analyzer
|
||||
.pytype/
|
||||
|
||||
# Cython debug symbols
|
||||
cython_debug/
|
||||
|
||||
# PyCharm
|
||||
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
||||
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||
#.idea/
|
||||
|
||||
### Python Patch ###
|
||||
# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
|
||||
poetry.toml
|
||||
|
||||
# ruff
|
||||
.ruff_cache/
|
||||
|
||||
# LSP config files
|
||||
pyrightconfig.json
|
||||
|
||||
### VisualStudioCode ###
|
||||
.vscode/*
|
||||
!.vscode/settings.json
|
||||
!.vscode/tasks.json
|
||||
!.vscode/launch.json
|
||||
!.vscode/extensions.json
|
||||
!.vscode/*.code-snippets
|
||||
|
||||
# Local History for Visual Studio Code
|
||||
.history/
|
||||
|
||||
# Built Visual Studio Code Extensions
|
||||
*.vsix
|
||||
|
||||
### VisualStudioCode Patch ###
|
||||
# Ignore all local history of files
|
||||
.history
|
||||
.ionide
|
||||
|
||||
# End of https://www.toptal.com/developers/gitignore/api/visualstudiocode,linux,python
|
||||
|
||||
# Custom rules (everything added below won't be overriden by 'Generate .gitignore File' if you use 'Update' option)
|
||||
|
||||
240
sn-image-base/README.md
Normal file
240
sn-image-base/README.md
Normal file
@@ -0,0 +1,240 @@
|
||||
# sn-image-base
|
||||
|
||||
The skill for the [SenseNova-Skills](https://github.com/OpenSenseNova/SenseNova-Skills) project, providing low-level APIs for image generation, recognition (VLM), and text optimization (LLM).
|
||||
|
||||
See [SKILL.md](SKILL.md) for full behavior.
|
||||
|
||||
This document describes detailed configurations for the skill.
|
||||
|
||||
For installation and usage, please refer to the project's [README.md](https://github.com/OpenSenseNova/SenseNova-Skills/blob/main/README.md).
|
||||
|
||||
## Overview
|
||||
|
||||
The skill provides the following subcommands:
|
||||
|
||||
- `sn-image-generate`: image generation
|
||||
- `sn-image-recognize`: image recognition (VLM)
|
||||
- `sn-text-optimize`: text optimization (LLM)
|
||||
|
||||
The skill supports the following models services:
|
||||
|
||||
- For image generation:
|
||||
- [SenseNova](https://platform.sensenova.cn/)
|
||||
- Nano Banana API
|
||||
- OpenAI Image Generation API (e.g. GPT-Image-2)
|
||||
|
||||
- For text and vision chat:
|
||||
- [SenseNova](https://platform.sensenova.cn/)
|
||||
- Models via Anthropic Messages API (e.g. Claude Sonnet 4.6)
|
||||
- Models via OpenAI Chat Completion API (e.g. GPT and Gemini/Qwen etc. in OpenAI Compatible API format)
|
||||
|
||||
## Configurations
|
||||
|
||||
### Quick Start
|
||||
|
||||
We recommend you to try out our [SenseNova Token Plan](https://platform.sensenova.cn/token-plan).
|
||||
|
||||
Go to <https://platform.sensenova.cn/token-plan/> to register a free account and get your API key for image generation and chat calls.
|
||||
|
||||
Set the following environment variables in `~/.openclaw/.env` (or `~/.hermes/.env` if you are using Hermes):
|
||||
|
||||
```ini
|
||||
# If all capabilities use the same gateway, these two variables are enough.
|
||||
SN_BASE_URL="https://token.sensenova.cn/v1"
|
||||
SN_API_KEY="<sensenova-token-plan-api-key>"
|
||||
|
||||
# Optional model overrides
|
||||
SN_IMAGE_GEN_MODEL="sensenova-u1-fast" # or other image generation models available in the SenseNova Token Plan
|
||||
SN_CHAT_MODEL="sensenova-6.7-flash-lite"
|
||||
```
|
||||
|
||||
### Detailed Configurations
|
||||
|
||||
With the [Quick Start](#quick-start), you can already use this skill.
|
||||
|
||||
If you want to configure the skill more (i.e. use different models, change the base URL, etc.), you can see the following configurations.
|
||||
|
||||
Multiple sources of configuration are supported, the priority is (high to low):
|
||||
|
||||
- (Recommended) `~/.openclaw/.env` (for OpenClaw) or `~/.hermes/.env` (for Hermes)
|
||||
- current working directory `.env` (not necessarily exists, depends on how the agent runs the skill)
|
||||
- process environment variables
|
||||
|
||||
> For experienced developers, see [configs.py](scripts/sn_image_base/configs.py) for the full list of variables and defaults.
|
||||
>
|
||||
> Helpful symbols for tracing behavior quickly:
|
||||
>
|
||||
> - `prepare_env()` for `.env` loading order
|
||||
> - `Field.resolve()` for env-var fallback order ("first set value wins")
|
||||
> - `Configs` for all defaults and env-name mapping
|
||||
|
||||
#### Image Generation
|
||||
|
||||
Environment variables are resolved as: dedicated variable > domain shared variable > global variable.
|
||||
|
||||
| Capability | API key fallback | Base URL fallback |
|
||||
| ---------- | ---------------- | ----------------- |
|
||||
| Text model | `SN_TEXT_API_KEY` -> `SN_CHAT_API_KEY` -> `SN_API_KEY` | `SN_TEXT_BASE_URL` -> `SN_CHAT_BASE_URL` -> `SN_BASE_URL` |
|
||||
| Vision model | `SN_VISION_API_KEY` -> `SN_CHAT_API_KEY` -> `SN_API_KEY` | `SN_VISION_BASE_URL` -> `SN_CHAT_BASE_URL` -> `SN_BASE_URL` |
|
||||
| Image generation | `SN_IMAGE_GEN_API_KEY` -> `SN_API_KEY` | `SN_IMAGE_GEN_BASE_URL` -> `SN_BASE_URL` |
|
||||
|
||||
Full configuration for image generation:
|
||||
|
||||
| Config Key | Description | Default |
|
||||
| ---------- | ----------- | ------- |
|
||||
| `SN_API_KEY` | Global API key used when capability-specific keys are unset | `""` |
|
||||
| `SN_BASE_URL` | Global base URL used when capability-specific base URLs are unset | `""` |
|
||||
| `SN_IMAGE_GEN_API_KEY` | Optional image-generation-only API key override | `SN_API_KEY` |
|
||||
| `SN_IMAGE_GEN_MODEL_TYPE` | The type of image generation model to use | `"sensenova"` |
|
||||
| `SN_IMAGE_GEN_MODEL` | The name of the image generation model to use | `"sensenova-u1-fast"` |
|
||||
| `SN_IMAGE_GEN_BASE_URL` | The base URL for the image generation API | `SN_BASE_URL`, then `"https://token.sensenova.cn/v1"` |
|
||||
|
||||
The default values are recommended for the [SenseNova](https://platform.sensenova.cn/).
|
||||
|
||||
When all capabilities use one gateway, set only `SN_BASE_URL` and `SN_API_KEY`.
|
||||
Set `SN_IMAGE_GEN_*` only when image generation needs a different provider.
|
||||
|
||||
To use non-default image generation models, please:
|
||||
|
||||
1. Set `SN_IMAGE_GEN_MODEL_TYPE` according to the model type, available values are:
|
||||
|
||||
```ini
|
||||
# (Default) For [SenseNova](https://platform.sensenova.cn/)
|
||||
SN_IMAGE_GEN_MODEL_TYPE="sensenova"
|
||||
# For Google's Nano Banana model API
|
||||
SN_IMAGE_GEN_MODEL_TYPE="nano-banana"
|
||||
# For OpenAI's image generation API
|
||||
SN_IMAGE_GEN_MODEL_TYPE="openai-image"
|
||||
```
|
||||
|
||||
2. Set `SN_IMAGE_GEN_BASE_URL` to the base URL for the image generation API. For example:
|
||||
|
||||
```ini
|
||||
# (Default) For [SenseNova](https://platform.sensenova.cn/)
|
||||
SN_IMAGE_GEN_BASE_URL="https://token.sensenova.cn/v1"
|
||||
# For Google's Nano Banana model API
|
||||
SN_IMAGE_GEN_BASE_URL="https://generativelanguage.googleapis.com"
|
||||
# For OpenAI's image generation API
|
||||
SN_IMAGE_GEN_BASE_URL="https://api.openai.com/v1"
|
||||
```
|
||||
|
||||
3. Set `SN_IMAGE_GEN_MODEL` to the model name provided by the model type. For example:
|
||||
|
||||
```ini
|
||||
# (Default) For [SenseNova](https://platform.sensenova.cn/)
|
||||
SN_IMAGE_GEN_MODEL="sensenova-u1-fast"
|
||||
# For Google's Nano Banana model API
|
||||
SN_IMAGE_GEN_MODEL="gemini-3.1-flash-image-preview"
|
||||
# For OpenAI's image generation API
|
||||
SN_IMAGE_GEN_MODEL="gpt-image-2"
|
||||
```
|
||||
|
||||
4. If image generation uses a different key than the global key, set `SN_IMAGE_GEN_API_KEY`. If `SN_API_KEY` already works for image generation, skip this.
|
||||
|
||||
```ini
|
||||
SN_IMAGE_GEN_API_KEY="sk-your-image-generation-api-key"
|
||||
```
|
||||
|
||||
#### Text and Vision Chat
|
||||
|
||||
##### Configure the shared chat runtime
|
||||
|
||||
Text optimization and image recognition now share one chat runtime. Configure the
|
||||
protocol, endpoint, API key, and default model once, then override text or vision
|
||||
models only when needed:
|
||||
|
||||
| Config Keys | Description | Default |
|
||||
| ----------- | ----------- | ------- |
|
||||
| `SN_CHAT_API_KEY` | API key for text and vision chat calls | `SN_API_KEY` |
|
||||
| `SN_CHAT_BASE_URL` | Shared base URL for the chat API | `SN_BASE_URL`, then `"https://token.sensenova.cn/v1"` |
|
||||
| `SN_CHAT_TYPE` | Shared chat protocol type | `"openai-completions"` |
|
||||
| `SN_CHAT_MODEL` | Shared default model for text and vision chat calls | `"sensenova-6.7-flash-lite"` |
|
||||
| `SN_TEXT_API_KEY` | Optional text-only provider API key | `SN_CHAT_API_KEY` -> `SN_API_KEY` |
|
||||
| `SN_TEXT_BASE_URL` | Optional text-only provider base URL | `SN_CHAT_BASE_URL` -> `SN_BASE_URL` |
|
||||
| `SN_TEXT_TYPE` | Optional text-only protocol type | `SN_CHAT_TYPE` |
|
||||
| `SN_TEXT_MODEL` | Optional model override for `sn-text-optimize` | `SN_CHAT_MODEL` |
|
||||
| `SN_VISION_API_KEY` | Optional vision provider API key | `SN_CHAT_API_KEY` -> `SN_API_KEY` |
|
||||
| `SN_VISION_BASE_URL` | Optional vision provider base URL | `SN_CHAT_BASE_URL` -> `SN_BASE_URL` |
|
||||
| `SN_VISION_TYPE` | Optional vision protocol type | `SN_CHAT_TYPE` |
|
||||
| `SN_VISION_MODEL` | Optional vision-capable model override for `sn-image-recognize` | `SN_CHAT_MODEL` |
|
||||
|
||||
The default values are recommended for the [SenseNova](https://platform.sensenova.cn/).
|
||||
|
||||
Configure `SN_TEXT_*` or `SN_VISION_*` only when a command needs a different provider than the shared `SN_CHAT_*` provider.
|
||||
|
||||
For chat calls, the runner also accepts host-only base URLs such as
|
||||
`https://token.sensenova.cn`: if no URL path is present, it appends the API
|
||||
version path before the interface endpoint. Prefer the documented versioned
|
||||
base URL, for example `https://token.sensenova.cn/v1`, for consistency with the
|
||||
built-in defaults.
|
||||
|
||||
To use non-default shared chat settings, please:
|
||||
|
||||
1. Set `SN_CHAT_TYPE` according to the chat API protocol. Available values are:
|
||||
|
||||
```ini
|
||||
# (Default) OpenAI-compatible `/chat/completions` interface (most widely supported)
|
||||
SN_CHAT_TYPE="openai-completions"
|
||||
# Anthropic Messages `/messages` interface
|
||||
SN_CHAT_TYPE="anthropic-messages"
|
||||
```
|
||||
|
||||
2. Set `SN_CHAT_BASE_URL` to the shared chat endpoint base URL. For example:
|
||||
|
||||
```ini
|
||||
# (Default) For [SenseNova](https://platform.sensenova.cn/)
|
||||
SN_CHAT_BASE_URL="https://token.sensenova.cn/v1"
|
||||
# For Anthropic Messages API
|
||||
SN_CHAT_BASE_URL="https://api.anthropic.com/v1"
|
||||
# For OpenAI's chat completion API
|
||||
SN_CHAT_BASE_URL="https://api.openai.com/v1"
|
||||
# For Google Gemini API (OpenAI-compatible)
|
||||
SN_CHAT_BASE_URL="https://generativelanguage.googleapis.com/v1beta/openai/"
|
||||
```
|
||||
|
||||
3. Set `SN_CHAT_MODEL`, or set `SN_TEXT_MODEL` / `SN_VISION_MODEL` only when a command needs a different model:
|
||||
|
||||
```ini
|
||||
# (Default) SenseNova 6.7 Flash Lite
|
||||
SN_CHAT_MODEL="sensenova-6.7-flash-lite"
|
||||
# Anthropic Claude Sonnet 4.6
|
||||
SN_VISION_MODEL="claude-sonnet-4-6"
|
||||
# Google Gemini 3 Flash Preview
|
||||
SN_VISION_MODEL="gemini-3-flash-preview"
|
||||
# OpenAI GPT 5.5
|
||||
SN_TEXT_MODEL="gpt-5.5"
|
||||
```
|
||||
|
||||
4. Set `SN_CHAT_API_KEY` to the API key for the shared chat endpoint, or use global `SN_API_KEY`.
|
||||
|
||||
```ini
|
||||
SN_CHAT_API_KEY="sk-your-api-key"
|
||||
```
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Missing API key
|
||||
|
||||
- Symptom: errors like "required but not set", "missing api key", or request unauthorized.
|
||||
- Fix: set global `SN_API_KEY` when all capabilities use one key. Do not also set `SN_IMAGE_GEN_API_KEY` unless image generation needs a different provider or key. Use `SN_CHAT_API_KEY`, `SN_TEXT_API_KEY`, or `SN_VISION_API_KEY` only when chat/text/vision needs a different provider.
|
||||
|
||||
### Wrong base URL
|
||||
|
||||
- Symptom: request fails immediately, or URL validation/auth endpoint errors.
|
||||
- Fix: verify `SN_BASE_URL` or capability-specific base URLs are full base URLs (with scheme + host), for example `https://token.sensenova.cn/v1`.
|
||||
|
||||
### Unsupported model name
|
||||
|
||||
- Symptom: provider returns HTTP 404 / model-not-found / bad request.
|
||||
- Fix: ensure `*_MODEL_TYPE` / `*_TYPE` and `*_MODEL` are from the same provider, and that the model is available in your account.
|
||||
|
||||
### Auth / permission errors
|
||||
|
||||
- Symptom: HTTP 401/403, "permission denied", "forbidden".
|
||||
- Fix: check whether the key matches the selected provider endpoint, confirm account quotas/permissions, and retry with a known-valid model.
|
||||
|
||||
## Security Notes
|
||||
|
||||
- **Never** commit `.env` files or API keys to git.
|
||||
- If a key is leaked, rotate it immediately and update local env files.
|
||||
- Prefer local secret management (`~/.openclaw/.env` or `~/.hermes/.env`) over hardcoding keys in scripts or prompts.
|
||||
239
sn-image-base/README_CN.md
Normal file
239
sn-image-base/README_CN.md
Normal file
@@ -0,0 +1,239 @@
|
||||
# sn-image-base
|
||||
|
||||
该技能属于 [SenseNova-Skills](https://github.com/OpenSenseNova/SenseNova-Skills) 项目,提供图像生成、图像识别(VLM)和文本优化(LLM)的底层 API 能力。
|
||||
|
||||
完整行为请见 [SKILL.md](SKILL.md)。
|
||||
|
||||
本文档主要介绍该技能的详细配置。
|
||||
|
||||
概览与技能安装、使用方法请参考项目根目录下的 [README.md](../../README.md)(中文可见 [README_CN.md](../../README_CN.md));详细配置以本文档为准。
|
||||
|
||||
## 概览
|
||||
|
||||
该技能提供以下子命令:
|
||||
|
||||
- `sn-image-generate`:图像生成
|
||||
- `sn-image-recognize`:图像识别(VLM)
|
||||
- `sn-text-optimize`:文本优化(LLM)
|
||||
|
||||
支持的模型服务如下:
|
||||
|
||||
- 图像生成:
|
||||
- [SenseNova](https://platform.sensenova.cn/)
|
||||
- Nano Banana API
|
||||
- OpenAI 图像生成 API(例如 GPT-Image-2)
|
||||
|
||||
- 文本与视觉 Chat:
|
||||
- [SenseNova](https://platform.sensenova.cn/)
|
||||
- 通过 Anthropic Messages API 接入的模型(例如 Claude Sonnet 4.6)
|
||||
- 通过 OpenAI Chat Completion API 接入的模型(例如 GPT、Gemini/Qwen 等 OpenAI 兼容格式模型)
|
||||
|
||||
## 配置
|
||||
|
||||
### 快速开始
|
||||
|
||||
推荐使用 [SenseNova Token Plan](https://platform.sensenova.cn/token-plan)。
|
||||
|
||||
前往 <https://platform.sensenova.cn/token-plan/> 注册免费账号,并获取可用于图像生成和 chat 调用的 API Key。
|
||||
|
||||
将以下环境变量写入 `~/.openclaw/.env`(OpenClaw)或 `~/.hermes/.env`(Hermes):
|
||||
|
||||
```ini
|
||||
# 如果所有能力都走同一个网关,只需要配置这两个变量。
|
||||
SN_BASE_URL="https://token.sensenova.cn/v1"
|
||||
SN_API_KEY="<sensenova-token-plan-api-key>"
|
||||
|
||||
# 可选模型覆盖
|
||||
SN_IMAGE_GEN_MODEL="sensenova-u1-fast" # 或 Token Plan 中可用的其他图像生成模型
|
||||
SN_CHAT_MODEL="sensenova-6.7-flash-lite"
|
||||
```
|
||||
|
||||
**注意:不要将 `.env` 文件或 API key 提交到 git。**
|
||||
|
||||
### 详细配置
|
||||
|
||||
完成 [快速开始](#快速开始) 后即可使用本技能。
|
||||
|
||||
若需更进一步配置(例如使用不同模型、修改 base URL 等),请参考以下内容。
|
||||
|
||||
支持多重配置来源,优先级(从高到低)如下:
|
||||
|
||||
- (推荐)`~/.openclaw/.env`(OpenClaw)或 `~/.hermes/.env`(Hermes)
|
||||
- 当前工作目录 `.env`(不一定存在,取决于 agent 运行技能的方式)
|
||||
- 进程环境变量
|
||||
|
||||
> 进阶开发者可查看 [configs.py](scripts/sn_image_base/configs.py) 获取完整变量与默认值。
|
||||
>
|
||||
> 便于快速追踪行为的关键符号:
|
||||
>
|
||||
> - `prepare_env()`:`.env` 加载顺序
|
||||
> - `Field.resolve()`:环境变量回退顺序(“第一个已设置值优先”)
|
||||
> - `Configs`:默认值与环境变量映射
|
||||
|
||||
#### 图像生成
|
||||
|
||||
环境变量解析优先级为:专用变量 > 领域共享变量 > 全局变量。
|
||||
|
||||
| 能力 | API key fallback | Base URL fallback |
|
||||
| ---- | ---------------- | ----------------- |
|
||||
| 文本模型 | `SN_TEXT_API_KEY` -> `SN_CHAT_API_KEY` -> `SN_API_KEY` | `SN_TEXT_BASE_URL` -> `SN_CHAT_BASE_URL` -> `SN_BASE_URL` |
|
||||
| 视觉模型 | `SN_VISION_API_KEY` -> `SN_CHAT_API_KEY` -> `SN_API_KEY` | `SN_VISION_BASE_URL` -> `SN_CHAT_BASE_URL` -> `SN_BASE_URL` |
|
||||
| 图像生成 | `SN_IMAGE_GEN_API_KEY` -> `SN_API_KEY` | `SN_IMAGE_GEN_BASE_URL` -> `SN_BASE_URL` |
|
||||
|
||||
图像生成完整配置如下:
|
||||
|
||||
| 配置键 | 说明 | 默认值 |
|
||||
| ------ | ---- | ------ |
|
||||
| `SN_API_KEY` | 所有能力共用的全局 API Key | `""` |
|
||||
| `SN_BASE_URL` | 所有能力共用的全局基础 URL | `""` |
|
||||
| `SN_IMAGE_GEN_API_KEY` | 可选的图像生成专用 API key 覆盖 | `SN_API_KEY` |
|
||||
| `SN_IMAGE_GEN_MODEL_TYPE` | 图像生成模型类型 | `"sensenova"` |
|
||||
| `SN_IMAGE_GEN_MODEL` | 图像生成模型名 | `"sensenova-u1-fast"` |
|
||||
| `SN_IMAGE_GEN_BASE_URL` | 图像生成 API 的基础 URL | `SN_BASE_URL`,然后 `"https://token.sensenova.cn/v1"` |
|
||||
|
||||
默认值适用于 [SenseNova](https://platform.sensenova.cn/)。
|
||||
|
||||
如果所有能力走同一个网关,只需要设置 `SN_BASE_URL` 和 `SN_API_KEY`。
|
||||
仅当图像生成需要不同 provider 时,再设置 `SN_IMAGE_GEN_*`。
|
||||
|
||||
如需使用非默认图像生成模型,请按以下步骤:
|
||||
|
||||
1. 设置 `SN_IMAGE_GEN_MODEL_TYPE` 为对应模型类型,可选值如下:
|
||||
|
||||
```ini
|
||||
# (默认)用于 [SenseNova](https://platform.sensenova.cn/)
|
||||
SN_IMAGE_GEN_MODEL_TYPE="sensenova"
|
||||
# 用于 Google Nano Banana 模型 API
|
||||
SN_IMAGE_GEN_MODEL_TYPE="nano-banana"
|
||||
# 用于 OpenAI 图像生成 API
|
||||
SN_IMAGE_GEN_MODEL_TYPE="openai-image"
|
||||
```
|
||||
|
||||
2. 设置 `SN_IMAGE_GEN_BASE_URL` 为图像生成 API 的基础 URL,例如:
|
||||
|
||||
```ini
|
||||
# (默认)用于 [SenseNova](https://platform.sensenova.cn/)
|
||||
SN_IMAGE_GEN_BASE_URL="https://token.sensenova.cn/v1"
|
||||
# 用于 Google Nano Banana 模型 API
|
||||
SN_IMAGE_GEN_BASE_URL="https://generativelanguage.googleapis.com"
|
||||
# 用于 OpenAI 图像生成 API
|
||||
SN_IMAGE_GEN_BASE_URL="https://api.openai.com/v1"
|
||||
```
|
||||
|
||||
3. 设置 `SN_IMAGE_GEN_MODEL` 为对应类型下的模型名,例如:
|
||||
|
||||
```ini
|
||||
# (默认)用于 [SenseNova](https://platform.sensenova.cn/)
|
||||
SN_IMAGE_GEN_MODEL="sensenova-u1-fast"
|
||||
# 用于 Google Nano Banana 模型 API
|
||||
SN_IMAGE_GEN_MODEL="gemini-3.1-flash-image-preview"
|
||||
# 用于 OpenAI 图像生成 API
|
||||
SN_IMAGE_GEN_MODEL="gpt-image-2"
|
||||
```
|
||||
|
||||
4. 如果图像生成使用不同于全局 key 的密钥,再设置 `SN_IMAGE_GEN_API_KEY`。如果 `SN_API_KEY` 已可用于图像生成,则无需设置。
|
||||
|
||||
```ini
|
||||
SN_IMAGE_GEN_API_KEY="sk-your-image-generation-api-key"
|
||||
```
|
||||
|
||||
#### 文本与视觉 Chat
|
||||
|
||||
##### 配置共享 Chat Runtime
|
||||
|
||||
文本优化和图像识别现在共享一套 chat runtime。协议、endpoint、API key 与默认模型配置一次,仅在需要时分别覆盖文本或视觉模型:
|
||||
|
||||
| 配置键 | 说明 | 默认值 |
|
||||
| ------ | ---- | ------ |
|
||||
| `SN_CHAT_API_KEY` | text/vision chat 调用共用 API key | `SN_API_KEY` |
|
||||
| `SN_CHAT_BASE_URL` | 共享 Chat API 基础 URL | `SN_BASE_URL`,然后 `"https://token.sensenova.cn/v1"` |
|
||||
| `SN_CHAT_TYPE` | 共享 Chat 协议类型 | `"openai-completions"` |
|
||||
| `SN_CHAT_MODEL` | text/vision chat 调用共用默认模型 | `"sensenova-6.7-flash-lite"` |
|
||||
| `SN_TEXT_API_KEY` | 可选文本 provider API key | `SN_CHAT_API_KEY` -> `SN_API_KEY` |
|
||||
| `SN_TEXT_BASE_URL` | 可选文本 provider 基础 URL | `SN_CHAT_BASE_URL` -> `SN_BASE_URL` |
|
||||
| `SN_TEXT_TYPE` | 可选文本协议类型 | `SN_CHAT_TYPE` |
|
||||
| `SN_TEXT_MODEL` | 可选的 `sn-text-optimize` 模型覆盖 | `SN_CHAT_MODEL` |
|
||||
| `SN_VISION_API_KEY` | 可选视觉 provider API key | `SN_CHAT_API_KEY` -> `SN_API_KEY` |
|
||||
| `SN_VISION_BASE_URL` | 可选视觉 provider 基础 URL | `SN_CHAT_BASE_URL` -> `SN_BASE_URL` |
|
||||
| `SN_VISION_TYPE` | 可选视觉协议类型 | `SN_CHAT_TYPE` |
|
||||
| `SN_VISION_MODEL` | 可选的 `sn-image-recognize` 视觉模型覆盖 | `SN_CHAT_MODEL` |
|
||||
|
||||
默认值适用于 [SenseNova](https://platform.sensenova.cn/)。
|
||||
|
||||
仅当文本或视觉命令需要使用不同 provider 时,才需要配置 `SN_TEXT_*` 或 `SN_VISION_*`。
|
||||
|
||||
对于 chat 调用,runner 也兼容不带路径的 host-only base URL,例如
|
||||
`https://token.sensenova.cn`:如果 URL 中没有 path,会先补上 API 版本路径再追加具体接口。
|
||||
为保持和内置默认值一致,建议优先使用带版本路径的 base URL,例如
|
||||
`https://token.sensenova.cn/v1`。
|
||||
|
||||
如需使用非默认 chat 设置,请按以下步骤:
|
||||
|
||||
1. 按 chat API 协议设置 `SN_CHAT_TYPE`。可选值如下:
|
||||
|
||||
```ini
|
||||
# (默认)OpenAI 兼容 `/chat/completions` 接口(最常见)
|
||||
SN_CHAT_TYPE="openai-completions"
|
||||
# Anthropic Messages `/messages` 接口
|
||||
SN_CHAT_TYPE="anthropic-messages"
|
||||
```
|
||||
|
||||
2. 将 `SN_CHAT_BASE_URL` 设置为共享 chat endpoint 的基础 URL,例如:
|
||||
|
||||
```ini
|
||||
# (默认)用于 [SenseNova](https://platform.sensenova.cn/)
|
||||
SN_CHAT_BASE_URL="https://token.sensenova.cn/v1"
|
||||
# 用于 Anthropic Messages API
|
||||
SN_CHAT_BASE_URL="https://api.anthropic.com/v1"
|
||||
# 用于 OpenAI Chat Completion API
|
||||
SN_CHAT_BASE_URL="https://api.openai.com/v1"
|
||||
# 用于 Google Gemini API(OpenAI 兼容)
|
||||
SN_CHAT_BASE_URL="https://generativelanguage.googleapis.com/v1beta/openai/"
|
||||
```
|
||||
|
||||
3. 设置 `SN_CHAT_MODEL`,仅在文本或视觉命令需要不同模型时再设置 `SN_TEXT_MODEL` / `SN_VISION_MODEL`:
|
||||
|
||||
```ini
|
||||
# (默认)SenseNova 6.7 Flash Lite
|
||||
SN_CHAT_MODEL="sensenova-6.7-flash-lite"
|
||||
# Anthropic Claude Sonnet 4.6
|
||||
SN_VISION_MODEL="claude-sonnet-4-6"
|
||||
# Google Gemini 3 Flash Preview
|
||||
SN_VISION_MODEL="gemini-3-flash-preview"
|
||||
# OpenAI GPT 5.5
|
||||
SN_TEXT_MODEL="gpt-5.5"
|
||||
```
|
||||
|
||||
4. 设置 `SN_CHAT_API_KEY` 为共享 chat endpoint 的 API key,或使用全局 `SN_API_KEY`:
|
||||
|
||||
```ini
|
||||
SN_CHAT_API_KEY="sk-your-api-key"
|
||||
```
|
||||
|
||||
## 故障排查
|
||||
|
||||
### 缺少 API key
|
||||
|
||||
- 现象:报错包含 "required but not set"、"missing api key" 或请求未授权。
|
||||
- 处理:如果所有能力使用同一个 key,设置全局 `SN_API_KEY` 即可。不要重复设置 `SN_IMAGE_GEN_API_KEY`,除非图像生成需要不同 provider 或 key;仅当 chat/text/vision 需要不同 provider 时,再设置 `SN_CHAT_API_KEY`、`SN_TEXT_API_KEY` 或 `SN_VISION_API_KEY`。
|
||||
|
||||
### base URL 配置错误
|
||||
|
||||
- 现象:请求立即失败,或出现 URL 校验 / endpoint 相关错误。
|
||||
- 处理:检查 `SN_BASE_URL` 或能力专用 base URL 是否为完整基础 URL(包含 scheme + host),例如 `https://token.sensenova.cn/v1`。
|
||||
|
||||
### 模型名不支持
|
||||
|
||||
- 现象:provider 返回 HTTP 404 / model-not-found / bad request。
|
||||
- 处理:确认 `*_MODEL_TYPE` / `*_TYPE` 与 `*_MODEL` 来自同一 provider,且模型在当前账号下可用。
|
||||
|
||||
### 鉴权 / 权限错误
|
||||
|
||||
- 现象:HTTP 401/403、"permission denied"、"forbidden"。
|
||||
- 处理:确认密钥与所选 provider endpoint 匹配,检查账号配额/权限,并使用已知可用模型重试。
|
||||
|
||||
## 安全说明
|
||||
|
||||
- **不要**将 `.env` 文件或 API key 提交到 git。
|
||||
- 若密钥泄露,请立即轮换并更新本地环境变量文件。
|
||||
- 优先使用本地密钥管理(`~/.openclaw/.env` 或 `~/.hermes/.env`),避免在脚本或提示词中硬编码密钥。
|
||||
276
sn-image-base/SKILL.md
Normal file
276
sn-image-base/SKILL.md
Normal file
@@ -0,0 +1,276 @@
|
||||
---
|
||||
name: sn-image-base
|
||||
description: |
|
||||
Base-layer skill for the SenseNova-Skills project, providing low-level APIs for image generation, recognition (VLM), and text optimization (LLM).
|
||||
This skill does not preprocess inputs; it only calls backend services and returns results.
|
||||
This skill is not user-facing and is intended for upper-layer skills only.
|
||||
triggers:
|
||||
- "SenseNova-Skills Image Generation"
|
||||
- "SenseNova-Skills 图像基础工具"
|
||||
- "sn 图像基础工具"
|
||||
- "SenseNova 图像基础工具"
|
||||
- "SenseNova Image Generation"
|
||||
- "sn-image-base"
|
||||
metadata:
|
||||
project: SenseNova-Skills
|
||||
tier: 0
|
||||
category: infrastructure
|
||||
user_visible: false
|
||||
---
|
||||
|
||||
# sn-image-base
|
||||
|
||||
## Dependency Installation
|
||||
|
||||
```bash
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
## Overview
|
||||
|
||||
`sn-image-base` is the base-layer skill (tier 0) of the SenseNova-Skills project and provides three low-level tools:
|
||||
|
||||
- `sn-image-generate`: image generation (calls text-to-image-no-enhance API)
|
||||
- `sn-image-recognize`: image recognition (uses VLM to analyze image content)
|
||||
- `sn-text-optimize`: text optimization (uses LLM to process text)
|
||||
|
||||
This skill **does not perform any input preprocessing** and only calls backend services to return results.
|
||||
|
||||
## Tools List
|
||||
|
||||
### sn-image-generate
|
||||
|
||||
Image generation tool that calls the text-to-image-no-enhance API.
|
||||
|
||||
`--prompt` is required; all other parameters are optional:
|
||||
|
||||
| Parameter | Type | Default | Description |
|
||||
|------|------|--------|------|
|
||||
| `--prompt` | string | **Required** | Prompt text for image generation |
|
||||
| `--negative-prompt` | string | `""` | Negative prompt |
|
||||
| `--image-size` | string | `2k` | Image size preset, supports `2k` only |
|
||||
| `--aspect-ratio` | string | `16:9` | Aspect ratio, e.g. `1:1`, `16:9`, `9:16` |
|
||||
| `--seed` | int | `None` | Random seed for reproducible generation |
|
||||
| `--unet-name` | string | `None` | Specify a UNet model name |
|
||||
| `--api-key` | string | `SN_IMAGE_GEN_API_KEY` -> `SN_API_KEY` | API key (CLI argument has priority; `MissingApiKeyError` is raised when all are empty) |
|
||||
| `--base-url` | string | `SN_IMAGE_GEN_BASE_URL` -> `SN_BASE_URL` | API base URL (CLI argument has priority) |
|
||||
| `--poll-interval` | float | `5.0` | Polling interval (seconds) |
|
||||
| `--timeout` | float | `300.0` | Timeout (seconds) |
|
||||
| `--insecure` | flag | `False` | Disable TLS verification |
|
||||
| `--save-path` | Path | Auto-generated | Save path |
|
||||
|
||||
### sn-image-recognize
|
||||
|
||||
Image recognition tool that uses VLM (Vision Language Model) to analyze image content. Supports multiple image inputs.
|
||||
|
||||
`--images` and `--user-prompt` (or `--user-prompt-path`) are required. All other parameters use three-level defaults (CLI > env var > built-in default):
|
||||
|
||||
| Parameter | Type | Built-in Default | Env Var | Description |
|
||||
|------|------|-----------|---------|------|
|
||||
| `--api-key` | string | No hardcoded default | `SN_VISION_API_KEY` -> `SN_CHAT_API_KEY` -> `SN_API_KEY` | Chat runtime API key; raises `MissingApiKeyError` when all are unset |
|
||||
| `--base-url` | string | `SN_CHAT_BASE_URL` default | `SN_VISION_BASE_URL` -> `SN_CHAT_BASE_URL` -> `SN_BASE_URL` | Vision provider base URL; falls back to shared chat/global provider |
|
||||
| `--model` | string | `sensenova-6.7-flash-lite` | `SN_VISION_MODEL` -> `SN_CHAT_MODEL` | Vision-capable model name |
|
||||
| `--vlm-type` | string | `openai-completions` | `SN_VISION_TYPE` -> `SN_CHAT_TYPE` | Chat protocol type override |
|
||||
| `--user-prompt-path` | string | `None` | - | Local file path, mutually exclusive with `--user-prompt` |
|
||||
| `--system-prompt-path` | string | `None` | - | Local file path, mutually exclusive with `--system-prompt` |
|
||||
|
||||
Available values for `--vlm-type`:
|
||||
|
||||
- `openai-completions`: OpenAI-compatible `/v1/chat/completions` interface
|
||||
- `anthropic-messages`: Anthropic Messages `/v1/messages` interface
|
||||
|
||||
### sn-text-optimize
|
||||
|
||||
Text optimization tool that uses LLM (Language Model) to optimize text content. Does not accept image inputs.
|
||||
|
||||
`--user-prompt` (or `--user-prompt-path`) is required. All other parameters use three-level defaults (CLI > env var > built-in default):
|
||||
|
||||
| Parameter | Type | Built-in Default | Env Var | Description |
|
||||
|------|------|-----------|---------|------|
|
||||
| `--api-key` | string | No hardcoded default | `SN_TEXT_API_KEY` -> `SN_CHAT_API_KEY` -> `SN_API_KEY` | Chat runtime API key; raises `MissingApiKeyError` when all are unset |
|
||||
| `--base-url` | string | `SN_CHAT_BASE_URL` default | `SN_TEXT_BASE_URL` -> `SN_CHAT_BASE_URL` -> `SN_BASE_URL` | Text provider base URL; falls back to shared chat/global provider |
|
||||
| `--model` | string | `sensenova-6.7-flash-lite` | `SN_TEXT_MODEL` -> `SN_CHAT_MODEL` | Text model name |
|
||||
| `--llm-type` | string | `openai-completions` | `SN_TEXT_TYPE` -> `SN_CHAT_TYPE` | Chat protocol type override |
|
||||
| `--user-prompt-path` | string | `None` | - | Local file path, mutually exclusive with `--user-prompt` |
|
||||
| `--system-prompt-path` | string | `None` | - | Local file path, mutually exclusive with `--system-prompt` |
|
||||
|
||||
Available values for `--llm-type`:
|
||||
|
||||
- `openai-completions`: OpenAI-compatible `/v1/chat/completions` interface
|
||||
- `anthropic-messages`: Anthropic Messages `/v1/messages` interface
|
||||
|
||||
## VLM vs LLM
|
||||
|
||||
| Tool | Model Type | Image Input | Interface Type Parameter |
|
||||
|------|----------|-----------------|-------------|
|
||||
| `sn-image-recognize` | VLM (Vision Language Model) | Yes, supports multiple images | `--vlm-type` |
|
||||
| `sn-text-optimize` | LLM (Language Model) | No, text only | `--llm-type` |
|
||||
|
||||
## Usage
|
||||
|
||||
All tools are called through the unified `sn_agent_runner.py` entrypoint:
|
||||
|
||||
```bash
|
||||
# Image generation (only prompt required; api-key/base-url have defaults)
|
||||
python scripts/sn_agent_runner.py sn-image-generate \
|
||||
--prompt "..."
|
||||
|
||||
# Image generation (override base-url)
|
||||
python scripts/sn_agent_runner.py sn-image-generate \
|
||||
--prompt "..." \
|
||||
--base-url "https://custom-endpoint.com/v1"
|
||||
|
||||
# Image generation (explicitly override api-key)
|
||||
python scripts/sn_agent_runner.py sn-image-generate \
|
||||
--prompt "..." \
|
||||
--api-key "sk-xxx"
|
||||
|
||||
# Image recognition (VLM) - minimal call (uses built-in Sensenova defaults)
|
||||
python scripts/sn_agent_runner.py sn-image-recognize \
|
||||
--user-prompt "Describe the image" \
|
||||
--images "path/to/image.png"
|
||||
|
||||
# Image recognition (VLM) - override to Anthropic Claude API compatible (messages interface)
|
||||
python scripts/sn_agent_runner.py sn-image-recognize \
|
||||
--user-prompt "Describe the image" \
|
||||
--images "path/to/image.png" \
|
||||
--api-key "sk-ant-xxx" \
|
||||
--base-url "https://api.anthropic.com" \
|
||||
--model "claude-sonnet-4-6" \
|
||||
--vlm-type "anthropic-messages"
|
||||
|
||||
# Text optimization (LLM) - minimal call (uses built-in Sensenova defaults)
|
||||
python scripts/sn_agent_runner.py sn-text-optimize \
|
||||
--user-prompt "Optimize the text: ..."
|
||||
|
||||
# Text optimization (LLM) - override to Anthropic Claude API compatible (messages interface)
|
||||
python scripts/sn_agent_runner.py sn-text-optimize \
|
||||
--user-prompt "Optimize the text: ..." \
|
||||
--api-key "sk-ant-xxx" \
|
||||
--base-url "https://api.anthropic.com" \
|
||||
--model "claude-sonnet-4-6" \
|
||||
--llm-type "anthropic-messages"
|
||||
```
|
||||
|
||||
### Default Parameter Behavior
|
||||
|
||||
Authentication parameters for `sn-image-generate` have the following default behavior:
|
||||
|
||||
| Parameter | Default | Override | Description |
|
||||
|------|--------|----------|------|
|
||||
| `--base-url` | `SN_IMAGE_GEN_BASE_URL` -> `SN_BASE_URL` | `--base-url "..."` | CLI argument has priority |
|
||||
| `--api-key` | `SN_IMAGE_GEN_API_KEY` -> `SN_API_KEY` | `--api-key "..."` | CLI argument has priority; throws `MissingApiKeyError` if all values are empty |
|
||||
|
||||
`sn-image-recognize` and `sn-text-optimize` use priority: **CLI argument > command-specific env var > shared `SN_CHAT_*` env var > global `SN_*` env var > built-in default**.
|
||||
|
||||
| Parameter | Built-in Default | Vision Env Var | Text Env Var |
|
||||
|------|-----------|-------------|-------------|
|
||||
| `--api-key` | None (must be provided) | `SN_VISION_API_KEY` -> `SN_CHAT_API_KEY` -> `SN_API_KEY` | `SN_TEXT_API_KEY` -> `SN_CHAT_API_KEY` -> `SN_API_KEY` |
|
||||
| `--base-url` | `https://token.sensenova.cn/v1` | `SN_VISION_BASE_URL` -> `SN_CHAT_BASE_URL` -> `SN_BASE_URL` | `SN_TEXT_BASE_URL` -> `SN_CHAT_BASE_URL` -> `SN_BASE_URL` |
|
||||
| `--model` | `sensenova-6.7-flash-lite` | `SN_VISION_MODEL` -> `SN_CHAT_MODEL` | `SN_TEXT_MODEL` -> `SN_CHAT_MODEL` |
|
||||
| `--vlm-type` / `--llm-type` | `openai-completions` | `SN_VISION_TYPE` -> `SN_CHAT_TYPE` | `SN_TEXT_TYPE` -> `SN_CHAT_TYPE` |
|
||||
|
||||
`api_key` resolution order (high to low): CLI `--api-key` > command-specific key (`SN_VISION_API_KEY`/`SN_TEXT_API_KEY`) > `SN_CHAT_API_KEY` > `SN_API_KEY`. If all are unset, `MissingApiKeyError` is raised.
|
||||
|
||||
Only `--api-key` must be provided via CLI or environment; base URL, model, and interface type have shared chat defaults.
|
||||
|
||||
## Agent Configuration Integration
|
||||
|
||||
The agent can automatically read parameters from `openclaw.json` without manual input:
|
||||
|
||||
| CLI Parameter | openclaw.json Field | Example |
|
||||
|-----------|-------------------|--------|
|
||||
| `--base-url` | `providers.<name>.baseUrl` | `https://api.anthropic.com` |
|
||||
| `--llm-type` | `providers.<name>.api` | `anthropic-messages` / `openai-completions` |
|
||||
| `--vlm-type` | `providers.<name>.api` | `anthropic-messages` / `openai-completions` |
|
||||
| `--model` | `providers.<name>.models[].id` | `claude-sonnet-4-6` |
|
||||
| `--api-key` | `providers.<name>.apiKey` or env var | `sk-cp-...` |
|
||||
|
||||
Note: `--llm-type` and `--vlm-type` share the same `providers.<name>.api` field and are used by LLM and VLM tools respectively.
|
||||
|
||||
Mapping between `provider.api` and interface type:
|
||||
|
||||
| api Value | Corresponding `--llm-type` / `--vlm-type` | Endpoint Path |
|
||||
|--------|----------------------------------|---------------|
|
||||
| `anthropic-messages` | `anthropic-messages` | `/v1/messages` |
|
||||
| `openai-completions` | `openai-completions` | `/v1/chat/completions` |
|
||||
| `openai-responses` | (future extension) | `/responses` |
|
||||
|
||||
## Mapping Between base-url and Interface Type
|
||||
|
||||
Different API types have different requirements for base-url format:
|
||||
|
||||
| Type | `--llm-type` / `--vlm-type` | Recommended base-url | Code Appended Path | Final URL Example |
|
||||
|------|------------------------------|---------------|--------------|---------------|
|
||||
| LLM | `openai-completions` | `https://token.sensenova.cn/v1` | `/chat/completions` | `https://token.sensenova.cn/v1/chat/completions` |
|
||||
| LLM | `anthropic-messages` | `https://api.anthropic.com/v1` | `/messages` | `https://api.anthropic.com/v1/messages` |
|
||||
| VLM | `openai-completions` | `https://token.sensenova.cn/v1` | `/chat/completions` | `https://token.sensenova.cn/v1/chat/completions` |
|
||||
| VLM | `anthropic-messages` | `https://api.anthropic.com/v1` | `/messages` | `https://api.anthropic.com/v1/messages` |
|
||||
|
||||
**Note**:
|
||||
|
||||
- Recommended chat base URLs include the provider API version path, for example `/v1`.
|
||||
- For compatibility, if the configured chat base URL has no path, the runner appends `/v1/chat/completions` or `/v1/messages`.
|
||||
- If the configured chat base URL already has a path such as `/v1`, the runner appends only `/chat/completions` or `/messages`.
|
||||
- Some providers use versioned paths other than `/v1`, such as Gemini's `/v1beta/openai`.
|
||||
|
||||
## Output Format
|
||||
|
||||
All tools support two output formats:
|
||||
|
||||
- `--output-format text` (default): outputs plain text result
|
||||
- `--output-format json`: outputs JSON, including `status` and `elapsed_seconds` (runtime in seconds, rounded to 2 decimals)
|
||||
|
||||
JSON output for `sn-image-recognize` and `sn-text-optimize` also includes `model`, `base_url`, and `interface_type` to verify the effective runtime configuration:
|
||||
|
||||
```json
|
||||
{
|
||||
"status": "ok",
|
||||
"result": "...",
|
||||
"model": "sensenova-6.7-flash-lite",
|
||||
"base_url": "https://token.sensenova.cn/v1",
|
||||
"interface_type": "openai-completions",
|
||||
"elapsed_seconds": 1.23
|
||||
}
|
||||
```
|
||||
|
||||
On failure:
|
||||
|
||||
```json
|
||||
{
|
||||
"status": "failed",
|
||||
"error": "error message",
|
||||
"elapsed_seconds": 0.05
|
||||
}
|
||||
```
|
||||
|
||||
## Input/Output Specification
|
||||
|
||||
See `references/api_spec.md` for details.
|
||||
|
||||
|
||||
---
|
||||
|
||||
> ⚠️ **厂商绑定**:此 skill 绑定 SenseNova 专用 API(图像生成、识别、文本优化),无法替换为其他模型。如果 SenseNova 不再免费或无 plan,此 skill 将不可用。
|
||||
>
|
||||
**依赖**: SN_API_KEY (SenseNova 平台 API key), Pillow (`~/.hermes/hermes-agent/venv/bin/pip3 install Pillow`)
|
||||
**配置参考**: `references/sensenova-config.md`
|
||||
**可替代方案**: comfyui (本地图像生成) + mmx vision (图像理解)
|
||||
|
||||
## Pitfalls
|
||||
|
||||
### Pillow 依赖未安装
|
||||
**Symptom**: `ModuleNotFoundError: No module named 'PIL'`
|
||||
**Root cause**: sn-image-generate 使用 PIL 处理图像,但系统 Python 或 venv 中未安装 Pillow。
|
||||
**Fix**: `pip install Pillow`(如果使用 hermes-agent 的 venv,需要用 `~/.hermes/hermes-agent/venv/bin/pip3 install Pillow`)。
|
||||
**Note**: hermes-agent 的 Python 路径是 `~/.hermes/hermes-agent/venv/bin/python3`,不是系统 python3。
|
||||
|
||||
### API 限流策略
|
||||
SenseNova 的限流是按 **5 小时窗口**计算,不是按分钟:
|
||||
- sensenova-6.7-flash-lite: 1500 次/5小时
|
||||
- sensenova-u1-fast: 1500 次/5小时
|
||||
- deepseek-v4-flash: 150 次/5小时(最严)
|
||||
|
||||
### Base URL
|
||||
所有 SenseNova 模型统一使用: `https://token.sensenova.cn/v1`
|
||||
|
||||
291
sn-image-base/references/api_spec.md
Normal file
291
sn-image-base/references/api_spec.md
Normal file
@@ -0,0 +1,291 @@
|
||||
# sn-image-base API Specification
|
||||
|
||||
## Table of Contents
|
||||
|
||||
- [sn-image-generate](#sn-image-generate)
|
||||
- [sn-image-recognize](#sn-image-recognize)
|
||||
- [sn-text-optimize](#sn-text-optimize)
|
||||
- [Error Handling](#error-handling)
|
||||
|
||||
---
|
||||
|
||||
## sn-image-generate
|
||||
|
||||
Image generation tool that calls the configured image generation backend.
|
||||
|
||||
### Command Format
|
||||
|
||||
```bash
|
||||
python sn_agent_runner.py sn-image-generate \
|
||||
--prompt <string> \
|
||||
[--api-key <string>] \
|
||||
[--base-url <string>] \
|
||||
[--negative-prompt <string>] \
|
||||
[--image-size 2k] \
|
||||
[--aspect-ratio <string>] \
|
||||
[--seed <int>] \
|
||||
[--unet-name <string>] \
|
||||
[--poll-interval <float>] \
|
||||
[--timeout <float>] \
|
||||
[--insecure] \
|
||||
[--output-format text|json] \
|
||||
[--save-path <path>]
|
||||
```
|
||||
|
||||
### Parameters
|
||||
|
||||
| Parameter | Type | Required | Default | Description |
|
||||
|-----------|------|----------|---------|-------------|
|
||||
| `--prompt` | string | **Yes** | - | Text prompt |
|
||||
| `--api-key` | string | No | `SN_IMAGE_GEN_API_KEY` -> `SN_API_KEY` | API Key (CLI takes precedence; raises `MissingApiKeyError` if all are empty) |
|
||||
| `--base-url` | string | No | `SN_IMAGE_GEN_BASE_URL` -> `SN_BASE_URL` | API base URL (CLI takes precedence) |
|
||||
| `--negative-prompt` | string | No | `""` | Negative prompt |
|
||||
| `--image-size` | string | No | `"2k"` | Image size: `2k` only |
|
||||
| `--aspect-ratio` | string | No | `"16:9"` | Aspect ratio |
|
||||
| `--seed` | int | No | `None` | Random seed (for reproducibility) |
|
||||
| `--unet-name` | string | No | `None` | UNet model name |
|
||||
| `--poll-interval` | float | No | `5.0` | Polling interval in seconds |
|
||||
| `--timeout` | float | No | `300.0` | Timeout in seconds |
|
||||
| `--insecure` | flag | No | `False` | Disable TLS verification |
|
||||
| `--output-format` | string | No | `"text"` | Output format: `text` or `json` |
|
||||
| `--save-path` | path | No | Auto-generated | Output image path |
|
||||
|
||||
### Aspect Ratio Options
|
||||
|
||||
`2:3`, `3:2`, `3:4`, `4:3`, `4:5`, `5:4`, `1:1`, `16:9`, `9:16`, `21:9`, `9:21`
|
||||
|
||||
### Output Path
|
||||
|
||||
Default output: `/tmp/openclaw-sn-image/t2i_<timestamp>.png`
|
||||
|
||||
### Response Examples
|
||||
|
||||
**text format**:
|
||||
|
||||
```
|
||||
Image generated successfully
|
||||
/tmp/openclaw-sn-image/t2i_20260414_120000.png
|
||||
```
|
||||
|
||||
**json format**:
|
||||
|
||||
```json
|
||||
{
|
||||
"status": "ok",
|
||||
"output": "/tmp/openclaw-sn-image/t2i_20260414_120000.png",
|
||||
"task_id": "task_xxx",
|
||||
"message": "Image generated successfully",
|
||||
"elapsed_seconds": 1.23
|
||||
}
|
||||
```
|
||||
|
||||
### API Key Notes
|
||||
|
||||
`--api-key` is optional. CLI parameter takes precedence; if not provided, reads `SN_IMAGE_GEN_API_KEY` -> `SN_API_KEY`. If all are empty, raises `MissingApiKeyError`:
|
||||
|
||||
**text format**:
|
||||
|
||||
```
|
||||
Error: API key is required but was not provided. Set SN_API_KEY, or set SN_IMAGE_GEN_API_KEY only for an image-generation-specific override, or pass --api-key explicitly.
|
||||
```
|
||||
|
||||
**json format**:
|
||||
|
||||
```json
|
||||
{"status": "failed", "error": "API key is required but was not provided. Set SN_API_KEY, or set SN_IMAGE_GEN_API_KEY only for an image-generation-specific override, or pass --api-key explicitly.", "elapsed_seconds": 0.05}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## sn-image-recognize
|
||||
|
||||
Image recognition tool that uses a VLM (Vision Language Model) to analyze image content.
|
||||
|
||||
### Command Format
|
||||
|
||||
```bash
|
||||
python sn_agent_runner.py sn-image-recognize \
|
||||
(--user-prompt <string> | --user-prompt-path <path>) \
|
||||
--images <string> [<string> ...] \
|
||||
--api-key <string> \
|
||||
--base-url <string> \
|
||||
--model <string> \
|
||||
[--system-prompt <string>] \
|
||||
[--system-prompt-path <path>] \
|
||||
[--vlm-type openai-completions|anthropic-messages] \
|
||||
[--output-format text|json]
|
||||
```
|
||||
|
||||
### Parameters
|
||||
|
||||
| Parameter | Type | Required | Default | Description |
|
||||
|-----------|------|----------|---------|-------------|
|
||||
| `--user-prompt` | string | One of two | - | User instruction (mutually exclusive with `--user-prompt-path`) |
|
||||
| `--user-prompt-path` | path | One of two | - | Local file path to read user instruction from (mutually exclusive with `--user-prompt`) |
|
||||
| `--images` | string[] | **Yes** | - | List of image paths (supports multiple) |
|
||||
| `--api-key` | string | No | No hardcoded default | CLI > `SN_VISION_API_KEY` > `SN_CHAT_API_KEY` > `SN_API_KEY`; raises `MissingApiKeyError` if all are empty |
|
||||
| `--base-url` | string | No | `https://token.sensenova.cn/v1` | CLI > `SN_VISION_BASE_URL` > `SN_CHAT_BASE_URL` > `SN_BASE_URL` |
|
||||
| `--model` | string | No | `sensenova-6.7-flash-lite` | CLI > `SN_VISION_MODEL` > `SN_CHAT_MODEL` |
|
||||
| `--system-prompt` | string | No | `""` | System instruction (mutually exclusive with `--system-prompt-path`) |
|
||||
| `--system-prompt-path` | path | No | - | Local file path to read system instruction from (mutually exclusive with `--system-prompt`) |
|
||||
| `--vlm-type` | string | No | `openai-completions` | CLI > `SN_VISION_TYPE` > `SN_CHAT_TYPE` |
|
||||
| `--output-format` | string | No | `"text"` | Output format: `text` or `json` |
|
||||
|
||||
`--vlm-type` options:
|
||||
|
||||
- `openai-completions`: OpenAI-compatible `/v1/chat/completions` endpoint
|
||||
- `anthropic-messages`: Anthropic Messages `/v1/messages` endpoint
|
||||
|
||||
### Response Examples
|
||||
|
||||
**text format**:
|
||||
|
||||
```
|
||||
This image shows an adorable orange cat napping in the sunlight.
|
||||
```
|
||||
|
||||
**json format**:
|
||||
|
||||
```json
|
||||
{
|
||||
"status": "ok",
|
||||
"result": "This image shows an adorable orange cat napping in the sunlight.",
|
||||
"model": "sensenova-6.7-flash-lite",
|
||||
"base_url": "https://token.sensenova.cn/v1",
|
||||
"interface_type": "openai-completions",
|
||||
"elapsed_seconds": 2.15
|
||||
}
|
||||
```
|
||||
|
||||
### Parameter Priority
|
||||
|
||||
`--api-key`, `--base-url`, `--model`, and `--vlm-type` use priority: **CLI parameter > command-specific environment variable > shared `SN_CHAT_*` environment variable > global `SN_*` environment variable > built-in default**.
|
||||
|
||||
| Parameter | Built-in Default | Environment Variable |
|
||||
|-----------|-----------------|---------------------|
|
||||
| `--api-key` | None (required) | `SN_VISION_API_KEY` -> `SN_CHAT_API_KEY` -> `SN_API_KEY` |
|
||||
| `--base-url` | `https://token.sensenova.cn/v1` | `SN_VISION_BASE_URL` -> `SN_CHAT_BASE_URL` -> `SN_BASE_URL` |
|
||||
| `--model` | `sensenova-6.7-flash-lite` | `SN_VISION_MODEL` -> `SN_CHAT_MODEL` |
|
||||
| `--vlm-type` | `openai-completions` | `SN_VISION_TYPE` -> `SN_CHAT_TYPE` |
|
||||
|
||||
Compatibility note: host-only chat base URLs such as `https://token.sensenova.cn`
|
||||
are also accepted. If the base URL has no path, the runner inserts `/v1` before
|
||||
the interface endpoint; if it already has a path such as `/v1`, the runner
|
||||
appends only the interface endpoint path.
|
||||
|
||||
---
|
||||
|
||||
## sn-text-optimize
|
||||
|
||||
Text optimization tool that uses an LLM (Language Model) to optimize text content.
|
||||
|
||||
### Command Format
|
||||
|
||||
```bash
|
||||
python sn_agent_runner.py sn-text-optimize \
|
||||
(--user-prompt <string> | --user-prompt-path <path>) \
|
||||
--api-key <string> \
|
||||
--base-url <string> \
|
||||
--model <string> \
|
||||
[--system-prompt <string>] \
|
||||
[--system-prompt-path <path>] \
|
||||
[--llm-type openai-completions|anthropic-messages] \
|
||||
[--output-format text|json]
|
||||
```
|
||||
|
||||
### Parameters
|
||||
|
||||
| Parameter | Type | Required | Default | Description |
|
||||
|-----------|------|----------|---------|-------------|
|
||||
| `--user-prompt` | string | One of two | - | User instruction (mutually exclusive with `--user-prompt-path`) |
|
||||
| `--user-prompt-path` | path | One of two | - | Local file path to read user instruction from (mutually exclusive with `--user-prompt`) |
|
||||
| `--api-key` | string | No | No hardcoded default | CLI > `SN_TEXT_API_KEY` > `SN_CHAT_API_KEY` > `SN_API_KEY`; raises `MissingApiKeyError` if all are empty |
|
||||
| `--base-url` | string | No | `https://token.sensenova.cn/v1` | CLI > `SN_TEXT_BASE_URL` > `SN_CHAT_BASE_URL` > `SN_BASE_URL` |
|
||||
| `--model` | string | No | `sensenova-6.7-flash-lite` | CLI > `SN_TEXT_MODEL` > `SN_CHAT_MODEL` |
|
||||
| `--system-prompt` | string | No | `""` | System instruction (mutually exclusive with `--system-prompt-path`) |
|
||||
| `--system-prompt-path` | path | No | - | Local file path to read system instruction from (mutually exclusive with `--system-prompt`) |
|
||||
| `--llm-type` | string | No | `openai-completions` | CLI > `SN_TEXT_TYPE` > `SN_CHAT_TYPE` |
|
||||
| `--output-format` | string | No | `"text"` | Output format: `text` or `json` |
|
||||
|
||||
`--llm-type` options:
|
||||
|
||||
- `openai-completions`: OpenAI-compatible `/v1/chat/completions` endpoint
|
||||
- `anthropic-messages`: Anthropic Messages `/v1/messages` endpoint
|
||||
|
||||
### Response Examples
|
||||
|
||||
**text format**:
|
||||
|
||||
```
|
||||
Optimized text content...
|
||||
```
|
||||
|
||||
**json format**:
|
||||
|
||||
```json
|
||||
{
|
||||
"status": "ok",
|
||||
"result": "Optimized text content...",
|
||||
"model": "sensenova-6.7-flash-lite",
|
||||
"base_url": "https://token.sensenova.cn/v1",
|
||||
"interface_type": "openai-completions",
|
||||
"elapsed_seconds": 0.83
|
||||
}
|
||||
```
|
||||
|
||||
### Parameter Priority
|
||||
|
||||
`--api-key`, `--base-url`, `--model`, and `--llm-type` use priority: **CLI parameter > command-specific environment variable > shared `SN_CHAT_*` environment variable > global `SN_*` environment variable > built-in default**.
|
||||
|
||||
| Parameter | Built-in Default | Environment Variable |
|
||||
|-----------|-----------------|---------------------|
|
||||
| `--api-key` | None (required) | `SN_TEXT_API_KEY` -> `SN_CHAT_API_KEY` -> `SN_API_KEY` |
|
||||
| `--base-url` | `https://token.sensenova.cn/v1` | `SN_TEXT_BASE_URL` -> `SN_CHAT_BASE_URL` -> `SN_BASE_URL` |
|
||||
| `--model` | `sensenova-6.7-flash-lite` | `SN_TEXT_MODEL` -> `SN_CHAT_MODEL` |
|
||||
| `--llm-type` | `openai-completions` | `SN_TEXT_TYPE` -> `SN_CHAT_TYPE` |
|
||||
|
||||
Compatibility note: host-only chat base URLs such as `https://token.sensenova.cn`
|
||||
are also accepted. If the base URL has no path, the runner inserts `/v1` before
|
||||
the interface endpoint; if it already has a path such as `/v1`, the runner
|
||||
appends only the interface endpoint path.
|
||||
|
||||
---
|
||||
|
||||
## Error Handling
|
||||
|
||||
### Error Types
|
||||
|
||||
| Type | Source | Trigger | Output Format |
|
||||
|------|--------|---------|---------------|
|
||||
| `MissingApiKeyError` | Custom business exception | API Key not provided for `sn-image-generate` | text: `Error: ...` / json: `{"status": "failed", "error": "..."}` |
|
||||
| `ValueError` (prompt) | `_resolve_prompt` | `--user-prompt` and `--user-prompt-path` both provided, neither provided, or file read failure | text: `Error: ...` / json: `{"status": "failed", "error": "..."}` |
|
||||
| argparse missing param | argparse standard error | Missing required parameters for `sn-image-recognize`/`sn-text-optimize` | `usage: ...` + exit 2 |
|
||||
| HTTP error | httpx request layer | API returns non-2xx status code | `{"status": "failed", "error": "HTTP NNN", "message": "..."}` |
|
||||
| Request exception | httpx request layer | Network error, timeout, etc. | `{"status": "failed", "error": "<ExceptionType>", "message": "..."}` |
|
||||
|
||||
### text format
|
||||
|
||||
Error messages are written to stderr and do not affect stdout content.
|
||||
|
||||
### json format
|
||||
|
||||
```json
|
||||
{
|
||||
"status": "failed",
|
||||
"error": "error type",
|
||||
"message": "detailed error message",
|
||||
"elapsed_seconds": 0.05
|
||||
}
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## API Key Environment Variables
|
||||
|
||||
| Tool | Environment Variables (high → low priority) | Notes |
|
||||
|------|---------------------------------------------|-------|
|
||||
| `sn-image-generate` | `SN_IMAGE_GEN_API_KEY` -> `SN_API_KEY` | CLI > optional image generation key > global key; raises `MissingApiKeyError` if all are empty |
|
||||
| `sn-image-recognize` | `SN_VISION_API_KEY` -> `SN_CHAT_API_KEY` -> `SN_API_KEY` | CLI > command-specific key > shared chat key > global key; raises `MissingApiKeyError` if all are empty |
|
||||
| `sn-text-optimize` | `SN_TEXT_API_KEY` -> `SN_CHAT_API_KEY` -> `SN_API_KEY` | CLI > command-specific key > shared chat key > global key; raises `MissingApiKeyError` if all are empty |
|
||||
|
||||
`SN_API_KEY` is the global key for all capabilities. `SN_CHAT_API_KEY` is the shared key for both text and vision chat calls. Use command-specific keys only when a command needs a different provider.
|
||||
58
sn-image-base/references/sensenova-config.md
Normal file
58
sn-image-base/references/sensenova-config.md
Normal file
@@ -0,0 +1,58 @@
|
||||
# SenseNova Skills 配置参考
|
||||
|
||||
## 安装日期
|
||||
2026-05-09
|
||||
|
||||
## API 配置
|
||||
|
||||
| 项目 | 值 |
|
||||
|------|-----|
|
||||
| Base URL | `https://token.sensenova.cn/v1` |
|
||||
| API Key | `SN_API_KEY` (保存在 `~/.hermes/.env`) |
|
||||
| 协议 | OpenAI 兼容 |
|
||||
|
||||
## 模型列表
|
||||
|
||||
| 模型 | 用途 | 调用限制 |
|
||||
|------|------|----------|
|
||||
| `sensenova-6.7-flash-lite` | 多模态智能体(文本+图像理解+工具调用) | 每 5 小时 1500 次 |
|
||||
| `sensenova-u1-fast` | 信息图/图像生成专用 | 每 5 小时 1500 次 |
|
||||
| `deepseek-v4-flash` | DeepSeek 高性能对话 | 每 5 小时 150 次 |
|
||||
|
||||
## 已安装 Skills(13个)
|
||||
|
||||
### 🔗 绑定 SenseNova(2个)
|
||||
- `sn-image-base` — 文生图、图像识别、文本优化
|
||||
- `sn-infographic` — 信息图生成(87种布局/66种风格)
|
||||
|
||||
### 🔄 可自由替换模型(11个)
|
||||
- 深度研究系列(6个):`sn-deep-research`, `sn-research-planning`, `sn-dimension-research`, `sn-research-synthesis`, `sn-research-report`, `sn-report-format-discovery`
|
||||
- 搜索系列(4个):`sn-search-academic`, `sn-search-code`, `sn-search-social-cn`, `sn-search-social-en`
|
||||
- `sn-md-to-html-report` — Markdown 转 HTML 阅读视图
|
||||
|
||||
## 依赖安装
|
||||
|
||||
```bash
|
||||
# Pillow (sn-image-base 需要)
|
||||
~/.hermes/hermes-agent/venv/bin/pip3 install Pillow
|
||||
```
|
||||
|
||||
## 测试结果(2026-05-09)
|
||||
|
||||
| Skill | 状态 | 备注 |
|
||||
|-------|------|------|
|
||||
| sensenova-6.7-flash-lite | ✅ | 文本对话正常 |
|
||||
| deepseek-v4-flash | ✅ | 文本对话正常 |
|
||||
| sn-image-generate (u1-fast) | ✅ | 图像生成正常,质量中等偏上 |
|
||||
| sn-search-academic | ✅ | ArXiv 搜索正常 |
|
||||
| sn-search-social-cn | ✅ | B站搜索正常 |
|
||||
| sn-search-code | ✅ | GitHub 搜索正常 |
|
||||
| sn-md-to-html-report | ✅ | HTML 转换正常 |
|
||||
|
||||
## 注意事项
|
||||
|
||||
1. **限流策略**:按 5 小时窗口限流,不是按分钟
|
||||
2. **DeepSeek 限流最严**:只有 150 次/5小时,是其他模型的 1/10
|
||||
3. **图像生成质量**:中等偏上,中文文字是伪汉字(通病),细节不如 Midjourney V6
|
||||
4. **生图依赖**:需要 Pillow 库,安装到 hermes-agent 的 venv
|
||||
5. **Chrome sandbox 问题**:容器/VM 环境中需要 `--no-sandbox` 参数
|
||||
3
sn-image-base/requirements.txt
Normal file
3
sn-image-base/requirements.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
httpx>=0.25.0
|
||||
pillow>=10.0.0
|
||||
python-dotenv>=1.0.0
|
||||
1
sn-image-base/scripts/.python-version
Normal file
1
sn-image-base/scripts/.python-version
Normal file
@@ -0,0 +1 @@
|
||||
3.9
|
||||
14
sn-image-base/scripts/pyproject.toml
Normal file
14
sn-image-base/scripts/pyproject.toml
Normal file
@@ -0,0 +1,14 @@
|
||||
[project]
|
||||
name = "sn-image-base"
|
||||
version = "0.1.0"
|
||||
description = "Base-layer skill for the SenseNova-Skills project, providing low-level APIs."
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.9"
|
||||
dependencies = []
|
||||
|
||||
[build-system]
|
||||
requires = ["uv_build>=0.11.7,<0.12"]
|
||||
build-backend = "uv_build"
|
||||
|
||||
[tool.uv.build-backend]
|
||||
module-root = "scripts"
|
||||
31
sn-image-base/scripts/ruff.toml
Normal file
31
sn-image-base/scripts/ruff.toml
Normal file
@@ -0,0 +1,31 @@
|
||||
line-length = 100
|
||||
# Assume the minimum python version
|
||||
target-version = 'py39'
|
||||
fix = true
|
||||
|
||||
############################
|
||||
# lint rules
|
||||
############################
|
||||
[lint]
|
||||
select = [
|
||||
"E", # pycodestyle: Error
|
||||
"W", # pycodestyle: Warning
|
||||
"F", # Pyflakes
|
||||
"PLE", # pylint: Error
|
||||
"B", # flake8-bugbear
|
||||
"TC", # flake8-type-checking
|
||||
"I", # isort
|
||||
"C4", # flake8-comprehensions
|
||||
"UP", # pyupgrade
|
||||
"ARG001", # unused arguments in functions
|
||||
"PERF", # Perflint
|
||||
"RUF", # Ruff-specific rules
|
||||
]
|
||||
unfixable = [
|
||||
"F401", # unused imports
|
||||
"F841", # unused variables
|
||||
]
|
||||
ignore = ["E501", "RUF067"]
|
||||
|
||||
[lint.per-file-ignores]
|
||||
"sn_agent_runner.py" = ["E402"]
|
||||
579
sn-image-base/scripts/sn_agent_runner.py
Normal file
579
sn-image-base/scripts/sn_agent_runner.py
Normal file
@@ -0,0 +1,579 @@
|
||||
"""OpenClaw unified runner for sn-image-base skills.
|
||||
|
||||
All tools are invoked as async coroutines and executed via asyncio.run().
|
||||
|
||||
Usage:
|
||||
python sn_agent_runner.py sn-image-generate --prompt "..."
|
||||
python sn_agent_runner.py sn-image-recognize --user-prompt "..." --images "..." --api-key "..." --base-url "..." --model "..."
|
||||
python sn_agent_runner.py sn-text-optimize --user-prompt "..." --api-key "..." --base-url "..." --model "..."
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import json
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import cast
|
||||
|
||||
SCRIPT_DIR = Path(__file__).resolve().parent
|
||||
if (d := str(SCRIPT_DIR)) not in sys.path:
|
||||
sys.path.insert(0, d)
|
||||
|
||||
from sn_image_base.configs import global_configs, is_valid_base_url, urlparse
|
||||
from sn_image_base.exceptions import (
|
||||
BadConfigurationError,
|
||||
InvalidBaseUrlError,
|
||||
MissingApiKeyError,
|
||||
U1BaseError,
|
||||
)
|
||||
from sn_image_base.generation import (
|
||||
NanoBananaText2ImageClient,
|
||||
OpenAIImageGenerationClient,
|
||||
SensenovaText2ImageClient,
|
||||
)
|
||||
from sn_image_base.llm import AnthropicMessagesAdapter, OpenAIChatAdapter
|
||||
|
||||
|
||||
def _resolve_prompt(
|
||||
direct: str | None,
|
||||
path: str | None,
|
||||
required: bool,
|
||||
name: str,
|
||||
) -> str:
|
||||
"""Resolve a prompt value from either a direct string or a file path.
|
||||
|
||||
Raises ValueError on mutual exclusion, missing required value, or file read failure.
|
||||
"""
|
||||
if direct is not None and path is not None:
|
||||
raise ValueError(
|
||||
f"Cannot use both --{name} and --{name}-path; they are mutually exclusive."
|
||||
)
|
||||
if required and not direct and not path:
|
||||
raise ValueError(f"--{name} or --{name}-path is required.")
|
||||
if path is not None:
|
||||
try:
|
||||
with open(path, encoding="utf-8") as f:
|
||||
return f.read()
|
||||
except OSError as exc:
|
||||
raise ValueError(f"Failed to read {name} from file {path}: {exc}") from exc
|
||||
return direct or ""
|
||||
|
||||
|
||||
def build_parser() -> argparse.ArgumentParser:
|
||||
"""Build and return the top-level argument parser.
|
||||
|
||||
Returns:
|
||||
argparse.ArgumentParser:
|
||||
Configured parser with subcommands for sn-image-generate,
|
||||
sn-image-recognize, and sn-text-optimize.
|
||||
"""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="sn-image-base unified runner - async tool execution."
|
||||
)
|
||||
subparsers = parser.add_subparsers(dest="command", required=True)
|
||||
|
||||
# sn-image-generate
|
||||
gen_parser = subparsers.add_parser("sn-image-generate", help="Generate image from text prompt")
|
||||
gen_parser.add_argument("--prompt", required=True, help="Text prompt for image generation")
|
||||
gen_parser.add_argument("--negative-prompt", default="", help="Negative prompt")
|
||||
gen_parser.add_argument(
|
||||
"--image-size", default="2k", choices=["2k"], help="Image size preset"
|
||||
)
|
||||
gen_parser.add_argument(
|
||||
"--aspect-ratio",
|
||||
default="16:9",
|
||||
choices=[
|
||||
"2:3",
|
||||
"3:2",
|
||||
"3:4",
|
||||
"4:3",
|
||||
"4:5",
|
||||
"5:4",
|
||||
"1:1",
|
||||
"16:9",
|
||||
"9:16",
|
||||
"21:9",
|
||||
"9:21",
|
||||
],
|
||||
help="Aspect ratio",
|
||||
)
|
||||
gen_parser.add_argument("--seed", type=int, default=None, help="Random seed")
|
||||
gen_parser.add_argument("--unet-name", dest="unet_name", default=None, help="UNet model name")
|
||||
gen_parser.add_argument(
|
||||
"--api-key",
|
||||
default="",
|
||||
help="API key (CLI > SN_IMAGE_GEN_API_KEY > SN_API_KEY)",
|
||||
)
|
||||
gen_parser.add_argument(
|
||||
"--base-url",
|
||||
default="",
|
||||
help="API base URL (CLI > SN_IMAGE_GEN_BASE_URL > SN_BASE_URL)",
|
||||
)
|
||||
gen_parser.add_argument("--poll-interval", type=float, default=5.0)
|
||||
gen_parser.add_argument("--timeout", type=float, default=300.0)
|
||||
gen_parser.add_argument("--insecure", action="store_true", help="Disable TLS verification")
|
||||
gen_parser.add_argument("-o", "--output-format", choices=["text", "json"], default="text")
|
||||
gen_parser.add_argument("--save-path", type=Path, default=None)
|
||||
|
||||
# sn-image-recognize (VLM)
|
||||
recog_parser = subparsers.add_parser(
|
||||
"sn-image-recognize", help="Recognize image content using VLM"
|
||||
)
|
||||
recog_parser.add_argument("--user-prompt", default=None, help="User-facing text instruction")
|
||||
recog_parser.add_argument(
|
||||
"--user-prompt-path",
|
||||
default=None,
|
||||
help="Path to a local file containing the user prompt (mutually exclusive with --user-prompt)",
|
||||
)
|
||||
recog_parser.add_argument("--system-prompt", default=None, help="System-level instruction")
|
||||
recog_parser.add_argument(
|
||||
"--system-prompt-path",
|
||||
default=None,
|
||||
help="Path to a local file containing the system prompt (mutually exclusive with --system-prompt)",
|
||||
)
|
||||
recog_parser.add_argument("--images", required=True, nargs="+", help="Image file paths or URLs")
|
||||
recog_parser.add_argument(
|
||||
"--api-key",
|
||||
default=None,
|
||||
help="API key (CLI > SN_VISION_API_KEY > SN_CHAT_API_KEY > SN_API_KEY)",
|
||||
)
|
||||
recog_parser.add_argument(
|
||||
"--base-url",
|
||||
default=None,
|
||||
help="API base URL (CLI > SN_VISION_BASE_URL > SN_CHAT_BASE_URL > SN_BASE_URL)",
|
||||
)
|
||||
recog_parser.add_argument(
|
||||
"--model",
|
||||
default=None,
|
||||
help="Vision model name (CLI > SN_VISION_MODEL > SN_CHAT_MODEL)",
|
||||
)
|
||||
recog_parser.add_argument(
|
||||
"--vlm-type",
|
||||
default=None,
|
||||
choices=["openai-completions", "anthropic-messages"],
|
||||
help="Chat protocol type override (CLI > SN_VISION_TYPE > SN_CHAT_TYPE)",
|
||||
)
|
||||
recog_parser.add_argument("-o", "--output-format", choices=["text", "json"], default="text")
|
||||
|
||||
# sn-text-optimize (LLM)
|
||||
opt_parser = subparsers.add_parser("sn-text-optimize", help="Optimize text using LLM")
|
||||
opt_parser.add_argument("--user-prompt", default=None, help="User-facing text instruction")
|
||||
opt_parser.add_argument(
|
||||
"--user-prompt-path",
|
||||
default=None,
|
||||
help="Path to a local file containing the user prompt (mutually exclusive with --user-prompt)",
|
||||
)
|
||||
opt_parser.add_argument("--system-prompt", default=None, help="System-level instruction")
|
||||
opt_parser.add_argument(
|
||||
"--system-prompt-path",
|
||||
default=None,
|
||||
help="Path to a local file containing the system prompt (mutually exclusive with --system-prompt)",
|
||||
)
|
||||
opt_parser.add_argument(
|
||||
"--api-key",
|
||||
default=None,
|
||||
help="API key (CLI > SN_TEXT_API_KEY > SN_CHAT_API_KEY > SN_API_KEY)",
|
||||
)
|
||||
opt_parser.add_argument(
|
||||
"--base-url",
|
||||
default=None,
|
||||
help="API base URL (CLI > SN_TEXT_BASE_URL > SN_CHAT_BASE_URL > SN_BASE_URL)",
|
||||
)
|
||||
opt_parser.add_argument(
|
||||
"--model",
|
||||
default=None,
|
||||
help="Text model name (CLI > SN_TEXT_MODEL > SN_CHAT_MODEL)",
|
||||
)
|
||||
opt_parser.add_argument(
|
||||
"--llm-type",
|
||||
default=None,
|
||||
choices=["openai-completions", "anthropic-messages"],
|
||||
help="Chat protocol type override (CLI > SN_TEXT_TYPE > SN_CHAT_TYPE)",
|
||||
)
|
||||
opt_parser.add_argument("-o", "--output-format", choices=["text", "json"], default="text")
|
||||
|
||||
return parser
|
||||
|
||||
|
||||
async def run_image_generate(args: argparse.Namespace) -> tuple[dict, int]:
|
||||
"""Run image-generate command using the configured image backend.
|
||||
|
||||
Args:
|
||||
args: Parsed command-line arguments from ``image-generate`` subcommand.
|
||||
|
||||
Returns:
|
||||
tuple[dict, int]:
|
||||
A (result_dict, exit_code) pair. result_dict contains status,
|
||||
output (image path), task_id, and message. exit_code is 0 on
|
||||
success and 1 on failure.
|
||||
"""
|
||||
api_key = args.api_key or global_configs.SN_IMAGE_GEN_API_KEY
|
||||
if not api_key:
|
||||
raise MissingApiKeyError(global_configs.get_env_var_help("SN_IMAGE_GEN_API_KEY"))
|
||||
|
||||
base_url = args.base_url or global_configs.SN_IMAGE_GEN_BASE_URL
|
||||
if not base_url:
|
||||
raise InvalidBaseUrlError(
|
||||
"No base URL provided. "
|
||||
f"{global_configs.get_env_var_help('SN_IMAGE_GEN_BASE_URL')} "
|
||||
"Or pass --base-url."
|
||||
)
|
||||
|
||||
if global_configs.SN_IMAGE_GEN_MODEL_TYPE == "sensenova":
|
||||
if not global_configs.SN_IMAGE_GEN_MODEL:
|
||||
env_var_help = global_configs.get_env_var_help("SN_IMAGE_GEN_MODEL")
|
||||
raise BadConfigurationError(f"No model provided. {env_var_help}")
|
||||
client = SensenovaText2ImageClient(
|
||||
api_key=api_key,
|
||||
base_url=base_url,
|
||||
model=global_configs.SN_IMAGE_GEN_MODEL,
|
||||
timeout=args.timeout,
|
||||
ssl_verify=not args.insecure,
|
||||
)
|
||||
print(f"Using SenseNova model {global_configs.SN_IMAGE_GEN_MODEL!r} for image generation")
|
||||
elif global_configs.SN_IMAGE_GEN_MODEL_TYPE == "nano-banana":
|
||||
if not global_configs.SN_IMAGE_GEN_MODEL:
|
||||
env_var_help = global_configs.get_env_var_help("SN_IMAGE_GEN_MODEL")
|
||||
raise BadConfigurationError(f"No model provided. {env_var_help}")
|
||||
client = NanoBananaText2ImageClient(
|
||||
api_key=api_key,
|
||||
base_url=base_url,
|
||||
model=global_configs.SN_IMAGE_GEN_MODEL,
|
||||
timeout=args.timeout,
|
||||
ssl_verify=not args.insecure,
|
||||
)
|
||||
print(f"Using Nano Banana model {global_configs.SN_IMAGE_GEN_MODEL!r} for image generation")
|
||||
elif global_configs.SN_IMAGE_GEN_MODEL_TYPE == "openai-image":
|
||||
if not global_configs.SN_IMAGE_GEN_MODEL:
|
||||
env_var_help = global_configs.get_env_var_help("SN_IMAGE_GEN_MODEL")
|
||||
raise BadConfigurationError(f"No model provided. {env_var_help}")
|
||||
client = OpenAIImageGenerationClient(
|
||||
api_key=api_key,
|
||||
base_url=base_url,
|
||||
model=global_configs.SN_IMAGE_GEN_MODEL,
|
||||
)
|
||||
print(
|
||||
f"Using OpenAI-compatible model {global_configs.SN_IMAGE_GEN_MODEL!r} for image generation"
|
||||
)
|
||||
else:
|
||||
supported_types = "sensenova, nano-banana, openai-image"
|
||||
raise BadConfigurationError(
|
||||
f"Unsupported SN_IMAGE_GEN_MODEL_TYPE {global_configs.SN_IMAGE_GEN_MODEL_TYPE!r}. "
|
||||
f"Supported values: {supported_types}."
|
||||
)
|
||||
try:
|
||||
result = await client.generate(
|
||||
prompt=args.prompt,
|
||||
negative_prompt=args.negative_prompt,
|
||||
image_size=args.image_size,
|
||||
aspect_ratio=args.aspect_ratio,
|
||||
seed=args.seed,
|
||||
unet_name=args.unet_name,
|
||||
output_path=args.save_path,
|
||||
)
|
||||
return result, 0 if result["status"] == "ok" else 1
|
||||
finally:
|
||||
await client.aclose()
|
||||
|
||||
|
||||
async def run_image_recognize(args: argparse.Namespace) -> tuple[dict, int]:
|
||||
"""Run image-recognize command using a VLM adapter.
|
||||
|
||||
Args:
|
||||
args: Parsed command-line arguments from ``image-recognize`` subcommand.
|
||||
|
||||
Returns:
|
||||
tuple[dict, int]:
|
||||
A (result_dict, exit_code) pair. result_dict contains status,
|
||||
result (model response text), model, base_url, and interface_type.
|
||||
exit_code is 0 on success and 1 on failure.
|
||||
"""
|
||||
user_prompt = _resolve_prompt(
|
||||
args.user_prompt, args.user_prompt_path, required=True, name="user-prompt"
|
||||
)
|
||||
system_prompt = _resolve_prompt(
|
||||
args.system_prompt,
|
||||
args.system_prompt_path,
|
||||
required=False,
|
||||
name="system-prompt",
|
||||
)
|
||||
|
||||
vlm_type, base_url, model, api_key = _resolve_model_runtime("vlm", args)
|
||||
adapter = cast(
|
||||
"AnthropicMessagesAdapter | OpenAIChatAdapter",
|
||||
_build_endpoint_and_adapter("vlm", vlm_type, base_url, model, api_key),
|
||||
)
|
||||
try:
|
||||
result_text = await adapter.vision_completion(
|
||||
user_prompt=user_prompt,
|
||||
images=args.images,
|
||||
system_prompt=system_prompt,
|
||||
model=model,
|
||||
)
|
||||
return {
|
||||
"status": "ok",
|
||||
"result": result_text,
|
||||
"model": model,
|
||||
"base_url": base_url,
|
||||
"interface_type": vlm_type,
|
||||
}, 0
|
||||
except Exception as exc:
|
||||
return {"status": "failed", "error": str(exc)}, 1
|
||||
finally:
|
||||
await adapter.aclose()
|
||||
|
||||
|
||||
async def run_text_optimize(args: argparse.Namespace) -> tuple[dict, int]:
|
||||
"""Run text-optimize command using an LLM adapter.
|
||||
|
||||
Args:
|
||||
args: Parsed command-line arguments from ``text-optimize`` subcommand.
|
||||
|
||||
Returns:
|
||||
tuple[dict, int]:
|
||||
A (result_dict, exit_code) pair. result_dict contains status,
|
||||
result (model response text), model, base_url, and interface_type.
|
||||
exit_code is 0 on success and 1 on failure.
|
||||
"""
|
||||
user_prompt = _resolve_prompt(
|
||||
args.user_prompt, args.user_prompt_path, required=True, name="user-prompt"
|
||||
)
|
||||
system_prompt = _resolve_prompt(
|
||||
args.system_prompt,
|
||||
args.system_prompt_path,
|
||||
required=False,
|
||||
name="system-prompt",
|
||||
)
|
||||
|
||||
llm_type, base_url, model, api_key = _resolve_model_runtime("llm", args)
|
||||
adapter = cast(
|
||||
"AnthropicMessagesAdapter | OpenAIChatAdapter",
|
||||
_build_endpoint_and_adapter("llm", llm_type, base_url, model, api_key),
|
||||
)
|
||||
try:
|
||||
result_text = await adapter.text_completion(
|
||||
user_prompt=user_prompt,
|
||||
system_prompt=system_prompt,
|
||||
model=model,
|
||||
)
|
||||
return {
|
||||
"status": "ok",
|
||||
"result": result_text,
|
||||
"model": model,
|
||||
"base_url": base_url,
|
||||
"interface_type": llm_type,
|
||||
}, 0
|
||||
except Exception as exc:
|
||||
return {"status": "failed", "error": str(exc)}, 1
|
||||
finally:
|
||||
await adapter.aclose()
|
||||
|
||||
|
||||
RUNTIME_PROFILES = {
|
||||
"vlm": {
|
||||
"type_arg": "vlm_type",
|
||||
"type_config": "SN_VISION_TYPE",
|
||||
"base_url_config": "SN_VISION_BASE_URL",
|
||||
"model_config": "SN_VISION_MODEL",
|
||||
"api_key_config": "SN_VISION_API_KEY",
|
||||
"label": "vision",
|
||||
"key_env": "SN_VISION_API_KEY, SN_CHAT_API_KEY, or SN_API_KEY",
|
||||
"url_env": "SN_VISION_BASE_URL, SN_CHAT_BASE_URL, or SN_BASE_URL",
|
||||
"model_env": "SN_VISION_MODEL or SN_CHAT_MODEL",
|
||||
"type_env": "SN_VISION_TYPE or SN_CHAT_TYPE",
|
||||
},
|
||||
"llm": {
|
||||
"type_arg": "llm_type",
|
||||
"type_config": "SN_TEXT_TYPE",
|
||||
"base_url_config": "SN_TEXT_BASE_URL",
|
||||
"model_config": "SN_TEXT_MODEL",
|
||||
"api_key_config": "SN_TEXT_API_KEY",
|
||||
"label": "text",
|
||||
"key_env": "SN_TEXT_API_KEY, SN_CHAT_API_KEY, or SN_API_KEY",
|
||||
"url_env": "SN_TEXT_BASE_URL, SN_CHAT_BASE_URL, or SN_BASE_URL",
|
||||
"model_env": "SN_TEXT_MODEL or SN_CHAT_MODEL",
|
||||
"type_env": "SN_TEXT_TYPE or SN_CHAT_TYPE",
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _first_non_empty(*values: str | None) -> str:
|
||||
return next((value for value in values if value), "")
|
||||
|
||||
|
||||
def _resolve_model_runtime(kind: str, args: argparse.Namespace) -> tuple[str, str, str, str]:
|
||||
"""Resolve and validate model runtime settings for a text or vision command.
|
||||
|
||||
Returns:
|
||||
tuple[str, str, str, str]:
|
||||
(interface_type, base_url, model, api_key).
|
||||
"""
|
||||
profile = RUNTIME_PROFILES.get(kind)
|
||||
if profile is None:
|
||||
raise ValueError(f"Unsupported runtime kind: {kind}")
|
||||
|
||||
iface_type = _first_non_empty(
|
||||
getattr(args, profile["type_arg"]),
|
||||
getattr(global_configs, profile["type_config"]),
|
||||
global_configs.SN_CHAT_TYPE,
|
||||
"openai-completions",
|
||||
)
|
||||
base_url = _first_non_empty(
|
||||
args.base_url,
|
||||
getattr(global_configs, profile["base_url_config"]),
|
||||
global_configs.SN_CHAT_BASE_URL,
|
||||
)
|
||||
model = _first_non_empty(
|
||||
args.model,
|
||||
getattr(global_configs, profile["model_config"]),
|
||||
)
|
||||
api_key = _first_non_empty(
|
||||
args.api_key,
|
||||
getattr(global_configs, profile["api_key_config"]),
|
||||
global_configs.SN_CHAT_API_KEY,
|
||||
)
|
||||
label = profile["label"]
|
||||
|
||||
if not api_key:
|
||||
raise MissingApiKeyError(
|
||||
f"No API key provided for {label} chat runtime. Set {profile['key_env']}, or pass --api-key."
|
||||
)
|
||||
if not base_url:
|
||||
raise InvalidBaseUrlError(
|
||||
f"No base URL provided for {label} chat runtime. Set {profile['url_env']}, or pass --base-url."
|
||||
)
|
||||
if not is_valid_base_url(base_url):
|
||||
raise InvalidBaseUrlError(f"Invalid base URL: {base_url}")
|
||||
if not model:
|
||||
raise BadConfigurationError(
|
||||
f"No model provided for {label} chat runtime. Set {profile['model_env']} or pass --model."
|
||||
)
|
||||
return iface_type, base_url, model, api_key
|
||||
|
||||
|
||||
def _build_endpoint_and_adapter(
|
||||
kind: str, iface_type: str, base_url: str, model: str, api_key: str
|
||||
):
|
||||
"""Build endpoint URL and instantiate the matching adapter."""
|
||||
base_url_obj = urlparse(base_url.rstrip("/"))
|
||||
|
||||
if iface_type == "anthropic-messages":
|
||||
endpoint = "/v1/messages" if not base_url_obj.path else "/messages"
|
||||
endpoint_url = f"{base_url_obj.geturl()}{endpoint}"
|
||||
if kind not in {"vlm", "llm"}:
|
||||
raise ValueError(f"Unsupported runtime kind: {kind}")
|
||||
adapter = AnthropicMessagesAdapter(
|
||||
endpoint_url=endpoint_url,
|
||||
api_key=api_key,
|
||||
model=model,
|
||||
)
|
||||
print(f"Using Anthropic Messages adapter for {kind.upper()} {model!r} on {endpoint_url!r}")
|
||||
else:
|
||||
endpoint = "/v1/chat/completions" if not base_url_obj.path else "/chat/completions"
|
||||
endpoint_url = f"{base_url_obj.geturl()}{endpoint}"
|
||||
if kind not in {"vlm", "llm"}:
|
||||
raise ValueError(f"Unsupported runtime kind: {kind}")
|
||||
adapter = OpenAIChatAdapter(
|
||||
endpoint_url=endpoint_url,
|
||||
api_key=api_key,
|
||||
model=model,
|
||||
)
|
||||
print(f"Using OpenAI Chat adapter for {kind.upper()} {model!r} on {endpoint_url!r}")
|
||||
|
||||
return adapter
|
||||
|
||||
|
||||
def _output_result(output_format: str, result: dict, elapsed: float | None = None) -> int:
|
||||
"""Print the result in the specified format and return the appropriate exit code.
|
||||
|
||||
Args:
|
||||
output_format: Either ``"text"`` or ``"json"``.
|
||||
result: Result dictionary with at least a ``status`` key ("ok" or "failed").
|
||||
elapsed: Optional elapsed time in seconds; appended to result as
|
||||
``elapsed_seconds`` when provided.
|
||||
|
||||
Returns:
|
||||
int: Exit code (0 if status is "ok", 1 otherwise).
|
||||
"""
|
||||
if elapsed is not None:
|
||||
result["elapsed_seconds"] = elapsed
|
||||
if output_format == "json":
|
||||
print(json.dumps(result, ensure_ascii=False))
|
||||
else:
|
||||
if result["status"] == "ok":
|
||||
if result.get("message"):
|
||||
print(result["message"])
|
||||
# text-optimize/image-recognize use "result", image-generate uses "output"
|
||||
print(result.get("result") or result.get("output") or "")
|
||||
else:
|
||||
print(result.get("message") or result["error"], file=sys.stderr)
|
||||
return 0 if result["status"] == "ok" else 1
|
||||
|
||||
|
||||
async def main_async(args: argparse.Namespace) -> int:
|
||||
"""Dispatch to the appropriate command handler.
|
||||
|
||||
Args:
|
||||
args: Parsed command-line arguments from any subcommand.
|
||||
|
||||
Returns:
|
||||
int: Exit code (0 on success, 1 on failure).
|
||||
"""
|
||||
start_time = time.time()
|
||||
try:
|
||||
if args.command == "sn-image-generate":
|
||||
result, _code = await run_image_generate(args)
|
||||
elif args.command == "sn-image-recognize":
|
||||
result, _code = await run_image_recognize(args)
|
||||
elif args.command == "sn-text-optimize":
|
||||
result, _code = await run_text_optimize(args)
|
||||
else:
|
||||
print(f"Unknown command: {args.command}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
elapsed = round(time.time() - start_time, 2)
|
||||
return _output_result(args.output_format, result, elapsed)
|
||||
|
||||
except U1BaseError as exc:
|
||||
elapsed = round(time.time() - start_time, 2)
|
||||
if args.output_format == "json":
|
||||
print(
|
||||
json.dumps(
|
||||
{"status": "failed", "error": str(exc), "elapsed_seconds": elapsed},
|
||||
ensure_ascii=False,
|
||||
)
|
||||
)
|
||||
else:
|
||||
print(f"Error: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
except ValueError as exc:
|
||||
elapsed = round(time.time() - start_time, 2)
|
||||
if args.output_format == "json":
|
||||
print(
|
||||
json.dumps(
|
||||
{"status": "failed", "error": str(exc), "elapsed_seconds": elapsed},
|
||||
ensure_ascii=False,
|
||||
)
|
||||
)
|
||||
else:
|
||||
print(f"Error: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
|
||||
def main() -> int:
|
||||
"""Entry point for the sn_agent_runner CLI.
|
||||
|
||||
Returns:
|
||||
int: Exit code from the async dispatcher.
|
||||
"""
|
||||
parser = build_parser()
|
||||
args = parser.parse_args()
|
||||
return asyncio.run(main_async(args))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
1
sn-image-base/scripts/sn_image_base/__init__.py
Normal file
1
sn-image-base/scripts/sn_image_base/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
# sn-image-base scripts
|
||||
313
sn-image-base/scripts/sn_image_base/configs.py
Normal file
313
sn-image-base/scripts/sn_image_base/configs.py
Normal file
@@ -0,0 +1,313 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import contextlib
|
||||
import os
|
||||
import warnings
|
||||
from pathlib import Path
|
||||
from typing import Annotated, Literal, get_args, get_origin, get_type_hints
|
||||
from urllib.parse import urlparse
|
||||
|
||||
SCRIPT_DIR = Path(__file__).absolute().parent
|
||||
# "skills" directory that contains "sn-*" skills (e.g. "sn-image-base", "sn-infographic", etc.)
|
||||
SKILLS_DIR = SCRIPT_DIR.parents[1]
|
||||
|
||||
|
||||
def prepare_env() -> None:
|
||||
try:
|
||||
from dotenv import load_dotenv
|
||||
except ImportError:
|
||||
warnings.warn("python-dotenv is not installed, `.env` files will be ignored", stacklevel=2)
|
||||
return
|
||||
# Priorities:
|
||||
# 1. ".env" in the agent's config directory:
|
||||
# - openclaw: ~/.openclaw/.env
|
||||
# - hermes: ~/.openclaw/.env
|
||||
# 2. ".env" in current working directory. (depends on how the agent runs the skill)
|
||||
# 3. Environment variables
|
||||
# ------------------------------------------------------------
|
||||
# In reverse order of priority, the latter overrides the former:
|
||||
# 3 -- do nothing; overridden by other env files
|
||||
# 2 --
|
||||
load_dotenv(override=True)
|
||||
# 1 --
|
||||
if "OPENCLAW_SHELL" in os.environ:
|
||||
agent_config_dir = Path("~/.openclaw").expanduser()
|
||||
else:
|
||||
agent_config_dir = Path("~/.hermes").expanduser()
|
||||
if (dotenv_path := agent_config_dir / ".env").exists():
|
||||
load_dotenv(dotenv_path, override=True)
|
||||
|
||||
|
||||
prepare_env()
|
||||
|
||||
|
||||
class Field:
|
||||
"""Metadata marker that pairs a field with one or more env var names.
|
||||
|
||||
Env vars are tried in order; the first env var that is set is returned.
|
||||
"""
|
||||
|
||||
__slots__ = ("env_names", "required", "secret")
|
||||
|
||||
def __init__(self, *env_names: str, required: bool = False, secret: bool = False) -> None:
|
||||
self.env_names: tuple[str, ...] | None = tuple(env_names) if env_names else None
|
||||
self.required = required
|
||||
self.secret = secret
|
||||
|
||||
def resolve(self, target_type: type | None = None) -> str | int | float | None:
|
||||
"""Return the first env var value that is set, converted to target_type.
|
||||
|
||||
Args:
|
||||
target_type: The type to convert to (str, int, float, etc.) or None.
|
||||
If not int or float, returns the raw string.
|
||||
|
||||
Returns:
|
||||
The converted value, or None if none of the env vars exist.
|
||||
"""
|
||||
if not self.env_names:
|
||||
return None
|
||||
for n in self.env_names:
|
||||
if n in os.environ:
|
||||
raw = os.environ[n]
|
||||
if target_type is int:
|
||||
return int(raw)
|
||||
if target_type is float:
|
||||
return float(raw)
|
||||
# For other types (Literal, etc.), return raw string
|
||||
return raw
|
||||
return None
|
||||
|
||||
|
||||
class Configs:
|
||||
"""Central registry of env var names and built-in defaults.
|
||||
|
||||
Fields annotated with ``Annotated[str, EnvVar(...)]`` are resolved in
|
||||
``__init__``: env vars are tried in order; if none is set, the class-level
|
||||
default is kept.
|
||||
"""
|
||||
|
||||
# global defaults shared by all SN capabilities.
|
||||
SN_API_KEY: Annotated[str, Field("SN_API_KEY", secret=True)] = ""
|
||||
SN_BASE_URL: Annotated[str, Field("SN_BASE_URL")] = ""
|
||||
|
||||
# image-generate
|
||||
SN_IMAGE_GEN_API_KEY: Annotated[
|
||||
str, Field("SN_IMAGE_GEN_API_KEY", "SN_API_KEY", required=True, secret=True)
|
||||
] = ""
|
||||
SN_IMAGE_GEN_BASE_URL: Annotated[
|
||||
str, Field("SN_IMAGE_GEN_BASE_URL", "SN_BASE_URL", required=True)
|
||||
] = "https://token.sensenova.cn/v1"
|
||||
SN_IMAGE_GEN_MODEL_TYPE: Annotated[
|
||||
Literal["sensenova", "nano-banana", "openai-image"], Field("SN_IMAGE_GEN_MODEL_TYPE")
|
||||
] = "sensenova"
|
||||
SN_IMAGE_GEN_MODEL: Annotated[str, Field("SN_IMAGE_GEN_MODEL")] = "sensenova-u1-fast"
|
||||
|
||||
# chat runtime shared by text and vision commands; command-specific
|
||||
# SN_TEXT_* / SN_VISION_* values override these defaults.
|
||||
SN_CHAT_API_KEY: Annotated[str, Field("SN_CHAT_API_KEY", "SN_API_KEY", secret=True)] = ""
|
||||
SN_CHAT_BASE_URL: Annotated[str, Field("SN_CHAT_BASE_URL", "SN_BASE_URL")] = (
|
||||
"https://token.sensenova.cn/v1"
|
||||
)
|
||||
SN_CHAT_TYPE: Annotated[
|
||||
Literal["anthropic-messages", "openai-completions"], Field("SN_CHAT_TYPE")
|
||||
] = "openai-completions"
|
||||
SN_CHAT_MODEL: Annotated[str, Field("SN_CHAT_MODEL")] = "sensenova-6.7-flash-lite"
|
||||
SN_TEXT_API_KEY: Annotated[
|
||||
str, Field("SN_TEXT_API_KEY", "SN_CHAT_API_KEY", "SN_API_KEY", secret=True)
|
||||
] = ""
|
||||
SN_TEXT_BASE_URL: Annotated[
|
||||
str, Field("SN_TEXT_BASE_URL", "SN_CHAT_BASE_URL", "SN_BASE_URL")
|
||||
] = ""
|
||||
SN_TEXT_TYPE: Annotated[
|
||||
Literal["anthropic-messages", "openai-completions"],
|
||||
Field("SN_TEXT_TYPE", "SN_CHAT_TYPE"),
|
||||
] = ""
|
||||
SN_TEXT_MODEL: Annotated[str, Field("SN_TEXT_MODEL", "SN_CHAT_MODEL")] = (
|
||||
"sensenova-6.7-flash-lite"
|
||||
)
|
||||
SN_VISION_API_KEY: Annotated[
|
||||
str, Field("SN_VISION_API_KEY", "SN_CHAT_API_KEY", "SN_API_KEY", secret=True)
|
||||
] = ""
|
||||
SN_VISION_BASE_URL: Annotated[
|
||||
str, Field("SN_VISION_BASE_URL", "SN_CHAT_BASE_URL", "SN_BASE_URL")
|
||||
] = ""
|
||||
SN_VISION_TYPE: Annotated[
|
||||
Literal["anthropic-messages", "openai-completions"],
|
||||
Field("SN_VISION_TYPE", "SN_CHAT_TYPE"),
|
||||
] = ""
|
||||
SN_VISION_MODEL: Annotated[str, Field("SN_VISION_MODEL", "SN_CHAT_MODEL")] = (
|
||||
"sensenova-6.7-flash-lite"
|
||||
)
|
||||
|
||||
def __init__(self) -> None:
|
||||
for field, hint in get_type_hints(type(self), include_extras=True).items():
|
||||
env_var = next((a for a in get_args(hint) if isinstance(a, Field)), None)
|
||||
if env_var is None:
|
||||
continue
|
||||
# Extract the actual type (unwrap Annotated, handle Literal)
|
||||
origin = get_origin(hint)
|
||||
actual_type = get_args(hint)[0] if origin is Annotated else hint
|
||||
if (val := env_var.resolve(actual_type)) is not None:
|
||||
setattr(self, field, val)
|
||||
|
||||
def to_string(self, mask_secrets: bool = True) -> str:
|
||||
rows = []
|
||||
for field_name, hint in get_type_hints(type(self), include_extras=True).items():
|
||||
field = next((a for a in get_args(hint) if isinstance(a, Field)), None)
|
||||
value = getattr(self, field_name, None)
|
||||
v = str(value)
|
||||
if mask_secrets and v and field and field.secret:
|
||||
if len(v) > 10:
|
||||
v = f"{v[:6]}{'*' * (len(v) - 10)}{v[-4:]}"
|
||||
elif len(v) > 4:
|
||||
v = f"{v[:4]}{'*' * (len(v) - 4)}"
|
||||
else:
|
||||
v = "*" * len(v)
|
||||
rows.append(f"{field_name}: {v}")
|
||||
return "\n".join(rows)
|
||||
|
||||
def validate_configs(self) -> tuple[list[tuple[str, str]], list[tuple[str, str]]]:
|
||||
field_env_names: dict[str, tuple[str, ...] | str] = {}
|
||||
errors: list[tuple[str, str]] = []
|
||||
for field_name, hint in get_type_hints(type(self), include_extras=True).items():
|
||||
field = next((a for a in get_args(hint) if isinstance(a, Field)), None)
|
||||
if field is None:
|
||||
continue
|
||||
if env_names := field.env_names:
|
||||
if len(env_names) > 1:
|
||||
field_env_names[field_name] = env_names
|
||||
elif len(env_names) == 1:
|
||||
field_env_names[field_name] = env_names[0]
|
||||
value = getattr(self, field_name, None)
|
||||
if not value:
|
||||
if field.required:
|
||||
if field_name == "SN_IMAGE_GEN_API_KEY":
|
||||
msg = (
|
||||
"Image generation API key is not set; configure SN_API_KEY, "
|
||||
"or configure SN_IMAGE_GEN_API_KEY only for an image-generation-specific override"
|
||||
)
|
||||
else:
|
||||
msg = f"Field '{field_name}' is required but not set; try setting the environment variable(s) {field.env_names}"
|
||||
errors.append((field_name, msg))
|
||||
continue
|
||||
|
||||
# Check fields combination rules:
|
||||
if not self.SN_IMAGE_GEN_MODEL:
|
||||
errors.append((
|
||||
"SN_IMAGE_GEN_MODEL",
|
||||
f"SN_IMAGE_GEN_MODEL is required when SN_IMAGE_GEN_MODEL_TYPE is {self.SN_IMAGE_GEN_MODEL_TYPE!r}",
|
||||
))
|
||||
|
||||
warnings: list[tuple[str, str]] = []
|
||||
runtime_checks = {
|
||||
"text": {
|
||||
"api_key": ("SN_TEXT_API_KEY",),
|
||||
"base_url": ("SN_TEXT_BASE_URL", "SN_CHAT_BASE_URL"),
|
||||
"model": ("SN_TEXT_MODEL",),
|
||||
"type": ("SN_TEXT_TYPE", "SN_CHAT_TYPE"),
|
||||
},
|
||||
"vision": {
|
||||
"api_key": ("SN_VISION_API_KEY",),
|
||||
"base_url": ("SN_VISION_BASE_URL", "SN_CHAT_BASE_URL"),
|
||||
"model": ("SN_VISION_MODEL",),
|
||||
"type": ("SN_VISION_TYPE", "SN_CHAT_TYPE"),
|
||||
},
|
||||
}
|
||||
for runtime, checks in runtime_checks.items():
|
||||
for field_kind, keys in checks.items():
|
||||
if any(getattr(self, key) for key in keys):
|
||||
continue
|
||||
env_help = " / ".join(
|
||||
", ".join(field_env_names[key])
|
||||
if isinstance(field_env_names.get(key), tuple)
|
||||
else str(field_env_names.get(key, key))
|
||||
for key in keys
|
||||
)
|
||||
warnings.append((
|
||||
keys[0],
|
||||
f"{keys[0]} is not set; {runtime} {field_kind} may be unavailable. Try setting: {env_help}",
|
||||
))
|
||||
|
||||
# check urls
|
||||
errors.extend(
|
||||
(
|
||||
key,
|
||||
f"{key} is not a valid base URL: {getattr(self, key)}",
|
||||
)
|
||||
for key in ("SN_CHAT_BASE_URL", "SN_TEXT_BASE_URL", "SN_VISION_BASE_URL")
|
||||
if getattr(self, key) and not is_valid_base_url(getattr(self, key))
|
||||
)
|
||||
errors.extend(
|
||||
(
|
||||
key,
|
||||
f"{key} is not a valid base URL: {getattr(self, key)}",
|
||||
)
|
||||
for key in (
|
||||
"SN_BASE_URL",
|
||||
"SN_IMAGE_GEN_BASE_URL",
|
||||
)
|
||||
if getattr(self, key) and not is_valid_base_url(getattr(self, key))
|
||||
)
|
||||
return errors, warnings
|
||||
|
||||
def get_annotated_field(self, field_name: str) -> Field | None:
|
||||
hints = get_type_hints(type(self), include_extras=True)
|
||||
if field_name not in hints:
|
||||
return None
|
||||
hint = hints[field_name]
|
||||
field_inst = next((a for a in get_args(hint) if isinstance(a, Field)), None)
|
||||
return field_inst
|
||||
|
||||
def get_env_var_help(self, field_name: str) -> str:
|
||||
"""Return a help string describing which environment variables can be used
|
||||
to set the specified configuration field.
|
||||
|
||||
Args:
|
||||
field_name: The name of the configuration field (e.g., "SN_CHAT_API_KEY").
|
||||
|
||||
Returns:
|
||||
A string describing the environment variable(s) that control this field.
|
||||
Returns an error message if the field does not exist or has no EnvVar annotation.
|
||||
"""
|
||||
if not hasattr(type(self), field_name):
|
||||
return f"Field '{field_name}' does not exist in Configs."
|
||||
|
||||
field_inst = self.get_annotated_field(field_name)
|
||||
if field_inst is None:
|
||||
return f"Field '{field_name}' is not configurable via environment variables."
|
||||
|
||||
current_value = getattr(self, field_name)
|
||||
env_names = list(field_inst.env_names) if field_inst.env_names else []
|
||||
if len(env_names) == 1:
|
||||
return (
|
||||
f"To set '{field_name}', configure the environment variable: {env_names[0]}\n"
|
||||
f"Current value: {current_value!r}"
|
||||
)
|
||||
else:
|
||||
env_list = ", ".join(env_names)
|
||||
return (
|
||||
f"To set '{field_name}', configure one of these environment variables: {env_list}\n"
|
||||
f"They are tried in order; the first set value is used.\n"
|
||||
f"Current value: {current_value!r}"
|
||||
)
|
||||
|
||||
|
||||
def is_valid_base_url(url: str) -> bool:
|
||||
with contextlib.suppress(ValueError):
|
||||
parsed = urlparse(url)
|
||||
return bool(parsed.scheme and parsed.netloc)
|
||||
return False
|
||||
|
||||
|
||||
def reload_env() -> None:
|
||||
global global_configs
|
||||
|
||||
prepare_env()
|
||||
try:
|
||||
global_configs = Configs()
|
||||
print("✅ Reloaded global_configs")
|
||||
except Exception as e:
|
||||
warnings.warn(f"Failed to reload global_configs: {e}", stacklevel=2)
|
||||
|
||||
|
||||
global_configs = Configs()
|
||||
39
sn-image-base/scripts/sn_image_base/exceptions.py
Normal file
39
sn-image-base/scripts/sn_image_base/exceptions.py
Normal file
@@ -0,0 +1,39 @@
|
||||
"""Shared exceptions for sn-image-base."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
class U1BaseError(Exception):
|
||||
"""Base exception for sn-image-base."""
|
||||
|
||||
DEFAULT_MESSAGE = "An error occurred in the sn-image-base skill."
|
||||
|
||||
def __init__(self, message: str | None = None) -> None:
|
||||
if message is None:
|
||||
message = self.DEFAULT_MESSAGE
|
||||
super().__init__(message)
|
||||
|
||||
|
||||
class BadConfigurationError(U1BaseError):
|
||||
"""Raised when the configuration is invalid."""
|
||||
|
||||
DEFAULT_MESSAGE = "The configuration is invalid."
|
||||
|
||||
|
||||
class MissingApiKeyError(BadConfigurationError):
|
||||
"""Raised when API key is not provided via CLI argument or environment variable."""
|
||||
|
||||
DEFAULT_MESSAGE = (
|
||||
"API key is required but was not provided. "
|
||||
"Set SN_API_KEY, or set SN_IMAGE_GEN_API_KEY only for an image-generation-specific "
|
||||
"override, or pass --api-key explicitly."
|
||||
)
|
||||
|
||||
|
||||
class InvalidBaseUrlError(BadConfigurationError):
|
||||
"""Raised when base URL is not provided via CLI argument or environment variable."""
|
||||
|
||||
DEFAULT_MESSAGE = (
|
||||
"Base URL is required but was not provided. "
|
||||
"Set SN_IMAGE_GEN_BASE_URL or SN_BASE_URL, or pass --base-url explicitly."
|
||||
)
|
||||
@@ -0,0 +1,9 @@
|
||||
from .nano_banana import NanoBananaText2ImageClient
|
||||
from .openai_image import OpenAIImageGenerationClient
|
||||
from .sensenova import SensenovaText2ImageClient
|
||||
|
||||
__all__ = [
|
||||
"NanoBananaText2ImageClient",
|
||||
"OpenAIImageGenerationClient",
|
||||
"SensenovaText2ImageClient",
|
||||
]
|
||||
@@ -0,0 +1,18 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def ensure_output_path(path: Path) -> Path:
|
||||
"""Ensure the parent directory of the given path exists.
|
||||
|
||||
Args:
|
||||
path (Path):
|
||||
The file path whose parent directory should be created.
|
||||
|
||||
Returns:
|
||||
Path:
|
||||
The original path unchanged.
|
||||
"""
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
return path
|
||||
@@ -0,0 +1,86 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import typing
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any
|
||||
|
||||
from sn_image_base.utils.error_utils import U1HttpResponseParseError
|
||||
from sn_image_base.utils.httpx_client import (
|
||||
create_async_httpx_client,
|
||||
httpx_response_raise_for_status_code,
|
||||
)
|
||||
|
||||
if typing.TYPE_CHECKING:
|
||||
import httpx
|
||||
|
||||
DEFAULT_POLL_INTERVAL = 5.0
|
||||
DEFAULT_HTTP_REQUEST_TIMEOUT = 300.0
|
||||
DEFAULT_MAX_CONNECTIONS = 100
|
||||
|
||||
|
||||
class T2IBaseClient(ABC):
|
||||
def __init__(
|
||||
self,
|
||||
api_key: str | None = None,
|
||||
base_url: str | None = None,
|
||||
*,
|
||||
model: str | None = None,
|
||||
max_connections: int = DEFAULT_MAX_CONNECTIONS,
|
||||
timeout: float = DEFAULT_HTTP_REQUEST_TIMEOUT,
|
||||
ssl_verify: bool = True,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
self._api_key = api_key
|
||||
self._base_url = base_url
|
||||
self.model = model
|
||||
self._client: httpx.AsyncClient | None = None
|
||||
self._max_connections = max_connections
|
||||
self._timeout = timeout
|
||||
self._ssl_verify = ssl_verify
|
||||
|
||||
async def _get_client(self) -> httpx.AsyncClient:
|
||||
if self._client is None:
|
||||
self._client = create_async_httpx_client(
|
||||
self.headers,
|
||||
timeout=self._timeout,
|
||||
max_connections=self._max_connections,
|
||||
verify=self._ssl_verify,
|
||||
)
|
||||
return self._client
|
||||
|
||||
async def aclose(self) -> None:
|
||||
if self._client is not None:
|
||||
await self._client.aclose()
|
||||
self._client = None
|
||||
|
||||
@property
|
||||
def api_key(self) -> str | None:
|
||||
return self._api_key
|
||||
|
||||
@property
|
||||
def base_url(self) -> str | None:
|
||||
return self._base_url
|
||||
|
||||
@abstractmethod
|
||||
async def generate(self, prompt: str, *args: Any, **kwargs: Any) -> Any: ...
|
||||
|
||||
@abstractmethod
|
||||
def get_api_url(self, *args: Any, **kwargs: Any) -> str: ...
|
||||
|
||||
@abstractmethod
|
||||
def build_payload(self, *args: Any, **kwargs: Any) -> Any: ...
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def headers(self) -> dict[str, str]: ...
|
||||
|
||||
def parse_response(self, response: httpx.Response) -> dict:
|
||||
httpx_response_raise_for_status_code(response)
|
||||
try:
|
||||
data = response.json()
|
||||
return data
|
||||
except ValueError as exc:
|
||||
raise U1HttpResponseParseError(
|
||||
detail=f"Failed to parse HTTP response. {response.request.url}. Response content: {response.content}",
|
||||
code=response.status_code,
|
||||
) from exc
|
||||
306
sn-image-base/scripts/sn_image_base/generation/nano_banana.py
Normal file
306
sn-image-base/scripts/sn_image_base/generation/nano_banana.py
Normal file
@@ -0,0 +1,306 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Any, Literal
|
||||
|
||||
import httpx
|
||||
from typing_extensions import override
|
||||
|
||||
from sn_image_base.configs import global_configs, is_valid_base_url
|
||||
from sn_image_base.utils.error_utils import U1HttpErrorBase
|
||||
|
||||
from .core import ensure_output_path
|
||||
from .core.client_base import (
|
||||
DEFAULT_HTTP_REQUEST_TIMEOUT,
|
||||
DEFAULT_MAX_CONNECTIONS,
|
||||
T2IBaseClient,
|
||||
)
|
||||
|
||||
DEFAULT_MODEL_SIZE: Literal["1K", "2K", "4K"] = "2K"
|
||||
DEFAULT_ASPECT_RATIO = "16:9"
|
||||
DEFAULT_POLL_INTERVAL = 5.0
|
||||
OUTPUT_DIR = Path("/tmp/openclaw-sn-image")
|
||||
|
||||
|
||||
class NanoBananaText2ImageClient(T2IBaseClient):
|
||||
"""Async client for Google Nano Banana API."""
|
||||
|
||||
# requires `{model}` placeholder for format string
|
||||
DEFAULT_API_PATH = "/v1beta/models/{model}:generateContent"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
api_key: str,
|
||||
base_url: str | None = None,
|
||||
*,
|
||||
model: str | None = None,
|
||||
max_connections: int = DEFAULT_MAX_CONNECTIONS,
|
||||
timeout: float = DEFAULT_HTTP_REQUEST_TIMEOUT,
|
||||
ssl_verify: bool = True,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Initialize the NanoBananaText2ImageClient.
|
||||
|
||||
Args:
|
||||
api_key (str):
|
||||
API key for authentication.
|
||||
base_url (str | None, optional):
|
||||
API base URL. If None, reads from SN_IMAGE_GEN_BASE_URL env var.
|
||||
model (str | None, optional):
|
||||
Model name. If None, reads from SN_IMAGE_GEN_MODEL env var.
|
||||
max_connections (int, optional):
|
||||
Maximum number of connections. Defaults to 100.
|
||||
timeout (float, optional):
|
||||
Total timeout in seconds for HTTP requests.
|
||||
Defaults to DEFAULT_HTTP_REQUEST_TIMEOUT.
|
||||
ssl_verify (bool, optional):
|
||||
If True, enable TLS verification. Defaults to True.
|
||||
"""
|
||||
super().__init__(
|
||||
api_key=api_key,
|
||||
base_url=base_url,
|
||||
model=model,
|
||||
max_connections=max_connections,
|
||||
timeout=timeout,
|
||||
ssl_verify=ssl_verify,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@override
|
||||
async def generate(
|
||||
self,
|
||||
prompt: str,
|
||||
negative_prompt: str = "",
|
||||
*,
|
||||
model: str | None = None,
|
||||
image_size: Literal["1K", "2K", "4K"] = DEFAULT_MODEL_SIZE,
|
||||
aspect_ratio: str = DEFAULT_ASPECT_RATIO,
|
||||
output_path: Path | None = None,
|
||||
**kwargs: Any,
|
||||
) -> dict:
|
||||
"""Generate an image from text prompt.
|
||||
|
||||
Args:
|
||||
prompt (str):
|
||||
Text prompt for image generation.
|
||||
negative_prompt (str, optional):
|
||||
Negative prompt. Defaults to "".
|
||||
model (str | None, optional):
|
||||
Model name override. Defaults to None.
|
||||
image_size (str, optional):
|
||||
Image size preset ("1K", "2K", "4K"). Defaults to DEFAULT_MODEL_SIZE.
|
||||
aspect_ratio (str, optional):
|
||||
Aspect ratio (e.g. "16:9", "1:1"). Defaults to DEFAULT_ASPECT_RATIO.
|
||||
output_path (Path | None, optional):
|
||||
Output path for the generated image. Defaults to None.
|
||||
**kwargs:
|
||||
Additional arguments reserved for backend compatibility.
|
||||
|
||||
Returns:
|
||||
dict:
|
||||
Dictionary with keys: status, output (path), message.
|
||||
"""
|
||||
model = model or self.model
|
||||
# Normalize image_size to uppercase for NanoBanana API
|
||||
image_size = image_size.upper() # type: ignore[assignment]
|
||||
payload = self.build_payload(
|
||||
prompt=prompt,
|
||||
negative_prompt=negative_prompt,
|
||||
image_size=image_size,
|
||||
aspect_ratio=aspect_ratio,
|
||||
)
|
||||
headers = self.headers
|
||||
api_url = self.get_api_url(model)
|
||||
|
||||
if output_path is None:
|
||||
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
||||
timestamp = time.strftime("%Y%m%d_%H%M%S")
|
||||
output_path = OUTPUT_DIR / f"t2i_{timestamp}.png"
|
||||
output_path = ensure_output_path(output_path)
|
||||
|
||||
client = await self._get_client()
|
||||
|
||||
try:
|
||||
create_response = await client.post(
|
||||
api_url,
|
||||
json=payload,
|
||||
headers=headers,
|
||||
)
|
||||
data = self.parse_response(create_response)
|
||||
except U1HttpErrorBase as exc:
|
||||
details = exc.detail or ""
|
||||
field_name = None
|
||||
if exc.code == 404:
|
||||
field_name = "SN_IMAGE_GEN_BASE_URL"
|
||||
elif exc.code == 401:
|
||||
field_name = "SN_IMAGE_GEN_API_KEY"
|
||||
if field_name is not None:
|
||||
field_hint = global_configs.get_annotated_field(field_name)
|
||||
if field_hint is not None:
|
||||
env_names = list(field_hint.env_names) if field_hint.env_names else []
|
||||
if env_names:
|
||||
if len(env_names) == 1:
|
||||
details += (
|
||||
f"\nIs the environment variable `{env_names[0]}` set correctly?"
|
||||
)
|
||||
else:
|
||||
env_names_str = ", ".join([f"`{n}`" for n in env_names])
|
||||
details += f"\nIs any of the following environment variable(s) set correctly: {env_names_str}?"
|
||||
return {
|
||||
"status": "failed",
|
||||
"error": f"HTTP {exc.code}: {exc.message}",
|
||||
"message": details,
|
||||
}
|
||||
try:
|
||||
images = data["images"]
|
||||
if not images:
|
||||
return {
|
||||
"status": "failed",
|
||||
"error": "No image generated from the model",
|
||||
}
|
||||
image, mime_type = images[-1]
|
||||
image_bytes = base64.b64decode(image)
|
||||
suffix = mime_type_to_suffix(mime_type)
|
||||
saved_path = output_path.with_suffix(suffix)
|
||||
saved_path.write_bytes(image_bytes)
|
||||
return {
|
||||
"status": "ok",
|
||||
"output": str(saved_path),
|
||||
"message": "Image generated successfully",
|
||||
}
|
||||
except httpx.HTTPStatusError as exc:
|
||||
return {
|
||||
"status": "failed",
|
||||
"error": f"HTTP {exc.response.status_code}",
|
||||
"message": f"http error: {exc.response.status_code} {exc.response.text}",
|
||||
}
|
||||
except (httpx.HTTPError, OSError, ValueError) as exc:
|
||||
return {
|
||||
"status": "failed",
|
||||
"error": type(exc).__name__,
|
||||
"message": f"request error: {exc}",
|
||||
}
|
||||
|
||||
@property
|
||||
@override
|
||||
def api_key(self) -> str:
|
||||
api_key = self._api_key or global_configs.SN_IMAGE_GEN_API_KEY
|
||||
if not api_key:
|
||||
raise ValueError(
|
||||
"API key is missing: {}".format(
|
||||
global_configs.get_env_var_help("SN_IMAGE_GEN_API_KEY")
|
||||
)
|
||||
)
|
||||
return api_key
|
||||
|
||||
@property
|
||||
@override
|
||||
def base_url(self) -> str:
|
||||
base_url = self._base_url or global_configs.SN_IMAGE_GEN_BASE_URL
|
||||
if not base_url:
|
||||
raise ValueError(
|
||||
"Base URL is missing: {}".format(
|
||||
global_configs.get_env_var_help("SN_IMAGE_GEN_BASE_URL")
|
||||
)
|
||||
)
|
||||
if not is_valid_base_url(base_url):
|
||||
raise ValueError(
|
||||
f"Base URL is not a valid base URL: {base_url}. "
|
||||
f"Try setting environment variable(s): {global_configs.get_env_var_help('SN_IMAGE_GEN_BASE_URL')}"
|
||||
)
|
||||
return base_url
|
||||
|
||||
@override
|
||||
def get_api_url(self, model: str | None = None) -> str:
|
||||
model = model or self.model
|
||||
path = self.DEFAULT_API_PATH.format(model=model).lstrip("/")
|
||||
api_url = f"{self.base_url.rstrip('/')}/{path}"
|
||||
return api_url
|
||||
|
||||
@override
|
||||
def build_payload(
|
||||
self,
|
||||
prompt: str,
|
||||
negative_prompt: str = "",
|
||||
*,
|
||||
image_size: Literal["1K", "2K", "4K"] = DEFAULT_MODEL_SIZE,
|
||||
aspect_ratio: str = DEFAULT_ASPECT_RATIO,
|
||||
max_output_tokens: int = 8192,
|
||||
**kwargs: Any,
|
||||
) -> dict:
|
||||
parts: list[dict] = [{"text": prompt}]
|
||||
if (image_b64 := kwargs.get("image_b64")) and (
|
||||
image_mime_type := kwargs.get("image_mime_type")
|
||||
):
|
||||
if image_mime_type not in ["image/jpeg", "image/png"]:
|
||||
msg = (
|
||||
f"Unsupported image MIME type: {image_mime_type}. "
|
||||
"Supported types: image/jpeg, image/png"
|
||||
)
|
||||
raise ValueError(msg)
|
||||
parts.append({"inline_data": {"mime_type": image_mime_type, "data": image_b64}})
|
||||
return {
|
||||
"contents": [{"role": "USER", "parts": parts}],
|
||||
"generationConfig": {
|
||||
"imageConfig": {"aspectRatio": aspect_ratio, "imageSize": image_size},
|
||||
"maxOutputTokens": max_output_tokens,
|
||||
},
|
||||
"safetySettings": [
|
||||
{"category": "HARM_CATEGORY_HARASSMENT", "threshold": "OFF"},
|
||||
{"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "OFF"},
|
||||
{"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "OFF"},
|
||||
{"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "OFF"},
|
||||
],
|
||||
}
|
||||
|
||||
@property
|
||||
@override
|
||||
def headers(self) -> dict[str, str]:
|
||||
return {
|
||||
"x-goog-api-key": self.api_key,
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
@override
|
||||
def parse_response(self, response: httpx.Response) -> dict:
|
||||
raw_data = super().parse_response(response)
|
||||
|
||||
images: list[tuple[str, str]] = []
|
||||
finish_reasons: list[str] = []
|
||||
candidates: list[dict] = raw_data.get("candidates") or []
|
||||
for c in candidates:
|
||||
content: dict[str, Any] = c.get("content") or {}
|
||||
parts: list[dict[str, Any]] = content.get("parts") or []
|
||||
if f_reason := content.get("finishReason"):
|
||||
finish_reasons.append(f_reason)
|
||||
for p in parts:
|
||||
inline_data: dict[str, Any] = p.get("inlineData", {})
|
||||
if inline_data:
|
||||
mime_type: str = inline_data.get("mimeType") # pyright: ignore[reportAssignmentType]
|
||||
data: str = inline_data.get("data") # pyright: ignore[reportAssignmentType]
|
||||
images.append((data, mime_type))
|
||||
return {
|
||||
"images": images,
|
||||
"finish_reasons": finish_reasons,
|
||||
}
|
||||
|
||||
|
||||
def mime_type_to_suffix(mime_type: str) -> str:
|
||||
"""Convert MIME type to file suffix.
|
||||
|
||||
Args:
|
||||
mime_type: MIME type.
|
||||
|
||||
Returns:
|
||||
str: File suffix.
|
||||
"""
|
||||
if mime_type == "image/jpeg":
|
||||
return ".jpg"
|
||||
elif mime_type == "image/png":
|
||||
return ".png"
|
||||
elif mime_type == "image/webp":
|
||||
return ".webp"
|
||||
else:
|
||||
return ".png"
|
||||
366
sn-image-base/scripts/sn_image_base/generation/openai_image.py
Normal file
366
sn-image-base/scripts/sn_image_base/generation/openai_image.py
Normal file
@@ -0,0 +1,366 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import math
|
||||
import re
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Any, Literal
|
||||
|
||||
import httpx
|
||||
from typing_extensions import override
|
||||
|
||||
from sn_image_base.configs import global_configs, is_valid_base_url
|
||||
from sn_image_base.exceptions import BadConfigurationError
|
||||
from sn_image_base.utils.error_utils import U1HttpErrorBase
|
||||
|
||||
from .core import ensure_output_path
|
||||
from .core.client_base import (
|
||||
DEFAULT_HTTP_REQUEST_TIMEOUT,
|
||||
DEFAULT_MAX_CONNECTIONS,
|
||||
T2IBaseClient,
|
||||
)
|
||||
|
||||
DEFAULT_RESOLUTION: Literal["1K", "2K"] = "2K"
|
||||
DEFAULT_ASPECT_RATIO = "16:9"
|
||||
DEFAULT_POLL_INTERVAL = 5.0
|
||||
OUTPUT_DIR = Path("/tmp/openclaw-sn-image")
|
||||
|
||||
B64_PARSE_PATTERN = re.compile(r"^data:([a-zA-Z0-9/]+?);base64,([+-/_A-Za-z0-9]+=*)$")
|
||||
|
||||
|
||||
class OpenAIImageGenerationClient(T2IBaseClient):
|
||||
"""Async client for OpenAI Image Generation API."""
|
||||
|
||||
DEFAULT_API_PATH = "/images/generations"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
api_key: str,
|
||||
base_url: str | None = None,
|
||||
*,
|
||||
model: str | None = None,
|
||||
max_connections: int = DEFAULT_MAX_CONNECTIONS,
|
||||
timeout: float = DEFAULT_HTTP_REQUEST_TIMEOUT,
|
||||
ssl_verify: bool = True,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Initialize the OpenAIImageGenerationClient.
|
||||
|
||||
Args:
|
||||
api_key (str):
|
||||
API key for authentication.
|
||||
base_url (str | None, optional):
|
||||
API base URL. If None, reads from SN_IMAGE_GEN_BASE_URL env var.
|
||||
model (str | None, optional):
|
||||
Model name. If None, reads from SN_IMAGE_GEN_MODEL env var.
|
||||
max_connections (int, optional):
|
||||
Maximum number of connections. Defaults to 100.
|
||||
timeout (float, optional):
|
||||
Total timeout in seconds for HTTP requests.
|
||||
Defaults to DEFAULT_HTTP_REQUEST_TIMEOUT.
|
||||
ssl_verify (bool, optional):
|
||||
If True, enable TLS verification. Defaults to True.
|
||||
"""
|
||||
super().__init__(
|
||||
api_key=api_key,
|
||||
base_url=base_url,
|
||||
model=model,
|
||||
max_connections=max_connections,
|
||||
timeout=timeout,
|
||||
ssl_verify=ssl_verify,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@override
|
||||
async def generate(
|
||||
self,
|
||||
prompt: str,
|
||||
*,
|
||||
model: str | None = None,
|
||||
image_size: Literal["1K", "2K", "1k", "2k"] | None = None,
|
||||
aspect_ratio: str | None = DEFAULT_ASPECT_RATIO,
|
||||
output_path: Path | None = None,
|
||||
**kwargs: Any,
|
||||
) -> dict:
|
||||
"""Generate an image from text prompt.
|
||||
|
||||
Args:
|
||||
prompt (str):
|
||||
Text prompt for image generation.
|
||||
model (str | None, optional):
|
||||
Model name override. Defaults to None.
|
||||
image_size (str, optional):
|
||||
Image size preset ("1K", "2K"). Defaults to DEFAULT_RESOLUTION.
|
||||
aspect_ratio (str, optional):
|
||||
Aspect ratio (e.g. "16:9", "1:1"). Defaults to DEFAULT_ASPECT_RATIO.
|
||||
output_path (Path | None, optional):
|
||||
Output path for the generated image. Defaults to None.
|
||||
**kwargs:
|
||||
Additional arguments reserved for backend compatibility.
|
||||
|
||||
Returns:
|
||||
dict:
|
||||
Dictionary with keys: status, output (path), message.
|
||||
"""
|
||||
model = model or self.model or global_configs.SN_IMAGE_GEN_MODEL
|
||||
if not model:
|
||||
raise BadConfigurationError(
|
||||
f"Model is not set. {global_configs.get_env_var_help('SN_IMAGE_GEN_MODEL')}"
|
||||
)
|
||||
image_size = image_size or DEFAULT_RESOLUTION
|
||||
if aspect_ratio is None:
|
||||
size = None
|
||||
else:
|
||||
rw, _, rh = aspect_ratio.partition(":")
|
||||
try:
|
||||
aspect_ratio_val: float = float(int(rw) / int(rh))
|
||||
except (ValueError, ZeroDivisionError) as e:
|
||||
raise ValueError(f"Invalid aspect ratio: {aspect_ratio}") from e
|
||||
size = self._resolve_size(
|
||||
resolution=image_size,
|
||||
aspect_ratio_val=aspect_ratio_val,
|
||||
)
|
||||
payload = self.build_payload(
|
||||
prompt=prompt,
|
||||
model=model,
|
||||
size=size,
|
||||
)
|
||||
headers = self.headers
|
||||
api_url = self.get_api_url(model)
|
||||
|
||||
if output_path is None:
|
||||
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
||||
timestamp = time.strftime("%Y%m%d_%H%M%S")
|
||||
output_path = OUTPUT_DIR / f"t2i_{timestamp}.png"
|
||||
output_path = ensure_output_path(output_path)
|
||||
|
||||
client = await self._get_client()
|
||||
|
||||
try:
|
||||
create_response = await client.post(
|
||||
api_url,
|
||||
json=payload,
|
||||
headers=headers,
|
||||
)
|
||||
data = self.parse_response(create_response)
|
||||
except U1HttpErrorBase as exc:
|
||||
details = exc.detail or ""
|
||||
field_name = None
|
||||
if exc.code == 404:
|
||||
field_name = "SN_IMAGE_GEN_BASE_URL"
|
||||
elif exc.code == 401:
|
||||
field_name = "SN_IMAGE_GEN_API_KEY"
|
||||
if field_name is not None:
|
||||
field_hint = global_configs.get_annotated_field(field_name)
|
||||
if field_hint is not None:
|
||||
env_names = list(field_hint.env_names) if field_hint.env_names else []
|
||||
if env_names:
|
||||
if len(env_names) == 1:
|
||||
details += (
|
||||
f"\nIs the environment variable `{env_names[0]}` set correctly?"
|
||||
)
|
||||
else:
|
||||
env_names_str = ", ".join([f"`{n}`" for n in env_names])
|
||||
details += f"\nIs any of the following environment variable(s) set correctly: {env_names_str}?"
|
||||
return {
|
||||
"status": "failed",
|
||||
"error": f"HTTP {exc.code}: {exc.message}",
|
||||
"message": details,
|
||||
}
|
||||
try:
|
||||
images = data["images"]
|
||||
if not images:
|
||||
return {
|
||||
"status": "failed",
|
||||
"error": "No image generated from the model",
|
||||
}
|
||||
image_bytes, mime_type = images[-1]
|
||||
suffix = mime_type_to_suffix(mime_type)
|
||||
saved_path = output_path.with_suffix(suffix)
|
||||
saved_path.write_bytes(image_bytes)
|
||||
return {
|
||||
"status": "ok",
|
||||
"output": str(saved_path),
|
||||
"message": "Image generated successfully",
|
||||
}
|
||||
except httpx.HTTPStatusError as exc:
|
||||
return {
|
||||
"status": "failed",
|
||||
"error": f"HTTP {exc.response.status_code}",
|
||||
"message": f"http error: {exc.response.status_code} {exc.response.text}",
|
||||
}
|
||||
except (httpx.HTTPError, OSError, ValueError) as exc:
|
||||
return {
|
||||
"status": "failed",
|
||||
"error": type(exc).__name__,
|
||||
"message": f"request error: {exc}",
|
||||
}
|
||||
|
||||
@property
|
||||
@override
|
||||
def api_key(self) -> str:
|
||||
api_key = self._api_key or global_configs.SN_IMAGE_GEN_API_KEY
|
||||
if not api_key:
|
||||
raise ValueError(
|
||||
"API key is missing: {}".format(
|
||||
global_configs.get_env_var_help("SN_IMAGE_GEN_API_KEY")
|
||||
)
|
||||
)
|
||||
return api_key
|
||||
|
||||
@property
|
||||
@override
|
||||
def base_url(self) -> str:
|
||||
base_url = self._base_url or global_configs.SN_IMAGE_GEN_BASE_URL
|
||||
if not base_url:
|
||||
raise ValueError(
|
||||
"Base URL is missing: {}".format(
|
||||
global_configs.get_env_var_help("SN_IMAGE_GEN_BASE_URL")
|
||||
)
|
||||
)
|
||||
if not is_valid_base_url(base_url):
|
||||
raise ValueError(
|
||||
f"Base URL is not a valid base URL: {base_url}. "
|
||||
f"Try setting environment variable(s): {global_configs.get_env_var_help('SN_IMAGE_GEN_BASE_URL')}"
|
||||
)
|
||||
return base_url
|
||||
|
||||
@override
|
||||
def get_api_url(self, model: str | None = None) -> str:
|
||||
model = model or self.model
|
||||
path = self.DEFAULT_API_PATH.format(model=model).lstrip("/")
|
||||
api_url = f"{self.base_url.rstrip('/')}/{path}"
|
||||
return api_url
|
||||
|
||||
@override
|
||||
def build_payload(
|
||||
self,
|
||||
prompt: str,
|
||||
model: str,
|
||||
*,
|
||||
n: int = 1,
|
||||
size: str | None = None,
|
||||
**kwargs: Any,
|
||||
) -> dict:
|
||||
"""
|
||||
Example:
|
||||
{
|
||||
"model": "dall-e-3",
|
||||
"prompt": "一只戴着墨镜的猫在赛博朋克城市的街道上喝咖啡, 赛璐璐画风",
|
||||
"n": 1,
|
||||
"size": "1024x1024",
|
||||
"response_format": "b64_json",
|
||||
}
|
||||
"""
|
||||
size = size or "auto"
|
||||
payload = {
|
||||
"model": model,
|
||||
"prompt": prompt,
|
||||
"n": n,
|
||||
"size": size,
|
||||
"response_format": "b64_json",
|
||||
**kwargs,
|
||||
}
|
||||
return payload
|
||||
|
||||
@property
|
||||
@override
|
||||
def headers(self) -> dict[str, str]:
|
||||
return {
|
||||
"Authorization": f"Bearer {self.api_key}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
@override
|
||||
def parse_response(self, response: httpx.Response) -> dict:
|
||||
"""
|
||||
Example:
|
||||
{
|
||||
"data": [{
|
||||
"b64_json": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAABOYA3Q..."
|
||||
}],
|
||||
"created": 1776789055
|
||||
"usage": {
|
||||
"input_tokens":773,
|
||||
"output_tokens":765,
|
||||
"total_tokens":1538,
|
||||
"input_tokens_details": {
|
||||
"text_tokens":8,
|
||||
"image_tokens":765
|
||||
}
|
||||
}
|
||||
}
|
||||
"""
|
||||
raw_data = super().parse_response(response)
|
||||
|
||||
images: list[tuple[bytes, str]] = []
|
||||
data_items: list[dict] = raw_data.get("data") or []
|
||||
for item in data_items:
|
||||
encoded: str = item.get("b64_json") or ""
|
||||
if not encoded:
|
||||
continue
|
||||
|
||||
if encoded.startswith("data:"):
|
||||
match = B64_PARSE_PATTERN.match(encoded)
|
||||
if match:
|
||||
mime_type = match.group(1)
|
||||
b64_data = match.group(2)
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Invalid base64 data in response: {encoded[:100]}... (truncated)"
|
||||
)
|
||||
else:
|
||||
mime_type = "image/png" # fallback to png
|
||||
b64_data = encoded
|
||||
try:
|
||||
decoded = base64.b64decode(b64_data)
|
||||
except Exception as e:
|
||||
raise ValueError(
|
||||
f"Failed to decode base64 data in response: {e}. b64_json: {encoded[:100]}... (truncated)"
|
||||
) from e
|
||||
images.append((decoded, mime_type))
|
||||
return {
|
||||
"images": images,
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def _resolve_size(
|
||||
cls,
|
||||
resolution: str,
|
||||
aspect_ratio_val: float | None,
|
||||
) -> str:
|
||||
"""Convert (resolution, aspect_ratio) to a pixel size string."""
|
||||
resolution = resolution.upper()
|
||||
if resolution == "1K":
|
||||
max_pixel = 1024**2
|
||||
elif resolution == "2K":
|
||||
max_pixel = 2048**2
|
||||
else:
|
||||
raise ValueError(f"Unsupported resolution: {resolution}")
|
||||
aspect_ratio_val = aspect_ratio_val or 1
|
||||
if aspect_ratio_val < 1 / 3 or aspect_ratio_val > 3:
|
||||
raise ValueError(f"Aspect ratio value must be between [1/3, 3], got {aspect_ratio_val}")
|
||||
|
||||
width: int = round(math.sqrt(max_pixel * aspect_ratio_val))
|
||||
height: int = round(math.sqrt(max_pixel / aspect_ratio_val))
|
||||
return f"{width}x{height}"
|
||||
|
||||
|
||||
def mime_type_to_suffix(mime_type: str) -> str:
|
||||
"""Convert MIME type to file suffix.
|
||||
|
||||
Args:
|
||||
mime_type: MIME type.
|
||||
|
||||
Returns:
|
||||
str: File suffix.
|
||||
"""
|
||||
if mime_type == "image/jpeg":
|
||||
return ".jpg"
|
||||
elif mime_type == "image/png":
|
||||
return ".png"
|
||||
elif mime_type == "image/webp":
|
||||
return ".webp"
|
||||
else:
|
||||
return ".png"
|
||||
508
sn-image-base/scripts/sn_image_base/generation/sensenova.py
Normal file
508
sn-image-base/scripts/sn_image_base/generation/sensenova.py
Normal file
@@ -0,0 +1,508 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import TYPE_CHECKING, Any, Literal
|
||||
|
||||
import httpx
|
||||
from PIL import Image
|
||||
from typing_extensions import override
|
||||
|
||||
from sn_image_base.configs import global_configs, is_valid_base_url
|
||||
from sn_image_base.exceptions import InvalidBaseUrlError, MissingApiKeyError
|
||||
from sn_image_base.generation.core import ensure_output_path
|
||||
from sn_image_base.generation.core.client_base import (
|
||||
DEFAULT_HTTP_REQUEST_TIMEOUT,
|
||||
DEFAULT_MAX_CONNECTIONS,
|
||||
T2IBaseClient,
|
||||
)
|
||||
from sn_image_base.utils.error_utils import U1HttpErrorBase
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from collections.abc import Sequence
|
||||
|
||||
DEFAULT_RESOLUTION: Literal["1K", "2K", "4K"] = "2K"
|
||||
DEFAULT_ASPECT_RATIO = "16:9"
|
||||
DEFAULT_POLL_INTERVAL = 5.0
|
||||
OUTPUT_DIR = Path("/tmp/openclaw-sn-image")
|
||||
|
||||
|
||||
IMAGE_GEN_ENDPOINT = "/images/generations"
|
||||
|
||||
|
||||
class SensenovaText2ImageClient(T2IBaseClient):
|
||||
"""Async client for Sensenova text-to-image API."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
api_key: str,
|
||||
base_url: str | None = None,
|
||||
*,
|
||||
model: str | None = None,
|
||||
max_connections: int = DEFAULT_MAX_CONNECTIONS,
|
||||
timeout: float = DEFAULT_HTTP_REQUEST_TIMEOUT,
|
||||
ssl_verify: bool = True,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Initialize the SensenovaText2ImageClient.
|
||||
|
||||
Args:
|
||||
api_key (str):
|
||||
API key for authentication.
|
||||
base_url (str | None, optional):
|
||||
API base URL. If None, reads from SN_IMAGE_GEN_BASE_URL env var.
|
||||
model (str | None, optional):
|
||||
Model name. If None, reads from SN_IMAGE_GEN_MODEL env var.
|
||||
max_connections (int, optional):
|
||||
Maximum number of connections. Defaults to 100.
|
||||
timeout (float, optional):
|
||||
Total timeout in seconds for HTTP requests.
|
||||
Defaults to DEFAULT_HTTP_REQUEST_TIMEOUT.
|
||||
ssl_verify (bool, optional):
|
||||
If True, enable TLS verification. Defaults to True.
|
||||
"""
|
||||
api_key = api_key or global_configs.SN_IMAGE_GEN_API_KEY
|
||||
if not api_key:
|
||||
raise MissingApiKeyError(
|
||||
"API key is missing: {}".format(
|
||||
global_configs.get_env_var_help("SN_IMAGE_GEN_API_KEY")
|
||||
)
|
||||
)
|
||||
base_url = base_url or global_configs.SN_IMAGE_GEN_BASE_URL
|
||||
if not base_url:
|
||||
raise InvalidBaseUrlError(
|
||||
"Base URL is missing: {}".format(
|
||||
global_configs.get_env_var_help("SN_IMAGE_GEN_BASE_URL")
|
||||
)
|
||||
)
|
||||
if not is_valid_base_url(base_url):
|
||||
raise InvalidBaseUrlError(
|
||||
f"Base URL is not a valid base URL: {base_url}. "
|
||||
f"Try setting environment variable(s): {global_configs.get_env_var_help('SN_IMAGE_GEN_BASE_URL')}"
|
||||
)
|
||||
super().__init__(
|
||||
api_key=api_key,
|
||||
base_url=base_url,
|
||||
model=model,
|
||||
max_connections=max_connections,
|
||||
timeout=timeout,
|
||||
ssl_verify=ssl_verify,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
@override
|
||||
async def generate(
|
||||
self,
|
||||
prompt: str,
|
||||
negative_prompt: str = "",
|
||||
*,
|
||||
model: str | None = None,
|
||||
image_size: Literal["1K", "2K", "4K"] = DEFAULT_RESOLUTION,
|
||||
aspect_ratio: str = DEFAULT_ASPECT_RATIO,
|
||||
output_path: Path | None = None,
|
||||
**kwargs: Any,
|
||||
) -> dict:
|
||||
"""Generate an image from text prompt.
|
||||
|
||||
Args:
|
||||
prompt (str):
|
||||
Text prompt for image generation.
|
||||
negative_prompt (str, optional):
|
||||
Negative prompt. Defaults to "".
|
||||
model (str | None, optional):
|
||||
Model name override. Defaults to None.
|
||||
image_size (str, optional):
|
||||
Image size preset ("1K", "2K", "4K"). Defaults to DEFAULT_RESOLUTION.
|
||||
aspect_ratio (str, optional):
|
||||
Aspect ratio (e.g. "16:9", "1:1"). Defaults to DEFAULT_ASPECT_RATIO.
|
||||
output_path (Path | None, optional):
|
||||
Output path for the generated image. Defaults to None.
|
||||
**kwargs:
|
||||
Additional arguments reserved for backend compatibility.
|
||||
|
||||
Returns:
|
||||
dict:
|
||||
Dictionary with keys: status, output (path), message.
|
||||
"""
|
||||
model = model or self.model or global_configs.SN_IMAGE_GEN_MODEL
|
||||
# Normalize image_size to uppercase for NanoBanana API
|
||||
image_size = image_size.upper() # type: ignore[assignment]
|
||||
output_format = "png"
|
||||
size = self._resolve_size(image_size, aspect_ratio)
|
||||
payload = self.build_payload(
|
||||
prompt=prompt,
|
||||
model=model,
|
||||
size=size,
|
||||
aspect_ratio=aspect_ratio,
|
||||
output_format=output_format,
|
||||
)
|
||||
headers = self.headers
|
||||
api_url = self.get_api_url(model)
|
||||
if output_path is None:
|
||||
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
||||
timestamp = time.strftime("%Y%m%d_%H%M%S")
|
||||
output_path = OUTPUT_DIR / f"t2i_{timestamp}.png"
|
||||
output_path = ensure_output_path(output_path)
|
||||
|
||||
client = await self._get_client()
|
||||
|
||||
try:
|
||||
create_response = await client.post(
|
||||
api_url,
|
||||
json=payload,
|
||||
headers=headers,
|
||||
)
|
||||
data = self.parse_response(create_response)
|
||||
except U1HttpErrorBase as exc:
|
||||
details = exc.detail or ""
|
||||
field_name = None
|
||||
if exc.code == 404:
|
||||
field_name = "SN_IMAGE_GEN_BASE_URL"
|
||||
elif exc.code == 401:
|
||||
field_name = "SN_IMAGE_GEN_API_KEY"
|
||||
# elif exc.code == 400:
|
||||
# warnings.warn(f"Bad request: {exc.message}; body: {payload}", stacklevel=2)
|
||||
if field_name is not None:
|
||||
field_hint = global_configs.get_annotated_field(field_name)
|
||||
if field_hint is not None:
|
||||
env_names = list(field_hint.env_names) if field_hint.env_names else []
|
||||
if env_names:
|
||||
if len(env_names) == 1:
|
||||
details += (
|
||||
f"\nIs the environment variable `{env_names[0]}` set correctly?"
|
||||
)
|
||||
else:
|
||||
env_names_str = ", ".join([f"`{n}`" for n in env_names])
|
||||
details += f"\nIs any of the following environment variable(s) set correctly: {env_names_str}?"
|
||||
return {
|
||||
"status": "failed",
|
||||
"error": f"HTTP {exc.code}: {exc.message}",
|
||||
"message": details,
|
||||
}
|
||||
try:
|
||||
images_urls: list[str] = data["images_urls"]
|
||||
if not images_urls:
|
||||
return {
|
||||
"status": "failed",
|
||||
"error": "No image generated from the model",
|
||||
}
|
||||
url = images_urls[-1]
|
||||
suffix = f".{output_format}"
|
||||
save_path = output_path.with_suffix(suffix)
|
||||
saved_path = await download_image(url, save_path)
|
||||
return {
|
||||
"status": "ok",
|
||||
"output": str(saved_path),
|
||||
"message": "Image generated successfully",
|
||||
}
|
||||
except httpx.HTTPStatusError as exc:
|
||||
return {
|
||||
"status": "failed",
|
||||
"error": f"HTTP {exc.response.status_code}",
|
||||
"message": f"http error: {exc.response.status_code} {exc.response.text}",
|
||||
}
|
||||
except (httpx.HTTPError, OSError, ValueError) as exc:
|
||||
return {
|
||||
"status": "failed",
|
||||
"error": type(exc).__name__,
|
||||
"message": f"request error: {exc}",
|
||||
}
|
||||
|
||||
@property
|
||||
@override
|
||||
def api_key(self) -> str:
|
||||
api_key = self._api_key or global_configs.SN_IMAGE_GEN_API_KEY
|
||||
if not api_key:
|
||||
raise ValueError(
|
||||
"API key is missing: {}".format(
|
||||
global_configs.get_env_var_help("SN_IMAGE_GEN_API_KEY")
|
||||
)
|
||||
)
|
||||
return api_key
|
||||
|
||||
@property
|
||||
@override
|
||||
def base_url(self) -> str:
|
||||
base_url = self._base_url or global_configs.SN_IMAGE_GEN_BASE_URL
|
||||
if not base_url:
|
||||
raise ValueError(
|
||||
"Base URL is missing: {}".format(
|
||||
global_configs.get_env_var_help("SN_IMAGE_GEN_BASE_URL")
|
||||
)
|
||||
)
|
||||
if not is_valid_base_url(base_url):
|
||||
raise ValueError(
|
||||
f"Base URL is not a valid base URL: {base_url}. "
|
||||
f"Try setting environment variable(s): {global_configs.get_env_var_help('SN_IMAGE_GEN_BASE_URL')}"
|
||||
)
|
||||
return base_url
|
||||
|
||||
@override
|
||||
def get_api_url(self, _model: str | None = None) -> str:
|
||||
base_url = self.base_url.rstrip("/")
|
||||
path = IMAGE_GEN_ENDPOINT.lstrip("/")
|
||||
api_url = f"{base_url}/{path}"
|
||||
return api_url
|
||||
|
||||
@override
|
||||
def build_payload(
|
||||
self,
|
||||
prompt: str,
|
||||
model: str,
|
||||
*,
|
||||
size: str | None = None,
|
||||
modalities: Sequence[str] = ("text", "image"),
|
||||
output_format: Literal["png"] = "png",
|
||||
response_format: Literal["url"] = "url",
|
||||
**kwargs: Any,
|
||||
) -> dict[str, Any]:
|
||||
"""Build the payload for the SenseNova image-generation endpoint.
|
||||
|
||||
Args:
|
||||
prompt (str): The prompt to generate an image for.
|
||||
model (str): The model to use for generation.
|
||||
size (str | None): Pixel size string (for example, "1920x1920").
|
||||
modalities (Sequence[str]): Reserved for compatibility; currently not sent.
|
||||
output_format (Literal["png"]): The output format of the image. Defaults to "png".
|
||||
response_format (Literal["url"]): The response format of the image. Defaults to "url".
|
||||
**kwargs (Any, optional): Additional parameters to pass to the API.
|
||||
|
||||
Example:
|
||||
{
|
||||
"model": "sensenova-u1-fast",
|
||||
"prompt": "A cat wearing a hat",
|
||||
"size": "1024x1024",
|
||||
"response_format": "url",
|
||||
"output_format": "png",
|
||||
}
|
||||
"""
|
||||
payload = {
|
||||
"model": model,
|
||||
"prompt": prompt,
|
||||
# "modalities": modalities,
|
||||
"size": size,
|
||||
# "n": 1,
|
||||
"response_format": response_format,
|
||||
"output_format": output_format,
|
||||
**kwargs,
|
||||
}
|
||||
return payload
|
||||
|
||||
@property
|
||||
@override
|
||||
def headers(self) -> dict[str, str]:
|
||||
if not self.api_key:
|
||||
raise MissingApiKeyError(
|
||||
"API key is missing: {}".format(
|
||||
global_configs.get_env_var_help("SN_IMAGE_GEN_API_KEY")
|
||||
)
|
||||
)
|
||||
return {
|
||||
"Authorization": f"Bearer {self.api_key}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def _resolve_size(
|
||||
cls,
|
||||
resolution: Literal["1K", "2K"] | str | None = None,
|
||||
aspect_ratio: ASPECT_RATIO_LITERALS | str | None = None,
|
||||
) -> str | None:
|
||||
"""Convert (resolution, aspect_ratio) to a pixel size string.
|
||||
|
||||
If aspect_ratio is None, returns the resolution as-is (e.g. "1K").
|
||||
"""
|
||||
if not resolution and not aspect_ratio:
|
||||
return None
|
||||
resolution = resolution or "2K"
|
||||
aspect_ratio = aspect_ratio or "1:1"
|
||||
if resolution == "1K":
|
||||
buckets = BUCKETS_1K
|
||||
elif resolution == "2K":
|
||||
buckets = BUCKETS_2K
|
||||
else:
|
||||
raise ValueError(f"Unsupported resolution: {resolution!r}. Must be '1K' or '2K'.")
|
||||
try:
|
||||
ws, _, hs = aspect_ratio.strip().partition(":")
|
||||
width = int(ws)
|
||||
height = int(hs)
|
||||
ratio = width / height
|
||||
except Exception as e:
|
||||
raise ValueError(f"Invalid aspect ratio: {aspect_ratio!r}") from e
|
||||
if ratio > 16 / 9:
|
||||
raise ValueError(f"Aspect ratio {aspect_ratio!r} is too wide. Maximum is 16:9")
|
||||
if ratio < 9 / 21:
|
||||
raise ValueError(f"Aspect ratio {aspect_ratio!r} is too high. Maximum is 9:21")
|
||||
w, h = _find_nearest_aspect_ratio(ratio, buckets)
|
||||
return f"{w}x{h}"
|
||||
|
||||
@override
|
||||
def parse_response(self, response: httpx.Response) -> dict:
|
||||
"""Parse the response from the SenseNova image-generation endpoint.
|
||||
|
||||
Example response data:
|
||||
|
||||
```json
|
||||
{
|
||||
"data": [{
|
||||
"url": "https://cdn.sensenova.dev/gen/..."
|
||||
}]
|
||||
}
|
||||
```
|
||||
|
||||
Args:
|
||||
response: The HTTP response from the SenseNova image-generation endpoint.
|
||||
|
||||
Returns:
|
||||
dict: Parsed data with key ``images_urls``.
|
||||
"""
|
||||
raw_data = super().parse_response(response)
|
||||
|
||||
images_urls: list[str] = []
|
||||
for item in raw_data.get("data", []):
|
||||
url = item.get("url")
|
||||
if url:
|
||||
images_urls.append(url)
|
||||
return {"images_urls": images_urls}
|
||||
|
||||
|
||||
async def download_image(
|
||||
url: str,
|
||||
save_path: Path,
|
||||
timeout: float = DEFAULT_HTTP_REQUEST_TIMEOUT,
|
||||
) -> Path:
|
||||
"""Download an image from a URL.
|
||||
|
||||
Args:
|
||||
url: The URL of the image to download.
|
||||
timeout: The timeout for the request.
|
||||
|
||||
Returns:
|
||||
Path: The path to the downloaded image file.
|
||||
"""
|
||||
save_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
temp_path: Path | None = None
|
||||
bytes_written = 0
|
||||
expected_length: int | None = None
|
||||
try:
|
||||
with tempfile.NamedTemporaryFile(
|
||||
dir=save_path.parent,
|
||||
prefix=f".{save_path.name}.",
|
||||
suffix=".tmp",
|
||||
delete=False,
|
||||
) as temp_file:
|
||||
temp_path = Path(temp_file.name)
|
||||
async with httpx.AsyncClient(timeout=timeout) as client:
|
||||
async with client.stream("GET", url) as response:
|
||||
response.raise_for_status()
|
||||
content_length = response.headers.get("content-length")
|
||||
if content_length is not None:
|
||||
expected_length = int(content_length)
|
||||
async for chunk in response.aiter_bytes():
|
||||
bytes_written += len(chunk)
|
||||
temp_file.write(chunk)
|
||||
temp_file.flush()
|
||||
os.fsync(temp_file.fileno())
|
||||
|
||||
if expected_length is not None and bytes_written != expected_length:
|
||||
raise OSError(
|
||||
f"Downloaded image is incomplete: got {bytes_written} bytes, "
|
||||
f"expected {expected_length} bytes"
|
||||
)
|
||||
|
||||
assert temp_path is not None
|
||||
_validate_image_file(temp_path)
|
||||
temp_path.replace(save_path)
|
||||
return save_path
|
||||
except Exception:
|
||||
if temp_path is not None:
|
||||
temp_path.unlink(missing_ok=True)
|
||||
raise
|
||||
|
||||
|
||||
def _validate_image_file(image_path: Path) -> None:
|
||||
"""Verify that the downloaded image can be decoded completely."""
|
||||
with Image.open(image_path) as image:
|
||||
image.verify()
|
||||
with Image.open(image_path) as image:
|
||||
image.load()
|
||||
|
||||
|
||||
def mime_type_to_suffix(mime_type: str) -> str:
|
||||
"""Convert MIME type to file suffix.
|
||||
|
||||
Args:
|
||||
mime_type: MIME type.
|
||||
|
||||
Returns:
|
||||
str: File suffix.
|
||||
"""
|
||||
if mime_type == "image/jpeg":
|
||||
return ".jpg"
|
||||
elif mime_type == "image/png":
|
||||
return ".png"
|
||||
elif mime_type == "image/webp":
|
||||
return ".webp"
|
||||
else:
|
||||
return ".png"
|
||||
|
||||
|
||||
ASPECT_RATIO_LITERALS = Literal[
|
||||
"2:3", "3:2", "3:4", "4:3", "4:5", "5:4", "1:1", "16:9", "9:16", "9:21"
|
||||
]
|
||||
BUCKETS_1K: dict[ASPECT_RATIO_LITERALS, tuple[int, int]] = {
|
||||
"2:3": (1088, 1632),
|
||||
"3:2": (1632, 1088),
|
||||
"3:4": (1152, 1536),
|
||||
"4:3": (1536, 1152),
|
||||
"4:5": (1184, 1472),
|
||||
"5:4": (1472, 1184),
|
||||
"1:1": (1344, 1344),
|
||||
"16:9": (1792, 992),
|
||||
"9:16": (992, 1792),
|
||||
"9:21": (864, 2048),
|
||||
}
|
||||
BUCKETS_2K: dict[ASPECT_RATIO_LITERALS, tuple[int, int]] = {
|
||||
"2:3": (1664, 2496),
|
||||
"3:2": (2496, 1664),
|
||||
"3:4": (1760, 2368),
|
||||
"4:3": (2368, 1760),
|
||||
"4:5": (1824, 2272),
|
||||
"5:4": (2272, 1824),
|
||||
"1:1": (2048, 2048),
|
||||
"16:9": (2752, 1536),
|
||||
"9:16": (1536, 2752),
|
||||
"9:21": (1344, 3136),
|
||||
}
|
||||
|
||||
|
||||
def _find_nearest_aspect_ratio(
|
||||
ratio: float,
|
||||
buckets: dict[ASPECT_RATIO_LITERALS, tuple[int, int]],
|
||||
) -> tuple[int, int]:
|
||||
wh_pairs = sorted(
|
||||
buckets.values(),
|
||||
key=lambda wh: abs(wh[0] / wh[1] - ratio),
|
||||
)
|
||||
return wh_pairs[0]
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import asyncio
|
||||
|
||||
async def main_async():
|
||||
client = SensenovaText2ImageClient(
|
||||
api_key=global_configs.SN_IMAGE_GEN_API_KEY,
|
||||
base_url=global_configs.SN_IMAGE_GEN_BASE_URL,
|
||||
)
|
||||
|
||||
result = await client.generate(
|
||||
prompt="A cat wearing a hat",
|
||||
image_size="1K",
|
||||
aspect_ratio="16:9",
|
||||
)
|
||||
print(result)
|
||||
|
||||
asyncio.run(main_async())
|
||||
5
sn-image-base/scripts/sn_image_base/llm/__init__.py
Normal file
5
sn-image-base/scripts/sn_image_base/llm/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
# llm module - Language Model (text only)
|
||||
from .anthropic_adapter import AnthropicMessagesAdapter
|
||||
from .chat_completions_adapter import OpenAIChatAdapter
|
||||
|
||||
__all__ = ["AnthropicMessagesAdapter", "OpenAIChatAdapter"]
|
||||
161
sn-image-base/scripts/sn_image_base/llm/anthropic_adapter.py
Normal file
161
sn-image-base/scripts/sn_image_base/llm/anthropic_adapter.py
Normal file
@@ -0,0 +1,161 @@
|
||||
"""Anthropic Messages API adapter for text and vision."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
|
||||
from sn_image_base.utils.error_utils import U1HttpResponseParseError
|
||||
from sn_image_base.utils.httpx_client import httpx_response_raise_for_status_code
|
||||
from sn_image_base.vlm.utils import image_to_base64
|
||||
from sn_image_base.vlm.vlm_adapter import VlmAdapter
|
||||
|
||||
from .llm_adapter import LlmAdapter
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
DEFAULT_REQUEST_TIMEOUT = 150.0
|
||||
DEFAULT_MAX_TOKENS = 4096
|
||||
|
||||
|
||||
class AnthropicMessagesAdapter(LlmAdapter, VlmAdapter):
|
||||
"""Anthropic Messages API adapter for text-only and vision calls."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
endpoint_url: str,
|
||||
api_key: str,
|
||||
model: str,
|
||||
*,
|
||||
max_tokens: int = DEFAULT_MAX_TOKENS,
|
||||
timeout: float = DEFAULT_REQUEST_TIMEOUT,
|
||||
async_client: httpx.AsyncClient | None = None,
|
||||
) -> None:
|
||||
self._url = endpoint_url
|
||||
self._api_key = api_key
|
||||
self._default_model = model
|
||||
self._max_tokens = max_tokens
|
||||
self._timeout = timeout
|
||||
self._external_client = async_client
|
||||
self._client: httpx.AsyncClient | None = async_client
|
||||
logger.info(
|
||||
"AnthropicMessagesAdapter: endpoint=%s model=%s max_tokens=%s",
|
||||
self._url,
|
||||
self._default_model,
|
||||
self._max_tokens,
|
||||
)
|
||||
|
||||
def _get_client(self) -> httpx.AsyncClient:
|
||||
if self._client is None:
|
||||
self._client = httpx.AsyncClient(timeout=self._timeout)
|
||||
return self._client
|
||||
|
||||
@property
|
||||
def _headers(self) -> dict[str, str]:
|
||||
return {
|
||||
"Authorization": f"Bearer {self._api_key}",
|
||||
"Content-Type": "application/json",
|
||||
"x-api-key": self._api_key,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _build_vision_content(
|
||||
user_prompt: str,
|
||||
images: list[str | bytes],
|
||||
) -> list[dict[str, Any]]:
|
||||
blocks: list[dict[str, Any]] = [{"type": "text", "text": user_prompt}]
|
||||
for image in images:
|
||||
mime, b64 = image_to_base64(image)
|
||||
blocks.append(
|
||||
{
|
||||
"type": "image",
|
||||
"source": {
|
||||
"type": "base64",
|
||||
"media_type": mime,
|
||||
"data": b64,
|
||||
},
|
||||
}
|
||||
)
|
||||
return blocks
|
||||
|
||||
def _build_payload(
|
||||
self,
|
||||
user_prompt: str,
|
||||
system_prompt: str,
|
||||
model: str | None,
|
||||
*,
|
||||
images: list[str | bytes] | None = None,
|
||||
) -> dict[str, Any]:
|
||||
messages: list[dict[str, Any]] = []
|
||||
if system_prompt:
|
||||
messages.append({"role": "user", "content": system_prompt})
|
||||
|
||||
user_content: str | list[dict[str, Any]]
|
||||
if images:
|
||||
user_content = self._build_vision_content(user_prompt, images)
|
||||
else:
|
||||
user_content = user_prompt
|
||||
messages.append({"role": "user", "content": user_content})
|
||||
|
||||
return {
|
||||
"model": model or self._default_model,
|
||||
"messages": messages,
|
||||
"max_tokens": self._max_tokens,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _parse_response(data: dict[str, Any]) -> str:
|
||||
content = data.get("content", [])
|
||||
if content:
|
||||
for block in content:
|
||||
if isinstance(block, dict) and block.get("type") == "text":
|
||||
return block.get("text", "")
|
||||
|
||||
thinking = data.get("thinking")
|
||||
if thinking:
|
||||
return f"[Think] {thinking}"
|
||||
|
||||
raise RuntimeError("Anthropic Messages response has no extractable content.")
|
||||
|
||||
async def _post_payload(self, payload: dict[str, Any]) -> str:
|
||||
resp = await self._get_client().post(self._url, json=payload, headers=self._headers)
|
||||
httpx_response_raise_for_status_code(resp)
|
||||
try:
|
||||
data = resp.json()
|
||||
except ValueError as exc:
|
||||
raise U1HttpResponseParseError(
|
||||
detail=f"Failed to parse HTTP response. {resp.request.url}. Response content: {resp.content}",
|
||||
code=resp.status_code,
|
||||
) from exc
|
||||
return self._parse_response(data)
|
||||
|
||||
async def text_completion(
|
||||
self,
|
||||
user_prompt: str,
|
||||
system_prompt: str = "",
|
||||
model: str | None = None,
|
||||
) -> str:
|
||||
payload = self._build_payload(user_prompt, system_prompt, model)
|
||||
return await self._post_payload(payload)
|
||||
|
||||
async def vision_completion(
|
||||
self,
|
||||
user_prompt: str,
|
||||
images: list[str | bytes],
|
||||
system_prompt: str = "",
|
||||
model: str | None = None,
|
||||
) -> str:
|
||||
payload = self._build_payload(
|
||||
user_prompt,
|
||||
system_prompt,
|
||||
model,
|
||||
images=images,
|
||||
)
|
||||
return await self._post_payload(payload)
|
||||
|
||||
async def aclose(self) -> None:
|
||||
if self._external_client is None and self._client is not None:
|
||||
await self._client.aclose()
|
||||
self._client = None
|
||||
@@ -0,0 +1,276 @@
|
||||
"""OpenAI-compatible chat/completions adapter for text and vision."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
|
||||
from sn_image_base.configs import is_valid_base_url
|
||||
from sn_image_base.exceptions import InvalidBaseUrlError, MissingApiKeyError
|
||||
from sn_image_base.utils.error_utils import (
|
||||
U1HttpBadResponseError,
|
||||
U1HttpNotFoundError,
|
||||
U1HttpResponseParseError,
|
||||
error_type_to_error_class,
|
||||
finish_reason_to_error_class,
|
||||
sanitize_base64_in_data,
|
||||
)
|
||||
from sn_image_base.utils.httpx_client import httpx_response_raise_for_status_code
|
||||
from sn_image_base.vlm.utils import image_to_data_url
|
||||
from sn_image_base.vlm.vlm_adapter import VlmAdapter
|
||||
|
||||
from .llm_adapter import LlmAdapter
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
DEFAULT_REQUEST_TIMEOUT = 600.0
|
||||
DEFAULT_MAX_COMPLETION_TOKENS = 8192
|
||||
|
||||
|
||||
class OpenAIChatAdapter(LlmAdapter, VlmAdapter):
|
||||
"""OpenAI-compatible ``/chat/completions`` adapter for text and vision."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
endpoint_url: str,
|
||||
api_key: str,
|
||||
model: str,
|
||||
*,
|
||||
timeout: float = DEFAULT_REQUEST_TIMEOUT,
|
||||
async_client: httpx.AsyncClient | None = None,
|
||||
reasoning_effort: str | None = None,
|
||||
) -> None:
|
||||
self._url = endpoint_url
|
||||
self._api_key = api_key
|
||||
self._default_model = model
|
||||
self._timeout = timeout
|
||||
self._reasoning_effort = reasoning_effort or None
|
||||
self._external_client = async_client
|
||||
self._client: httpx.AsyncClient | None = async_client
|
||||
logger.info(
|
||||
"OpenAIChatAdapter: endpoint=%s model=%s reasoning_effort=%s",
|
||||
self._url,
|
||||
self._default_model,
|
||||
self._reasoning_effort,
|
||||
)
|
||||
|
||||
def _get_client(self) -> httpx.AsyncClient:
|
||||
if self._client is None:
|
||||
self._client = httpx.AsyncClient(timeout=self._timeout)
|
||||
return self._client
|
||||
|
||||
@property
|
||||
def _headers(self) -> dict[str, str]:
|
||||
return {
|
||||
"Authorization": f"Bearer {self._api_key}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _build_vision_content(
|
||||
user_prompt: str,
|
||||
images: list[str | bytes],
|
||||
) -> list[dict[str, Any]]:
|
||||
content: list[dict[str, Any]] = [{"type": "text", "text": user_prompt}]
|
||||
content.extend(
|
||||
{"type": "image_url", "image_url": {"url": image_to_data_url(img)}} for img in images
|
||||
)
|
||||
return content
|
||||
|
||||
def _build_payload(
|
||||
self,
|
||||
user_prompt: str,
|
||||
system_prompt: str,
|
||||
model: str,
|
||||
*,
|
||||
images: list[str | bytes] | None = None,
|
||||
max_completion_tokens: int | None = DEFAULT_MAX_COMPLETION_TOKENS,
|
||||
) -> dict[str, Any]:
|
||||
messages: list[dict[str, Any]] = []
|
||||
if system_prompt:
|
||||
messages.append({"role": "system", "content": system_prompt})
|
||||
|
||||
user_content: str | list[dict[str, Any]]
|
||||
if images:
|
||||
user_content = self._build_vision_content(user_prompt, images)
|
||||
else:
|
||||
user_content = user_prompt
|
||||
messages.append({"role": "user", "content": user_content})
|
||||
|
||||
payload: dict[str, Any] = {
|
||||
"model": model,
|
||||
"messages": messages,
|
||||
}
|
||||
if self._reasoning_effort:
|
||||
payload["reasoning_effort"] = self._reasoning_effort
|
||||
if max_completion_tokens:
|
||||
payload["max_completion_tokens"] = max_completion_tokens
|
||||
return payload
|
||||
|
||||
@staticmethod
|
||||
def _parse_response(data: dict[str, Any]) -> str:
|
||||
if "error" in data and (error := data["error"]):
|
||||
error_message = error.get("message")
|
||||
error_type = error.get("type")
|
||||
error_code = error.get("code")
|
||||
error_class, explanation = error_type_to_error_class(error_type)
|
||||
raise error_class(
|
||||
explanation,
|
||||
detail=f"chat/completions response has error. Error: {error_message}",
|
||||
code=error_code,
|
||||
)
|
||||
|
||||
choices = data.get("choices") or []
|
||||
if not choices:
|
||||
sanitized_data = sanitize_base64_in_data(data)
|
||||
dumped = json.dumps(sanitized_data, ensure_ascii=False)
|
||||
raise U1HttpBadResponseError(
|
||||
detail=f"chat/completions response has no choices. Response: {dumped}",
|
||||
)
|
||||
|
||||
contents: list[str] = []
|
||||
finish_reason: str | None = None
|
||||
for choice in choices:
|
||||
msg = choice.get("message", {})
|
||||
finish_reason = choice.get("finish_reason") or finish_reason
|
||||
content_val = msg.get("content")
|
||||
if isinstance(content_val, str):
|
||||
contents.append(content_val)
|
||||
elif isinstance(content_val, list):
|
||||
parts: list[str] = []
|
||||
for block in content_val:
|
||||
if isinstance(block, dict) and block.get("type") == "text":
|
||||
text = block.get("text")
|
||||
if isinstance(text, str):
|
||||
parts.append(text)
|
||||
contents.append("".join(parts))
|
||||
|
||||
final_content = "".join(contents)
|
||||
if final_content:
|
||||
return final_content
|
||||
|
||||
sanitized_data = sanitize_base64_in_data(data)
|
||||
dumped = json.dumps(sanitized_data, ensure_ascii=False)
|
||||
detail_msg = ""
|
||||
if finish_reason:
|
||||
detail_msg += f"\n^ Finish reason: {finish_reason}"
|
||||
detail_msg += f"\n^ Response: {dumped}"
|
||||
if finish_reason == "stop":
|
||||
raise U1HttpBadResponseError(
|
||||
"chat/completions response with empty content.",
|
||||
detail=detail_msg,
|
||||
)
|
||||
if finish_reason:
|
||||
error_class, explanation = finish_reason_to_error_class(finish_reason)
|
||||
raise error_class(explanation, detail=detail_msg)
|
||||
raise U1HttpBadResponseError(
|
||||
"chat/completions response has no content. No finish reason provided.",
|
||||
detail=detail_msg,
|
||||
)
|
||||
|
||||
async def _post_payload(self, payload: dict[str, Any], model: str) -> str:
|
||||
resp = await self._get_client().post(self._url, json=payload, headers=self._headers)
|
||||
try:
|
||||
httpx_response_raise_for_status_code(resp)
|
||||
data = resp.json()
|
||||
except U1HttpNotFoundError as exc:
|
||||
raise U1HttpNotFoundError(
|
||||
detail=f"{exc.detail} model={model!r}",
|
||||
code=resp.status_code,
|
||||
) from exc
|
||||
except ValueError as exc:
|
||||
raise U1HttpResponseParseError(
|
||||
detail=f"Failed to parse HTTP response. {resp.request.url}. Response content: {resp.content}",
|
||||
code=resp.status_code,
|
||||
) from exc
|
||||
return self._parse_response(data)
|
||||
|
||||
async def text_completion(
|
||||
self,
|
||||
user_prompt: str,
|
||||
system_prompt: str = "",
|
||||
model: str | None = None,
|
||||
) -> str:
|
||||
resolved_model = model or self._default_model
|
||||
payload = self._build_payload(user_prompt, system_prompt, resolved_model)
|
||||
return await self._post_payload(payload, resolved_model)
|
||||
|
||||
async def vision_completion(
|
||||
self,
|
||||
user_prompt: str,
|
||||
images: list[str | bytes],
|
||||
system_prompt: str = "",
|
||||
model: str | None = None,
|
||||
) -> str:
|
||||
resolved_model = model or self._default_model
|
||||
payload = self._build_payload(
|
||||
user_prompt,
|
||||
system_prompt,
|
||||
resolved_model,
|
||||
images=images,
|
||||
)
|
||||
return await self._post_payload(payload, resolved_model)
|
||||
|
||||
async def aclose(self) -> None:
|
||||
if self._external_client is None and self._client is not None:
|
||||
await self._client.aclose()
|
||||
self._client = None
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import argparse
|
||||
import asyncio
|
||||
|
||||
from sn_image_base.configs import global_configs
|
||||
|
||||
parser = argparse.ArgumentParser(description="Async OpenAI-compatible chat adapter.")
|
||||
parser.add_argument("--prompt", default=None, help="Prompt to use for the model")
|
||||
parser.add_argument("--system-prompt", default=None, help="System prompt to use")
|
||||
parser.add_argument("--image", default=os.environ.get("IMAGE_PATH"), help="Optional image path")
|
||||
args = parser.parse_args()
|
||||
|
||||
async def main() -> None:
|
||||
prompt = args.prompt or "Write a poem about the topic: 'Hello world'"
|
||||
base_url = global_configs.SN_CHAT_BASE_URL
|
||||
if not base_url:
|
||||
raise InvalidBaseUrlError(
|
||||
f"No base URL provided for chat runtime. {global_configs.get_env_var_help('SN_CHAT_BASE_URL')}"
|
||||
)
|
||||
if not is_valid_base_url(base_url):
|
||||
raise InvalidBaseUrlError(
|
||||
f"Invalid base URL for chat runtime: {base_url}. {global_configs.get_env_var_help('SN_CHAT_BASE_URL')}"
|
||||
)
|
||||
endpoint_url = f"{base_url.rstrip('/')}/chat/completions"
|
||||
api_key = global_configs.SN_CHAT_API_KEY
|
||||
if not api_key:
|
||||
raise MissingApiKeyError(
|
||||
f"No API key provided for chat runtime. {global_configs.get_env_var_help('SN_CHAT_API_KEY')}"
|
||||
)
|
||||
model = global_configs.SN_TEXT_MODEL
|
||||
|
||||
adapter = OpenAIChatAdapter(
|
||||
endpoint_url=endpoint_url,
|
||||
api_key=api_key,
|
||||
model=model,
|
||||
)
|
||||
try:
|
||||
if args.image:
|
||||
result = await adapter.vision_completion(
|
||||
user_prompt=prompt,
|
||||
images=[args.image],
|
||||
system_prompt=args.system_prompt or "",
|
||||
)
|
||||
else:
|
||||
result = await adapter.text_completion(
|
||||
user_prompt=prompt,
|
||||
system_prompt=args.system_prompt or "",
|
||||
)
|
||||
print(result)
|
||||
finally:
|
||||
await adapter.aclose()
|
||||
|
||||
asyncio.run(main())
|
||||
51
sn-image-base/scripts/sn_image_base/llm/llm_adapter.py
Normal file
51
sn-image-base/scripts/sn_image_base/llm/llm_adapter.py
Normal file
@@ -0,0 +1,51 @@
|
||||
"""Abstract base class for LLM (Language Model) adapters."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
|
||||
class LlmAdapter(ABC):
|
||||
"""Uniform async interface for a single Language Model backend.
|
||||
|
||||
Each concrete adapter wraps one LLM endpoint + model combination and
|
||||
exposes a single :meth:`text_completion` coroutine. Synchronous
|
||||
calling is intentionally **not** supported; callers must run inside an
|
||||
asyncio event loop.
|
||||
|
||||
**Client ownership contract** — when a shared
|
||||
:class:`httpx.AsyncClient` is supplied at construction time the adapter
|
||||
*reuses* it and must **not** close it; the caller retains full ownership
|
||||
of the client's lifecycle. When no external client is provided the
|
||||
adapter creates and owns an internal client and must close it in
|
||||
:meth:`aclose`.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
async def text_completion(
|
||||
self,
|
||||
user_prompt: str,
|
||||
system_prompt: str = "",
|
||||
model: str | None = None,
|
||||
) -> str:
|
||||
"""Send a text-only prompt to the model and return the reply.
|
||||
|
||||
Args:
|
||||
user_prompt: User-facing text instruction.
|
||||
system_prompt: System-level instruction prepended to the
|
||||
conversation. Defaults to ''.
|
||||
model: Model name to use. If None, uses the default set at
|
||||
initialization.
|
||||
|
||||
Returns:
|
||||
str: Raw text response from the model.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
async def aclose(self) -> None:
|
||||
"""Release async resources owned by this adapter.
|
||||
|
||||
Must be called when the adapter is no longer needed. Adapters that
|
||||
were given an external shared client must implement this as a no-op;
|
||||
adapters that created their own internal client must close it here.
|
||||
"""
|
||||
231
sn-image-base/scripts/sn_image_base/utils/error_utils.py
Normal file
231
sn-image-base/scripts/sn_image_base/utils/error_utils.py
Normal file
@@ -0,0 +1,231 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import contextlib
|
||||
import json
|
||||
from collections.abc import Iterable, Mapping
|
||||
from typing import Any
|
||||
|
||||
|
||||
class U1BaseError(Exception):
|
||||
MESSAGE = "Base error"
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
message: str | None = None,
|
||||
detail: str | None = None,
|
||||
code: int | None = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
if message is None:
|
||||
message = self.MESSAGE
|
||||
super().__init__(message)
|
||||
self.message = message
|
||||
self.code = code
|
||||
self.detail = detail
|
||||
|
||||
def __str__(self) -> str:
|
||||
if self.code:
|
||||
msg = f"{self.__class__.__name__}[{self.code}]"
|
||||
else:
|
||||
msg = f"{self.__class__.__name__}"
|
||||
if self.message:
|
||||
msg += f"(message={self.message!r})"
|
||||
if self.detail:
|
||||
msg += f" <detail>{self.detail}</detail>"
|
||||
return msg
|
||||
|
||||
|
||||
# ----------------------
|
||||
# HTTP Errors
|
||||
# ----------------------
|
||||
|
||||
|
||||
class U1HttpErrorBase(U1BaseError):
|
||||
MESSAGE = "Base HTTP Error"
|
||||
|
||||
|
||||
class U1HttpAuthError(U1HttpErrorBase):
|
||||
MESSAGE = "Authentication or Authorization Failed"
|
||||
|
||||
|
||||
class U1HttpNotFoundError(U1HttpErrorBase):
|
||||
MESSAGE = "Resource Not Found"
|
||||
|
||||
|
||||
class U1HttpTooManyRequestsError(U1HttpErrorBase):
|
||||
MESSAGE = "Too Many Requests"
|
||||
|
||||
|
||||
class U1HttpServerError(U1HttpErrorBase):
|
||||
MESSAGE = "Server Error"
|
||||
|
||||
|
||||
class U1HttpBadRequestError(U1HttpErrorBase):
|
||||
MESSAGE = "Bad Request"
|
||||
|
||||
|
||||
class U1HttpPermissionError(U1HttpErrorBase):
|
||||
MESSAGE = "Permission Error"
|
||||
|
||||
|
||||
class U1HttpResponseParseError(U1HttpErrorBase):
|
||||
MESSAGE = "Failed to parse HTTP response"
|
||||
|
||||
|
||||
class U1HttpTimeoutError(U1HttpErrorBase):
|
||||
MESSAGE = "Timeout Error"
|
||||
|
||||
|
||||
class U1HttpNetworkError(U1HttpErrorBase):
|
||||
MESSAGE = "Network Error"
|
||||
|
||||
|
||||
class U1HttpUnknownError(U1HttpErrorBase):
|
||||
MESSAGE = "Unknown Error"
|
||||
|
||||
|
||||
class U1HttpForbiddenContentError(U1HttpErrorBase):
|
||||
MESSAGE = "Forbidden Content Filtered"
|
||||
|
||||
|
||||
class U1HttpTruncatedResponseError(U1HttpErrorBase):
|
||||
MESSAGE = "Truncated Response"
|
||||
|
||||
|
||||
class U1HttpBadResponseError(U1HttpErrorBase):
|
||||
MESSAGE = "Bad Response"
|
||||
|
||||
|
||||
def finish_reason_to_error_class(finish_reason: str) -> tuple[type[U1HttpErrorBase], str]:
|
||||
if finish_reason == "length":
|
||||
explanation = "Response was truncated due to length limit."
|
||||
return U1HttpTruncatedResponseError, explanation
|
||||
elif finish_reason == "content_filter":
|
||||
explanation = "Response was filtered due to content policy."
|
||||
return U1HttpForbiddenContentError, explanation
|
||||
elif finish_reason in ("tool_calls", "function_call"):
|
||||
explanation = "Response was halted due to tool calls or function calls."
|
||||
return U1HttpBadRequestError, explanation
|
||||
elif finish_reason == "stop":
|
||||
explanation = "Response was completed normally."
|
||||
return U1HttpBadResponseError, explanation
|
||||
return U1HttpBadRequestError, f"Unknown finish reason: {finish_reason!r}."
|
||||
|
||||
|
||||
def error_type_to_error_class(error_type: str) -> tuple[type[U1HttpErrorBase], str]:
|
||||
if error_type == "invalid_request_error":
|
||||
explanation = "Invalid request error."
|
||||
return U1HttpBadRequestError, explanation
|
||||
elif error_type == "rate_limit_error":
|
||||
explanation = "Rate limit exceeded."
|
||||
return U1HttpTooManyRequestsError, explanation
|
||||
elif error_type == "authentication_error":
|
||||
explanation = "Authentication error."
|
||||
return U1HttpAuthError, explanation
|
||||
elif error_type == "api_error":
|
||||
explanation = "API service internal error."
|
||||
return U1HttpServerError, explanation
|
||||
elif error_type == "permission_error":
|
||||
explanation = "You are not authorized to access this resource."
|
||||
return U1HttpPermissionError, explanation
|
||||
return U1HttpBadRequestError, f"Unknown error type: {error_type!r}."
|
||||
|
||||
|
||||
def sanitize_base64_in_data(data: Any, *, truncate_length: int = 200) -> Any:
|
||||
"""Recursively replace base64-encoded strings in data structure.
|
||||
|
||||
Args:
|
||||
data: Data to sanitize (dict, list, str, or other)
|
||||
truncate_length: Maximum length of base64-encoded string to truncate
|
||||
|
||||
Returns:
|
||||
Sanitized data with base64 strings replaced by placeholders
|
||||
|
||||
Example:
|
||||
>>> _sanitize_base64_in_data({"image": "iVBORw0KG..." * 100})
|
||||
{"image": "<base64-data: 1200 bytes>"}
|
||||
"""
|
||||
# Handle binary data first (bytes, bytearray, memoryview)
|
||||
if isinstance(data, (bytes, bytearray)):
|
||||
# Try: bytes -> str
|
||||
with contextlib.suppress(Exception):
|
||||
data = data.decode("utf-8")
|
||||
if isinstance(data, (bytes, bytearray, memoryview)):
|
||||
return f'<binary-data len="{len(data)}bytes"/>'
|
||||
if isinstance(data, str):
|
||||
# Try: str -> dict | list
|
||||
with contextlib.suppress(Exception):
|
||||
data = json.loads(data)
|
||||
|
||||
seen_ids: set[int] = set() # Prevent circular references
|
||||
|
||||
def __recursive_sanitize_base64_in_data(
|
||||
data: Mapping | Iterable | str | Any,
|
||||
) -> dict | list | str | Any:
|
||||
if isinstance(data, str):
|
||||
if _is_base64_string(data) and len(data) > truncate_length:
|
||||
# Truncate base64-encoded string, replace it with placeholder
|
||||
len_str = f"{len(data):,d}bytes"
|
||||
return f'<base64-data len="{len_str}">{data[:truncate_length]}...{TRUNCATED_MARKER}...{data[-truncate_length:]}</base64-data>'
|
||||
return data
|
||||
elif isinstance(data, Mapping):
|
||||
obj_id = id(data)
|
||||
if obj_id in seen_ids:
|
||||
return "<circular-reference:mapping/>"
|
||||
seen_ids.add(obj_id)
|
||||
result = {
|
||||
key: __recursive_sanitize_base64_in_data(value) for key, value in data.items()
|
||||
}
|
||||
seen_ids.remove(obj_id)
|
||||
return result
|
||||
elif isinstance(data, Iterable):
|
||||
obj_id = id(data)
|
||||
if obj_id in seen_ids:
|
||||
return "<circular-reference:iterable/>"
|
||||
seen_ids.add(obj_id)
|
||||
result = [__recursive_sanitize_base64_in_data(item) for item in data]
|
||||
seen_ids.remove(obj_id)
|
||||
return result
|
||||
return data
|
||||
|
||||
return __recursive_sanitize_base64_in_data(data)
|
||||
|
||||
|
||||
TRUNCATED_MARKER = "<<<///TRUNCATED///>>>"
|
||||
BASE64_DETECTION_MIN_LENGTH = 200 # Minimum length to consider as potential base64
|
||||
BASE64_CHARS = set("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=")
|
||||
|
||||
|
||||
def _is_base64_string(value: str) -> bool:
|
||||
"""Check if a string looks like base64-encoded data.
|
||||
|
||||
Args:
|
||||
value: String to check
|
||||
|
||||
Returns:
|
||||
True if the string appears to be base64-encoded data
|
||||
|
||||
Heuristics:
|
||||
- Length >= BASE64_DETECTION_MIN_LENGTH (200 chars)
|
||||
- At least 80% of characters are valid base64 chars (A-Za-z0-9+/=)
|
||||
- No whitespace or newlines (valid base64 is continuous)
|
||||
"""
|
||||
if not isinstance(value, str) or len(value) < BASE64_DETECTION_MIN_LENGTH:
|
||||
return False
|
||||
|
||||
# Check if mostly base64 characters (allow some tolerance)
|
||||
if value.startswith("data:"):
|
||||
# Remove the prefix like "data:image/jpeg;base64,"
|
||||
index = value.find(";base64,")
|
||||
if index != -1:
|
||||
value = value[index + len(";base64,") :]
|
||||
valid_count = sum(1 for c in value if c in BASE64_CHARS)
|
||||
ratio = valid_count / len(value)
|
||||
|
||||
if ratio >= 0.98:
|
||||
with contextlib.suppress(Exception):
|
||||
base64.b64decode(value)
|
||||
return True
|
||||
|
||||
return False
|
||||
180
sn-image-base/scripts/sn_image_base/utils/httpx_client.py
Normal file
180
sn-image-base/scripts/sn_image_base/utils/httpx_client.py
Normal file
@@ -0,0 +1,180 @@
|
||||
"""Shared httpx async client factory for vigeneval evaluators.
|
||||
|
||||
Centralizes connection pool limits, pool timeout, and optional file descriptor
|
||||
limit check to avoid PoolTimeout and 'Too many open files' under high concurrency.
|
||||
"""
|
||||
|
||||
import contextlib
|
||||
import json
|
||||
import resource
|
||||
from typing import Any
|
||||
|
||||
import httpx
|
||||
|
||||
from .error_utils import (
|
||||
U1HttpAuthError,
|
||||
U1HttpBadRequestError,
|
||||
U1HttpNotFoundError,
|
||||
U1HttpServerError,
|
||||
U1HttpTooManyRequestsError,
|
||||
)
|
||||
|
||||
|
||||
def check_file_descriptor_limit(max_connections: int, margin: int = 200) -> None:
|
||||
"""Raise if process file descriptor limit is too low for max_connections.
|
||||
|
||||
Avoids 'Too many open files' mid-run when using a large httpx connection pool.
|
||||
No-op on Windows or when resource module has no RLIMIT_NOFILE.
|
||||
|
||||
Args:
|
||||
max_connections: Intended httpx pool max_connections.
|
||||
margin: Extra FDs to reserve for app (logs, other files). Default 200.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If soft limit < max_connections + margin.
|
||||
"""
|
||||
try:
|
||||
soft, _hard = resource.getrlimit(resource.RLIMIT_NOFILE)
|
||||
except (ImportError, AttributeError, OSError):
|
||||
return
|
||||
required = max_connections + margin
|
||||
if soft < required:
|
||||
raise RuntimeError(
|
||||
f"File descriptor limit too low for max_connections={max_connections}. "
|
||||
f"Current soft limit: {soft}, need at least {required}. "
|
||||
"Raise the limit before running, e.g.: ulimit -n 2048 # or higher, then re-run."
|
||||
)
|
||||
|
||||
|
||||
def create_async_httpx_client(
|
||||
headers: dict[str, str],
|
||||
*,
|
||||
timeout: float = 600.0,
|
||||
max_connections: int = 500,
|
||||
pool_timeout: float = 60.0,
|
||||
check_fd_limit: bool = False,
|
||||
verify: bool = True,
|
||||
**client_kwargs: Any,
|
||||
) -> httpx.AsyncClient:
|
||||
"""Create an httpx.AsyncClient with shared defaults for vigeneval evaluators.
|
||||
|
||||
Automatically uses proxy from environment variables (HTTPS_PROXY, HTTP_PROXY, etc.)
|
||||
when trust_env=True (default). Supports proxy authentication via URL format:
|
||||
http://username:password@proxy_host:port
|
||||
|
||||
Connection pool limits and pool timeout help avoid PoolTimeout under high concurrency.
|
||||
Optionally checks process file descriptor limit before creating the client.
|
||||
|
||||
Args:
|
||||
headers: Request headers (e.g. Content-Type, Authorization).
|
||||
timeout: Request timeout in seconds. Default 600.
|
||||
max_connections: Connection pool size. Default 500; use 1000 for batch
|
||||
high parallelism (and check_fd_limit=True).
|
||||
pool_timeout: Max seconds to wait for a connection from the pool. Default 60.
|
||||
check_fd_limit: If True, call check_file_descriptor_limit(max_connections)
|
||||
and raise before creating the client. Use for batch evaluators.
|
||||
verify: If False, disable SSL certificate verification (avoids
|
||||
CERTIFICATE_VERIFY_FAILED). Use only for dev/testing or trusted networks.
|
||||
**client_kwargs: Passed through to httpx.AsyncClient (e.g. base_url).
|
||||
|
||||
Returns:
|
||||
A new httpx.AsyncClient. Caller must aclose() when done.
|
||||
|
||||
Example:
|
||||
# Set proxy with authentication in environment
|
||||
export HTTP_PROXY="http://user:pass@proxy.example.com:3128"
|
||||
export HTTPS_PROXY="http://user:pass@proxy.example.com:3128"
|
||||
|
||||
# Create client - proxy is automatically used
|
||||
client = create_async_httpx_client(
|
||||
headers={"Authorization": "Bearer token"},
|
||||
max_connections=100,
|
||||
)
|
||||
"""
|
||||
if check_fd_limit:
|
||||
check_file_descriptor_limit(max_connections)
|
||||
|
||||
# Note: Proxy configuration is handled automatically by httpx when trust_env=True.
|
||||
# We don't need to explicitly read or pass proxy URLs - httpx will read from
|
||||
# environment variables (HTTPS_PROXY, HTTP_PROXY, etc.) and handle authentication.
|
||||
|
||||
limits = httpx.Limits(
|
||||
max_connections=max_connections,
|
||||
max_keepalive_connections=min(400, max_connections),
|
||||
keepalive_expiry=30.0,
|
||||
)
|
||||
|
||||
# Create transport without explicit proxy parameter when trust_env=True
|
||||
# This allows httpx to properly handle proxy authentication from environment
|
||||
transport = httpx.AsyncHTTPTransport(
|
||||
verify=verify,
|
||||
trust_env=True,
|
||||
local_address="0.0.0.0",
|
||||
limits=limits,
|
||||
)
|
||||
|
||||
# Create client with trust_env=True to enable proxy from environment
|
||||
return httpx.AsyncClient(
|
||||
transport=transport,
|
||||
headers=headers,
|
||||
timeout=httpx.Timeout(timeout, pool=pool_timeout),
|
||||
verify=verify,
|
||||
trust_env=True, # Enable reading proxy from environment variables
|
||||
**client_kwargs,
|
||||
)
|
||||
|
||||
|
||||
def httpx_response_raise_for_status_code(response: httpx.Response) -> None:
|
||||
"""Check httpx response status code and raise appropriate exceptions.
|
||||
|
||||
Args:
|
||||
response: The httpx response object.
|
||||
verbose: Whether to log verbose information.
|
||||
|
||||
Raises:
|
||||
AuthError: If response status is 401 or 403.
|
||||
APIError: If response status is 429 or 5xx.
|
||||
InvalidRequestError: If response status is 4xx (except 401, 403, 429).
|
||||
"""
|
||||
# Try best effort to parse response content & headers
|
||||
response_headers = "[N/A]" # Not available
|
||||
response_content = "[N/A]" # Not available
|
||||
request_url = "[N/A]"
|
||||
request_method = "[N/A]"
|
||||
with contextlib.suppress(Exception):
|
||||
response_headers = response.headers
|
||||
response_headers = dict(response_headers)
|
||||
with contextlib.suppress(Exception):
|
||||
response_content = response.content
|
||||
response_content = response_content.decode("utf-8")
|
||||
response_content = json.loads(response_content)
|
||||
with contextlib.suppress(Exception):
|
||||
request_method = response.request.method
|
||||
request_method = request_method.upper()
|
||||
request_url = str(response.request.url)
|
||||
|
||||
if response.status_code == 404:
|
||||
raise U1HttpNotFoundError(
|
||||
detail=f"{request_method} {request_url!r} not found. Please check the URL and the model name.",
|
||||
code=response.status_code,
|
||||
)
|
||||
if response.status_code in (401, 403):
|
||||
raise U1HttpAuthError(
|
||||
detail=f"Authentication or authorization failed. {request_method} {request_url!r}. Response content: {response_content}",
|
||||
code=response.status_code,
|
||||
)
|
||||
elif response.status_code in (429, 503):
|
||||
raise U1HttpTooManyRequestsError(
|
||||
detail=f"Service temporarily unavailable. Please try again later. {request_method} {request_url!r}. Response content: {response_content}",
|
||||
code=response.status_code,
|
||||
)
|
||||
elif 500 <= response.status_code <= 599:
|
||||
raise U1HttpServerError(
|
||||
detail=f"Request failed. {request_method} {request_url!r}. Response content: {response_content}",
|
||||
code=response.status_code,
|
||||
)
|
||||
elif 400 <= response.status_code <= 499:
|
||||
raise U1HttpBadRequestError(
|
||||
detail=f"Bad request. {request_method} {request_url!r}. Response content: {response_content}",
|
||||
code=response.status_code,
|
||||
)
|
||||
5
sn-image-base/scripts/sn_image_base/vlm/__init__.py
Normal file
5
sn-image-base/scripts/sn_image_base/vlm/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
# vlm module - Vision Language Model
|
||||
|
||||
from .vlm_adapter import VlmAdapter
|
||||
|
||||
__all__ = ["VlmAdapter"]
|
||||
120
sn-image-base/scripts/sn_image_base/vlm/utils.py
Normal file
120
sn-image-base/scripts/sn_image_base/vlm/utils.py
Normal file
@@ -0,0 +1,120 @@
|
||||
"""Image encoding / decoding utilities for VLM."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import io
|
||||
from pathlib import Path
|
||||
|
||||
from PIL import Image
|
||||
|
||||
|
||||
def read_image_bytes(image: str | bytes) -> bytes:
|
||||
"""Read raw image bytes from a path or return bytes unchanged.
|
||||
|
||||
Args:
|
||||
image: File path to an image, or raw image bytes.
|
||||
|
||||
Returns:
|
||||
bytes: Raw image bytes.
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If image is a path and the file does not exist.
|
||||
"""
|
||||
if isinstance(image, bytes):
|
||||
return image
|
||||
path = Path(image)
|
||||
if not path.is_file():
|
||||
raise FileNotFoundError(f"Image file not found: {image}")
|
||||
return path.read_bytes()
|
||||
|
||||
|
||||
def detect_mime(data: bytes) -> str:
|
||||
"""Infer MIME type from image magic bytes.
|
||||
|
||||
Args:
|
||||
data: Raw image bytes (at least 8 bytes for PNG check).
|
||||
|
||||
Returns:
|
||||
str: 'image/png', 'image/jpeg', or 'image/png' as fallback.
|
||||
"""
|
||||
if data[:8] == b"\x89PNG\r\n\x1a\n":
|
||||
return "image/png"
|
||||
if data[:3] == b"\xff\xd8\xff":
|
||||
return "image/jpeg"
|
||||
return "image/png"
|
||||
|
||||
|
||||
def detect_suffix(data: bytes) -> str:
|
||||
"""Infer file suffix from image magic bytes.
|
||||
|
||||
Args:
|
||||
data: Raw image bytes.
|
||||
|
||||
Returns:
|
||||
str: '.png', '.jpg', or '.bin' as fallback.
|
||||
"""
|
||||
if data[:8] == b"\x89PNG\r\n\x1a\n":
|
||||
return ".png"
|
||||
if data[:3] == b"\xff\xd8\xff":
|
||||
return ".jpg"
|
||||
return ".bin"
|
||||
|
||||
|
||||
def image_to_mime_and_bytes(image: str | bytes) -> tuple[str, bytes]:
|
||||
"""Get MIME type and raw bytes; convert to PNG if format is not PNG/JPEG.
|
||||
|
||||
Args:
|
||||
image: File path or raw image bytes.
|
||||
|
||||
Returns:
|
||||
tuple[str, bytes]: (mime_type, raw_bytes). Unknown formats become PNG.
|
||||
"""
|
||||
raw = read_image_bytes(image)
|
||||
mime = detect_mime(raw)
|
||||
if mime in ("image/png", "image/jpeg"):
|
||||
return mime, raw
|
||||
img = Image.open(io.BytesIO(raw)).convert("RGBA")
|
||||
buf = io.BytesIO()
|
||||
img.save(buf, format="PNG")
|
||||
return "image/png", buf.getvalue()
|
||||
|
||||
|
||||
def image_to_base64(image: str | bytes) -> tuple[str, str]:
|
||||
"""Encode image to MIME type and base64 string.
|
||||
|
||||
Args:
|
||||
image: File path or raw image bytes.
|
||||
|
||||
Returns:
|
||||
tuple[str, str]: (mime_type, base64_encoded_string).
|
||||
"""
|
||||
mime, raw = image_to_mime_and_bytes(image)
|
||||
return mime, base64.b64encode(raw).decode("utf-8")
|
||||
|
||||
|
||||
def image_to_data_url(image: str | bytes) -> str:
|
||||
"""Build a data URL (data:mime;base64,...) for the image.
|
||||
|
||||
Args:
|
||||
image: File path or raw image bytes.
|
||||
|
||||
Returns:
|
||||
str: Data URL string.
|
||||
"""
|
||||
mime, b64 = image_to_base64(image)
|
||||
return f"data:{mime};base64,{b64}"
|
||||
|
||||
|
||||
def mask_secret(secret: str) -> str:
|
||||
"""Mask a secret for logging (e.g. show first 6 and last 4 chars).
|
||||
|
||||
Args:
|
||||
secret: Raw secret string.
|
||||
|
||||
Returns:
|
||||
str: Masked string (e.g. 'abcdef...ghij' or all '*' if length <= 8).
|
||||
"""
|
||||
if len(secret) <= 8:
|
||||
return "*" * len(secret)
|
||||
return f"{secret[:6]}...{secret[-4:]}"
|
||||
55
sn-image-base/scripts/sn_image_base/vlm/vlm_adapter.py
Normal file
55
sn-image-base/scripts/sn_image_base/vlm/vlm_adapter.py
Normal file
@@ -0,0 +1,55 @@
|
||||
"""Abstract base class for VLM (Vision Language Model) adapters."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
|
||||
|
||||
class VlmAdapter(ABC):
|
||||
"""Uniform async interface for a single Vision Language Model backend.
|
||||
|
||||
Each concrete adapter wraps one LLM endpoint + model combination and
|
||||
exposes a single :meth:`vision_completion` coroutine. Synchronous
|
||||
calling is intentionally **not** supported; callers must run inside an
|
||||
asyncio event loop.
|
||||
|
||||
**Client ownership contract** — when a shared
|
||||
:class:`httpx.AsyncClient` is supplied at construction time the adapter
|
||||
*reuses* it and must **not** close it; the caller retains full ownership
|
||||
of the client's lifecycle. When no external client is provided the
|
||||
adapter creates and owns an internal client and must close it in
|
||||
:meth:`aclose`.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
async def vision_completion(
|
||||
self,
|
||||
user_prompt: str,
|
||||
images: list[str | bytes],
|
||||
system_prompt: str = "",
|
||||
model: str | None = None,
|
||||
) -> str:
|
||||
"""Send image(s) and a text prompt to the model; return the reply.
|
||||
|
||||
Args:
|
||||
user_prompt: User-facing text instruction.
|
||||
images: One or more images to pass to the model. Each element
|
||||
is either a file-path string or raw image bytes.
|
||||
system_prompt: System-level instruction prepended to the
|
||||
conversation. Defaults to ''.
|
||||
model: Model name to use. If None, uses the default set at
|
||||
initialization.
|
||||
|
||||
Returns:
|
||||
str: Raw text response from the model (may contain JSON or
|
||||
markdown-wrapped JSON depending on the model and prompt).
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
async def aclose(self) -> None:
|
||||
"""Release async resources owned by this adapter.
|
||||
|
||||
Must be called when the adapter is no longer needed. Adapters that
|
||||
were given an external shared client must implement this as a no-op;
|
||||
adapters that created their own internal client must close it here.
|
||||
"""
|
||||
Reference in New Issue
Block a user