From 29c89b7b6deefb4b34c9c700a0b6dc322bcfac8e Mon Sep 17 00:00:00 2001 From: D1m7asis Date: Fri, 20 Jun 2025 00:21:31 +0200 Subject: [PATCH 1/2] Add AI/ML API integration --- docs/en/Quickstart.md | 2 + docs/zh-CN/Quickstart.md | 2 + vlmeval/api/__init__.py | 3 +- vlmeval/api/aimlapi.py | 102 +++++++++++++++++ vlmeval/config.py | 236 ++++++++++++++++++++++++++++++++++++++- 5 files changed, 343 insertions(+), 2 deletions(-) create mode 100644 vlmeval/api/aimlapi.py diff --git a/docs/en/Quickstart.md b/docs/en/Quickstart.md index 807d59b0a..a580d03d8 100644 --- a/docs/en/Quickstart.md +++ b/docs/en/Quickstart.md @@ -31,6 +31,8 @@ To infer with API models (GPT-4v, Gemini-Pro-V, etc.) or use LLM APIs as the **j OPENAI_API_BASE= # StepAI API STEPAI_API_KEY= + # AI/ML API + AIML_API_KEY= # REKA API REKA_API_KEY= # GLMV API diff --git a/docs/zh-CN/Quickstart.md b/docs/zh-CN/Quickstart.md index afc5cea19..a5436f9f7 100644 --- a/docs/zh-CN/Quickstart.md +++ b/docs/zh-CN/Quickstart.md @@ -30,6 +30,8 @@ pip install -e . OPENAI_API_BASE= # StepAI API STEPAI_API_KEY= + # AI/ML API + AIML_API_KEY= # REKA API REKA_API_KEY= # GLMV API diff --git a/vlmeval/api/__init__.py b/vlmeval/api/__init__.py index 10b2902e3..ac7d1327f 100644 --- a/vlmeval/api/__init__.py +++ b/vlmeval/api/__init__.py @@ -18,6 +18,7 @@ from .taichu import TaichuVLAPI, TaichuVLRAPI from .doubao_vl_api import DoubaoVL from .mug_u import MUGUAPI +from .aimlapi import AIMLAPI __all__ = [ 'OpenAIWrapper', 'HFChatModel', 'GeminiWrapper', 'GPT4V', 'Gemini', @@ -25,5 +26,5 @@ 'Reka', 'GLMVisionAPI', 'CWWrapper', 'SenseChatVisionAPI', 'HunyuanVision', 'Qwen2VLAPI', 'BlueLMWrapper', 'BlueLM_V_API', 'JTVLChatAPI', 'bailingMMAPI', 'TaiyiAPI', 'TeleMMAPI', 'SiliconFlowAPI', 'LMDeployAPI', - 'TaichuVLAPI', 'TaichuVLRAPI', 'DoubaoVL', "MUGUAPI" + 'TaichuVLAPI', 'TaichuVLRAPI', 'DoubaoVL', "MUGUAPI", "AIMLAPI", ] diff --git a/vlmeval/api/aimlapi.py b/vlmeval/api/aimlapi.py new file mode 100644 index 000000000..41258c01c --- /dev/null +++ b/vlmeval/api/aimlapi.py @@ -0,0 +1,102 @@ +from vlmeval.smp import * +from vlmeval.api.base import BaseAPI + +url = 'https://api.aimlapi.com/v1/chat/completions' +headers = { + 'Content-Type': 'application/json', + 'Authorization': 'Bearer {}', + "HTTP-Referer": "https://github.com/open-compass/VLMEvalKit", + "X-Title": "VLMEvalKit", +} + + +class AIMLAPI_INT(BaseAPI): + + is_api: bool = True + + def __init__(self, + model: str = 'gpt-4-turbo', + retry: int = 10, + wait: int = 3, + key: str = None, + temperature: float = 0, + max_tokens: int = 300, + verbose: bool = True, + system_prompt: str = None, + **kwargs): + self.model = model + self.fail_msg = 'Fail to obtain answer via API.' + self.headers = dict(headers) + self.temperature = temperature + self.max_tokens = max_tokens + self.system_prompt = system_prompt + self.key = key or os.environ.get('AIML_API_KEY', '') + self.headers['Authorization'] = self.headers['Authorization'].format(self.key) + + super().__init__(retry=retry, wait=wait, verbose=verbose, system_prompt=system_prompt, **kwargs) + + @staticmethod + def build_msgs(msgs_raw): + messages = [] + content = [] + + for msg in msgs_raw: + if msg['type'] == 'text': + content.append({ + "type": "text", + "text": msg['value'] + }) + elif msg['type'] == 'image': + image_b64 = encode_image_file_to_base64(msg['value']) + content.append({ + "type": "image_url", + "image_url": { + "url": f"data:image/webp;base64,{image_b64}" + } + }) + + messages.append({ + "role": "user", + "content": content + }) + return messages + + def generate_inner(self, inputs, **kwargs) -> str: + payload = dict( + model=self.model, + max_tokens=self.max_tokens, + temperature=self.temperature, + messages=self.build_msgs(inputs), + **kwargs + ) + response = requests.post(url, headers=self.headers, data=json.dumps(payload)) + ret_code = response.status_code + ret_code = 0 if (200 <= int(ret_code) < 300) else ret_code + + answer = self.fail_msg + try: + resp_struct = json.loads(response.text) + answer = resp_struct['choices'][0]['message']['content'].strip() + except Exception as err: + if self.verbose: + self.logger.error(f'{type(err)}: {err}') + self.logger.error(response.text if hasattr(response, 'text') else response) + + return ret_code, answer, response + + +class AIMLAPI(AIMLAPI_INT): + + def generate(self, message, dataset=None): + return super(AIMLAPI_INT, self).generate(message) + + +if __name__ == '__main__': + # export AIML_API_KEY='' + model = AIMLAPI_INT(verbose=True) + inputs = [ + {'type': 'image', 'value': '../../assets/apple.jpg'}, + {'type': 'text', 'value': 'Please describe this image in detail.'}, + ] + code, answer, resp = model.generate_inner(inputs) + print(code, answer, resp) diff --git a/vlmeval/config.py b/vlmeval/config.py index e9221326e..1eea71b9b 100644 --- a/vlmeval/config.py +++ b/vlmeval/config.py @@ -8,6 +8,7 @@ TransCore_ROOT = None Yi_ROOT = None OmniLMM_ROOT = None +AIMLAPI_ROOT = None Mini_Gemini_ROOT = None VXVERSE_ROOT = None VideoChat2_ROOT = None @@ -95,6 +96,239 @@ ), } +ai_ml_api_models = { + # Default + "aimlapi": partial( + AIMLAPI, + model="gpt-4-turbo", + root=AIMLAPI_ROOT, + temperature=0, + ), + + # OpenAI + "aimlapi_gpt_4o": partial( + AIMLAPI, + model="openai/gpt-4o", + root=AIMLAPI_ROOT, + temperature=0, + ), + "aimlapi_gpt_4o_mini": partial( + AIMLAPI, + model="gpt-4o-mini", + root=AIMLAPI_ROOT, + temperature=0, + ), + "aimlapi_gpt_4": partial( + AIMLAPI, + model="gpt-4", + root=AIMLAPI_ROOT, + temperature=0, + ), + "aimlapi_gpt_3_5_turbo": partial( + AIMLAPI, + model="gpt-3.5-turbo", + root=AIMLAPI_ROOT, + temperature=0, + ), + "aimlapi_gpt_3_5_turbo_instruct": partial( + AIMLAPI, + model="gpt-3.5-turbo-instruct", + root=AIMLAPI_ROOT, + temperature=0, + ), + "aimlapi_o1": partial( + AIMLAPI, + model="o1", + root=AIMLAPI_ROOT, + temperature=0, + ), + "aimlapi_o1_mini": partial( + AIMLAPI, + model="o1-mini", + root=AIMLAPI_ROOT, + temperature=0, + ), + "aimlapi_o3_mini": partial( + AIMLAPI, + model="o3-mini", + root=AIMLAPI_ROOT, + temperature=0, + ), + "aimlapi_text_embedding_3_large": partial( + AIMLAPI, + model="text-embedding-3-large", + root=AIMLAPI_ROOT, + temperature=0, + ), + + # Meta + "aimlapi_llama_3_3_70b": partial( + AIMLAPI, + model="meta-llama/Llama-3.3-70B-Instruct-Turbo", + root=AIMLAPI_ROOT, + temperature=0, + ), + "aimlapi_llama_3_2_3b": partial( + AIMLAPI, + model="meta-llama/Llama-3.2-3B-Instruct-Turbo", + root=AIMLAPI_ROOT, + temperature=0, + ), + "aimlapi_llama_3_8b_lite": partial( + AIMLAPI, + model="meta-llama/Meta-Llama-3-8B-Instruct-Lite", + root=AIMLAPI_ROOT, + temperature=0, + ), + "aimlapi_llama_3_8b_chat_hf": partial( + AIMLAPI, + model="meta-llama/Llama-3-8b-chat-hf", + root=AIMLAPI_ROOT, + temperature=0, + ), + "aimlapi_llama_3_1_8b": partial( + AIMLAPI, + model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo", + root=AIMLAPI_ROOT, + temperature=0, + ), + "aimlapi_llama_guard_3_8b": partial( + AIMLAPI, + model="meta-llama/Meta-Llama-Guard-3-8B", + root=AIMLAPI_ROOT, + temperature=0, + ), + + # Google + "aimlapi_gemma_2_27b": partial( + AIMLAPI, + model="google/gemma-2-27b-it", + root=AIMLAPI_ROOT, + temperature=0, + ), + "aimlapi_gemini_1_5_flash": partial( + AIMLAPI, + model="google/gemini-1.5-flash", + root=AIMLAPI_ROOT, + temperature=0, + ), + "aimlapi_gemini_1_5_pro": partial( + AIMLAPI, + model="google/gemini-1.5-pro", + root=AIMLAPI_ROOT, + temperature=0, + ), + "aimlapi_textembedding_gecko": partial( + AIMLAPI, + model="textembedding-gecko-multilingual@001", + root=AIMLAPI_ROOT, + temperature=0, + ), + "aimlapi_text_multilingual_emb_002": partial( + AIMLAPI, + model="text-multilingual-embedding-002", + root=AIMLAPI_ROOT, + temperature=0, + ), + + # Mistral + "aimlapi_mixtral_8x22b": partial( + AIMLAPI, + model="mistralai/Mixtral-8x22B-Instruct-v0.1", + root=AIMLAPI_ROOT, + temperature=0, + ), + "aimlapi_mistral_7b": partial( + AIMLAPI, + model="mistralai/Mistral-7B-Instruct-v0.3", + root=AIMLAPI_ROOT, + temperature=0, + ), + "aimlapi_mistral_tiny": partial( + AIMLAPI, + model="mistralai/mistral-tiny", + root=AIMLAPI_ROOT, + temperature=0, + ), + + # Qwen + "aimlapi_qwen_2_5_7b": partial( + AIMLAPI, + model="Qwen/Qwen2.5-7B-Instruct-Turbo", + root=AIMLAPI_ROOT, + temperature=0, + ), + + # Anthropic + "aimlapi_claude_3_haiku": partial( + AIMLAPI, + model="claude-3-haiku-20240307", + root=AIMLAPI_ROOT, + temperature=0, + ), + "aimlapi_claude_3_5_sonnet_0620": partial( + AIMLAPI, + model="claude-3-5-sonnet-20240620", + root=AIMLAPI_ROOT, + temperature=0, + ), + "aimlapi_claude_3_5_sonnet_1022": partial( + AIMLAPI, + model="claude-3-5-sonnet-20241022", + root=AIMLAPI_ROOT, + temperature=0, + ), + + # DeepSeek + "aimlapi_deepseek_v3": partial( + AIMLAPI, + model="deepseek-chat", + root=AIMLAPI_ROOT, + temperature=0, + ), + "aimlapi_deepseek_r1": partial( + AIMLAPI, + model="deepseek-reasoner", + root=AIMLAPI_ROOT, + temperature=0, + ), + "aimlapi_deepseek_prover_v2": partial( + AIMLAPI, + model="deepseek/deepseek-prover-v2", + root=AIMLAPI_ROOT, + temperature=0, + ), + + # X AI + "aimlapi_grok_beta": partial( + AIMLAPI, + model="x-ai/grok-beta", + root=AIMLAPI_ROOT, + temperature=0, + ), + "aimlapi_grok_3_beta": partial( + AIMLAPI, + model="x-ai/grok-3-beta", + root=AIMLAPI_ROOT, + temperature=0, + ), + "aimlapi_grok_3_mini_beta": partial( + AIMLAPI, + model="x-ai/grok-3-mini-beta", + root=AIMLAPI_ROOT, + temperature=0, + ), + # You can use any other model from https://aimlapi.com/models + # Just copy the pattern below and insert the desired model name in place of `"***"`: + # + # "aimlapi_***": partial( + # AIMLAPI, + # model="***", # Replace with model name from the model catalog + # root=AIMLAPI_ROOT, + # temperature=0, + # ), +} + api_models = { # GPT "GPT4V": partial( @@ -1433,7 +1667,7 @@ kosmos_series, points_series, nvlm_series, vintern_series, h2ovl_series, aria_series, smolvlm_series, sail_series, valley_series, vita_series, ross_series, emu_series, ola_series, ursa_series, gemma_series, - long_vita_series, ristretto_series, kimi_series, aguvis_series, hawkvl_series, flash_vl + long_vita_series, ristretto_series, kimi_series, aguvis_series, hawkvl_series, flash_vl, ai_ml_api_models ] for grp in model_groups: From 6559b4944a6e2c7348bfa15ce2098fd909feba5c Mon Sep 17 00:00:00 2001 From: D1m7asis Date: Fri, 20 Jun 2025 00:30:13 +0200 Subject: [PATCH 2/2] docs: AI/ML API --- docs/en/Quickstart.md | 32 ++++++++++++++++++++++++++++++++ docs/zh-CN/Quickstart.md | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/docs/en/Quickstart.md b/docs/en/Quickstart.md index a580d03d8..382ee1839 100644 --- a/docs/en/Quickstart.md +++ b/docs/en/Quickstart.md @@ -52,6 +52,38 @@ To infer with API models (GPT-4v, Gemini-Pro-V, etc.) or use LLM APIs as the **j ``` - Fill the blanks with your API keys (if necessary). Those API keys will be automatically loaded when doing the inference and evaluation. + +## 🌐 Supported Model Provider: AI/ML API Integration + +VLMEvalKit now includes first-class support for [AI/ML API](https://aimlapi.com/app/?utm_source=VLMEvalKit&utm_medium=github&utm_campaign=integration) — a unified provider of 300+ popular models, including: + +* 🧠 OpenAI (GPT-4o, GPT-3.5) +* 🐑 Claude 3.5 Series +* 🔹 Gemini 1.5 Pro / Flash +* 📚 DeepSeek, LLaMA3, Mistral and more + +**Key Highlights of AI/ML API:** + +* ⚡ High rate limits +* 💼 Enterprise-grade uptime +* ♻ Fully OpenAI-compatible interface + +📘 [Documentation](https://docs.aimlapi.com/?utm_source=VLMEvalKit&utm_medium=github&utm_campaign=integration) + +🔗 [Explore Models](https://aimlapi.com/models/?utm_source=VLMEvalKit&utm_medium=github&utm_campaign=integration) + +To enable AI/ML API, set your key: + +```bash +AIML_API_KEY=your_api_key +``` + +Then use any supported AIMLAPI model in the `--model` argument. For example: + +```bash +python run.py --data MMBench_DEV_EN --model aimlapi_gpt_4o +``` + ## Step 1. Configuration **VLM Configuration**: All VLMs are configured in `vlmeval/config.py`. Few legacy VLMs (like MiniGPT-4, LLaVA-v1-7B) requires additional configuration (configuring the code / model_weight root in the config file). During evaluation, you should use the model name specified in `supported_VLM` in `vlmeval/config.py` to select the VLM. Make sure you can successfully infer with the VLM before starting the evaluation with the following command `vlmutil check {MODEL_NAME}`. diff --git a/docs/zh-CN/Quickstart.md b/docs/zh-CN/Quickstart.md index a5436f9f7..71b4ffeb2 100644 --- a/docs/zh-CN/Quickstart.md +++ b/docs/zh-CN/Quickstart.md @@ -51,6 +51,38 @@ pip install -e . ``` - 如果需要使用 API 在对应键值空白处填写上你的密钥。这些 API 密钥将在进行推理和评估时自动加载。 + +## 🌐 支持的模型服务商:AI/ML API 接入 + +VLMEvalKit 现已原生支持 [AI/ML API](https://aimlapi.com/app/?utm_source=VLMEvalKit&utm_medium=github&utm_campaign=integration) —— 一个统一的接口平台,整合了 300+ 主流模型,包括: + +* 🧠 OpenAI(GPT-4o, GPT-3.5) +* 🐑 Claude 3.5 系列 +* 🔹 Gemini 1.5 Pro / Flash +* 📚 DeepSeek,LLaMA3,Mistral 等 + +**AI/ML API 特点:** + +* ⚡ 高应对请求限率 +* 💼 企业级稳定性 +* ♻ 完全兼容 OpenAI API 接口 + +📘 [使用文档](https://docs.aimlapi.com/?utm_source=VLMEvalKit&utm_medium=github&utm_campaign=integration) + +🔗 [模型目录](https://aimlapi.com/models/?utm_source=VLMEvalKit&utm_medium=github&utm_campaign=integration) + +要启用 AI/ML API,请设置环境变量: + +```bash +AIML_API_KEY=你的密钥 +``` + +随后在 `--model` 参数中使用相应的 AIMLAPI 模型,例如: + +```bash +python run.py --data MMBench_DEV_EN --model aimlapi_gpt_4o +``` + ## 第1步 配置 **VLM 配置**:所有 VLMs 都在 `vlmeval/config.py` 中配置。对于某些 VLMs(如 MiniGPT-4、LLaVA-v1-7B),需要额外的配置(在配置文件中配置代码 / 模型权重根目录)。在评估时,你应该使用 `vlmeval/config.py` 中 `supported_VLM` 指定的模型名称来选择 VLM。确保在开始评估之前,你可以成功使用 VLM 进行推理,使用以下命令 `vlmutil check {MODEL_NAME}`。