From 29c89b7b6deefb4b34c9c700a0b6dc322bcfac8e Mon Sep 17 00:00:00 2001
From: D1m7asis <tumaskow@yandex.ru>
Date: Fri, 20 Jun 2025 00:21:31 +0200
Subject: [PATCH 1/2] Add AI/ML API integration

---
 docs/en/Quickstart.md    |   2 +
 docs/zh-CN/Quickstart.md |   2 +
 vlmeval/api/__init__.py  |   3 +-
 vlmeval/api/aimlapi.py   | 102 +++++++++++++++++
 vlmeval/config.py        | 236 ++++++++++++++++++++++++++++++++++++++-
 5 files changed, 343 insertions(+), 2 deletions(-)
 create mode 100644 vlmeval/api/aimlapi.py

diff --git a/docs/en/Quickstart.md b/docs/en/Quickstart.md
index 807d59b0a..a580d03d8 100644
--- a/docs/en/Quickstart.md
+++ b/docs/en/Quickstart.md
@@ -31,6 +31,8 @@ To infer with API models (GPT-4v, Gemini-Pro-V, etc.) or use LLM APIs as the **j
   OPENAI_API_BASE=
   # StepAI API
   STEPAI_API_KEY=
+  # AI/ML API
+  AIML_API_KEY=
   # REKA API
   REKA_API_KEY=
   # GLMV API
diff --git a/docs/zh-CN/Quickstart.md b/docs/zh-CN/Quickstart.md
index afc5cea19..a5436f9f7 100644
--- a/docs/zh-CN/Quickstart.md
+++ b/docs/zh-CN/Quickstart.md
@@ -30,6 +30,8 @@ pip install -e .
   OPENAI_API_BASE=
   # StepAI API
   STEPAI_API_KEY=
+  # AI/ML API
+  AIML_API_KEY=
   # REKA API
   REKA_API_KEY=
   # GLMV API
diff --git a/vlmeval/api/__init__.py b/vlmeval/api/__init__.py
index 10b2902e3..ac7d1327f 100644
--- a/vlmeval/api/__init__.py
+++ b/vlmeval/api/__init__.py
@@ -18,6 +18,7 @@
 from .taichu import TaichuVLAPI, TaichuVLRAPI
 from .doubao_vl_api import DoubaoVL
 from .mug_u import MUGUAPI
+from .aimlapi import AIMLAPI
 
 __all__ = [
     'OpenAIWrapper', 'HFChatModel', 'GeminiWrapper', 'GPT4V', 'Gemini',
@@ -25,5 +26,5 @@
     'Reka', 'GLMVisionAPI', 'CWWrapper', 'SenseChatVisionAPI', 'HunyuanVision',
     'Qwen2VLAPI', 'BlueLMWrapper', 'BlueLM_V_API', 'JTVLChatAPI',
     'bailingMMAPI', 'TaiyiAPI', 'TeleMMAPI', 'SiliconFlowAPI', 'LMDeployAPI',
-    'TaichuVLAPI', 'TaichuVLRAPI', 'DoubaoVL', "MUGUAPI"
+    'TaichuVLAPI', 'TaichuVLRAPI', 'DoubaoVL', "MUGUAPI", "AIMLAPI",
 ]
diff --git a/vlmeval/api/aimlapi.py b/vlmeval/api/aimlapi.py
new file mode 100644
index 000000000..41258c01c
--- /dev/null
+++ b/vlmeval/api/aimlapi.py
@@ -0,0 +1,102 @@
+from vlmeval.smp import *
+from vlmeval.api.base import BaseAPI
+
+url = 'https://api.aimlapi.com/v1/chat/completions'
+headers = {
+    'Content-Type': 'application/json',
+    'Authorization': 'Bearer {}',
+    "HTTP-Referer": "https://github.com/open-compass/VLMEvalKit",
+    "X-Title": "VLMEvalKit",
+}
+
+
+class AIMLAPI_INT(BaseAPI):
+
+    is_api: bool = True
+
+    def __init__(self,
+                 model: str = 'gpt-4-turbo',
+                 retry: int = 10,
+                 wait: int = 3,
+                 key: str = None,
+                 temperature: float = 0,
+                 max_tokens: int = 300,
+                 verbose: bool = True,
+                 system_prompt: str = None,
+                 **kwargs):
+        self.model = model
+        self.fail_msg = 'Fail to obtain answer via API.'
+        self.headers = dict(headers)
+        self.temperature = temperature
+        self.max_tokens = max_tokens
+        self.system_prompt = system_prompt
+        self.key = key or os.environ.get('AIML_API_KEY', '')
+        self.headers['Authorization'] = self.headers['Authorization'].format(self.key)
+
+        super().__init__(retry=retry, wait=wait, verbose=verbose, system_prompt=system_prompt, **kwargs)
+
+    @staticmethod
+    def build_msgs(msgs_raw):
+        messages = []
+        content = []
+
+        for msg in msgs_raw:
+            if msg['type'] == 'text':
+                content.append({
+                    "type": "text",
+                    "text": msg['value']
+                })
+            elif msg['type'] == 'image':
+                image_b64 = encode_image_file_to_base64(msg['value'])
+                content.append({
+                    "type": "image_url",
+                    "image_url": {
+                        "url": f"data:image/webp;base64,{image_b64}"
+                    }
+                })
+
+        messages.append({
+            "role": "user",
+            "content": content
+        })
+        return messages
+
+    def generate_inner(self, inputs, **kwargs) -> str:
+        payload = dict(
+            model=self.model,
+            max_tokens=self.max_tokens,
+            temperature=self.temperature,
+            messages=self.build_msgs(inputs),
+            **kwargs
+        )
+        response = requests.post(url, headers=self.headers, data=json.dumps(payload))
+        ret_code = response.status_code
+        ret_code = 0 if (200 <= int(ret_code) < 300) else ret_code
+
+        answer = self.fail_msg
+        try:
+            resp_struct = json.loads(response.text)
+            answer = resp_struct['choices'][0]['message']['content'].strip()
+        except Exception as err:
+            if self.verbose:
+                self.logger.error(f'{type(err)}: {err}')
+                self.logger.error(response.text if hasattr(response, 'text') else response)
+
+        return ret_code, answer, response
+
+
+class AIMLAPI(AIMLAPI_INT):
+
+    def generate(self, message, dataset=None):
+        return super(AIMLAPI_INT, self).generate(message)
+
+
+if __name__ == '__main__':
+    # export AIML_API_KEY=''
+    model = AIMLAPI_INT(verbose=True)
+    inputs = [
+        {'type': 'image', 'value': '../../assets/apple.jpg'},
+        {'type': 'text', 'value': 'Please describe this image in detail.'},
+    ]
+    code, answer, resp = model.generate_inner(inputs)
+    print(code, answer, resp)
diff --git a/vlmeval/config.py b/vlmeval/config.py
index e9221326e..1eea71b9b 100644
--- a/vlmeval/config.py
+++ b/vlmeval/config.py
@@ -8,6 +8,7 @@
 TransCore_ROOT = None
 Yi_ROOT = None
 OmniLMM_ROOT = None
+AIMLAPI_ROOT = None
 Mini_Gemini_ROOT = None
 VXVERSE_ROOT = None
 VideoChat2_ROOT = None
@@ -95,6 +96,239 @@
     ),
 }
 
+ai_ml_api_models = {
+    # Default
+    "aimlapi": partial(
+        AIMLAPI,
+        model="gpt-4-turbo",
+        root=AIMLAPI_ROOT,
+        temperature=0,
+    ),
+
+    # OpenAI
+    "aimlapi_gpt_4o": partial(
+        AIMLAPI,
+        model="openai/gpt-4o",
+        root=AIMLAPI_ROOT,
+        temperature=0,
+    ),
+    "aimlapi_gpt_4o_mini": partial(
+        AIMLAPI,
+        model="gpt-4o-mini",
+        root=AIMLAPI_ROOT,
+        temperature=0,
+    ),
+    "aimlapi_gpt_4": partial(
+        AIMLAPI,
+        model="gpt-4",
+        root=AIMLAPI_ROOT,
+        temperature=0,
+    ),
+    "aimlapi_gpt_3_5_turbo": partial(
+        AIMLAPI,
+        model="gpt-3.5-turbo",
+        root=AIMLAPI_ROOT,
+        temperature=0,
+    ),
+    "aimlapi_gpt_3_5_turbo_instruct": partial(
+        AIMLAPI,
+        model="gpt-3.5-turbo-instruct",
+        root=AIMLAPI_ROOT,
+        temperature=0,
+    ),
+    "aimlapi_o1": partial(
+        AIMLAPI,
+        model="o1",
+        root=AIMLAPI_ROOT,
+        temperature=0,
+    ),
+    "aimlapi_o1_mini": partial(
+        AIMLAPI,
+        model="o1-mini",
+        root=AIMLAPI_ROOT,
+        temperature=0,
+    ),
+    "aimlapi_o3_mini": partial(
+        AIMLAPI,
+        model="o3-mini",
+        root=AIMLAPI_ROOT,
+        temperature=0,
+    ),
+    "aimlapi_text_embedding_3_large": partial(
+        AIMLAPI,
+        model="text-embedding-3-large",
+        root=AIMLAPI_ROOT,
+        temperature=0,
+    ),
+
+    # Meta
+    "aimlapi_llama_3_3_70b": partial(
+        AIMLAPI,
+        model="meta-llama/Llama-3.3-70B-Instruct-Turbo",
+        root=AIMLAPI_ROOT,
+        temperature=0,
+    ),
+    "aimlapi_llama_3_2_3b": partial(
+        AIMLAPI,
+        model="meta-llama/Llama-3.2-3B-Instruct-Turbo",
+        root=AIMLAPI_ROOT,
+        temperature=0,
+    ),
+    "aimlapi_llama_3_8b_lite": partial(
+        AIMLAPI,
+        model="meta-llama/Meta-Llama-3-8B-Instruct-Lite",
+        root=AIMLAPI_ROOT,
+        temperature=0,
+    ),
+    "aimlapi_llama_3_8b_chat_hf": partial(
+        AIMLAPI,
+        model="meta-llama/Llama-3-8b-chat-hf",
+        root=AIMLAPI_ROOT,
+        temperature=0,
+    ),
+    "aimlapi_llama_3_1_8b": partial(
+        AIMLAPI,
+        model="meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",
+        root=AIMLAPI_ROOT,
+        temperature=0,
+    ),
+    "aimlapi_llama_guard_3_8b": partial(
+        AIMLAPI,
+        model="meta-llama/Meta-Llama-Guard-3-8B",
+        root=AIMLAPI_ROOT,
+        temperature=0,
+    ),
+
+    # Google
+    "aimlapi_gemma_2_27b": partial(
+        AIMLAPI,
+        model="google/gemma-2-27b-it",
+        root=AIMLAPI_ROOT,
+        temperature=0,
+    ),
+    "aimlapi_gemini_1_5_flash": partial(
+        AIMLAPI,
+        model="google/gemini-1.5-flash",
+        root=AIMLAPI_ROOT,
+        temperature=0,
+    ),
+    "aimlapi_gemini_1_5_pro": partial(
+        AIMLAPI,
+        model="google/gemini-1.5-pro",
+        root=AIMLAPI_ROOT,
+        temperature=0,
+    ),
+    "aimlapi_textembedding_gecko": partial(
+        AIMLAPI,
+        model="textembedding-gecko-multilingual@001",
+        root=AIMLAPI_ROOT,
+        temperature=0,
+    ),
+    "aimlapi_text_multilingual_emb_002": partial(
+        AIMLAPI,
+        model="text-multilingual-embedding-002",
+        root=AIMLAPI_ROOT,
+        temperature=0,
+    ),
+
+    # Mistral
+    "aimlapi_mixtral_8x22b": partial(
+        AIMLAPI,
+        model="mistralai/Mixtral-8x22B-Instruct-v0.1",
+        root=AIMLAPI_ROOT,
+        temperature=0,
+    ),
+    "aimlapi_mistral_7b": partial(
+        AIMLAPI,
+        model="mistralai/Mistral-7B-Instruct-v0.3",
+        root=AIMLAPI_ROOT,
+        temperature=0,
+    ),
+    "aimlapi_mistral_tiny": partial(
+        AIMLAPI,
+        model="mistralai/mistral-tiny",
+        root=AIMLAPI_ROOT,
+        temperature=0,
+    ),
+
+    # Qwen
+    "aimlapi_qwen_2_5_7b": partial(
+        AIMLAPI,
+        model="Qwen/Qwen2.5-7B-Instruct-Turbo",
+        root=AIMLAPI_ROOT,
+        temperature=0,
+    ),
+
+    # Anthropic
+    "aimlapi_claude_3_haiku": partial(
+        AIMLAPI,
+        model="claude-3-haiku-20240307",
+        root=AIMLAPI_ROOT,
+        temperature=0,
+    ),
+    "aimlapi_claude_3_5_sonnet_0620": partial(
+        AIMLAPI,
+        model="claude-3-5-sonnet-20240620",
+        root=AIMLAPI_ROOT,
+        temperature=0,
+    ),
+    "aimlapi_claude_3_5_sonnet_1022": partial(
+        AIMLAPI,
+        model="claude-3-5-sonnet-20241022",
+        root=AIMLAPI_ROOT,
+        temperature=0,
+    ),
+
+    # DeepSeek
+    "aimlapi_deepseek_v3": partial(
+        AIMLAPI,
+        model="deepseek-chat",
+        root=AIMLAPI_ROOT,
+        temperature=0,
+    ),
+    "aimlapi_deepseek_r1": partial(
+        AIMLAPI,
+        model="deepseek-reasoner",
+        root=AIMLAPI_ROOT,
+        temperature=0,
+    ),
+    "aimlapi_deepseek_prover_v2": partial(
+        AIMLAPI,
+        model="deepseek/deepseek-prover-v2",
+        root=AIMLAPI_ROOT,
+        temperature=0,
+    ),
+
+    # X AI
+    "aimlapi_grok_beta": partial(
+        AIMLAPI,
+        model="x-ai/grok-beta",
+        root=AIMLAPI_ROOT,
+        temperature=0,
+    ),
+    "aimlapi_grok_3_beta": partial(
+        AIMLAPI,
+        model="x-ai/grok-3-beta",
+        root=AIMLAPI_ROOT,
+        temperature=0,
+    ),
+    "aimlapi_grok_3_mini_beta": partial(
+        AIMLAPI,
+        model="x-ai/grok-3-mini-beta",
+        root=AIMLAPI_ROOT,
+        temperature=0,
+    ),
+    # You can use any other model from https://aimlapi.com/models
+    # Just copy the pattern below and insert the desired model name in place of `"***"`:
+    #
+    # "aimlapi_***": partial(
+    #     AIMLAPI,
+    #     model="***",  # Replace with model name from the model catalog
+    #     root=AIMLAPI_ROOT,
+    #     temperature=0,
+    # ),
+}
+
 api_models = {
     # GPT
     "GPT4V": partial(
@@ -1433,7 +1667,7 @@
     kosmos_series, points_series, nvlm_series, vintern_series, h2ovl_series,
     aria_series, smolvlm_series, sail_series, valley_series, vita_series,
     ross_series, emu_series, ola_series, ursa_series, gemma_series,
-    long_vita_series, ristretto_series, kimi_series, aguvis_series, hawkvl_series, flash_vl
+    long_vita_series, ristretto_series, kimi_series, aguvis_series, hawkvl_series, flash_vl, ai_ml_api_models
 ]
 
 for grp in model_groups:

From 6559b4944a6e2c7348bfa15ce2098fd909feba5c Mon Sep 17 00:00:00 2001
From: D1m7asis <tumaskow@yandex.ru>
Date: Fri, 20 Jun 2025 00:30:13 +0200
Subject: [PATCH 2/2] docs: AI/ML API

---
 docs/en/Quickstart.md    | 32 ++++++++++++++++++++++++++++++++
 docs/zh-CN/Quickstart.md | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 64 insertions(+)

diff --git a/docs/en/Quickstart.md b/docs/en/Quickstart.md
index a580d03d8..382ee1839 100644
--- a/docs/en/Quickstart.md
+++ b/docs/en/Quickstart.md
@@ -52,6 +52,38 @@ To infer with API models (GPT-4v, Gemini-Pro-V, etc.) or use LLM APIs as the **j
   ```
 
 - Fill the blanks with your API keys (if necessary). Those API keys will be automatically loaded when doing the inference and evaluation.
+
+## 🌐 Supported Model Provider: AI/ML API Integration
+
+VLMEvalKit now includes first-class support for [AI/ML API](https://aimlapi.com/app/?utm_source=VLMEvalKit&utm_medium=github&utm_campaign=integration) — a unified provider of 300+ popular models, including:
+
+* 🧠 OpenAI (GPT-4o, GPT-3.5)
+* 🐑 Claude 3.5 Series
+* 🔹 Gemini 1.5 Pro / Flash
+* 📚 DeepSeek, LLaMA3, Mistral and more
+
+**Key Highlights of AI/ML API:**
+
+* ⚡ High rate limits
+* 💼 Enterprise-grade uptime
+* ♻ Fully OpenAI-compatible interface
+
+📘 [Documentation](https://docs.aimlapi.com/?utm_source=VLMEvalKit&utm_medium=github&utm_campaign=integration)
+
+🔗 [Explore Models](https://aimlapi.com/models/?utm_source=VLMEvalKit&utm_medium=github&utm_campaign=integration)
+
+To enable AI/ML API, set your key:
+
+```bash
+AIML_API_KEY=your_api_key
+```
+
+Then use any supported AIMLAPI model in the `--model` argument. For example:
+
+```bash
+python run.py --data MMBench_DEV_EN --model aimlapi_gpt_4o
+```
+
 ## Step 1. Configuration
 
 **VLM Configuration**: All VLMs are configured in `vlmeval/config.py`. Few legacy VLMs (like MiniGPT-4, LLaVA-v1-7B) requires additional configuration (configuring the code / model_weight root in the config file). During evaluation, you should use the model name specified in `supported_VLM` in `vlmeval/config.py` to select the VLM. Make sure you can successfully infer with the VLM before starting the evaluation with the following command `vlmutil check {MODEL_NAME}`.
diff --git a/docs/zh-CN/Quickstart.md b/docs/zh-CN/Quickstart.md
index a5436f9f7..71b4ffeb2 100644
--- a/docs/zh-CN/Quickstart.md
+++ b/docs/zh-CN/Quickstart.md
@@ -51,6 +51,38 @@ pip install -e .
   ```
 
 - 如果需要使用 API 在对应键值空白处填写上你的密钥。这些 API 密钥将在进行推理和评估时自动加载。
+
+## 🌐 支持的模型服务商：AI/ML API 接入
+
+VLMEvalKit 现已原生支持 [AI/ML API](https://aimlapi.com/app/?utm_source=VLMEvalKit&utm_medium=github&utm_campaign=integration) —— 一个统一的接口平台，整合了 300+ 主流模型，包括：
+
+* 🧠 OpenAI（GPT-4o, GPT-3.5）
+* 🐑 Claude 3.5 系列
+* 🔹 Gemini 1.5 Pro / Flash
+* 📚 DeepSeek，LLaMA3，Mistral 等
+
+**AI/ML API 特点：**
+
+* ⚡ 高应对请求限率
+* 💼 企业级稳定性
+* ♻ 完全兼容 OpenAI API 接口
+
+📘 [使用文档](https://docs.aimlapi.com/?utm_source=VLMEvalKit&utm_medium=github&utm_campaign=integration)
+
+🔗 [模型目录](https://aimlapi.com/models/?utm_source=VLMEvalKit&utm_medium=github&utm_campaign=integration)
+
+要启用 AI/ML API，请设置环境变量：
+
+```bash
+AIML_API_KEY=你的密钥
+```
+
+随后在 `--model` 参数中使用相应的 AIMLAPI 模型，例如：
+
+```bash
+python run.py --data MMBench_DEV_EN --model aimlapi_gpt_4o
+```
+
 ## 第1步 配置
 
 **VLM 配置**：所有 VLMs 都在 `vlmeval/config.py` 中配置。对于某些 VLMs（如 MiniGPT-4、LLaVA-v1-7B），需要额外的配置（在配置文件中配置代码 / 模型权重根目录）。在评估时，你应该使用 `vlmeval/config.py` 中 `supported_VLM` 指定的模型名称来选择 VLM。确保在开始评估之前，你可以成功使用 VLM 进行推理，使用以下命令 `vlmutil check {MODEL_NAME}`。