diff --git a/vlmeval/api/__init__.py b/vlmeval/api/__init__.py
old mode 100644
new mode 100755
index 82ae29d50..e82cce0c2
--- a/vlmeval/api/__init__.py
+++ b/vlmeval/api/__init__.py
@@ -1,43 +1,44 @@
-from .gpt import OpenAIWrapper, GPT4V
-from .hf_chat_model import HFChatModel
-from .gemini import GeminiWrapper, Gemini
-from .qwen_vl_api import QwenVLWrapper, QwenVLAPI, Qwen2VLAPI
-from .qwen_api import QwenAPI
-from .claude import Claude_Wrapper, Claude3V
-from .reka import Reka
-from .glm_vision import GLMVisionAPI
-from .cloudwalk import CWWrapper
-from .sensechat_vision import SenseChatVisionAPI
-from .siliconflow import SiliconFlowAPI, TeleMMAPI
-from .telemm import TeleMM2_API
-from .telemm_thinking import TeleMM2Thinking_API
-from .hunyuan import HunyuanVision
-from .bailingmm import bailingMMAPI
-from .bluelm_api import BlueLMWrapper, BlueLM_API
-from .jt_vl_chat import JTVLChatAPI
-from .jt_vl_chat_mini import JTVLChatAPI_Mini, JTVLChatAPI_2B
-from .video_chat_online_v2 import VideoChatOnlineV2API
-from .taiyi import TaiyiAPI
-from .lmdeploy import LMDeployAPI
-from .arm_thinker import ARM_thinker
-from .taichu import TaichuVLAPI, TaichuVLRAPI
-from .doubao_vl_api import DoubaoVL
-from .mug_u import MUGUAPI
-from .kimivl_api import KimiVLAPIWrapper, KimiVLAPI
-from .rbdashmm_chat3_api import RBdashMMChat3_API, RBdashChat3_5_API
-from .rbdashmm_chat3_5_api import RBdashMMChat3_78B_API, RBdashMMChat3_5_38B_API
-from .together import TogetherAPI
-from .gcp_vertex import GCPVertexAPI
-from .bedrock import BedrockAPI
-
-__all__ = [
-    'OpenAIWrapper', 'HFChatModel', 'GeminiWrapper', 'GPT4V', 'Gemini',
-    'QwenVLWrapper', 'QwenVLAPI', 'QwenAPI', 'Claude3V', 'Claude_Wrapper',
-    'Reka', 'GLMVisionAPI', 'CWWrapper', 'SenseChatVisionAPI', 'HunyuanVision',
-    'Qwen2VLAPI', 'BlueLMWrapper', 'BlueLM_API', 'JTVLChatAPI', 'JTVLChatAPI_Mini', 'JTVLChatAPI_2B',
-    'bailingMMAPI', 'TaiyiAPI', 'TeleMMAPI', 'SiliconFlowAPI', 'LMDeployAPI', 'ARM_thinker',
-    'TaichuVLAPI', 'TaichuVLRAPI', 'DoubaoVL', "MUGUAPI", 'KimiVLAPIWrapper', 'KimiVLAPI',
-    'RBdashMMChat3_API', 'RBdashChat3_5_API', 'RBdashMMChat3_78B_API', 'RBdashMMChat3_5_38B_API',
-    'VideoChatOnlineV2API', 'TeleMM2_API', 'TeleMM2Thinking_API',
-    'TogetherAPI', 'GCPVertexAPI', 'BedrockAPI',
-]
+from .gpt import OpenAIWrapper, GPT4V
+from .hf_chat_model import HFChatModel
+from .gemini import GeminiWrapper, Gemini
+from .qwen_vl_api import QwenVLWrapper, QwenVLAPI, Qwen2VLAPI
+from .qwen_api import QwenAPI
+from .claude import Claude_Wrapper, Claude3V
+from .reka import Reka
+from .glm_vision import GLMVisionAPI
+from .cloudwalk import CWWrapper
+from .sensechat_vision import SenseChatVisionAPI
+from .siliconflow import SiliconFlowAPI, TeleMMAPI
+from .telemm import TeleMM2_API
+from .telemm_thinking import TeleMM2Thinking_API
+from .hunyuan import HunyuanVision
+from .bailingmm import bailingMMAPI
+from .bluelm_api import BlueLMWrapper, BlueLM_API
+from .jt_vl_chat import JTVLChatAPI
+from .jt_vl_chat_mini import JTVLChatAPI_Mini, JTVLChatAPI_2B
+from .video_chat_online_v2 import VideoChatOnlineV2API
+from .taiyi import TaiyiAPI
+from .lmdeploy import LMDeployAPI
+from .arm_thinker import ARM_thinker
+from .taichu import TaichuVLAPI, TaichuVLRAPI
+from .doubao_vl_api import DoubaoVL
+from .mug_u import MUGUAPI
+from .kimivl_api import KimiVLAPIWrapper, KimiVLAPI
+from .rbdashmm_chat3_api import RBdashMMChat3_API, RBdashChat3_5_API
+from .rbdashmm_chat3_5_api import RBdashMMChat3_78B_API, RBdashMMChat3_5_38B_API
+from .together import TogetherAPI
+from .gcp_vertex import GCPVertexAPI
+from .bedrock import BedrockAPI
+from .video_chat_online_v3 import VideoChatOnlineV3API
+
+__all__ = [
+    'OpenAIWrapper', 'HFChatModel', 'GeminiWrapper', 'GPT4V', 'Gemini',
+    'QwenVLWrapper', 'QwenVLAPI', 'QwenAPI', 'Claude3V', 'Claude_Wrapper',
+    'Reka', 'GLMVisionAPI', 'CWWrapper', 'SenseChatVisionAPI', 'HunyuanVision',
+    'Qwen2VLAPI', 'BlueLMWrapper', 'BlueLM_API', 'JTVLChatAPI', 'JTVLChatAPI_Mini', 'JTVLChatAPI_2B',
+    'bailingMMAPI', 'TaiyiAPI', 'TeleMMAPI', 'SiliconFlowAPI', 'LMDeployAPI', 'ARM_thinker',
+    'TaichuVLAPI', 'TaichuVLRAPI', 'DoubaoVL', "MUGUAPI", 'KimiVLAPIWrapper', 'KimiVLAPI',
+    'RBdashMMChat3_API', 'RBdashChat3_5_API', 'RBdashMMChat3_78B_API', 'RBdashMMChat3_5_38B_API',
+    'VideoChatOnlineV2API', 'TeleMM2_API', 'TeleMM2Thinking_API',
+    'TogetherAPI', 'GCPVertexAPI', 'BedrockAPI', 'VideoChatOnlineV3API'
+]
diff --git a/vlmeval/api/base_client.py b/vlmeval/api/base_client.py
new file mode 100755
index 000000000..5647c2b18
--- /dev/null
+++ b/vlmeval/api/base_client.py
@@ -0,0 +1,250 @@
+import requests
+import json
+import base64
+from typing import List, Dict, Any
+import time
+
+
+class VLLMClient:
+    def __init__(self, base_url: str = "http://localhost:9100/v1/chat/completions", app_code: str = 'B0m6Tuglt5shfY7t3GyoJn1V5yVAm0Ba'):
+        """
+        初始化vLLM客户端
+        
+        Args:
+            base_url: vLLM server地址
+        """
+        self.base_url = base_url
+        self.app_code = app_code
+        
+    def encode_image_to_base64(self, image_path: str) -> str:
+        """
+        将图片编码为base64字符串
+        
+        Args:
+            image_path: 图片路径
+            
+        Returns:
+            base64编码的图片字符串
+        """
+        with open(image_path, "rb") as image_file:
+            encoded_string = base64.b64encode(image_file.read()).decode('utf-8')
+        return encoded_string
+    
+    def create_messages_with_images(
+        self, 
+        prompt: str, 
+        image_paths: List[str], 
+        image_format: str = "base64"
+    ) -> List[Dict]:
+        """
+        创建包含图片的消息
+        
+        Args:
+            prompt: 文本提示词
+            image_paths: 图片路径列表
+            image_format: 图片格式，支持"base64"或"url"
+            
+        Returns:
+            消息列表
+        """
+        messages = [
+            {
+                "role": "user",
+                "content": []
+            }
+        ]
+        
+        # 添加文本部分
+        messages[0]["content"].append({
+            "type": "text",
+            "text": prompt
+        })
+        
+        # 添加图片部分
+        for image_path in image_paths:
+            if image_format == "base64":
+                # 读取并编码图片
+                base64_image = self.encode_image_to_base64(image_path)
+                image_content = {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": f"data:image/jpeg;base64,{base64_image}"
+                    }
+                }
+            elif image_format == "url":
+                image_content = {
+                    "type": "image_url",
+                    "image_url": {
+                        "url": image_path
+                    }
+                }
+            else:
+                raise ValueError(f"不支持的图片格式: {image_format}")
+            
+            messages[0]["content"].append(image_content)
+        
+        return messages
+    
+    def stream_completion(
+        self,
+        prompt: str = None,
+        messages: List[Dict] = None,
+        image_paths: List[str] = None,
+        model: str = None,
+        max_tokens: int = None,
+        temperature: float = 0.7,
+        top_p: float = None,
+        stream: bool = True,
+        **kwargs
+    ):
+        """
+        流式输出请求
+        
+        Args:
+            prompt: 文本提示词（如果使用messages参数，则忽略此参数）
+            messages: 消息列表（支持多模态）
+            image_paths: 图片路径列表
+            model: 模型名称
+            max_tokens: 最大token数
+            temperature: 温度参数
+            top_p: top-p采样参数
+            stream: 是否使用流式输出
+            **kwargs: 其他参数
+            
+        Yields:
+            生成的文本片段
+        """
+        # 构建请求体
+        
+        request_data = {
+            "model": model,
+            "stream": stream,
+            **kwargs
+        }
+        if temperature is not None:
+            request_data["temperature"] = temperature
+        if max_tokens is not None:
+            request_data["max_tokens"] = max_tokens 
+        if max_tokens is not None:
+            request_data["max_tokens"] = max_tokens 
+        print("request_data:",request_data)
+        # 处理消息
+        if messages is not None:
+            request_data["messages"] = messages
+        elif image_paths is not None:
+            # 如果有图片路径，创建包含图片的消息
+            if prompt is None:
+                prompt = "请描述图片内容"
+            request_data["messages"] = self.create_messages_with_images(prompt, image_paths)
+        elif prompt is not None:
+            # 纯文本消息
+            request_data["messages"] = [
+                {
+                    "role": "user",
+                    "content": prompt
+                }
+            ]
+        else:
+            raise ValueError("必须提供prompt、messages或image_paths中的一个")
+        
+        # 发送请求
+        response = requests.post(
+            self.base_url,
+            json=request_data,
+            stream=True,
+            headers={"Content-Type": "application/json",'Authorization':self.app_code}
+        )
+        
+        if response.status_code != 200:
+            raise Exception(f"请求失败，状态码: {response.status_code}, 响应: {response.text}")
+        
+        # 处理流式响应
+        full_response = ""
+        for line in response.iter_lines():
+            if line:
+                line = line.decode('utf-8')
+                
+                # 跳过SSE事件开始标记
+                if line.startswith('data: '):
+                    data = line[6:]  # 去掉"data: "前缀
+                    
+                    # 检查是否为结束标记
+                    if data == '[DONE]':
+                        break
+                    
+                    try:
+                        # 解析JSON
+                        json_data = json.loads(data)
+                        
+                        # 提取内容
+                        if 'choices' in json_data and len(json_data['choices']) > 0:
+                            delta = json_data['choices'][0].get('delta', {})
+                            content = delta.get('content', '')
+                            
+                            if content:
+                                full_response += content
+                                yield content
+                    except json.JSONDecodeError as e:
+                        print(f"JSON解析错误: {e}, 原始数据: {data}")
+        
+        # 返回完整响应
+        return full_response
+    
+    def non_stream_completion(
+        self,
+        prompt: str = None,
+        messages: List[Dict] = None,
+        image_paths: List[str] = None,
+        model: str = None,
+        max_tokens: int = 512,
+        temperature: float = 0.7,
+        top_p: float = 0.9,
+        **kwargs
+    ) -> Dict[str, Any]:
+        """
+        非流式输出请求
+        
+        Args:
+            参数同stream_completion
+            
+        Returns:
+            完整的响应
+        """
+        # 构建请求体
+        request_data = {
+            "model": model,
+            "max_tokens": max_tokens,
+            "temperature": temperature,
+            "top_p": top_p,
+            "stream": False,
+            **kwargs
+        }
+        
+        # 处理消息（同流式版本）
+        if messages is not None:
+            request_data["messages"] = messages
+        elif image_paths is not None:
+            if prompt is None:
+                prompt = "请描述图片内容"
+            request_data["messages"] = self.create_messages_with_images(prompt, image_paths)
+        elif prompt is not None:
+            request_data["messages"] = [
+                {
+                    "role": "user",
+                    "content": prompt
+                }
+            ]
+        else:
+            raise ValueError("必须提供prompt、messages或image_paths中的一个")
+        
+        # 发送请求
+        response = requests.post(
+            self.base_url,
+            json=request_data,
+            headers={"Content-Type": "application/json"}
+        )
+        
+        if response.status_code != 200:
+            raise Exception(f"请求失败，状态码: {response.status_code}, 响应: {response.text}")
+        
+        return response.json()
diff --git a/vlmeval/api/video_chat_online_v3.py b/vlmeval/api/video_chat_online_v3.py
new file mode 100755
index 000000000..7c42cc909
--- /dev/null
+++ b/vlmeval/api/video_chat_online_v3.py
@@ -0,0 +1,208 @@
+import pandas as pd
+import requests
+import json
+import os
+import base64
+from vlmeval.smp import *
+from vlmeval.api.base import BaseAPI
+from vlmeval.dataset import DATASET_TYPE
+from vlmeval.dataset import img_root_map
+from vlmeval.api.base_client import VLLMClient
+
+API_ENDPOINT = ''
+APP_CODE = ''
+
+class VideoChatOnlineV3Wrapper(BaseAPI):
+    is_api: bool = True
+    INTERLEAVE = False
+
+    def __init__(self,
+                 model: str = 'jtchat',
+                 retry: int = 5,
+                 wait: int = 5,
+                 api_base: str = '',
+                 app_code: str = '',
+                 verbose: bool = True,
+                 system_prompt: str = None,
+                 temperature: float = 0.7,
+                 max_tokens: int = 2048,
+                 proxy: str = None,
+                 **kwargs):
+        self.model = model
+
+        self.temperature = temperature
+        self.max_tokens = max_tokens
+        self.api_base = api_base
+        self.app_code = app_code
+
+        
+        super().__init__(wait=wait, retry=retry, system_prompt=system_prompt, verbose=verbose, **kwargs)
+
+    def dump_image(self, line, dataset):
+        """Dump the image(s) of the input line to the corresponding dataset folder.
+
+        Args:
+            line (line of pd.DataFrame): The raw input line.
+            dataset (str): The name of the dataset.
+
+        Returns:
+            str | list[str]: The paths of the dumped images.
+        """
+        ROOT = LMUDataRoot()
+        assert isinstance(dataset, str)
+
+        img_root = os.path.join(ROOT, 'images', img_root_map(dataset) if dataset in img_root_map(dataset) else dataset)
+        os.makedirs(img_root, exist_ok=True)
+        if 'image' in line:
+            if isinstance(line['image'], list):
+                tgt_path = []
+                assert 'image_path' in line
+                for img, im_name in zip(line['image'], line['image_path']):
+                    path = osp.join(img_root, im_name)
+                    if not read_ok(path):
+                        decode_base64_to_image_file(img, path)
+                    tgt_path.append(path)
+            else:
+                tgt_path = osp.join(img_root, f"{line['index']}.jpg")
+                if not read_ok(tgt_path):
+                    decode_base64_to_image_file(line['image'], tgt_path)
+                tgt_path = [tgt_path]
+        else:
+            assert 'image_path' in line
+            tgt_path = toliststr(line['image_path'])
+
+        return tgt_path
+
+    def use_custom_prompt(self, dataset):
+        assert dataset is not None
+         if listinstr(['MMMU_DEV_VAL','MMMU_TEST'], dataset):
+            return False
+        else:
+            return True
+
+    def build_multi_choice_prompt(self, line, dataset=None):
+        question = line['question']
+        hint = line['hint'] if ('hint' in line and not pd.isna(line['hint'])) else None
+        if hint is not None:
+            question = hint + '\n' + question
+
+        # options = {
+        #     cand: line[cand]
+        #     for cand in string.ascii_uppercase
+        #     if cand in line and not pd.isna(line[cand])
+        # }
+        options = {
+            cand.upper(): line[cand]
+            for cand in string.ascii_letters  # string.ascii_letters 包含所有大小写字母
+            if cand in line and not pd.isna(line[cand])
+        }
+        for key, item in options.items():
+            question += f'\n{key}. {item}'
+        prompt = question
+
+        if len(options):
+            prompt += '\n请直接回答选项字母。' if cn_string(
+                prompt) else "\nAnswer with the option's letter from the given choices directly."
+        else:
+            prompt += '\n请直接回答问题。' if cn_string(prompt) else '\nAnswer the question directly.'
+
+        return prompt
+
+    def build_prompt(self, line, dataset=None):
+        assert self.use_custom_prompt(dataset)
+        assert dataset is None or isinstance(dataset, str)
+
+        tgt_path = self.dump_image(line, dataset)
+
+        if dataset is not None and listinstr(['MME'], dataset):
+            question = line['question']
+            prompt = question + ' Answer the question using a single word or phrase.'
+        elif dataset is not None and listinstr(['HallusionBench'], dataset):
+            question = line['question']
+            prompt = question + ' Please answer yes or no. Answer the question using a single word or phrase.'
+        elif dataset is not None and DATASET_TYPE(dataset) == 'MCQ':
+            prompt = self.build_multi_choice_prompt(line, dataset)
+        elif dataset is not None and DATASET_TYPE(dataset) == 'VQA':
+            if listinstr(['MathVista', 'MathVision','LogicVista','MultimodalCreation','QA_CN',"VQU","Perception_ZJ","OCRBench_v2"], dataset):
+                prompt = line['question']
+            elif listinstr(['LLaVABench'], dataset):
+                question = line['question']
+                prompt = question + '\nAnswer this question in detail.'
+            elif listinstr(['MMVet'], dataset):
+                prompt = line['question']
+            else:
+                question = line['question']
+                prompt = question + '\nAnswer the question using a single word or phrase.'
+        else:
+            prompt = line['question']
+        message = [dict(type='text', value=prompt)]
+        message.extend([dict(type='image', value=s) for s in tgt_path])
+        return message
+
+    def message_to_promptimg(self, message, dataset=None):
+        assert not self.INTERLEAVE
+        model_name = self.__class__.__name__
+        import warnings
+        warnings.warn(
+            f'Model {model_name} does not support interleaved input. '
+            'Will use the first image and aggregated texts as prompt. ')
+        num_images = len([x for x in message if x['type'] == 'image'])
+        if num_images == 0:
+            prompt = '\n'.join([x['value'] for x in message if x['type'] == 'text'])
+            image = None
+        else:
+            prompt = '\n'.join([x['value'] for x in message if x['type'] == 'text'])
+            if dataset == 'BLINK':
+                image = concat_images_vlmeval(
+                    [x['value'] for x in message if x['type'] == 'image'],
+                    target_size=512)
+            else:
+                image = [x['value'] for x in message if x['type'] == 'image'][0]
+        return prompt, image
+
+    def generate_inner(self, inputs, **kwargs) -> str:
+        assert isinstance(inputs, str) or isinstance(inputs, list)
+        inputs = [inputs] if isinstance(inputs, str) else inputs
+        dataset = kwargs.get('dataset', None)
+        prompt, image_path = self.message_to_promptimg(message=inputs, dataset=dataset)
+
+        client = VLLMClient(base_url=API_ENDPOINT)
+        print("\n=== 示例: 多图输入流式输出 ===")
+        print(API_ENDPOINT)
+        image_paths = [image_path]
+        import os
+        for img_path in image_paths:
+            if not os.path.exists(img_path):
+                print(f"警告: 图片文件不存在: {img_path}")
+                # 使用占位符
+                image_paths = []
+        
+        if image_paths:
+            # prompt = "请描述这些图片的内容"
+            print(f"提示: {prompt}")
+            print(f"图片数量: {len(image_paths)}")
+            print("响应:", end=" ", flush=True)
+            
+            full_response = ""
+
+            try:
+                for chunk in client.stream_completion(
+                    prompt=prompt,
+                    image_paths=image_paths,
+                    model="jtchat"
+                ):
+                    print(chunk, end="", flush=True)
+                    full_response += chunk
+            except Exception as e:
+                print(f"错误: {e}")
+        else:
+            print("无有效图片，跳过示例2")
+        
+        print("\n" + "="*50)
+        print("完整输出：",full_response)
+        return 0,full_response,'Succeeded! '
+
+class VideoChatOnlineV3API(VideoChatOnlineV3Wrapper):
+
+    def generate(self, message, dataset=None):
+        return super(VideoChatOnlineV3API, self).generate(message, dataset=dataset)