open-compass
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 6 additions & 2 deletions b/‎.pre-commit-config.yaml‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎docs/en/conf.py‎
Lines changed: 2 additions & 2 deletions b/‎docs/en/conf.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/zh-CN/conf.py‎
Lines changed: 2 additions & 2 deletions b/‎docs/zh-CN/conf.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎requirements.txt‎
Lines changed: 1 addition & 0 deletions b/‎requirements.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎run.py‎
Lines changed: 26 additions & 12 deletions b/‎run.py‎
Lines changed: 26 additions & 12 deletions
diff --git a/‎run_api.py‎
Lines changed: 6 additions & 7 deletions b/‎run_api.py‎
Lines changed: 6 additions & 7 deletions
diff --git a/‎scripts/apires_scan.py‎
Lines changed: 4 additions & 1 deletion b/‎scripts/apires_scan.py‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎scripts/auto_run.py‎
Lines changed: 5 additions & 1 deletion b/‎scripts/auto_run.py‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎scripts/data_browser.py‎
Lines changed: 13 additions & 8 deletions b/‎scripts/data_browser.py‎
Lines changed: 13 additions & 8 deletions
diff --git a/‎scripts/mmb_eval_gradio.py‎
Lines changed: 11 additions & 3 deletions b/‎scripts/mmb_eval_gradio.py‎
Lines changed: 11 additions & 3 deletions
@@ -15,15 +15,19 @@ exclude: |
   )
 repos:
   - repo: https://github.com/PyCQA/flake8
-    rev: 6.1.0
+    rev: 7.1.2
     hooks:
       - id: flake8
         args:
           [
             "--max-line-length=120",
-            "--ignore=F401,F403,F405,E402,E722,E741,W503,E231,E702",
+            "--ignore=W503",
           ]
         exclude: ^configs/
+  - repo: https://github.com/PyCQA/isort
+    rev: 6.0.1
+    hooks:
+      - id: isort
   - repo: https://github.com/google/yapf
     rev: v0.43.0
     hooks:
 
@@ -10,9 +10,9 @@
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
-#
-import os
+
 import ast
+import os
 import subprocess
 import sys
 
 
@@ -10,9 +10,9 @@
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
-#
-import os
+
 import ast
+import os
 import subprocess
 import sys
 
 
@@ -52,6 +52,7 @@ setuptools
 sty
 sympy
 tabulate
+termcolor
 tiktoken
 timeout-decorator
 timm
 
@@ -1,8 +1,15 @@
+import argparse
+import copy as cp
+import datetime
 import json
 import os
+import os.path as osp
 import subprocess
 from functools import partial
 
+import pandas as pd
+from tabulate import tabulate
+
 
 # GET the number of GPUs on the node without importing libs like torch
 def get_gpu_list():
@@ -14,14 +21,14 @@ def get_gpu_list():
         ps = subprocess.Popen(('nvidia-smi', '--list-gpus'), stdout=subprocess.PIPE)
         output = subprocess.check_output(('wc', '-l'), stdin=ps.stdout)
         return list(range(int(output)))
-    except:
+    except Exception:
         return []
 
 
 RANK = int(os.environ.get('RANK', 0))
 WORLD_SIZE = int(os.environ.get('WORLD_SIZE', 1))
-LOCAL_WORLD_SIZE = int(os.environ.get("LOCAL_WORLD_SIZE",1))
-LOCAL_RANK = int(os.environ.get("LOCAL_RANK",1))
+LOCAL_WORLD_SIZE = int(os.environ.get("LOCAL_WORLD_SIZE", 1))
+LOCAL_RANK = int(os.environ.get("LOCAL_RANK", 1))
 
 GPU_LIST = get_gpu_list()
 if LOCAL_WORLD_SIZE > 1 and len(GPU_LIST):
@@ -40,12 +47,13 @@ def get_gpu_list():
 
 
 from vlmeval.config import supported_VLM
-from vlmeval.dataset.video_dataset_config import supported_video_datasets
 from vlmeval.dataset import build_dataset
+from vlmeval.dataset.video_dataset_config import supported_video_datasets
 from vlmeval.inference import infer_data_job
-from vlmeval.inference_video import infer_data_job_video
 from vlmeval.inference_mt import infer_data_job_mt
-from vlmeval.smp import *
+from vlmeval.inference_video import infer_data_job_video
+from vlmeval.smp import (MMBenchOfficialServer, get_pred_file_format, githash, listinstr, load,
+                         load_env, ls, prepare_reuse_files, proxy_set, setup_logger, timestr)
 from vlmeval.utils.result_transfer import MMMU_result_transfer, MMTBench_result_transfer
 
 
@@ -74,8 +82,9 @@ def build_model_from_config(cfg, model_name, use_vllm=False):
 
 
 def build_dataset_from_config(cfg, dataset_name):
-    import vlmeval.dataset
     import inspect
+
+    import vlmeval.dataset
     config = cp.deepcopy(cfg[dataset_name])
     if config == {}:
         return supported_video_datasets[dataset_name]()
@@ -201,7 +210,6 @@ def parse_args():
 
 
 def main():
-    logger = get_logger('RUN')
     args = parse_args()
     use_config, cfg = False, None
     if args.config is not None:
@@ -212,15 +220,19 @@ def main():
     else:
         assert len(args.data), '--data should be a list of data files'
 
+    if 'MMEVAL_ROOT' in os.environ:
+        args.work_dir = os.environ['MMEVAL_ROOT']
+
+    date, commit_id = timestr('day'), githash(digits=8)
+    eval_id = f"T{date}_G{commit_id}"
+    logger = setup_logger(log_file=os.path.join(args.work_dir, 'logs', f'{eval_id}_{timestr()}.log'))
+
     if RANK == 0:
         if not args.reuse:
             logger.warning('--reuse is not set, will not reuse previous (before one day) temporary files')
         else:
             logger.warning('--reuse is set, will reuse the latest prediction & temporary pickle files')
 
-    if 'MMEVAL_ROOT' in os.environ:
-        args.work_dir = os.environ['MMEVAL_ROOT']
-
     if not use_config:
         for k, v in supported_VLM.items():
             if hasattr(v, 'keywords') and 'retry' in v.keywords and args.retry is not None:
@@ -232,8 +244,8 @@ def main():
 
         # If FWD_API is set, will use class `GPT4V` for all API models in the config
         if os.environ.get('FWD_API', None) == '1':
-            from vlmeval.config import api_models as supported_APIs
             from vlmeval.api import GPT4V
+            from vlmeval.config import api_models as supported_APIs
             for m in args.model:
                 if m in supported_APIs:
                     kws = supported_VLM[m].keywords
@@ -248,6 +260,7 @@ def main():
         )
 
     for _, model_name in enumerate(args.model):
+        logger.info(f'=========== {model_name} ===========')
         model = None
         date, commit_id = timestr('day'), githash(digits=8)
         eval_id = f"T{date}_G{commit_id}"
@@ -267,6 +280,7 @@ def main():
             model = build_model_from_config(cfg['model'], model_name, args.use_vllm)
 
         for _, dataset_name in enumerate(args.data):
+            logger.info(f'----------- {dataset_name} -----------')
             if WORLD_SIZE > 1:
                 dist.barrier()
 
 
@@ -1,20 +1,19 @@
+import argparse
 import asyncio
+import datetime
 import json
 import os
-import argparse
-import datetime
 from functools import partial
 from pathlib import Path
 from typing import List
 
-from vlmeval.config import supported_VLM
-from vlmeval.dataset import build_dataset
-from vlmeval.smp import *
 from vlmeval.api import LMDeployAPI
 from vlmeval.api.adapters import get_adapter_registry
-
+from vlmeval.config import supported_VLM
+from vlmeval.dataset import build_dataset
 from vlmeval.inference_api import APIEvalPipeline, DatasetConfig
-
+from vlmeval.smp import (get_pred_file_format, githash, listinstr, load_env, prepare_reuse_files,
+                         setup_logger, timestr)
 
 group_dic = {
     'general-mini': ['MMMU_Pro_10c'],
 
@@ -1,6 +1,9 @@
+import os.path as osp
 import sys
-from vlmeval import *
+
 from vlmeval.dataset import SUPPORTED_DATASETS
+from vlmeval.smp import listinstr, load, ls
+
 FAIL_MSG = 'Failed to obtain answer via API.'
 
 root = sys.argv[1]
 
@@ -1,6 +1,10 @@
 import argparse
-from vlmeval.smp import *
+import os
+import os.path as osp
+
 from vlmeval.config import supported_VLM
+from vlmeval.smp import listinstr
+
 
 def is_api(x):
     return getattr(supported_VLM[x].func, 'is_api', False)
 
@@ -4,20 +4,25 @@
 # browse data in http://127.0.0.1:10064
 """
 
-import os
+import argparse
+import base64
+import copy
 import io
 import json
-import copy
+import os
+import os.path as osp
+import string
 import time
-import gradio as gr
-import base64
-from PIL import Image
 from io import BytesIO
-from argparse import Namespace
-# from llava import conversation as conversation_lib
 from typing import Sequence
-from vlmeval import *
+
+import gradio as gr
+import pandas as pd
+from PIL import Image
+
+from vlmeval.api import OpenAIWrapper
 from vlmeval.dataset import SUPPORTED_DATASETS, build_dataset
+from vlmeval.smp import LMUDataRoot, encode_image_file_to_base64, load
 
 SYS = "You are a helpful assistant. Your job is to faithfully translate all provided text into Chinese faithfully. "
 
 
@@ -1,7 +1,15 @@
-from vlmeval.smp import *
-from vlmeval.tools import EVAL
-from vlmeval.dataset import build_dataset
+import datetime
+import os
+import os.path as osp
+import shutil
+
 import gradio as gr
+import numpy as np
+import pandas as pd
+
+from vlmeval.dataset import build_dataset
+from vlmeval.smp import LMUDataRoot, cn_string, dump, load, md5
+from vlmeval.tools import EVAL
 
 HEADER = """
 # Welcome to MMBench👏👏