-
Notifications
You must be signed in to change notification settings - Fork 211
Expand file tree
/
Copy pathconfig.yaml
More file actions
65 lines (64 loc) · 4.5 KB
/
config.yaml
File metadata and controls
65 lines (64 loc) · 4.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# config.yaml
env:
OPENAI_API_KEY: "YOUR_ONENAI_API" # Not that neccesary.
CUDA_VISIBLE_DEVICES: '0,1,2,3,4,5,6,7'
HYDRA_FULL_ERROR: 1
N_GPUS: 8
BASE_MODEL: 'Qwen/Qwen2.5-7B-Instruct' # This model will be served in vllm, used by `agentflow.port`, and serve as the base model of rollout in the training process.
ROLLOUT_TP_SIZE: 1
EXPERIMENT_NAME: 'AgentFlow_pro'
PROJECT_NAME: 'AgentFlow_pro'
BASE_DATA_DIR: 'data' # This is where to find training and eval data.
VERBOSITY: 'DEBUG'
N_WORKERS: 16
ENABLE_TOOLS: ["Base_Generator_Tool","Python_Coder_Tool","Google_Search_Tool","Wikipedia_Search_Tool"] # If OpenAI API is on, then it can add tools.
# TOOL_ENGINE: ["gpt-4o-mini","dashscope-qwen2.5-7b-coder-instruct","Default","Default"] # Default means use tool.py's default params. You can set "dashscope" as qwen7B, "gpt-4o-mini" for gpt, or "self" as the training BASE_MODEL.
# TOOL_ENGINE: ["vllm-Qwen/Qwen2.5-7B-Instruct","vllm-Qwen/Qwen2.5-7B-Instruct","Default","Default"] # If you are not using the Dashscope API, you can use VLLM to serve qwen2.5-7b-instruct on your own server and redesign the llm_engine port.
MODEL_ENGINE: ["trainable","gpt-4o-mini","gpt-4o-mini","gpt-4o-mini"]
# MODEL_ENGINE: ["trainable","dashscope","dashscope","dashscope"] # [planner_main, planner_fixed, verifier, executor]. "trainable" means use the BASE_MODEL with different temperatures for train/test. You can also specify specific models like "dashscope", "gpt-4o-mini", etc.
TOOL_STEPS: 3 # Do not increase beyond 5. It may overflow context.
TEST_TEMPERATURE: 0.0
TRAIN_TEMPERATURE: 0.7 # 0.7 - 0.5 is good. Please check whether every planner and the executor have the this parameter correctly transfered.
OUTPUT_TYPE: "direct" # Different output mode in rollout's last output, not that neccesary if we are searching and math reasoning because answer should be short.
AGENT_MAX_TIMEOUT: 500 # 300-500 is good. When steps extend beyond this limit, there may be errors.
python_args:
agentflow.port: 9999 # This will be sent to agent serving and training.
algorithm.adv_estimator: 'grpo'
data.train_files: '${BASE_DATA_DIR}/train/combined_train.parquet' # Mixed nq search and mathard, shuffled.
data.val_files: '${BASE_DATA_DIR}/val/aime24.parquet' # AIME24 for fast check, the first epoch maybe down due to async start and file lock.
actor_rollout_ref.rollout.tensor_model_parallel_size: '${ROLLOUT_TP_SIZE}'
trainer.n_gpus_per_node: '${N_GPUS}'
data.train_batch_size: 32
actor_rollout_ref.rollout.n: 8
actor_rollout_ref.actor.ppo_mini_batch_size: 8
actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu: 4
actor_rollout_ref.rollout.log_prob_micro_batch_size_per_gpu: 4
actor_rollout_ref.rollout.multi_turn.format: 'hermes'
actor_rollout_ref.model.path: '${BASE_MODEL}'
data.max_prompt_length: 18432 # This is safe here because in qwen-plannr&executor and dashscope/4o-mini's tools will never prematurely end the output at this length.
data.max_response_length: 2048 # Make sure this will be transfered all the way down.
data.truncation: 'truncate' # If this is set to "error", then the process will be shut down when the input length exceeds the max_prompt_length + max_response_length.
trainer.val_before_train: True # If you restart from a CKPT, then you don't need to save time.
actor_rollout_ref.actor.optim.lr: 1e-6
actor_rollout_ref.model.use_remove_padding: True
actor_rollout_ref.actor.use_kl_loss: True
actor_rollout_ref.actor.kl_loss_coef: 0.001
actor_rollout_ref.actor.entropy_coeff: 0.0 # Maybe this can somehow prevent model repetition?
actor_rollout_ref.actor.clip_ratio_low: 0.2
actor_rollout_ref.actor.clip_ratio_high: 0.3
actor_rollout_ref.model.enable_gradient_checkpointing: True
actor_rollout_ref.actor.fsdp_config.param_offload: False
actor_rollout_ref.actor.fsdp_config.optimizer_offload: False
actor_rollout_ref.rollout.name: 'vllm'
actor_rollout_ref.rollout.gpu_memory_utilization: 0.6 # 0.55-0.65 is fine. If it's too small, the BASE_MODEL inference will be slow. If it's too large, the KV cache and other extra saved logic will cause OOM.
actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu: 4
actor_rollout_ref.ref.fsdp_config.param_offload: False
algorithm.use_kl_in_reward: False
trainer.critic_warmup: 0
trainer.logger: ['console','wandb']
trainer.project_name: '${PROJECT_NAME}'
trainer.experiment_name: '${EXPERIMENT_NAME}'
trainer.nnodes: 1
trainer.save_freq: 2 # This is to ensure the CKPT exists.
trainer.test_freq: 2
trainer.total_epochs: 5