vllm自动化压测脚本

生成结果benchmark_summary.txt如下所示:

bash 复制代码
input_len,output_len,num_prompts,concurrency,output_throughput,total_throughput,duration_sec,mean_ttft_ms,mean_tpot_ms,mean_itl_ms
2048,2048,4,1,523.4,1046.8,12.3,21.5,11.2,9.7
32768,2048,16,4,487.1,982.6,35.6,48.3,13.9,10.4

日志保存在logs/下,result-filename保存在results/下,自动化压测脚本如下:

bash 复制代码
#!/usr/bin/env bash

set -euo pipefail

###############################
# ===== 参数配置区 =====
###############################
BACKEND="vllm"
BASE_URL="http://127.0.0.1:30734"
ENDPOINT="/v1/completions"
DATASET_NAME="random"
MODEL="qwen3.5"
TOKENIZER="/model/Qwen3.5-27B"
SEED="12345"

RANDOM_INPUT_LENS=(2048 32768 65536 131072)
RANDOM_OUTPUT_LENS=(2048)
MAX_CONCURRENCY_LIST=(1 4 8 16)

LOG_DIR="logs"
RESULT_DIR="results"
SUMMARY_TXT="benchmark_summary.txt"

mkdir -p "${LOG_DIR}" "${RESULT_DIR}"

if [[ ! -f "${SUMMARY_TXT}" ]]; then
  cat > "${SUMMARY_TXT}" <<EOF
input_len,output_len,num_prompts,concurrency,output_throughput,total_throughput,duration_sec,mean_ttft_ms,mean_tpot_ms,mean_itl_ms
EOF
fi

###############################
# ===== 工具函数 =====
###############################
parse_metric() {
  local json_file="$1"
  local key="$2"
  jq -r ".${key} // \"0\"" "$json_file"
}

###############################
# ===== 主循环 =====
###############################
for input_len in "${RANDOM_INPUT_LENS[@]}"; do
  for output_len in "${RANDOM_OUTPUT_LENS[@]}"; do
    for max_concurrency in "${MAX_CONCURRENCY_LIST[@]}"; do

      num_prompts=$((max_concurrency * 4))

      base_name="input=${input_len},output=${output_len},num_prompts=${num_prompts},concurrency=${max_concurrency}"
      log_path="${LOG_DIR}/${base_name}.log"
      result_json="${RESULT_DIR}/${base_name}.json"

      echo "=========================================="
      echo "RUNNING:"
      echo "  input_len=${input_len}"
      echo "  output_len=${output_len}"
      echo "  max_concurrency=${max_concurrency}"
      echo "  num_prompts=${num_prompts}"
      echo "  log=${log_path}"
      echo "  result=${result_json}"
      echo "=========================================="

      vllm bench serve \
        --backend "${BACKEND}" \
        --base-url "${BASE_URL}" \
        --endpoint "${ENDPOINT}" \
        --dataset-name "${DATASET_NAME}" \
        --model "${MODEL}" \
        --tokenizer "${TOKENIZER}" \
        --seed "${SEED}" \
        --random-input-len "${input_len}" \
        --random-output-len "${output_len}" \
        --num-prompts "${num_prompts}" \
        --max-concurrency "${max_concurrency}" \
        --save-result \
        --result-dir "${RESULT_DIR}" \
        --result-filename "$(basename "${result_json}")" \
        > "${log_path}" 2>&1

      echo "DONE: ${log_path}"
      echo

      # =========================
      # 解析指标
      # =========================
      if [[ -f "${result_json}" ]]; then
        output_throughput=$(parse_metric "${result_json}" "output_throughput")
        total_throughput=$(parse_metric "${result_json}" "total_token_throughput")
        duration=$(parse_metric "${result_json}" "duration")
        mean_ttft=$(parse_metric "${result_json}" "mean_ttft_ms")
        mean_tpot=$(parse_metric "${result_json}" "mean_tpot_ms")
        mean_itl=$(parse_metric "${result_json}" "mean_itl_ms")

        # ✅ 使用 printf 统一保留一位小数
        printf "%d,%d,%d,%d,%.1f,%.1f,%.1f,%.1f,%.1f,%.1f\n" \
          "${input_len}" \
          "${output_len}" \
          "${num_prompts}" \
          "${max_concurrency}" \
          "${output_throughput}" \
          "${total_throughput}" \
          "${duration}" \
          "${mean_ttft}" \
          "${mean_tpot}" \
          "${mean_itl}" \
          >> "${SUMMARY_TXT}"
      else
        echo "WARNING: Result JSON not found: ${result_json}"
      fi

    done
  done
done

echo "=========================================="
echo "All benchmarks finished."
echo "Logs are stored in: ${LOG_DIR}"
echo "Results are stored in: ${RESULT_DIR}"
echo "Summary TXT: ${SUMMARY_TXT}"
echo "=========================================="
相关推荐
YoanAILab3 小时前
AI 推理系统架构怎么选?图像生成与文本生成的分层选型思路(ComfyUI / Dify / vLLM / Triton)
人工智能·系统架构·comfyui·dify·vllm·ai工程
NeilNiu17 小时前
本地部署Ollama及部署模型
vllm
Forrit1 天前
关于vLLM框架
vllm
SunnyRivers1 天前
快速理解vLLM命令行工具serve
命令行·serve·vllm
田井中律.2 天前
linux部署VLLM、大模型(详细图文)
vllm
陈 洪 伟2 天前
大模型推理引擎vLLM(19): vLLM中的DBO(Dual Batch Overlap)功能代码实现分析
vllm
AIGC_北苏3 天前
Qwen3.5开源模型实测
vllm
npupengsir3 天前
nano vllm代码详解
人工智能·算法·vllm
冰封剑心3 天前
容器参数错误,更换参数
人工智能·计算机视觉·vllm