生成结果benchmark_summary.txt如下所示:
bash
input_len,output_len,num_prompts,concurrency,output_throughput,total_throughput,duration_sec,mean_ttft_ms,mean_tpot_ms,mean_itl_ms
2048,2048,4,1,523.4,1046.8,12.3,21.5,11.2,9.7
32768,2048,16,4,487.1,982.6,35.6,48.3,13.9,10.4
日志保存在logs/下,result-filename保存在results/下,自动化压测脚本如下:
bash
#!/usr/bin/env bash
set -euo pipefail
###############################
# ===== 参数配置区 =====
###############################
BACKEND="vllm"
BASE_URL="http://127.0.0.1:30734"
ENDPOINT="/v1/completions"
DATASET_NAME="random"
MODEL="qwen3.5"
TOKENIZER="/model/Qwen3.5-27B"
SEED="12345"
RANDOM_INPUT_LENS=(2048 32768 65536 131072)
RANDOM_OUTPUT_LENS=(2048)
MAX_CONCURRENCY_LIST=(1 4 8 16)
LOG_DIR="logs"
RESULT_DIR="results"
SUMMARY_TXT="benchmark_summary.txt"
mkdir -p "${LOG_DIR}" "${RESULT_DIR}"
if [[ ! -f "${SUMMARY_TXT}" ]]; then
cat > "${SUMMARY_TXT}" <<EOF
input_len,output_len,num_prompts,concurrency,output_throughput,total_throughput,duration_sec,mean_ttft_ms,mean_tpot_ms,mean_itl_ms
EOF
fi
###############################
# ===== 工具函数 =====
###############################
parse_metric() {
local json_file="$1"
local key="$2"
jq -r ".${key} // \"0\"" "$json_file"
}
###############################
# ===== 主循环 =====
###############################
for input_len in "${RANDOM_INPUT_LENS[@]}"; do
for output_len in "${RANDOM_OUTPUT_LENS[@]}"; do
for max_concurrency in "${MAX_CONCURRENCY_LIST[@]}"; do
num_prompts=$((max_concurrency * 4))
base_name="input=${input_len},output=${output_len},num_prompts=${num_prompts},concurrency=${max_concurrency}"
log_path="${LOG_DIR}/${base_name}.log"
result_json="${RESULT_DIR}/${base_name}.json"
echo "=========================================="
echo "RUNNING:"
echo " input_len=${input_len}"
echo " output_len=${output_len}"
echo " max_concurrency=${max_concurrency}"
echo " num_prompts=${num_prompts}"
echo " log=${log_path}"
echo " result=${result_json}"
echo "=========================================="
vllm bench serve \
--backend "${BACKEND}" \
--base-url "${BASE_URL}" \
--endpoint "${ENDPOINT}" \
--dataset-name "${DATASET_NAME}" \
--model "${MODEL}" \
--tokenizer "${TOKENIZER}" \
--seed "${SEED}" \
--random-input-len "${input_len}" \
--random-output-len "${output_len}" \
--num-prompts "${num_prompts}" \
--max-concurrency "${max_concurrency}" \
--save-result \
--result-dir "${RESULT_DIR}" \
--result-filename "$(basename "${result_json}")" \
> "${log_path}" 2>&1
echo "DONE: ${log_path}"
echo
# =========================
# 解析指标
# =========================
if [[ -f "${result_json}" ]]; then
output_throughput=$(parse_metric "${result_json}" "output_throughput")
total_throughput=$(parse_metric "${result_json}" "total_token_throughput")
duration=$(parse_metric "${result_json}" "duration")
mean_ttft=$(parse_metric "${result_json}" "mean_ttft_ms")
mean_tpot=$(parse_metric "${result_json}" "mean_tpot_ms")
mean_itl=$(parse_metric "${result_json}" "mean_itl_ms")
# ✅ 使用 printf 统一保留一位小数
printf "%d,%d,%d,%d,%.1f,%.1f,%.1f,%.1f,%.1f,%.1f\n" \
"${input_len}" \
"${output_len}" \
"${num_prompts}" \
"${max_concurrency}" \
"${output_throughput}" \
"${total_throughput}" \
"${duration}" \
"${mean_ttft}" \
"${mean_tpot}" \
"${mean_itl}" \
>> "${SUMMARY_TXT}"
else
echo "WARNING: Result JSON not found: ${result_json}"
fi
done
done
done
echo "=========================================="
echo "All benchmarks finished."
echo "Logs are stored in: ${LOG_DIR}"
echo "Results are stored in: ${RESULT_DIR}"
echo "Summary TXT: ${SUMMARY_TXT}"
echo "=========================================="