BenchmarkSQL是一个用于评估数据库性能的开源工具。它模拟TPC-C(Transaction Processing Performance Council)基准测试场景,该场景主要用于衡量数据库在处理大量并发事务时的能力。TPC-C测试场景模拟了一个典型的批发分销商的业务环境,包括新订单、支付、库存查询等操作。
openEuler版本:openEuler 22.03 (LTS-SP4)
安装依赖:
JDK,ANT,R语言
其中安装JDK不在此说明,网上很多教程
安装ANT步骤如下:
将ANT安装包上传至服务器,然后解压:
#tar -xvzf apache-ant-1.10.15-bin.tar.gz
设置环境变量:
编辑/etc/profile文件追以下内容:
export ANT_HOME=/opt/software/ant/apache-ant-1.10.15
export PATH=PATH:ANT_HOME/bin
执行:
#source /etc/profile
使环境变量设置生效
验证安装结果:
#ant -version

R语言安装步骤如下:
1.上传压缩包(R-4.4.2.tar.gz)到服务器
2.解压R-4.4.2.tar.gz:
#tar -xvf R-4.4.2.tar.gz
3.进入解压目录,进行编译安装(时间有点长,请耐心等待):
#./configure && make && make install

4.验证安装结果:
#R --version

安装编译R语言过程中可能会出现下面3个错误:
报错1:configure: error: --with-x=yes (default) and X11 headers/libs are not available
解决方法:yum -y install xorg-x11-server-devel libX11-devel libXt-devel
报错2:configure: error: "liblzma library and headers are required"
解决方法:yum install xz-devel.x86_64
报错3:configure: error: libcurl >= 7.28.0 library and headers are required with support for https
解决方法:yum install libcurl-devel
安装 BenchmarkSQL:
创建数据库和用户
create user benchmarksql with password 'Benchmark123';
create database benchmarksql owner benchmarksql;
给用户授予权限:有建表权限,创建索引权限等等,为了方便修改用户为管理员
alter user benchmarksql with SYSADMIN;
解压benchmarksql
上传benchmark压缩包到服务器,然后解压:
#unzip benchmarksql-5.0.zip
编译benchmarksql
进入benchmark解压目录:
执行命令:
#ant

创建配置文件
进入run目录
#cd run
#cp props.pg my_postgres.properties
修改配置文件
#vi my_postgres.properties
修改内容:
db=postgres
driver=org.postgresql.Driver
conn=jdbc:postgresql://localhost:15000/benchmarksql
user=benchmarksql
password=Benchmark123
osCollectorDevices=net_ens33 blk_sda
- 配置文件详解:
- db=postgres //数据库类型,postgres,mysql
- driver=org.postgresql.Driver //驱动,mysql8.0以上为com.mysql.cj.jdbc.Driver
- conn=jdbc:postgresql://localhost:5432/postgres //PG数据库连接字符串,正常情况下,需要更改localhost为对应PG服务IP、5432位对应PG服务端口、postgres为对应测试数据库名
- user=benchmarksql //数据库用户名,通常建议用默认,这就需要我们提前在数据库中建立benchmarksql用户
- password=PWbmsql //如上用户密码
- warehouses=1 //仓库数量,数量根据实际服务器内存配置,每个仓库 约 100MB,代表一个独立业务单元(TPC-C 标准)
- loadWorkers=4 //用于在数据库中初始化数据的加载进程数量,默认为4,实际使用过程中可以根据实际情况调整,加载速度会随worker数量的增加而有所提升
- terminals=1 //终端数,即并发客户端数量,通常设置为CPU线程总数的2~6倍
- runTxnsPerTerminal=10 //每个终端(terminal)运行的固定事务数量,例如:如果该值设置为10,意味着每个terminal运行10个事务,如果有32个终端,那整体运行320个事务后,测试结束。该参数配置为非0值时,下面的runMins参数必须设置为0
- runMins=0 //要测试的整体时间,单位为分钟,如果runMins设置为60,那么测试持续1小时候结束。该值设置为非0值时,runTxnsPerTerminal参数必须设置为0。这两个参数不能同时设置为正整数,如果设置其中一个,另一个必须为0,主要区别是runMins定义时间长度来控制测试时间;runTxnsPerTerminal定义事务总数来控制时间。
- limitTxnsPerMin=300 //每分钟事务总数限制,该参数主要控制每分钟处理的事务数,事务数受terminals参数的影响,如果terminals数量大于limitTxnsPerMin值,意味着并发数大于每分钟事务总数,该参数会失效,想想也是如此,如果有1000个并发同时发起,那每分钟事务数设置为300就没意义了,上来就是1000个并发,所以要让该参数有效,可以设置数量大于并发数,或者让其失效,测试过程中目前采用的是默认300。
- terminalWarehouseFixed=true //终端和仓库的绑定模式,设置为true时可以运行4.x兼容模式,意思为每个终端都有一个固定的仓库。设置为false时可以均匀的使用数据库整体配置。TPCC规定每个终端都必须有一个绑定的仓库,所以一般使用默认值true。
- //下面五个值的总和必须等于100,默认值为:45, 43, 4, 4 & 4 ,与TPC-C测试定义的比例一致,实际操作过程中,可以调整比重来适应各种场景。
newOrderWeight=45
paymentWeight=43
orderStatusWeight=4
deliveryWeight=4
stockLevelWeight=4 - resultDirectory=my_result_%tY-%tm-%td_%tH%tM%tS //测试数据生成目录,默认无需修改,默认生成在run目录下面,名字形如my_result_xxxx的文件夹。
- osCollectorScript=./misc/os_collector_linux.py //操作系统性能收集脚本,默认无需修改,需要操作系统具备有python环境
- osCollectorInterval=1 //操作系统收集操作间隔,默认为1秒
- //osCollectorSSHAddr=user@dbhost //操作系统收集所对应的主机,如果对本机数据库进行测试,该参数保持注销即可,如果要对远程服务器进行测试,请填写用户名和主机名。
- osCollectorDevices=net_ens33 blk_sda //操作系统中被收集服务器的网卡名称和磁盘名称,例如:使用ifconfig查看操作系统网卡名称,找到测试所走的网卡,名称为ens33,那么下面网卡名设置为net_ens33(net_前缀固定);使用df -h查看数据库数据目录,名称为(/dev/sdb 33T 18T 16T 54% /hgdata),那么下面磁盘名设置为blk_sdb(blk_前缀固定)
创建数据库模式和加载初始数据
在run目录下执行:
./runDatabaseBuild.sh my_postgres.properties
执行成功后输出信息:
[omm@hostName1 run]$ ./runDatabaseBuild.sh my_postgres.properties
# ------------------------------------------------------------
# Loading SQL file ./sql.common/tableCreates.sql
# ------------------------------------------------------------
create table bmsql_config (
cfg_name varchar(30) primary key,
cfg_value varchar(50)
);
create table bmsql_warehouse (
w_id integer not null,
w_ytd decimal(12,2),
w_tax decimal(4,4),
w_name varchar(10),
w_street_1 varchar(20),
w_street_2 varchar(20),
w_city varchar(20),
w_state char(2),
w_zip char(9)
);
create table bmsql_district (
d_w_id integer not null,
d_id integer not null,
d_ytd decimal(12,2),
d_tax decimal(4,4),
d_next_o_id integer,
d_name varchar(10),
d_street_1 varchar(20),
d_street_2 varchar(20),
d_city varchar(20),
d_state char(2),
d_zip char(9)
);
create table bmsql_customer (
c_w_id integer not null,
c_d_id integer not null,
c_id integer not null,
c_discount decimal(4,4),
c_credit char(2),
c_last varchar(16),
c_first varchar(16),
c_credit_lim decimal(12,2),
c_balance decimal(12,2),
c_ytd_payment decimal(12,2),
c_payment_cnt integer,
c_delivery_cnt integer,
c_street_1 varchar(20),
c_street_2 varchar(20),
c_city varchar(20),
c_state char(2),
c_zip char(9),
c_phone char(16),
c_since timestamp,
c_middle char(2),
c_data varchar(500)
);
create sequence bmsql_hist_id_seq;
create table bmsql_history (
hist_id integer,
h_c_id integer,
h_c_d_id integer,
h_c_w_id integer,
h_d_id integer,
h_w_id integer,
h_date timestamp,
h_amount decimal(6,2),
h_data varchar(24)
);
create table bmsql_new_order (
no_w_id integer not null,
no_d_id integer not null,
no_o_id integer not null
);
create table bmsql_oorder (
o_w_id integer not null,
o_d_id integer not null,
o_id integer not null,
o_c_id integer,
o_carrier_id integer,
o_ol_cnt integer,
o_all_local integer,
o_entry_d timestamp
);
create table bmsql_order_line (
ol_w_id integer not null,
ol_d_id integer not null,
ol_o_id integer not null,
ol_number integer not null,
ol_i_id integer not null,
ol_delivery_d timestamp,
ol_amount decimal(6,2),
ol_supply_w_id integer,
ol_quantity integer,
ol_dist_info char(24)
);
create table bmsql_item (
i_id integer not null,
i_name varchar(24),
i_price decimal(5,2),
i_data varchar(50),
i_im_id integer
);
create table bmsql_stock (
s_w_id integer not null,
s_i_id integer not null,
s_quantity integer,
s_ytd integer,
s_order_cnt integer,
s_remote_cnt integer,
s_data varchar(50),
s_dist_01 char(24),
s_dist_02 char(24),
s_dist_03 char(24),
s_dist_04 char(24),
s_dist_05 char(24),
s_dist_06 char(24),
s_dist_07 char(24),
s_dist_08 char(24),
s_dist_09 char(24),
s_dist_10 char(24)
);
Starting BenchmarkSQL LoadData
driver=org.postgresql.Driver
conn=jdbc:postgresql://localhost:15000/benchmarksql
user=benchmarksql
password=***********
warehouses=10
loadWorkers=4
fileLocation (not defined)
csvNullValue (not defined - using default 'NULL')
Worker 000: Loading ITEM
Worker 001: Loading Warehouse 1
Worker 002: Loading Warehouse 2
Worker 003: Loading Warehouse 3
Worker 000: Loading ITEM done
Worker 000: Loading Warehouse 4
Worker 002: Loading Warehouse 2 done
Worker 002: Loading Warehouse 5
Worker 001: Loading Warehouse 1 done
Worker 001: Loading Warehouse 6
Worker 003: Loading Warehouse 3 done
Worker 003: Loading Warehouse 7
Worker 000: Loading Warehouse 4 done
Worker 000: Loading Warehouse 8
Worker 002: Loading Warehouse 5 done
Worker 002: Loading Warehouse 9
Worker 001: Loading Warehouse 6 done
Worker 001: Loading Warehouse 10
Worker 003: Loading Warehouse 7 done
Worker 000: Loading Warehouse 8 done
Worker 002: Loading Warehouse 9 done
Worker 001: Loading Warehouse 10 done
# ------------------------------------------------------------
# Loading SQL file ./sql.common/indexCreates.sql
# ------------------------------------------------------------
alter table bmsql_warehouse add constraint bmsql_warehouse_pkey
primary key (w_id);
alter table bmsql_district add constraint bmsql_district_pkey
primary key (d_w_id, d_id);
alter table bmsql_customer add constraint bmsql_customer_pkey
primary key (c_w_id, c_d_id, c_id);
create index bmsql_customer_idx1
on bmsql_customer (c_w_id, c_d_id, c_last, c_first);
alter table bmsql_oorder add constraint bmsql_oorder_pkey
primary key (o_w_id, o_d_id, o_id);
create unique index bmsql_oorder_idx1
on bmsql_oorder (o_w_id, o_d_id, o_carrier_id, o_id);
alter table bmsql_new_order add constraint bmsql_new_order_pkey
primary key (no_w_id, no_d_id, no_o_id);
alter table bmsql_order_line add constraint bmsql_order_line_pkey
primary key (ol_w_id, ol_d_id, ol_o_id, ol_number);
alter table bmsql_stock add constraint bmsql_stock_pkey
primary key (s_w_id, s_i_id);
alter table bmsql_item add constraint bmsql_item_pkey
primary key (i_id);
# ------------------------------------------------------------
# Loading SQL file ./sql.common/foreignKeys.sql
# ------------------------------------------------------------
alter table bmsql_district add constraint d_warehouse_fkey
foreign key (d_w_id)
references bmsql_warehouse (w_id);
alter table bmsql_customer add constraint c_district_fkey
foreign key (c_w_id, c_d_id)
references bmsql_district (d_w_id, d_id);
alter table bmsql_history add constraint h_customer_fkey
foreign key (h_c_w_id, h_c_d_id, h_c_id)
references bmsql_customer (c_w_id, c_d_id, c_id);
alter table bmsql_history add constraint h_district_fkey
foreign key (h_w_id, h_d_id)
references bmsql_district (d_w_id, d_id);
alter table bmsql_new_order add constraint no_order_fkey
foreign key (no_w_id, no_d_id, no_o_id)
references bmsql_oorder (o_w_id, o_d_id, o_id);
alter table bmsql_oorder add constraint o_customer_fkey
foreign key (o_w_id, o_d_id, o_c_id)
references bmsql_customer (c_w_id, c_d_id, c_id);
alter table bmsql_order_line add constraint ol_order_fkey
foreign key (ol_w_id, ol_d_id, ol_o_id)
references bmsql_oorder (o_w_id, o_d_id, o_id);
alter table bmsql_order_line add constraint ol_stock_fkey
foreign key (ol_supply_w_id, ol_i_id)
references bmsql_stock (s_w_id, s_i_id);
alter table bmsql_stock add constraint s_warehouse_fkey
foreign key (s_w_id)
references bmsql_warehouse (w_id);
alter table bmsql_stock add constraint s_item_fkey
foreign key (s_i_id)
references bmsql_item (i_id);
# ------------------------------------------------------------
# Loading SQL file ./sql.postgres/extraHistID.sql
# ------------------------------------------------------------
-- ----
-- Extra Schema objects/definitions for history.hist_id in PostgreSQL
-- ----
-- ----
-- This is an extra column not present in the TPC-C
-- specs. It is useful for replication systems like
-- Bucardo and Slony-I, which like to have a primary
-- key on a table. It is an auto-increment or serial
-- column type. The definition below is compatible
-- with Oracle 11g, using a sequence and a trigger.
-- ----
-- Adjust the sequence above the current max(hist_id)
select setval('bmsql_hist_id_seq', (select max(hist_id) from bmsql_history));
-- Make nextval(seq) the default value of the hist_id column.
alter table bmsql_history
alter column hist_id set default nextval('bmsql_hist_id_seq');
-- Add a primary key history(hist_id)
alter table bmsql_history add primary key (hist_id);
# ------------------------------------------------------------
# Loading SQL file ./sql.postgres/buildFinish.sql
# ------------------------------------------------------------
-- ----
-- Extra commands to run after the tables are created, loaded,
-- indexes built and extra's created.
-- PostgreSQL version.
-- ----
vacuum analyze;
创建的表和初始化的数据量

运行基准测试
在run目录下执行:
#./runBenchmark.sh my_postgres.properties
如果执行报错:print ",".join([str(x) for x in sysInfo])
这是因为benchmarksql是用python2写的脚本,而openEuler用的是python3,python3的print函数不兼容python2的print函数,需要修改benchmarksql安装目录下的/run/misc/os_collector_linux.py脚本中的print函数用法,具体如何修改见本文最后os_collector_linux.py脚本
如果执行报错:ValueError: can't have unbuffered text I/O
这是因为benchmarksql是用python2写的脚本,而openEuler用的是python3,python3的open函数不兼容python2的open函数,需要修改benchmarksql安装目录下的/run/misc/os_collector_linux.py脚本中的open函数用法,具体如何修改见本文最后os_collector_linux.py脚本
如果执行报错:NameError: name 'lastStatData' is not defined
这是因为benchmarksql是用python2写的脚本,而openEuler用的是python3,python3和python2存在兼容性问题,在 Python 3 中运行时变量作用域可能不同,具体如何修改见本文最后os_collector_linux.py脚本
执行成功后输出信息:
[root@hostName1 run]# ./runBenchmark.sh my_postgres.properties
03:49:54,400 [main] INFO jTPCC : Term-00,
03:49:54,411 [main] INFO jTPCC : Term-00, +-------------------------------------------------------------+
03:49:54,412 [main] INFO jTPCC : Term-00, BenchmarkSQL v5.0
03:49:54,413 [main] INFO jTPCC : Term-00, +-------------------------------------------------------------+
03:49:54,414 [main] INFO jTPCC : Term-00, (c) 2003, Raul Barbosa
03:49:54,416 [main] INFO jTPCC : Term-00, (c) 2004-2016, Denis Lussier
03:49:54,426 [main] INFO jTPCC : Term-00, (c) 2016, Jan Wieck
03:49:54,427 [main] INFO jTPCC : Term-00, +-------------------------------------------------------------+
03:49:54,428 [main] INFO jTPCC : Term-00,
03:49:54,468 [main] INFO jTPCC : Term-00, db=postgres
03:49:54,468 [main] INFO jTPCC : Term-00, driver=org.postgresql.Driver
03:49:54,468 [main] INFO jTPCC : Term-00, conn=jdbc:postgresql://localhost:15000/benchmarksql
03:49:54,469 [main] INFO jTPCC : Term-00, user=benchmarksql
03:49:54,471 [main] INFO jTPCC : Term-00,
03:49:54,471 [main] INFO jTPCC : Term-00, warehouses=10
03:49:54,472 [main] INFO jTPCC : Term-00, terminals=1
03:49:54,481 [main] INFO jTPCC : Term-00, runTxnsPerTerminal=10
03:49:54,481 [main] INFO jTPCC : Term-00, limitTxnsPerMin=300
03:49:54,484 [main] INFO jTPCC : Term-00, terminalWarehouseFixed=true
03:49:54,484 [main] INFO jTPCC : Term-00,
03:49:54,484 [main] INFO jTPCC : Term-00, newOrderWeight=45
03:49:54,485 [main] INFO jTPCC : Term-00, paymentWeight=43
03:49:54,487 [main] INFO jTPCC : Term-00, orderStatusWeight=4
03:49:54,487 [main] INFO jTPCC : Term-00, deliveryWeight=4
03:49:54,487 [main] INFO jTPCC : Term-00, stockLevelWeight=4
03:49:54,487 [main] INFO jTPCC : Term-00,
03:49:54,488 [main] INFO jTPCC : Term-00, resultDirectory=my_result_%tY-%tm-%td_%tH%tM%tS
03:49:54,488 [main] INFO jTPCC : Term-00, osCollectorScript=./misc/os_collector_linux.py
03:49:54,489 [main] INFO jTPCC : Term-00,
03:49:54,559 [main] INFO jTPCC : Term-00, copied my_postgres.properties to my_result_2025-06-19_034954/run.properties
03:49:54,581 [main] INFO jTPCC : Term-00, created my_result_2025-06-19_034954/data/runInfo.csv for runID 11
03:49:54,582 [main] INFO jTPCC : Term-00, writing per transaction results to my_result_2025-06-19_034954/data/result.csv
03:49:54,589 [main] INFO jTPCC : Term-00, osCollectorScript=./misc/os_collector_linux.py
03:49:54,589 [main] INFO jTPCC : Term-00, osCollectorInterval=1
03:49:54,589 [main] INFO jTPCC : Term-00, osCollectorSSHAddr=null
03:49:54,590 [main] INFO jTPCC : Term-00, osCollectorDevices=net_ens33 blk_sda
03:49:54,693 [main] INFO jTPCC : Term-00,
03:49:55,172 [main] INFO jTPCC : Term-00, C value for C_LAST during load: 118
03:49:55,173 [main] INFO jTPCC : Term-00, C value for C_LAST this run: 221
03:49:55,173 [main] INFO jTPCC : Term-00, Ter03:49:58,483 [Thread-1] INFO jTPCC : Term-00, t tpmTOTAL: 96 Memory Usage: 12MB / 118MB
03:49:58,490 [Thread-1] INFO jTPCC : Term-00,
03:49:58,503 [Thread-1] INFO jTPCC : Term-00, Measured tpmC (NewOrders) = 20.24
03:49:58,507 [Thread-1] INFO jTPCC : Term-00, Measured tpmTOTAL = 222.74
03:49:58,510 [Thread-1] INFO jTPCC : Term-00, Session Start = 2025-06-19 03:49:55
03:49:58,512 [Thread-1] INFO jTPCC : Term-00, Session End = 2025-06-19 03:49:58
03:49:58,514 [Thread-1] INFO jTPCC : Term-00, Transaction Count = 10
[root@hostName1 run]#
测试结束后,run目录下会生成一个新目录,它的命名格式为 my_result_%tY-%tm-%td_%tH%tM%tS。

使用 generateReport.sh my_result_* 脚本创建具有图形的 HTML 文件:
例如:
#./generateReport.sh my_result_2025-06-19_034954
使用generateReport.sh,需要安装R语言
注意由于我的openEuler服务器没有安装图形桌面,是没法生成图片的,会报类似下面的错误:Generating my_result_2025-06-19_034954/tpm_nopm.png ... Error in .External2(C_X11, paste0("png::", filename), gwidth, gheight, :

将my_result_2025-06-19_034954目录打包下载下来
#zip -r my_result_2025-06-19_034954.zip my_result_2025-06-19_034954/

打开报告文件:report.html,类似下面这样子:


生成的报告怎么看:
( 1)整体性能:
Overall tpmC: 124.67
Overall tpmTotal: 280.67
tpmC 是核心事务(NEW_ORDER)每分钟处理的事务数。
tpmTotal 是所有类型的事务(包括NEW_ORDER、Payment、Order Status、Delivery和Stock Level)每分钟的总事务数。
这里的tpmC为124.67,意味着每分钟大约处理124.67个NEW_ORDER事务,这是TPC-C基准测试中最为关键的性能指标。
( 2 ) 理论最大值:
理论最大值:12.86 NEW_ORDER 事务/分钟/仓库
TPC-C规范指出,在理想的条件下,每个仓库的NEW_ORDER事务理论最大值为12.86。要达到这个最大值,需要:
完美的负载分配(45%的事务是NEW_ORDER)。
系统在零响应时间下运行。
实际上,由于系统的延迟和负载不平衡,很难达到这个理论最大值。
( 3) 实际性能 vs 理论性能:
124.67 tpmC 是理论最大值的 969.414%
这意味着系统的实际性能是理论最大值的约9.7倍。也就是说,相较于理想状态,当前的系统性能远超预期,表明该数据库配置在特定负载下的表现非常好。考虑到系统的实际响应时间和负载,这种超出理论最大值的表现通常表明系统在某些方面(如硬件配置、数据库优化、并行处理等)非常高效。
( 4)生成的两张图:


每分钟事务数量。

事务延迟。
TPCC表结构
TPC-C测试系统数据库由9张表组成,它们的关系如下图所示,表框里的数字表示该表将要存放多少条记录,仓库数W的调整在测试中能够体现数据库所能够支持的数据规模的能力:

Warehouse(仓库表,W表示仓数,能够体现数据库所能够支持的数据规模)
District(区域表,W*10表每个仓为10个销售点供货)
Customer(用户表,W*30k表示每个供货点为3000个客户提供服务)
Stock(库存表,W*100k表示每个仓库维护10w种商品库存记录)
Order(订单表)
New-Order(新订单表)
Item(商品表,表固定大小为10w)
Order-Line(订单行)
History(历史表)
TPCC事务模型
TPC-C需要处理的交易事务主要为以下几种:
新订单(New-Order,占比45%) :客户输入一笔新的订货交易;
支付操作(Payment,占比43%) :更新客户帐户余额以反映其支付状况;
发货(Delivery,占比4%) :发货(模拟批处理交易);
订单状态查询(Order-Status,占比4%) :查询客户最近交易的状态;
库存状态查询(Stock-Level,占比4%) :查询仓库库存状况,以便能够及时补货。
核心指标解读

附录:
修改run/misc/os_collector_linux.py脚本
#!/usr/bin/env python
# ----------------------------------------------------------------------
# os_collector_linux.py -
#
# Script used to collect OS level resource utilization data like
# CPU usage and disk IO.
#
# This code is used in the jTPCCOSCollect class. It is launched as
# a separate process, possibly via ssh(1) on the remote database
# server. The ability of Python to receive a script to execute on
# stdin allows us to execute this script via ssh(1) on the database
# server without installing any programs/scripts there.
#
# The command line arguments for this script are the runID, the
# interval in seconds at which to collect information and a variable
# number of devices in the form "blk_<devname>" "net_<devname>",
# for example "blk_sda" for the first SCSI disk or "net_eth0".
#
# The output on stdout is one line for CPU/VM info, followed by a
# line for each of the specified devices in CSV format. The first
# set of lines are the CSV headers. The output is prefixed with the
# runID, elapsed_ms and for the devices the blk_ or net_ name that
# was specified on the command line. This format makes it easy to
# load the data into a result database where it can be analyzed
# together with the BenchmarkSQL per transaction results and compared
# to other benchmark runs.
#
# It is the caller's responsibility to split the output lines into
# separate result CSV files.
# ----------------------------------------------------------------------
import errno
import math
import os
import sys
import time
# ----
# main
# ----
def main(argv):
global deviceFDs
global lastDeviceData
# ----
# Get the runID and collection interval from the command line
# ----
runID = int(argv[0])
interval = float(argv[1])
# ----
# Our start time is now. Since most of the information is deltas
# we can only produce the first data after the first interval.
# ----
startTime = time.time()
nextDue = startTime + interval
# ----
# Initialize CPU and vmstat collection and output the CSV header.
# ----
sysInfo = ['run', 'elapsed', ]
sysInfo += initSystemUsage()
print(",".join([str(x) for x in sysInfo]))
# ----
# Get all the devices from the command line.
# ----
devices = []
deviceFDs = {}
lastDeviceData = {}
for dev in argv[2:]:
if dev.startswith('blk_'):
devices.append(dev)
elif dev.startswith('net_'):
devices.append(dev)
else:
raise Exception("unknown device type '" + dev + "'")
# ----
# Initialize usage collection per device depending on the type.
# Output all the headers in the order, the devices are given.
# ----
for dev in devices:
if dev.startswith('blk_'):
devInfo = ['run', 'elapsed', 'device', ]
devInfo += initBlockDevice(dev)
print(",".join([str(x) for x in devInfo]))
elif dev.startswith('net_'):
devInfo = ['run', 'elapsed', 'device', ]
devInfo += initNetDevice(dev)
print(",".join([str(x) for x in devInfo]))
# ----
# Flush all header lines.
# ----
sys.stdout.flush()
try:
while True:
# ----
# Wait until our next collection interval and calculate the
# elapsed time in milliseconds.
# ----
now = time.time()
if nextDue > now:
time.sleep(nextDue - now)
elapsed = int((nextDue - startTime) * 1000.0)
sysInfo = [runID, elapsed, ]
sysInfo += getSystemUsage()
print(",".join([str(x) for x in sysInfo]))
# ----
# Collect all device utilization data.
# ----
for dev in devices:
if dev.startswith('blk_'):
devInfo = [runID, elapsed, dev, ]
devInfo += getBlockUsage(dev, interval)
print(",".join([str(x) for x in devInfo]))
elif dev.startswith('net_'):
devInfo = [runID, elapsed, dev, ]
devInfo += getNetUsage(dev, interval)
print(",".join([str(x) for x in devInfo]))
# ----
# Bump the time when we are next due.
# ----
nextDue += interval
sys.stdout.flush()
# ----
# Running on the command line for test purposes?
# ----
except KeyboardInterrupt:
print("")
return 0
# ----
# The OSCollector class will just close our stdout on the other
# side, so this is expected.
# ----
except IOError as e:
if e.errno == errno.EPIPE:
return 0
else:
raise e
def initSystemUsage():
global procStatFD
global procVMStatFD
global lastStatData
global lastVMStatData
procStatFD = open("/proc/stat", "rb")
for line in procStatFD:
line = line.decode().split()
if line[0] == "cpu":
lastStatData = [int(x) for x in line[1:]]
break
if len(lastStatData) != 10:
raise Exception("cpu line in /proc/stat too short")
procVMStatFD = open("/proc/vmstat", "rb")
lastVMStatData = {}
for line in procVMStatFD:
line = line.decode().split()
if line[0] in ['nr_dirty', ]:
lastVMStatData['vm_' + line[0]] = int(line[1])
if len(lastVMStatData.keys()) != 1:
raise Exception("not all elements found in /proc/vmstat")
return [
'cpu_user', 'cpu_nice', 'cpu_system',
'cpu_idle', 'cpu_iowait', 'cpu_irq',
'cpu_softirq', 'cpu_steal',
'cpu_guest', 'cpu_guest_nice',
'vm_nr_dirty',
]
def getSystemUsage():
global procStatFD
global procVMStatFD
global lastStatData
global lastVMStatData
procStatFD.seek(0, 0)
for line in procStatFD:
line = line.decode().split()
if line[0] != "cpu":
continue
statData = [int(x) for x in line[1:]]
deltaTotal = float(sum(statData) - sum(lastStatData))
if deltaTotal == 0:
result = [0.0 for x in statData]
else:
result = []
for old, new in zip(lastStatData, statData):
result.append(float(new - old) / deltaTotal)
lastStatData = statData
break
procVMStatFD.seek(0, 0)
newVMStatData = {}
for line in procVMStatFD:
line = line.decode().split()
if line[0] in ['nr_dirty', ]:
newVMStatData['vm_' + line[0]] = int(line[1])
for key in ['vm_nr_dirty', ]:
result.append(newVMStatData[key])
return result
def initBlockDevice(dev):
global deviceFDs
global lastDeviceData
devPath = os.path.join("/sys/block", dev[4:], "stat")
deviceFDs[dev] = open(devPath, "rb")
line = deviceFDs[dev].readline().decode().split()
newData = []
for idx, mult in [
(0, 1.0), (1, 1.0), (2, 0.5),
(4, 1.0), (5, 1.0), (6, 0.5),
]:
newData.append(int(line[idx]))
lastDeviceData[dev] = newData
return ['rdiops', 'rdmerges', 'rdkbps', 'wriops', 'wrmerges', 'wrkbps', ]
def getBlockUsage(dev, interval):
global deviceFDs
global lastDeviceData
deviceFDs[dev].seek(0, 0)
line = deviceFDs[dev].readline().decode().split()
oldData = lastDeviceData[dev]
newData = []
result = []
ridx = 0
for idx, mult in [
(0, 1.0), (1, 1.0), (2, 0.5),
(4, 1.0), (5, 1.0), (6, 0.5),
]:
newData.append(int(line[idx]))
result.append(float(newData[ridx] - oldData[ridx]) * mult / interval)
ridx += 1
lastDeviceData[dev] = newData
return result
def initNetDevice(dev):
global deviceFDs
global lastDeviceData
devPath = os.path.join("/sys/class/net", dev[4:], "statistics")
deviceData = []
for fname in ['rx_packets', 'rx_bytes', 'tx_packets', 'tx_bytes', ]:
key = dev + "." + fname
deviceFDs[key] = open(os.path.join(devPath, fname), "rb")
deviceData.append(int(deviceFDs[key].read()))
lastDeviceData[dev] = deviceData
return ['rxpktsps', 'rxkbps', 'txpktsps', 'txkbps', ]
def getNetUsage(dev, interval):
global deviceFDs
global lastDeviceData
oldData = lastDeviceData[dev]
newData = []
for fname in ['rx_packets', 'rx_bytes', 'tx_packets', 'tx_bytes', ]:
key = dev + "." + fname
deviceFDs[key].seek(0, 0)
newData.append(int(deviceFDs[key].read()))
result = [
float(newData[0] - oldData[0]) / interval,
float(newData[1] - oldData[1]) / interval / 1024.0,
float(newData[2] - oldData[2]) / interval,
float(newData[3] - oldData[3]) / interval / 1024.0,
]
lastDeviceData[dev] = newData
return result
if __name__ == '__main__':
sys.exit(main(sys.argv[1:]))