export MLX5_DEBUG_MASK=0xff
export MLX5_DEBUG_FILE=/tmp/mlx5.txt
git clone https://github.com/linux-rdma/rdma-core.git
cd rdma-core
./build.sh
修改build/CMakeCache.txt
MLX5_DEBUG:BOOL=TRUE
function install_rdma_core
{
local dir=/swgwork/cmi/rdma-core/build/lib
cd /lib64/
/bin/cp $dir/librdmacm.so.1.3.56.0 .
/bin/cp $dir/libibverbs.so.1.14.56.0 .
/bin/cp $dir/libmlx5.so.1.25.56.0 .
/bin/rm librdmacm.so.1
/bin/rm librdmacm.so
/bin/rm libibverbs.so.1
/bin/rm libibverbs.so
/bin/rm libibverbs/libmlx5-rdmav34.so
/bin/rm libmlx5.so.1
ln -s librdmacm.so.1.3.56.0 librdmacm.so
ln -s librdmacm.so.1.3.56.0 librdmacm.so.1
ln -s libibverbs.so.1.14.56.0 libibverbs.so
ln -s libibverbs.so.1.14.56.0 libibverbs.so.1
ln -s libmlx5.so.1.25.56.0 libmlx5.so.1
cd libibverbs
ln -s ../libmlx5.so.1.25.56.0 libmlx5-rdmav34.so
}
运行ib_send_bw:
ip netns exec n11 /swgwork/cmi/perftest/ib_send_bw -d mlx5_3 -z
ip netns exec n11 /swgwork/cmi/perftest/ib_send_bw -d mlx5_3 1.1.1.1 -D 10000 -z
server端收包的时候得到下面的log:
tail -f /tmp/mlx5.txt
mlx5_get_next_cqe:564: dump cqe for cqn 0x1a9e:
00000000 00000000 00000000 00000000
00000000 00000000 00000000 00000000
00000000 00000000 00000000 00010000
0001d00e 98d17383 0000024d 03e3f720
mlx5_get_next_cqe:564: dump cqe for cqn 0x1a9e:
00000000 00000000 00000000 00000000
00000000 00000000 00000000 00000000
00000000 00000000 00000000 00010000
0001d00e 98d17f13 0000024d 03e46b20
mlx5_get_next_cqe:564: dump cqe for cqn 0x1a9e:
00000000 00000000 00000000 00000000
00000000 00000000 00000000 00000000
00000000 00000000 00000000 00010000
0001d00e 98d18aa3 0000024d 03e52e20
上面的输出对应下面的数据结构:
struct mlx5_cqe64 {
union {
struct {
uint8_t rsvd0[2];
__be16 wqe_id;
uint8_t rsvd4[13];
uint8_t ml_path;
uint8_t rsvd20[4];
__be16 slid;
__be32 flags_rqpn;
uint8_t hds_ip_ext;
uint8_t l4_hdr_type_etc;
__be16 vlan_info;
};
struct mlx5_tm_cqe tm_cqe;
/* TMH is scattered to CQE upon match */
struct ibv_tmh tmh;
};
__be32 srqn_uidx;
__be32 imm_inval_pkey;
uint8_t app;
uint8_t app_op;
__be16 app_info;
__be32 byte_cnt;
__be64 timestamp;
__be32 sop_drop_qpn;
__be16 wqe_counter;
uint8_t signature;
uint8_t op_own;
};
比如上面的例子byte_cnt就是0x10000(65536),wqe_counter分别是03e4,03e5,timestamp分别是0001d00e 98d17f13,0001d00e 98d18aa3。
crash> ps | grep ib_send_bw
> 685270 2581 23 ffff90c5b0c28000 RU 0.0 6716 4832 ib_send_bw
crash> files ffff90c5b0c28000
PID: 685270 TASK: ffff90c5b0c28000 CPU: 23 COMMAND: "ib_send_bw"
ROOT: / CWD: /labhome/cmi/mi/drgn/ib
FD FILE DENTRY INODE TYPE PATH
0 ffff90c3cda4eb80 ffff90c3cc6399a0 ffff90c3d1325a40 CHR /dev/pts/9
1 ffff90c3cda4eb80 ffff90c3cc6399a0 ffff90c3d1325a40 CHR /dev/pts/9
2 ffff90c3cda4eb80 ffff90c3cc6399a0 ffff90c3d1325a40 CHR /dev/pts/9
3 ffff90c4de695200 ffff90c5504eccd0 ffff90c3cf2dd978 CHR /dev/infiniband/uverbs3
4 ffff90c4de695980 ffff90c4c814c148 ffff90c4f9ca6380 REG /tmp/mlx5.txt
5 ffff90c4de695f80 ffff90c4c81c8000 ffff90c3c05ed580 UNKN [infinibandevent]
6 ffff90c4de694c00 ffff90c47ffcf710 ffff90c3e222a0f0 CHR /dev/infiniband/rdma_cm
7 ffff90c4de697780 ffff90c5504eccd0 ffff90c3cf2dd978 CHR /dev/infiniband/uverbs3
8 ffff90c4de695b00 ffff90c4c814c148 ffff90c4f9ca6380 REG /tmp/mlx5.txt
9 ffff90c4de696e80 ffff90c4c81c83d8 ffff90c3c05ed580 UNKN [infinibandevent]