libpcap之数据分流

当前系统都是多核系统,为了充分利用多核优势,数据包可以在底层就进行分流,可以通过多线程/多进程对同一个接口的数据进行并行的处理。

一、实验

  • 一个server/client程序
  • 一个简单的抓包程序,抓取server/client之间的通信数据

1.1 server.c

c 复制代码
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <unistd.h>
#include <string.h>
#include <netinet/in.h>
#include <arpa/inet.h>

void
Usage() {
    printf("ip port\n");
}

int
start_srv(const char *ip, int port) {
    int ret = 0;
    int sockfd;

    sockfd = socket(AF_INET, SOCK_STREAM, 0);
    if (sockfd < 0) {
        perror("socket:");
        ret = -1;
        goto out;
    }

    struct sockaddr_in srv;
    srv.sin_family = AF_INET;
    srv.sin_port = htons(port);
    inet_pton(AF_INET, ip, &srv.sin_addr.s_addr);

    socklen_t len = sizeof(srv);
    if (bind(sockfd, (const struct sockaddr *) &srv, len)) {
        perror("bind:");
        ret = -2;
        goto out;
    }

    if (listen(sockfd, 5)) {
        perror("listen:");
        ret = -3;
        goto out;
    }

    int cli_fd = accept(sockfd, NULL, NULL);

    if (cli_fd < 0) {
        perror("accept:");
        ret = -4;
        goto out;
    }

    char buf[128] = {0};
    read(cli_fd, buf, sizeof(buf)-1);
    sleep(1);
    memcpy(buf, "hello world", sizeof("hello world"));
    write(cli_fd, buf, strlen(buf));

out:
    if (sockfd > 0) {
        close(sockfd);
    }

    return ret;
}

int 
main(int argc, char **argv) {
    if (argc < 3) {
        Usage();
        exit(1);
    }

    start_srv(argv[1], atoi(argv[2]));

    return 0;
}

1.2 client.c

c 复制代码
#include <stdio.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <unistd.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <string.h>
#include <stdlib.h>

int
start_cli(const char *remote_ip, int port) {
    int ret = 0;
    int sockfd;

    sockfd = socket(AF_INET, SOCK_STREAM, 0);
    if (sockfd < 0) {
        perror("socket:");
        ret = -1;
        goto out;
    }

    struct sockaddr_in srv;
    srv.sin_family = AF_INET;
    srv.sin_port = htons(port);
    inet_pton(AF_INET, remote_ip, &srv.sin_addr.s_addr);

    socklen_t len = sizeof(srv);
    if (connect(sockfd, (const struct sockaddr *) &srv, len)) {
        perror("connect:");
        ret = -2;
        goto out;
    }

    char buf[128];
    snprintf(buf, sizeof(buf), "I'm client.");
    write(sockfd, buf, strlen(buf));
    read(sockfd, buf, sizeof(buf));

out:
    if (sockfd > 0) {
        close(sockfd);
    }

    return ret;
}

void
Usage() {
    printf("remote_ip port\n");
}

int
main(int argc, char **argv) {
    if (argc < 3) {
        Usage();
        exit(1);
    }

    start_cli(argv[1], atoi(argv[2]));
    
    return 0;
}

1.3 fanouttest.c

c 复制代码
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <pcap.h>
#include <pcap-int.h>
#include <pcap/bpf.h>

void 
Usage() {
    printf("-i device\n");
    printf("-w filename\n");
    printf("-f filter\n");
}

int recv_pkt = 0;

void 
handler_cb(u_char *priv, const struct pcap_pkthdr *h, const u_char *pkt) {
    if (priv) {
        pcap_dump(priv, h, pkt);
    } else {
        printf("recv pkt length %u\n", h->caplen);
    }

    ++recv_pkt;
}

pcap_t *handle = NULL;

void
sig_handler(int num) {
    if (handle) {
        pcap_breakloop(handle);
    }
}

int
fanout_test(const char *device, const char *filter, const char *filename) {
   
    signal(SIGUSR1, sig_handler);

    pcap_dumper_t *dumper = NULL;
    struct bpf_program bpf = {0};
    int ret = 0;
    char errbuf[PCAP_ERRBUF_SIZE];

    handle = pcap_open_live(device, 65535, 1, 1000, errbuf);
    if (!handle) {
        printf("open %s failed. %s\n", device, errbuf);
        ret = -1;
        goto out;
    }

    if (filter) {
        bpf_u_int32 localnet, netmask;

        if (pcap_lookupnet(device, &localnet, &netmask, errbuf)) {
            localnet = 0;
            netmask = 0;
            printf("lookupnet err: %s\n", errbuf);
        }
        if (pcap_compile(handle, &bpf, filter, 1, netmask)) {
            printf("compile %s failed. %s\n", filter, pcap_geterr(handle));

            ret = -2;
            goto out;
        }

        if (pcap_setfilter(handle, &bpf)) {
            printf("set filter failed. %s\n", pcap_geterr(handle));

            ret = -3;
            goto out;
        }
    }

    if (filename) {
        dumper = pcap_dump_open(handle, filename);
        if (!dumper) {
            printf("dump open %s failed. %s", filename, pcap_geterr(handle));
           
            ret = -4;
            goto out;
        }
    }

    pcap_loop(handle, -1, handler_cb, (u_char *)dumper);
    printf("recv %d packet\n", recv_pkt);

out:
    if (dumper) {
        pcap_dump_close(dumper);
    }
    if (handle) {
        pcap_close(handle);
    }
    pcap_freecode(&bpf);

    return ret;
}

int 
main(int argc, char **argv) {
    char *device = NULL;
    char *save_file = NULL;
    char *filter = NULL;
    int opt;

    while ((opt = getopt(argc, argv, "i:w:f:")) != -1) {
        switch (opt) {
        case 'i':
            device = optarg;
            break;
        case 'w':
            save_file = optarg;
            break;
        case 'f':
            filter = optarg;
            break;
        default:
            Usage();
            exit(1);
            break;
        }
    }

    if (!device) {
        Usage();
        exit (2);
    }

    fanout_test(device, filter, save_file);

    return 0;
}

1.4 实验过程

复用libpcap编译结构,直接在testprogs目录增加源码文件。

testprogs/Makefile

testprogs/fanouttest.c

拷贝获取数据


bash 复制代码
gcc server.c -o srv
gcc client.c -o cli



可以从结果看,数据确实拷贝了两份,每个文件都是完整的数据。

分流获取数据

在函数中添加setsockopt设置相关属性。

c 复制代码
...
#include <errno.h>
#include <string.h>
#include <linux/if_packet.h>

...

int
fanout_test(const char *device, const char *filter, const char *filename) {
   
  ...

    int val;
    int id = 0;
    val = (id << 16) | PACKET_FANOUT_QM;
    if (setsockopt(handle->fd, SOL_PACKET, PACKET_FANOUT, &val, sizeof(val))) {
        printf("set fanout failed. %s \n", strerror(errno));
    }

    pcap_loop(handle, -1, handler_cb, (u_char *)dumper);
    printf("recv %d packet\n", recv_pkt);
...
}

从结果看,左边抓取到20个包,右边抓取到10个包,数据确实被分流了,没有完全复制。

二、原理

c 复制代码
static int
packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
{
	struct sock *sk = sock->sk;
	struct packet_sock *po = pkt_sk(sk);
	int ret;

	if (level != SOL_PACKET)
		return -ENOPROTOOPT;

	switch (optname) {
	...
	case PACKET_FANOUT:
	{
		int val;

		if (optlen != sizeof(val))
			return -EINVAL;
		if (copy_from_user(&val, optval, sizeof(val)))
			return -EFAULT;

		return fanout_add(sk, val & 0xffff, val >> 16);
	}
	...
	default:
		return -ENOPROTOOPT;
	}
}


c 复制代码
static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
{
	struct packet_rollover *rollover = NULL;
	struct packet_sock *po = pkt_sk(sk);
	struct packet_fanout *f, *match;
	u8 type = type_flags & 0xff;
	u8 flags = type_flags >> 8;
	int err;

	switch (type) {
	case PACKET_FANOUT_ROLLOVER:
		if (type_flags & PACKET_FANOUT_FLAG_ROLLOVER)
			return -EINVAL;
	case PACKET_FANOUT_HASH:
	case PACKET_FANOUT_LB:
	case PACKET_FANOUT_CPU:
	case PACKET_FANOUT_RND:
	case PACKET_FANOUT_QM:
	case PACKET_FANOUT_CBPF:
	case PACKET_FANOUT_EBPF:
		break;
	default:
		return -EINVAL;
	}

	mutex_lock(&fanout_mutex);

	err = -EALREADY;
	if (po->fanout)
		goto out;

	if (type == PACKET_FANOUT_ROLLOVER ||
	    (type_flags & PACKET_FANOUT_FLAG_ROLLOVER)) {
		err = -ENOMEM;
		rollover = kzalloc(sizeof(*rollover), GFP_KERNEL);
		if (!rollover)
			goto out;
		atomic_long_set(&rollover->num, 0);
		atomic_long_set(&rollover->num_huge, 0);
		atomic_long_set(&rollover->num_failed, 0);
	}

	if (type_flags & PACKET_FANOUT_FLAG_UNIQUEID) {
		if (id != 0) {
			err = -EINVAL;
			goto out;
		}
		if (!fanout_find_new_id(sk, &id)) {
			err = -ENOMEM;
			goto out;
		}
		/* ephemeral flag for the first socket in the group: drop it */
		flags &= ~(PACKET_FANOUT_FLAG_UNIQUEID >> 8);
	}

	match = NULL;
	list_for_each_entry(f, &fanout_list, list) {
		if (f->id == id &&
		    read_pnet(&f->net) == sock_net(sk)) {
			match = f;
			break;
		}
	}
	err = -EINVAL;
	if (match && match->flags != flags)
		goto out;
	if (!match) {
		err = -ENOMEM;
		match = kzalloc(sizeof(*match), GFP_KERNEL);
		if (!match)
			goto out;
		write_pnet(&match->net, sock_net(sk));
		match->id = id;
		match->type = type;
		match->flags = flags;
		INIT_LIST_HEAD(&match->list);
		spin_lock_init(&match->lock);
		refcount_set(&match->sk_ref, 0);
		fanout_init_data(match);
		match->prot_hook.type = po->prot_hook.type;
		match->prot_hook.dev = po->prot_hook.dev;
		match->prot_hook.func = packet_rcv_fanout;
		match->prot_hook.af_packet_priv = match;
		match->prot_hook.id_match = match_fanout_group;
		list_add(&match->list, &fanout_list);
	}
	err = -EINVAL;

	spin_lock(&po->bind_lock);
	if (po->running &&
	    match->type == type &&
	    match->prot_hook.type == po->prot_hook.type &&
	    match->prot_hook.dev == po->prot_hook.dev) {
		err = -ENOSPC;
		if (refcount_read(&match->sk_ref) < PACKET_FANOUT_MAX) {
			__dev_remove_pack(&po->prot_hook);
			po->fanout = match;
			po->rollover = rollover;
			rollover = NULL;
			refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1);
			__fanout_link(sk, po);
			err = 0;
		}
	}
	spin_unlock(&po->bind_lock);

	if (err && !refcount_read(&match->sk_ref)) {
		list_del(&match->list);
		kfree(match);
	}

out:
	kfree(rollover);
	mutex_unlock(&fanout_mutex);
	return err;
}

通过将入口函数tpacket_rcv替换成packet_rcv_fanout,并且将相同的抓包方式放在fanout中的一个数据中,最后将此fanout挂在一个全局列表fanout_list中。

并且将po从ptype_all列表中剔除掉,最后在ptype_all中只保留一个即可,这样数据就只有一个入口,然后在到packet_rcv_fanout中进行分流处理。

c 复制代码
static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
			     struct packet_type *pt, struct net_device *orig_dev)
{
	struct packet_fanout *f = pt->af_packet_priv;
	unsigned int num = READ_ONCE(f->num_members);
	struct net *net = read_pnet(&f->net);
	struct packet_sock *po;
	unsigned int idx;

	if (!net_eq(dev_net(dev), net) || !num) {
		kfree_skb(skb);
		return 0;
	}

	if (fanout_has_flag(f, PACKET_FANOUT_FLAG_DEFRAG)) {
		skb = ip_check_defrag(net, skb, IP_DEFRAG_AF_PACKET);
		if (!skb)
			return 0;
	}
	switch (f->type) {
	case PACKET_FANOUT_HASH:
	default:
		idx = fanout_demux_hash(f, skb, num);
		break;
	case PACKET_FANOUT_LB:
		idx = fanout_demux_lb(f, skb, num);
		break;
	case PACKET_FANOUT_CPU:
		idx = fanout_demux_cpu(f, skb, num);
		break;
	case PACKET_FANOUT_RND:
		idx = fanout_demux_rnd(f, skb, num);
		break;
	case PACKET_FANOUT_QM:
		idx = fanout_demux_qm(f, skb, num);
		break;
	case PACKET_FANOUT_ROLLOVER:
		idx = fanout_demux_rollover(f, skb, 0, false, num);
		break;
	case PACKET_FANOUT_CBPF:
	case PACKET_FANOUT_EBPF:
		idx = fanout_demux_bpf(f, skb, num);
		break;
	}

	if (fanout_has_flag(f, PACKET_FANOUT_FLAG_ROLLOVER))
		idx = fanout_demux_rollover(f, skb, idx, true, num);

	po = pkt_sk(f->arr[idx]);
	return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev);
}

packet_rcv_fanout 进入后,根据不同的算法进行选择一个po,然后调用相应的func(tpacket_rcv)

相关推荐
橘色的喵5 个月前
探索PcapPlusPlus开源库:网络数据包处理与性能优化
网络·性能优化·libpcap·pcap·网络捕获·winpcap·pcapplusplus
adgentleman10 个月前
arping交叉编译
交叉编译·libpcap·arping·libnet