我在 ubuntu 16.04LTS 上实现了 packet_send 例程来发送带有 tx_ring 的数据包,有时它可以到达线速,但有时只能到达线速的一半。
有时20s会恢复,有时几分钟,没有观察到规则。
请有人帮我找出原因!
下面贴测试数据,
sar结果,线速=1Gbps
16:34:08:830 PM IFACE rxpck/s txpck/s rxkB/s txkB/s rxcmp/s txcmp/s rxmcst/s %ifutil
16:34:08:830 PM ens33 31.00 82718.00 2.20 119249.11 0.00 0.00 0.00 97.69
16:34:09:830 PM IFACE rxpck/s txpck/s rxkB/s txkB/s rxcmp/s txcmp/s rxmcst/s %ifutil
16:34:09:830 PM ens33 30.00 83301.00 2.14 120092.89 0.00 0.00 0.00 98.38
16:34:10:831
16:34:10:831 PM IFACE rxpck/s txpck/s rxkB/s txkB/s rxcmp/s txcmp/s rxmcst/s %ifutil
16:34:11:028 PM ens33 29.00 84618.00 2.08 121982.30 0.00 0.00 0.00 99.93
16:34:11:831
16:34:11:831 PM IFACE rxpck/s txpck/s rxkB/s txkB/s rxcmp/s txcmp/s rxmcst/s %ifutil
16:34:12:118 PM ens33 29.00 84285.00 2.08 121509.26 0.00 0.00 0.00 99.54
16:34:12:831
16:34:12:832 PM IFACE rxpck/s txpck/s rxkB/s txkB/s rxcmp/s txcmp/s rxmcst/s %ifutil
16:34:12:832 PM ens33 30.00 83709.00 2.19 120664.26 0.00 0.00 0.00 98.85
16:34:13:831
16:34:13:832 PM IFACE rxpck/s txpck/s rxkB/s txkB/s rxcmp/s txcmp/s rxmcst/s %ifutil
16:34:13:832 PM ens33 30.00 85626.00 2.14 123425.94 0.00 0.00 0.00 101.11
16:34:14:831
16:34:14:831 PM IFACE rxpck/s txpck/s rxkB/s txkB/s rxcmp/s txcmp/s rxmcst/s %ifutil
16:34:14:831 PM ens33 29.00 85910.00 2.08 123829.60 0.00 0.00 0.00 101.44
16:34:15:831
16:34:15:831 PM IFACE rxpck/s txpck/s rxkB/s txkB/s rxcmp/s txcmp/s rxmcst/s %ifutil
16:34:15:832 PM ens33 32.00 84821.00 2.26 122267.27 0.00 0.00 0.00 100.16
16:34:16:831
16:34:16:831 PM IFACE rxpck/s txpck/s rxkB/s txkB/s rxcmp/s txcmp/s rxmcst/s %ifutil
16:34:16:832 PM ens33 31.00 86557.00 2.20 124772.29 0.00 0.00 0.00 102.21
16:34:17:831
16:34:17:831 PM IFACE rxpck/s txpck/s rxkB/s txkB/s rxcmp/s txcmp/s rxmcst/s %ifutil
16:34:17:832 PM ens33 29.00 81088.00 2.08 116886.07 0.00 0.00 0.00 95.75
16:34:18:832
从这里开始性能下降
16:34:18:832 PM IFACE rxpck/s txpck/s rxkB/s txkB/s rxcmp/s txcmp/s rxmcst/s %ifutil
16:34:18:832 PM ens33 29.00 54067.00 2.09 77973.99 0.00 0.00 0.00 63.88
16:34:19:832
16:34:19:832 PM IFACE rxpck/s txpck/s rxkB/s txkB/s rxcmp/s txcmp/s rxmcst/s %ifutil
16:34:19:833 PM ens33 29.00 40137.00 2.08 57876.41 0.00 0.00 0.00 47.41
16:34:20:832
16:34:20:832 PM IFACE rxpck/s txpck/s rxkB/s txkB/s rxcmp/s txcmp/s rxmcst/s %ifutil
16:34:20:832 PM ens33 29.00 39912.00 2.08 57556.60 0.00 0.00 0.00 47.15
16:34:21:832
16:34:21:832 PM IFACE rxpck/s txpck/s rxkB/s txkB/s rxcmp/s txcmp/s rxmcst/s %ifutil
16:34:21:832 PM ens33 31.00 40215.00 2.20 57971.92 0.00 0.00 0.00 47.49
16:34:22:832
16:34:22:832 PM IFACE rxpck/s txpck/s rxkB/s txkB/s rxcmp/s txcmp/s rxmcst/s %ifutil
16:34:22:832 PM ens33 29.00 40254.00 2.08 58033.12 0.00 0.00 0.00 47.54
16:34:23:833
16:34:23:833 PM IFACE rxpck/s txpck/s rxkB/s txkB/s rxcmp/s txcmp/s rxmcst/s %ifutil
16:34:23:833 PM ens33 31.00 40245.00 2.20 58015.83 0.00 0.00 0.00 47.53
16:34:24:833
16:34:24:833 PM IFACE rxpck/s txpck/s rxkB/s txkB/s rxcmp/s txcmp/s rxmcst/s %ifutil
16:34:24:833 PM ens33 31.00 40173.00 2.20 57945.90 0.00 0.00 0.00 47.47
16:34:25:833
16:34:25:833 PM IFACE rxpck/s txpck/s rxkB/s txkB/s rxcmp/s txcmp/s rxmcst/s %ifutil
16:34:25:833 PM ens33 28.00 40478.00 2.02 58379.48 0.00 0.00 0.00 47.82
16:34:26:833
16:34:26:833 PM IFACE rxpck/s txpck/s rxkB/s txkB/s rxcmp/s txcmp/s rxmcst/s %ifutil
16:34:26:833 PM ens33 32.00 40350.00 2.41 58168.46 0.00 0.00 0.00 47.65
16:34:27:833
16:34:27:833 PM IFACE rxpck/s txpck/s rxkB/s txkB/s rxcmp/s txcmp/s rxmcst/s %ifutil
16:34:27:834 PM ens33 33.00 40399.00 2.47 58248.12 0.00 0.00 0.00 47.72
在这里恢复
16:34:41:834 PM IFACE rxpck/s txpck/s rxkB/s txkB/s rxcmp/s txcmp/s rxmcst/s %ifutil
16:34:41:835 PM ens33 31.00 76530.00 2.20 110334.69 0.00 0.00 0.00 90.39
16:34:42:836
16:34:42:836 PM IFACE rxpck/s txpck/s rxkB/s txkB/s rxcmp/s txcmp/s rxmcst/s %ifutil
16:34:42:836 PM ens33 26.00 85416.00 1.91 123126.53 0.00 0.00 0.00 100.87
最佳结果
16:34:08:611top - 14:40:50 up 10 days, 21:13, 3 users, load average: 3.26, 2.72, 1.62
16:34:08:611Tasks: 207 total, 2 running, 205 sleeping, 0 stopped, 0 zombie
16:34:08:611%Cpu(s): 0.3 us, 27.4 sy, 0.0 ni, 45.4 id, 0.0 wa, 0.0 hi, 26.9 si, 0.0 st
16:34:08:611KiB Mem : 4037752 total, 151772 free, 2171892 used, 1714088 buff/cache
16:34:08:611KiB Swap: 4191228 total, 4191072 free, 156 used. 1487920 avail Mem
16:34:08:611
16:34:08:611 PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
16:34:08:61161597 root 20 0 4817224 591332 81796 S 134.7 14.6 127:48.11 packet_send
16:34:08:611 18 root 20 0 0 0 0 R 79.2 0.0 57:36.18 ksoftirqd/2
16:34:08:61145073 root 20 0 0 0 0 S 1.0 0.0 0:00.37 kworker/u128:1
16:34:08:61249928 telnetd 20 0 19168 2116 1936 S 1.0 0.1 0:00.07 in.telnetd
16:34:08:612 1 root 20 0 37736 5804 4020 S 0.0 0.1 0:10.96 systemd
16:34:08:613 2 root 20 0 0 0 0 S 0.0 0.0 0:00.29 kthreadd
16:34:08:613 3 root 20 0 0 0 0 S 0.0 0.0 0:05.73 ksoftirqd/0
16:34:08:613 5 root 0 -20 0 0 0 S 0.0 0.0 0:00.00 kworker/0:0H
16:34:08:613 7 root 20 0 0 0 0 S 0.0 0.0 25:54.40 rcu_sched
16:34:08:613 8 root 20 0 0 0 0 S 0.0 0.0 0:00.05 rcu_bh
16:34:08:613 9 root rt 0 0 0 0 S 0.0 0.0 1:30.57 migration/0
16:34:08:614 10 root rt 0 0 0 0 S 0.0 0.0 0:08.82 watchdog/0
16:34:08:614 11 root rt 0 0 0 0 S 0.0 0.0 0:11.23 watchdog/1
16:34:08:614 12 root rt 0 0 0 0 S 0.0 0.0 2:00.00 migration/1
16:34:08:614 13 root 20 0 0 0 0 S 0.0 0.0 0:06.68 ksoftirqd/1
16:34:08:614 15 root 0 -20 0 0 0 S 0.0 0.0 0:00.00 kworker/1:0H
16:34:08:614 16 root rt 0 0 0 0 S 0.0 0.0 0:11.27 watchdog/2
16:34:08:614 17 root rt 0 0 0 0 S 0.0 0.0 1:21.77 migration/2
16:34:08:614 20 root 0 -20 0 0 0 S 0.0 0.0 0:00.00 kworker/2:0H
16:34:08:614 21 root rt 0 0 0 0 S 0.0 0.0 0:10.96 watchdog/3
16:34:08:615 22 root rt 0 0 0 0 S 0.0 0.0 1:21.40 migration/3
16:34:08:615 23 root 20 0 0 0 0 S 0.0 0.0 0:06.05 ksoftirqd/3
16:34:19:657top - 14:41:01 up 10 days, 21:14, 3 users, load average: 3.44, 2.78, 1.65
16:34:19:657Tasks: 207 total, 1 running, 206 sleeping, 0 stopped, 0 zombie
16:34:19:657%Cpu(s): 0.0 us, 21.9 sy, 0.0 ni, 72.9 id, 0.0 wa, 0.0 hi, 5.2 si, 0.0 st
16:34:19:657KiB Mem : 4037752 total, 145764 free, 2177852 used, 1714136 buff/cache
16:34:19:657KiB Swap: 4191228 total, 4191072 free, 156 used. 1481976 avail Mem
16:34:19:657
16:34:19:657 PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND
16:34:19:65761597 root 20 0 4817224 591332 81796 S 115.0 14.6 128:02.40 packet_send
16:34:19:65847844 root 20 0 40516 3812 3172 R 1.0 0.1 0:01.62 top
16:34:19:65850061 xiejie 20 0 6096 776 708 S 1.0 0.0 0:00.03 sar
16:34:19:658 1 root 20 0 37736 5804 4020 S 0.0 0.1 0:10.96 systemd
16:34:19:658 2 root 20 0 0 0 0 S 0.0 0.0 0:00.29 kthreadd
16:34:19:658 3 root 20 0 0 0 0 S 0.0 0.0 0:05.73 ksoftirqd/0
16:34:19:658 5 root 0 -20 0 0 0 S 0.0 0.0 0:00.00 kworker/0:0H
16:34:19:658 7 root 20 0 0 0 0 S 0.0 0.0 25:54.45 rcu_sched
16:34:19:658 8 root 20 0 0 0 0 S 0.0 0.0 0:00.05 rcu_bh
16:34:19:658 9 root rt 0 0 0 0 S 0.0 0.0 1:30.57 migration/0
16:34:19:658 10 root rt 0 0 0 0 S 0.0 0.0 0:08.82 watchdog/0
16:34:19:658 11 root rt 0 0 0 0 S 0.0 0.0 0:11.23 watchdog/1
16:34:19:658 12 root rt 0 0 0 0 S 0.0 0.0 2:00.00 migration/1
16:34:19:658 13 root 20 0 0 0 0 S 0.0 0.0 0:06.69 ksoftirqd/1
16:34:19:658 15 root 0 -20 0 0 0 S 0.0 0.0 0:00.00 kworker/1:0H
16:34:19:658 16 root rt 0 0 0 0 S 0.0 0.0 0:11.27 watchdog/2
16:34:19:658 17 root rt 0 0 0 0 S 0.0 0.0 1:21.77 migration/2
16:34:19:659 18 root 20 0 0 0 0 S 0.0 0.0 57:43.62 ksoftirqd/2
16:34:19:659 20 root 0 -20 0 0 0 S 0.0 0.0 0:00.00 kworker/2:0H
16:34:19:659 21 root rt 0 0 0 0 S 0.0 0.0 0:10.96 watchdog/3
16:34:19:659 22 root rt 0 0 0 0 S 0.0 0.0 1:21.40 migration/3
16:34:19:659 23 root 20 0 0 0 0 S 0.0 0.0 0:06.05 ksoftirqd/3
实现就像
struct ring {
struct iovec *frames;
uint8_t *mm_space;
size_t mm_len;
struct sockaddr_ll s_ll;
union {
struct tpacket_req layout;
#ifdef HAVE_TPACKET3
struct tpacket_req3 layout3;
#endif
uint8_t raw;
};
};
struct ring tx_ring;
/* tx_ring setup */
sock = socket(PF_PACKET, SOCK_RAW, 0);
ret = setsockopt(sock, SOL_PACKET, PACKET_LOSS, (void *) &discard, sizeof(discard)); /*set PACKET_LOSS */
tx_ring.layout.tp_block_size = PAGE_SIZE << 2; /*PAGE_SIZE=4096 */
tx_ring.layout.tp_frame_size = TPACKET_ALIGNMENT << 7;
tx_ring.layout.tp_block_nr = 256;
tx_ring.layout.tp_frame_nr = tx_ring.layout.tp_block_nr*(tx_ring.layout.tp_block_size/tx_ring.layout.tp_frame_size);
setsockopt(sock, SOL_PACKET, PACKET_VERSION, &val, sizeof(val)); /* set version TPACKET_V2*/
setsockopt(sock, SOL_PACKET, PACKET_TX_RING, &tx_ring.layout, sizeof(tx_ring.layout));
tx_ring.mm_len = (size_t) tx_ring.layout.tp_block_size * tx_ring.layout.tp_block_nr;
tx_ring.mm_space = mmap(NULL, tx_ring.mm_len, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_LOCKED | MAP_POPULATE, sock, 0);
/* map frames */
tx_ring.frames = xzmalloc_aligned(len, CO_CACHE_LINE_SIZE);
for (i = 0; i < num; ++i) {
tx_ring.frames[i].iov_len = size;
tx_ring.frames[i].iov_base = tx_ring.mm_space + (i * size);
}
/* bind */
bind(sock, (struct sockaddr *) &tx_ring.s_ll, sizeof(tx_ring.s_ll)); /*sll_family = AF_PACKET; sll_protocol=htons(ETH_P_ALL)*/
/* implementation of the xmit routine */
static void xmit_fastpath(struct ctx *ctx, unsigned int packet_num)
{
uint8_t *out = NULL;
static unsigned int it = 0;
unsigned long num = packet_num;
struct frame_map *hdr;
while (likely(sigint==0 && num > 0 && plen > 0)) {
/* user_may_pull_from_tx -> return !(tp_status & (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING)); */
if (!user_may_pull_from_tx(tx_ring.frames[it].iov_base)) {
int ret = pull_and_flush_tx_ring(sock); /* sendto(sock, NULL, 0, MSG_DONTWAIT, NULL, 0); */
if (unlikely(ret < 0)) {
if (errno != EBADF && errno != ENOBUFS)
panic();
}
continue;
}
hdr = tx_ring.frames[it].iov_base;
out = ((uint8_t *) hdr) + TPACKET2_HDRLEN - sizeof(struct sockaddr_ll);
hdr->tp_h.tp_snaplen = packets[i].len;
hdr->tp_h.tp_len = packets[i].len;
memcpy(out, packets[i].payload, packets[i].len);
num--;
kernel_may_pull_from_tx(&hdr->tp_h); /*tp_status = TP_STATUS_SEND_REQUEST;*/
it = (it + 1) % tx_ring.layout.tp_frame_nr;
}
}
void send_packets(u_int frameTotalNum,const u_char *packetData, u_int pkt_len)
{
load_packets(packetData, pkt_len); /* load packetData to packets*/
xmit_fastpath(ctx, frameTotalNum);
}
void xmit_finish()
{
int i = 0, retry=100;
int sock = dev_io_fd_get(ctx.dev_out);
while (pull_and_flush_tx_ring_wait(sock) < 0 && errno == ENOBUFS && retry-- > 0)
usleep(10000);
close(sock);
free(ctx);
}
int main(int argc, char *argv[]){
char rawData[] = {}; /* fill test data like 1400 bytes data with l2&l3 header */
int i = 10000000;
do{
send_packets(1000,rawData,sizeof(rawData));
}while(i-->0);
xmit_finish(); /* clear */
return 0;
}