plumgrid@yhs-plumgrid:~/iovisor/bcc/tests/jit$ cat bpfdev1.b
#packed "false"
// hash
struct FwdKey {
u32 dip:32;
};
struct FwdLeaf {
u32 fwd_idx:32;
};
Table<FwdKey, FwdLeaf, FIXED_MATCH, NONE> fwd_map(1);
// array
struct ConfigKey {
u32 index:32;
};
struct ConfigLeaf {
u32 bpfdev_ip:32;
u32 slave_ip:32;
};
Table<ConfigKey, ConfigLeaf, INDEXED, AUTO> config_map(1);
// hash
struct MacaddrKey {
u32 ip:32;
};
struct MacaddrLeaf {
u64 mac:48;
};
Table<MacaddrKey, MacaddrLeaf, FIXED_MATCH, AUTO> macaddr_map(11);
// hash
struct SlaveKey {
u32 slave_ip:32;
};
struct SlaveLeaf {
u32 slave_ifindex:32;
};
Table<SlaveKey, SlaveLeaf, FIXED_MATCH, NONE> slave_map(10);
u32 main(struct proto::skbuff *skb) {
u32 ret:32;
if skb.pkt_type == 0 {
// tx
// make sure configured
u32 slave_ip:32;
struct ConfigKey cfg_key = {.index = 0};
struct ConfigLeaf *cfg_leaf;
config_map.lookup(cfg_key, cfg_leaf) {};
on_valid(cfg_leaf) {
slave_ip = cfg_leaf->slave_ip;
} else {
return 0xffffffff;
}
// make sure slave configured
// tx, default to the single slave
struct SlaveKey slave_key = {.slave_ip = slave_ip};
struct SlaveLeaf *slave_leaf;
slave_map.lookup(slave_key, slave_leaf);
on_valid(slave_leaf) {
ret = slave_leaf->slave_ifindex;
} else {
return 0xffffffff;
}
} else {
// rx, default to stack
ret = 0;
}
goto proto::ethernet;
state proto::ethernet {
}
state proto::dot1q {
}
state proto::arp {
if (skb.pkt_type) {
if $arp.oper == 1 {
struct MacaddrKey mac_key = {.ip = $arp.spa};
struct MacaddrLeaf mac_leaf = {.mac = $arp.sha};
macaddr_map.update(mac_key, mac_leaf);
}
goto EOP;
}
}
state proto::ip {
}
state proto::udp {
if $udp.dport != 5000 {
goto EOP;
}
if (skb.pkt_type) {
// lookup and then forward
struct FwdKey fwd_key = {.dip = $ip.dst};
struct FwdLeaf *fwd_val;
fwd_map.lookup(fwd_key, fwd_val) {};
on_valid(fwd_val) {
return fwd_val.fwd_idx;
} else {
goto EOP;
}
} else {
// rewrite the packet and send to a pre-configured index if needed
u32 new_ip:32;
u32 old_ip:32;
u64 src_mac:48;
u64 dst_mac:48;
struct ConfigKey cfg_key = {.index = 0};
struct ConfigLeaf *cfg_leaf;
config_map.lookup(cfg_key, cfg_leaf) {};
on_valid(cfg_leaf) {
struct MacaddrKey mac_key = {.ip = cfg_leaf->bpfdev_ip};
struct MacaddrLeaf *mac_leaf;
mac_key.ip = cfg_leaf->bpfdev_ip;
macaddr_map.lookup(mac_key, mac_leaf) {};
on_valid (mac_leaf) {
src_mac = mac_leaf->mac;
} else {
goto EOP;
}
mac_key.ip = cfg_leaf->slave_ip;
macaddr_map.lookup(mac_key, mac_leaf) {};
on_valid (mac_leaf) {
dst_mac = mac_leaf->mac;
} else {
goto EOP;
}
// rewrite ethernet header
pkt.rewrite_field($ethernet.dst, dst_mac);
pkt.rewrite_field($ethernet.src, src_mac);
// ip & udp checksum
incr_cksum(@ip.hchecksum, $ip.src, cfg_leaf->bpfdev_ip);
incr_cksum(@ip.hchecksum, $ip.dst, cfg_leaf->slave_ip);
incr_cksum(@udp.crc, $ip.src, cfg_leaf->bpfdev_ip, 1);
incr_cksum(@udp.crc, $ip.dst, cfg_leaf->slave_ip, 1);
// rewrite ip src/dst fields
pkt.rewrite_field($ip.src, cfg_leaf->bpfdev_ip);
pkt.rewrite_field($ip.dst, cfg_leaf->slave_ip);
goto EOP;
} else {
goto EOP;
}
}
}
state EOP {
return ret;
}
}
#!/usr/bin/env python
# test program for a simple bpfdev
import sys
import commands
from ctypes import c_uint, c_ulong, Structure
from netaddr import IPAddress, EUI
from bpf import BPF
from subprocess import check_call
from unittest import main, TestCase
# map structures
class FwdKey(Structure):
_fields_ = [("dip", c_uint)]
class FwdLeaf(Structure):
_fields_ = [("ifc_idx", c_uint)]
class ConfigKey(Structure):
_fields_ = [("idx", c_uint)]
class ConfigLeaf(Structure):
_fields_ = [("bpfdev_ip", c_uint),
("slave_ip", c_uint)]
class MacaddrKey(Structure):
_fields_ = [("ip", c_uint)]
class MacaddrLeaf(Structure):
_fields_ = [("mac", c_ulong)]
class SlaveKey(Structure):
_fields_ = [("slave_ip", c_uint)]
class SlaveLeaf(Structure):
_fields_ = [("ifc_idx", c_uint)]
class TestBPFDev(TestCase):
def config(self, bpfdev, ns, bpfdev_ip, bpfdev_mac, slave_ip):
# ifup bpfdev
check_call(["ip", "link", "set", "dev", bpfdev, "up"])
check_call(["ifconfig", bpfdev, bpfdev_ip])
check_call(["ifconfig", bpfdev, "hw", "ether", bpfdev_mac])
# setup a namespace for the VM
if_se = ns + ".eth0.se"
if_vm = ns + ".eth0.vm"
check_call(["ip", "netns", "add", ns])
check_call(["ip", "link", "add", "name", if_se, "type", "veth", "peer", "name", if_vm])
check_call(["ip", "link", "set", if_vm, "netns", ns])
check_call(["ip", "netns", "exec", ns, "ip", "link", "set", if_vm, "name", "eth0"])
check_call(["ip", "link", "set", if_se, "up"])
check_call(["ip", "netns", "exec", ns, "ip", "link", "set", "eth0", "up"])
check_call(["ip", "link", "set", "dev", if_se, "promisc", "on"])
check_call(["ip", "netns", "exec", ns, "ifconfig", "eth0", slave_ip])
# establish the master-slave relationships
check_call(["ip", "link", "set", "dev", if_se, "master", bpfdev])
def setUp(self):
sys.stderr.write("build bpfdev programs for br22 and br33\n")
self.prog1 = BPF("main", "bpfdev1.b", "proto.b", prog_type=BPF.BPF_PROG_TYPE_BPFDEV, debug=0)
self.prog2 = BPF("main", "bpfdev1.b", "proto.b", prog_type=BPF.BPF_PROG_TYPE_BPFDEV, debug=0)
# create two bpf devices
sys.stderr.write("creating bpfdev br22 and br33\n")
self.prog1.create_bpfdev("br22")
self.prog2.create_bpfdev("br33")
# configure bpfdev
sys.stderr.write("configuring bpfdev br22 and br33\n")
self.config("br22", "ns0", "10.0.0.4", "02:02:02:02:02:02", "10.1.1.3")
self.config("br33", "ns1", "20.0.0.4", "04:04:04:04:04:04", "20.1.1.3")
# prog1 table configuration
sys.stderr.write("configuring bpfdev br22 table\n")
fwd_if = int(commands.getoutput('ip -o link show dev br33 | awk -F\': \' \'{print $1}\''))
sys.stderr.write("br22 special rx packet forward to %d\n" % fwd_if)
fwd_map = self.prog1.table("fwd_map", FwdKey, FwdLeaf)
key = FwdKey(IPAddress("10.0.0.4").value)
leaf = FwdLeaf(fwd_if)
fwd_map.put(key, leaf)
config_map = self.prog1.table("config_map", ConfigKey, ConfigLeaf)
key = ConfigKey(0)
leaf = ConfigLeaf(IPAddress("10.0.0.4").value, IPAddress("10.1.1.3").value)
config_map.put(key, leaf)
macaddr_map = self.prog1.table("macaddr_map", MacaddrKey, MacaddrLeaf)
key = MacaddrKey(IPAddress("10.0.0.4").value)
leaf = MacaddrLeaf(EUI("02-02-02-02-02-02").value) # 02:02:02:02:02:02
macaddr_map.put(key, leaf)
slave_map = self.prog1.table("slave_map", SlaveKey, SlaveLeaf)
fwd_if = int(commands.getoutput('ip -o link show dev ns0.eth0.se | awk -F\': \' \'{print $1}\''))
sys.stderr.write("br22 special tx packet forward to %d\n" % fwd_if)
key = SlaveKey(IPAddress("10.1.1.3").value)
leaf = SlaveLeaf(fwd_if)
slave_map.put(key, leaf)
# prog2 table configuratioin
sys.stderr.write("configuring bpfdev br33 table\n")
fwd_if = int(commands.getoutput('ip -o link show dev br22 | awk -F\': \' \'{print $1}\''))
sys.stderr.write("br33 special rx packet forward to %d\n" % fwd_if)
fwd_map = self.prog2.table("fwd_map", FwdKey, FwdLeaf)
key = FwdKey(IPAddress("20.0.0.4").value)
leaf = FwdLeaf(fwd_if)
fwd_map.put(key, leaf)
config_map = self.prog2.table("config_map", ConfigKey, ConfigLeaf)
key = ConfigKey(0)
leaf = ConfigLeaf(IPAddress("20.0.0.4").value, IPAddress("20.1.1.3").value)
config_map.put(key, leaf)
macaddr_map = self.prog2.table("macaddr_map", MacaddrKey, MacaddrLeaf)
key = MacaddrKey(IPAddress("20.0.0.4").value)
leaf = MacaddrLeaf(EUI("04-04-04-04-04-04").value) # 04:04:04:04:04:04
macaddr_map.put(key, leaf)
slave_map = self.prog2.table("slave_map", SlaveKey, SlaveLeaf)
fwd_if = int(commands.getoutput('ip -o link show dev ns1.eth0.se | awk -F\': \' \'{print $1}\''))
sys.stderr.write("br33 special tx packet forward to %d\n" % fwd_if)
key = SlaveKey(IPAddress("20.1.1.3").value)
leaf = SlaveLeaf(fwd_if)
slave_map.put(key, leaf)
sys.stderr.write("prog1 config_map")
config_map = self.prog1.table("config_map", ConfigKey, ConfigLeaf)
key = ConfigKey(0)
leaf = config_map.get(key)
print "config1", leaf.bpfdev_ip, leaf.slave_ip
sys.stderr.write("prog2 config_map")
config_map = self.prog2.table("config_map", ConfigKey, ConfigLeaf)
key = ConfigKey(0)
leaf = config_map.get(key)
print "config2", leaf.bpfdev_ip, leaf.slave_ip
def test_ping(self):
sys.stderr.write("testing ping between master and slave\n")
check_call(["ip", "netns", "exec", "ns0", "ping", "-c4", "10.0.0.4"])
check_call(["ip", "netns", "exec", "ns1", "ping", "-c4", "20.0.0.4"])
# sys.stderr.write("testing forwarding from br22 to br33\n")
# check_call(["ip", "netns", "exec", "ns1", "/usr/bin/python", "/home/plumgrid/bpf/recv_udp.py", "&"])
# check_call(["ip", "netns", "exec", "ns0", "/usr/bin/python", "/home/plumgrid/bpf/send_udp.py"])
if __name__ == "__main__":
main()
In order to run complete test, there are other changes in bcc are needed to support new bpfdev device and these changes are not included here.
5: from 39 to 89: R0=imm5000 R1=imm0 R6=ctx R7=imm0 R8=imm14 R9=inv R10=fp
5: 89: (b7) r9 = 0
5: 90: (63) *(u32 *)(r10 -56) = r9
5: 91: (18) r1 = 0x587280
5: 93: (bf) r2 = r10
5: 94: (07) r2 += -56
5: 95: (85) call 1
5: 96: (bf) r1 = r0
5: 97: (15) if r0 == 0x0 goto pc-11
5: R0=map_value(ks=4,vs=8) R1=map_value_or_null(ks=4,vs=8) R6=ctx R7=imm0 R8=imm14 R9=imm0 R10=fp
5: 98: (05) goto pc+0
5: 99: (63) *(u32 *)(r10 -64) = r9
5: 100: (bf) r2 = r1
5: 101: (7b) *(u64 *)(r10 -72) = r2
5: 102: (61) r1 = *(u32 *)(r2 +0)
5: R2 invalid mem access 'map_value_or_null'
5:
5: ERROR: test_ping (__main__.TestBPFDev)
5: ----------------------------------------------------------------------
5: Traceback (most recent call last):
5: File "/home/plumgrid/iovisor/bcc/tests/jit/bpfdev1.py", line 59, in setUp
5: self.prog1 = BPF("main", "bpfdev1.b", "proto.b", prog_type=BPF.BPF_PROG_TYPE_BPFDEV, debug=0)
5: File "/home/plumgrid/iovisor/bcc/src/bpf.py", line 64, in __init__
5: self.load(self.name)
5: File "/home/plumgrid/iovisor/bcc/src/bpf.py", line 78, in load
5: raise Exception("Failed to load BPF program %s" % self.name)
5: Exception: Failed to load BPF program main
5:
5: ----------------------------------------------------------------------
instruction "96" does an assignment from r0 to r1, and verifier thinks r1 could be map_value or null,
although instruction "97" checks "r0" for null.
The compiler ought to generate better code. The instruction "r1 = r0" is not necessary.
I dumped IR (change py program debug=0 to debug=1), and feed the IR to llc.
`llc -march=bpf -filetype=asm -O3 b.ll``
LBB8_16: # %if.else20
mov r9, 0
stw -56(r10), r9
ld_pseudo r1, 1, 6
mov r2, r10
addi r2, -56
call 1
mov r1, r0
jeqi r0, 0 goto LBB8_12
jmp LBB8_18
LBB8_18: # %onvalid.then25
stw -64(r10), r9
mov r2, r1
std -72(r10), r2
ldw r1, 0(r2)
stw -64(r10), r1
-O2 generates similar code.
Studying the LLVM optimization passes, there is a path in LLVM which called "virtual register rewrite" and it indeed removes SOME of the above redundant copies, but not all of them, hence causing the issue.
FYI, I changed LLVM to print out the pass applied during bcc compiler optimization and below is the result:
Checking llc compiler passes, it is very similar (I did not compare one-to-one) to the above
for function passes.
In summary, this is an LLVM issue and we may have to fix there.