rte_bus_list
struct rte_bus {
TAILQ_ENTRY(rte_bus) next; /< Next bus object in linked list */ const char *name; /< Name of the bus */ rte_bus_scan_t scan; /< Scan for devices attached to bus */ rte_bus_probe_t probe; /< Probe devices on bus */ rte_bus_find_device_t find_device; /< Find a device on the bus */ rte_bus_plug_t plug; /< Probe single device for drivers */ rte_bus_unplug_t unplug; /< Remove single device from driver */ rte_bus_parse_t parse; /< Parse a device name */ struct rte_bus_conf conf; /< Bus configuration */ }; TAILQ_HEAD(rte_bus_list, rte_bus); #define TAILQ_HEAD(name, type) \ struct name { \ struct type *tqh_first; /* first element */ \ struct type **tqh_last; /* addr of last next element */ \ } /* 定义rte_bus_list */ struct rte_bus_list rte_bus_list = TAILQ_HEAD_INITIALIZER(rte_bus_list);
讯享网
注册pci bus
将rte_pci_bus插入rte_bus_list链表
讯享网struct rte_pci_bus {
struct rte_bus bus; /< Inherit the generic class */ struct rte_pci_device_list device_list; /< List of PCI devices */ struct rte_pci_driver_list driver_list; /< List of PCI drivers */ }; /* 定义rte_pci_bus */ struct rte_pci_bus rte_pci_bus = {
.bus = {
.scan = rte_pci_scan, .probe = rte_pci_probe, .find_device = pci_find_device, .plug = pci_plug, .unplug = pci_unplug, .parse = pci_parse, }, .device_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.device_list), .driver_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.driver_list), }; RTE_REGISTER_BUS(pci, rte_pci_bus.bus); #define RTE_REGISTER_BUS(nm, bus) \ RTE_INIT_PRIO(businitfn_ nm, 101); \ /* 声明为gcc构造函数,先于main()执行 */ static void businitfn_ nm(void) \ {
\ (bus).name = RTE_STR(nm);\ rte_bus_register(&bus); \ } void rte_bus_register(struct rte_bus *bus) {
RTE_VERIFY(bus); RTE_VERIFY(bus->name && strlen(bus->name)); /* A bus should mandatorily have the scan implemented */ RTE_VERIFY(bus->scan); RTE_VERIFY(bus->probe); RTE_VERIFY(bus->find_device); /* Buses supporting driver plug also require unplug. */ RTE_VERIFY(!bus->plug || bus->unplug); /* 将rte_pci_bus.bus插入rte_bus_list链表 */ TAILQ_INSERT_TAIL(&rte_bus_list, bus, next); RTE_LOG(DEBUG, EAL, "Registered [%s] bus.\n", bus->name); }
注册pci driver
将rte_ixgbe_pmd插入rte_pci_bus.driver_list链表
struct rte_pci_driver {
TAILQ_ENTRY(rte_pci_driver) next; /< Next in list. */ struct rte_driver driver; /< Inherit core driver. */ struct rte_pci_bus *bus; /< PCI bus reference. */ pci_probe_t *probe; /< Device Probe function. */ pci_remove_t *remove; /< Device Remove function. */ const struct rte_pci_id *id_table; /< ID table, NULL terminated. */ uint32_t drv_flags; /< Flags contolling handling of device. */ }; /* 定义rte_ixgbe_pmd */ static struct rte_pci_driver rte_ixgbe_pmd = {
.id_table = pci_id_ixgbe_map, .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC, .probe = eth_ixgbe_pci_probe, .remove = eth_ixgbe_pci_remove, }; RTE_PMD_REGISTER_PCI(net_ixgbe, rte_ixgbe_pmd); #define RTE_PMD_REGISTER_PCI(nm, pci_drv) \ RTE_INIT(pciinitfn_ nm); \ /* 声明为gcc构造函数,先于main()执行 */ static void pciinitfn_ nm(void) \ {
\ (pci_drv).driver.name = RTE_STR(nm);\ rte_pci_register(&pci_drv); \ } \ RTE_PMD_EXPORT_NAME(nm, __COUNTER__) void rte_pci_register(struct rte_pci_driver *driver) {
/* 将rte_ixgbe_pmd插入rte_pci_bus.driver_list链表 */ TAILQ_INSERT_TAIL(&rte_pci_bus.driver_list, driver, next); driver->bus = &rte_pci_bus; }
rte_config和lcore_config
讯享网struct rte_config {
uint32_t master_lcore; /< Id of the master lcore */ uint32_t lcore_count; /< Number of available logical cores. */ uint32_t service_lcore_count;/< Number of available service cores. */ enum rte_lcore_role_t lcore_role[RTE_MAX_LCORE]; /< State of cores. */ / Primary or secondary configuration */ enum rte_proc_type_t process_type; / * Pointer to memory configuration, which may be shared across multiple * DPDK instances */ struct rte_mem_config *mem_config; } __attribute__((__packed__)); /* 定义rte_config */ static struct rte_config rte_config = {
.mem_config = &early_mem_config, }; struct lcore_config {
unsigned detected; /< true if lcore was detected */ pthread_t thread_id; /< pthread identifier */ int pipe_master2slave[2]; /< communication pipe with master */ int pipe_slave2master[2]; /< communication pipe with master */ lcore_function_t * volatile f; /< function to call */ void * volatile arg; /< argument of function */ volatile int ret; /< return value of function */ volatile enum rte_lcore_state_t state; /< lcore state */ unsigned socket_id; /< physical socket id for this lcore */ unsigned core_id; /< core number on socket for this lcore */ int core_index; /< relative index, starting from 0 */ rte_cpuset_t cpuset; /< cpu set which the lcore affinity to */ uint8_t core_role; /< role of core eg: OFF, RTE, SERVICE */ }; /* 定义lcore_config数组 */ struct lcore_config lcore_config[RTE_MAX_LCORE];
rte_eal_init()
MASTER lcore的主循环函数
/* Launch threads, called at application init(). */ int rte_eal_init(int argc, char **argv) {
... /* rte_eal_cpu_init() -> * eal_cpu_core_id() * eal_cpu_socket_id() * 读取/sys/devices/system/[cpu|node] * 设置lcore_config->[core_role|core_id|socket_id] */ if (rte_eal_cpu_init() < 0) {
rte_eal_init_alert("Cannot detect lcores."); rte_errno = ENOTSUP; return -1; } /* eal_parse_args() -> * eal_parse_common_option() -> * eal_parse_coremask() * eal_parse_master_lcore() * eal_parse_lcores() * eal_adjust_config() * 解析-c、--master_lcore、--lcores参数 * 在eal_parse_lcores()中确认可用的logical CPU * 在eal_adjust_config()中设置rte_config.master_lcore为0 (设置第一个lcore为MASTER lcore) */ fctret = eal_parse_args(argc, argv); if (fctret < 0) {
rte_eal_init_alert("Invalid 'command line' arguments."); rte_errno = EINVAL; rte_atomic32_clear(&run_once); return -1; } ... /* 初始化大页信息 */ if (rte_eal_memory_init() < 0) {
rte_eal_init_alert("Cannot init memory\n"); rte_errno = ENOMEM; return -1; } ... /* eal_thread_init_master() -> * eal_thread_set_affinity() * 设置当前线程为MASTER lcore * 在eal_thread_set_affinity()中绑定MASTER lcore到logical CPU */ eal_thread_init_master(rte_config.master_lcore); ... /* rte_bus_scan() -> * rte_pci_scan() -> * pci_scan_one() -> * pci_parse_sysfs_resource() * rte_pci_add_device() * 遍历rte_bus_list链表,调用每个bus的scan函数,pci为rte_pci_scan() * 遍历/sys/bus/pci/devices目录,为每个DBSF分配struct rte_pci_device * 逐行读取并解析每个DBSF的resource,保存到dev->mem_resource[i] * 将dev插入rte_pci_bus.device_list链表 */ if (rte_bus_scan()) {
rte_eal_init_alert("Cannot scan the buses for devices\n"); rte_errno = ENODEV; return -1; } /* pthread_create() -> * eal_thread_loop() -> * eal_thread_set_affinity() * 为每个SLAVE lcore创建线程,线程函数为eal_thread_loop() * 在eal_thread_set_affinity()中绑定SLAVE lcore到logical CPU */ RTE_LCORE_FOREACH_SLAVE(i) {
/* * create communication pipes between master thread * and children */ /* MASTER lcore创建pipes用于MASTER和SLAVE lcore间通信(父子线程间通信) */ if (pipe(lcore_config[i].pipe_master2slave) < 0) rte_panic("Cannot create pipe\n"); if (pipe(lcore_config[i].pipe_slave2master) < 0) rte_panic("Cannot create pipe\n"); lcore_config[i].state = WAIT; /* 设置SLAVE lcore的状态为WAIT */ /* create a thread for each lcore */ ret = pthread_create(&lcore_config[i].thread_id, NULL, eal_thread_loop, NULL); ... } /* * Launch a dummy function on all slave lcores, so that master lcore * knows they are all ready when this function returns. */ rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER); rte_eal_mp_wait_lcore(); ... /* Probe all the buses and devices/drivers on them */ /* rte_bus_probe() -> * rte_pci_probe() -> * pci_probe_all_drivers() -> * rte_pci_probe_one_driver() -> * rte_pci_match() * rte_pci_map_device() -> * pci_uio_map_resource() * eth_ixgbe_pci_probe() * 遍历rte_bus_list链表,调用每个bus的probe函数,pci为rte_pci_probe() * rte_pci_probe()/pci_probe_all_drivers()分别遍历rte_pci_bus.device_list/driver_list链表,匹配设备和驱动 * 映射BAR,调用驱动的probe函数,ixgbe为eth_ixgbe_pci_probe() */ if (rte_bus_probe()) {
rte_eal_init_alert("Cannot probe devices\n"); rte_errno = ENOTSUP; return -1; } ... }
rte_pci_device和mapped_pci_resource
讯享网struct rte_pci_device {
TAILQ_ENTRY(rte_pci_device) next; /< Next probed PCI device. */ struct rte_device device; /< Inherit core device */ /* DBSF */ struct rte_pci_add addr; /< PCI location. */ struct rte_pci_id id; /< PCI ID. */ struct rte_mem_resource mem_resource[PCI_MAX_RESOURCE]; /< PCI Memory Resource */ struct rte_intr_handle intr_handle; /< Interrupt handle */ struct rte_pci_driver *driver; /< Associated driver */ uint16_t max_vfs; /< sriov enable if not zero */ enum rte_kernel_driver kdrv; /< Kernel driver passthrough */ char name[PCI_PRI_STR_SIZE+1]; /< PCI location (ASCII) */ }; struct rte_mem_resource {
/* 总线地址 */ uint64_t phys_addr; /< Physical address, 0 if not resource. */ uint64_t len; /< Length of the resource. */ /* 虚拟地址 */ void *addr; /< Virtual address, NULL when not mapped. */ }; struct mapped_pci_resource {
TAILQ_ENTRY(mapped_pci_resource) next; /* DBSF */ struct rte_pci_addr pci_addr; char path[PATH_MAX]; int nb_maps; struct pci_map maps[PCI_MAX_RESOURCE]; }; TAILQ_HEAD(mapped_pci_res_list, mapped_pci_resource);
pci_parse_sysfs_resource()
/* [root@localhost ~]# cat /sys/bus/pci/devices/0000:07:00.0/resource * 0x00000000df 0x00000000df9fffff 0x000000000014220c * 0x0000000000000000 0x0000000000000000 0x0000000000000000 * 0x0000000000000000 0x0000000000000000 0x0000000000000000 * 0x0000000000000000 0x0000000000000000 0x0000000000000000 * 0x00000000dfa04000 0x00000000dfa07fff 0x000000000014220c * 0x0000000000000000 0x0000000000000000 0x0000000000000000 * 0x00000000dfc80000 0x00000000dfcfffff 0x000000000004e200 * 0x0000000000000000 0x0000000000000000 0x0000000000000000 * 0x0000000000000000 0x0000000000000000 0x0000000000000000 * 0x0000000000000000 0x0000000000000000 0x0000000000000000 * 0x0000000000000000 0x0000000000000000 0x0000000000000000 * 0x0000000000000000 0x0000000000000000 0x0000000000000000 * 0x0000000000000000 0x0000000000000000 0x0000000000000000 * 每列分别表示start、end、flag */ static int pci_parse_sysfs_resource(const char *filename, struct rte_pci_device *dev) {
FILE *f; char buf[BUFSIZ]; int i; uint64_t phys_addr, end_addr, flags; f = fopen(filename, "r"); /* 打开/sys/bus/pci/devices/DBSF/resource文件 */ if (f == NULL) {
RTE_LOG(ERR, EAL, "Cannot open sysfs resource\n"); return -1; } for (i = 0; i<PCI_MAX_RESOURCE; i++) {
/* 最多6个BAR */ if (fgets(buf, sizeof(buf), f) == NULL) {
/* 逐行读取 */ RTE_LOG(ERR, EAL, "%s(): cannot read resource\n", __func__); goto error; } if (pci_parse_one_sysfs_resource(buf, sizeof(buf), &phys_addr, &end_addr, &flags) < 0) goto error; if (flags & IORESOURCE_MEM) {
/* MEM地址空间 */ dev->mem_resource[i].phys_addr = phys_addr; /* 总线地址 */ dev->mem_resource[i].len = end_addr - phys_addr + 1; /* not mapped for now */ dev->mem_resource[i].addr = NULL; /* 虚拟地址 */ } } fclose(f); return 0; error: fclose(f); return -1; } int pci_parse_one_sysfs_resource(char *line, size_t len, uint64_t *phys_addr, uint64_t *end_addr, uint64_t *flags) {
union pci_resource_info {
struct {
char *phys_addr; char *end_addr; char *flags; }; char *ptrs[PCI_RESOURCE_FMT_NVAL]; } res_info; if (rte_strsplit(line, len, res_info.ptrs, 3, ' ') != 3) {
RTE_LOG(ERR, EAL, "%s(): bad resource format\n", __func__); return -1; } errno = 0; *phys_addr = strtoull(res_info.phys_addr, NULL, 16); /* 16进制字符串转换为unsigned long long */ *end_addr = strtoull(res_info.end_addr, NULL, 16); *flags = strtoull(res_info.flags, NULL, 16); if (errno != 0) {
RTE_LOG(ERR, EAL, "%s(): bad resource format\n", __func__); return -1; } return 0; }
pci_uio_map_resource()
讯享网int pci_uio_map_resource(struct rte_pci_device *dev) {
int i, map_idx = 0, ret; uint64_t phaddr; struct mapped_pci_resource *uio_res = NULL; struct mapped_pci_res_list *uio_res_list = RTE_TAILQ_CAST(rte_uio_tailq.head, mapped_pci_res_list); dev->intr_handle.fd = -1; dev->intr_handle.uio_cfg_fd = -1; dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; /* secondary processes - use already recorded details */ if (rte_eal_process_type() != RTE_PROC_PRIMARY) return pci_uio_map_secondary(dev); /* allocate uio resource */ ret = pci_uio_alloc_resource(dev, &uio_res); /* 为/dev/uioX分配struct mapped_pci_resource */ if (ret) return ret; /* Map all BARs */ for (i = 0; i != PCI_MAX_RESOURCE; i++) {
/* 最多6个BAR */ /* skip empty BAR */ phaddr = dev->mem_resource[i].phys_addr; if (phaddr == 0) /* 对于0000:07:00.0,当i为0或4时,phaddr非空 */ continue; ret = pci_uio_map_resource_by_index(dev, i, uio_res, map_idx); /* 映射BAR */ if (ret) goto error; map_idx++; } uio_res->nb_maps = map_idx; /* 映射BAR的个数 */ TAILQ_INSERT_TAIL(uio_res_list, uio_res, next); /* 将uio_res插入uio_res_list链表 */ return 0; error: for (i = 0; i < map_idx; i++) {
pci_unmap_resource(uio_res->maps[i].addr, (size_t)uio_res->maps[i].size); rte_free(uio_res->maps[i].path); } pci_uio_free_resource(dev, uio_res); return -1; } int pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx, struct mapped_pci_resource *uio_res, int map_idx) {
int fd; char devname[PATH_MAX]; void *mapaddr; struct rte_pci_addr *loc; struct pci_map *maps; loc = &dev->addr; maps = uio_res->maps; /* update devname for mmap */ snprintf(devname, sizeof(devname), "%s/" PCI_PRI_FMT "/resource%d", pci_get_sysfs_path(), loc->domain, loc->bus, loc->devid, loc->function, res_idx); /* allocate memory to keep path */ maps[map_idx].path = rte_malloc(NULL, strlen(devname) + 1, 0); if (maps[map_idx].path == NULL) {
RTE_LOG(ERR, EAL, "Cannot allocate memory for path: %s\n", strerror(errno)); return -1; } /* * open resource file, to mmap it */ fd = open(devname, O_RDWR); /* 打开/sys/bus/pci/devices/0000:07:00.0/resource0文件 */ if (fd < 0) {
RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", devname, strerror(errno)); goto error; } /* try mapping somewhere close to the end of hugepages */ if (pci_map_addr == NULL) pci_map_addr = pci_find_max_end_va(); mapaddr = pci_map_resource(pci_map_addr, fd, 0, (size_t)dev->mem_resource[res_idx].len, 0); /* 映射BAR */ close(fd); if (mapaddr == MAP_FAILED) goto error; pci_map_addr = RTE_PTR_ADD(mapaddr, (size_t)dev->mem_resource[res_idx].len); maps[map_idx].phaddr = dev->mem_resource[res_idx].phys_addr; /* 总线地址 */ maps[map_idx].size = dev->mem_resource[res_idx].len; maps[map_idx].addr = mapaddr; /* mmap()得到的虚拟地址 */ maps[map_idx].offset = 0; strcpy(maps[map_idx].path, devname); dev->mem_resource[res_idx].addr = mapaddr; /* mmap()得到的虚拟地址 */ return 0; error: rte_free(maps[map_idx].path); return -1; } void * pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size, int additional_flags) {
void *mapaddr; /* Map the PCI memory resource of device */ /* 将从fd + offset开始、大小为size的总线地址映射到从requested_addr开始的虚拟地址 */ mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE, MAP_SHARED | additional_flags, fd, offset); if (mapaddr == MAP_FAILED) {
RTE_LOG(ERR, EAL, "%s(): cannot mmap(%d, %p, 0x%lx, 0x%lx): %s (%p)\n", __func__, fd, requested_addr, (unsigned long)size, (unsigned long)offset, strerror(errno), mapaddr); } else RTE_LOG(DEBUG, EAL, " PCI memory mapped at %p\n", mapaddr); return mapaddr; }
eth_ixgbe_pci_probe()
static int eth_ixgbe_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, struct rte_pci_device *pci_dev) {
return rte_eth_dev_pci_generic_probe(pci_dev, sizeof(struct ixgbe_adapter), eth_ixgbe_dev_init); } static inline int rte_eth_dev_pci_generic_probe(struct rte_pci_device *pci_dev, size_t private_data_size, eth_dev_pci_callback_t dev_init) {
... eth_dev = rte_eth_dev_pci_allocate(pci_dev, private_data_size); ... ret = dev_init(eth_dev); /* ixgbe为eth_ixgbe_dev_init() */ ... } static inline struct rte_eth_dev * rte_eth_dev_pci_allocate(struct rte_pci_device *dev, size_t private_data_size) {
... /* rte_eth_dev_allocate() -> * rte_eth_dev_find_free_port() * rte_eth_dev_data_alloc() * eth_dev_get() */ eth_dev = rte_eth_dev_allocate(name); ... /* 分配private data,ixgbe为struct ixgbe_adapter */ eth_dev->data->dev_private = rte_zmalloc_socket(name, private_data_size, RTE_CACHE_LINE_SIZE, dev->device.numa_node); ... } struct rte_eth_dev * rte_eth_dev_allocate(const char *name) {
... /* 遍历rte_eth_devices数组,找到一个空闲的设备 */ port_id = rte_eth_dev_find_free_port(); ... /* 分配rte_eth_dev_data数组 */ rte_eth_dev_data_alloc(); ... /* 设置port_id对应的设备的state为RTE_ETH_DEV_ATTACHED */ eth_dev = eth_dev_get(port_id); ... }
eth_ixgbe_dev_init()
讯享网static int eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev) {
... eth_dev->dev_ops = &ixgbe_eth_dev_ops; /* 注册ixgbe_eth_dev_ops函数表 */ eth_dev->rx_pkt_burst = &ixgbe_recv_pkts; /* burst收包函数 */ eth_dev->tx_pkt_burst = &ixgbe_xmit_pkts; /* burst发包函数 */ eth_dev->tx_pkt_prepare = &ixgbe_prep_pkts; ... hw->device_id = pci_dev->id.device_id; /* device_id */ hw->vendor_id = pci_dev->id.vendor_id; /* vendor_id */ hw->hw_addr = (void *)pci_dev->mem_resource[0].addr; /* mmap()得到的BAR的虚拟地址 */ ... /* ixgbe_init_shared_code() -> * ixgbe_set_mac_type() * ixgbe_init_ops_82599() * 在ixgbe_set_mac_type()中根据vendor_id和device_id设置hw->mac.type,82599为ixgbe_mac_82599EB * 根据hw->mac.type调用对应的函数设置hw->mac.ops,82599为ixgbe_init_ops_82599() */ diag = ixgbe_init_shared_code(hw); ... /* ixgbe_init_hw() -> * ixgbe_call_func() -> * ixgbe_init_hw_generic() -> * ixgbe_reset_hw_82599() -> * ixgbe_get_mac_addr_generic() * 得到网卡的mac地址 */ diag = ixgbe_init_hw(hw); ... ether_addr_copy((struct ether_addr *) hw->mac.perm_addr, ð_dev->data->mac_addrs[0]); /* 复制网卡的mac地址到eth_dev->data->mac_addrs */ ... } static const struct eth_dev_ops ixgbe_eth_dev_ops = {
.dev_configure = ixgbe_dev_configure, .dev_start = ixgbe_dev_start, ... .rx_queue_setup = ixgbe_dev_rx_queue_setup, ... .tx_queue_setup = ixgbe_dev_tx_queue_setup, ... };
DPDK/Kernel映射BAR的区别
图片来源于
http://blog.chinaunix.net/uid-20528014-id-314322.html
http://blog.chinaunix.net/uid-20528014-id-315798.html
http://blog.chinaunix.net/uid-20528014-id-315801.html


DPDK使用mmap()将总线地址映射到用户空间虚拟地址
Kernel使用ioremap()将总线地址映射到内核空间虚拟地址

版权声明:本文内容由互联网用户自发贡献,该文观点仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容,请联系我们,一经查实,本站将立刻删除。
如需转载请保留出处:https://51itzy.com/kjqy/26614.html