DPDK总结(rte_eal_init)

DPDK总结(rte_eal_init)DPDK 学习 rte eal init DPDK 学习 eal thread loop rte bus list struct rte bus TAILQ ENTRY rte bus next lt Next bus object in linked list const

大家好,我是讯享网,很高兴认识大家。

rte_bus_list

struct rte_bus { 
    TAILQ_ENTRY(rte_bus) next; /< Next bus object in linked list */ const char *name; /< Name of the bus */ rte_bus_scan_t scan; /< Scan for devices attached to bus */ rte_bus_probe_t probe; /< Probe devices on bus */ rte_bus_find_device_t find_device; /< Find a device on the bus */ rte_bus_plug_t plug; /< Probe single device for drivers */ rte_bus_unplug_t unplug; /< Remove single device from driver */ rte_bus_parse_t parse; /< Parse a device name */ struct rte_bus_conf conf; /< Bus configuration */ }; TAILQ_HEAD(rte_bus_list, rte_bus); #define TAILQ_HEAD(name, type) \ struct name { \ struct type *tqh_first; /* first element */ \ struct type **tqh_last; /* addr of last next element */ \ } /* 定义rte_bus_list */ struct rte_bus_list rte_bus_list = TAILQ_HEAD_INITIALIZER(rte_bus_list); 

讯享网

注册pci bus

将rte_pci_bus插入rte_bus_list链表

讯享网struct rte_pci_bus { 
    struct rte_bus bus; /< Inherit the generic class */ struct rte_pci_device_list device_list; /< List of PCI devices */ struct rte_pci_driver_list driver_list; /< List of PCI drivers */ }; /* 定义rte_pci_bus */ struct rte_pci_bus rte_pci_bus = { 
    .bus = { 
    .scan = rte_pci_scan, .probe = rte_pci_probe, .find_device = pci_find_device, .plug = pci_plug, .unplug = pci_unplug, .parse = pci_parse, }, .device_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.device_list), .driver_list = TAILQ_HEAD_INITIALIZER(rte_pci_bus.driver_list), }; RTE_REGISTER_BUS(pci, rte_pci_bus.bus); #define RTE_REGISTER_BUS(nm, bus) \ RTE_INIT_PRIO(businitfn_ nm, 101); \ /* 声明为gcc构造函数,先于main()执行 */ static void businitfn_ nm(void) \ { 
   \ (bus).name = RTE_STR(nm);\ rte_bus_register(&bus); \ } void rte_bus_register(struct rte_bus *bus) { 
    RTE_VERIFY(bus); RTE_VERIFY(bus->name && strlen(bus->name)); /* A bus should mandatorily have the scan implemented */ RTE_VERIFY(bus->scan); RTE_VERIFY(bus->probe); RTE_VERIFY(bus->find_device); /* Buses supporting driver plug also require unplug. */ RTE_VERIFY(!bus->plug || bus->unplug); /* 将rte_pci_bus.bus插入rte_bus_list链表 */ TAILQ_INSERT_TAIL(&rte_bus_list, bus, next); RTE_LOG(DEBUG, EAL, "Registered [%s] bus.\n", bus->name); } 

注册pci driver

将rte_ixgbe_pmd插入rte_pci_bus.driver_list链表


讯享网

struct rte_pci_driver { 
    TAILQ_ENTRY(rte_pci_driver) next; /< Next in list. */ struct rte_driver driver; /< Inherit core driver. */ struct rte_pci_bus *bus; /< PCI bus reference. */ pci_probe_t *probe; /< Device Probe function. */ pci_remove_t *remove; /< Device Remove function. */ const struct rte_pci_id *id_table; /< ID table, NULL terminated. */ uint32_t drv_flags; /< Flags contolling handling of device. */ }; /* 定义rte_ixgbe_pmd */ static struct rte_pci_driver rte_ixgbe_pmd = { 
    .id_table = pci_id_ixgbe_map, .drv_flags = RTE_PCI_DRV_NEED_MAPPING | RTE_PCI_DRV_INTR_LSC, .probe = eth_ixgbe_pci_probe, .remove = eth_ixgbe_pci_remove, }; RTE_PMD_REGISTER_PCI(net_ixgbe, rte_ixgbe_pmd); #define RTE_PMD_REGISTER_PCI(nm, pci_drv) \ RTE_INIT(pciinitfn_ nm); \ /* 声明为gcc构造函数,先于main()执行 */ static void pciinitfn_ nm(void) \ { 
   \ (pci_drv).driver.name = RTE_STR(nm);\ rte_pci_register(&pci_drv); \ } \ RTE_PMD_EXPORT_NAME(nm, __COUNTER__) void rte_pci_register(struct rte_pci_driver *driver) { 
    /* 将rte_ixgbe_pmd插入rte_pci_bus.driver_list链表 */ TAILQ_INSERT_TAIL(&rte_pci_bus.driver_list, driver, next); driver->bus = &rte_pci_bus; } 

rte_config和lcore_config

讯享网struct rte_config { 
    uint32_t master_lcore; /< Id of the master lcore */ uint32_t lcore_count; /< Number of available logical cores. */ uint32_t service_lcore_count;/< Number of available service cores. */ enum rte_lcore_role_t lcore_role[RTE_MAX_LCORE]; /< State of cores. */ / Primary or secondary configuration */ enum rte_proc_type_t process_type; / * Pointer to memory configuration, which may be shared across multiple * DPDK instances */ struct rte_mem_config *mem_config; } __attribute__((__packed__)); /* 定义rte_config */ static struct rte_config rte_config = { 
    .mem_config = &early_mem_config, }; struct lcore_config { 
    unsigned detected; /< true if lcore was detected */ pthread_t thread_id; /< pthread identifier */ int pipe_master2slave[2]; /< communication pipe with master */ int pipe_slave2master[2]; /< communication pipe with master */ lcore_function_t * volatile f; /< function to call */ void * volatile arg; /< argument of function */ volatile int ret; /< return value of function */ volatile enum rte_lcore_state_t state; /< lcore state */ unsigned socket_id; /< physical socket id for this lcore */ unsigned core_id; /< core number on socket for this lcore */ int core_index; /< relative index, starting from 0 */ rte_cpuset_t cpuset; /< cpu set which the lcore affinity to */ uint8_t core_role; /< role of core eg: OFF, RTE, SERVICE */ }; /* 定义lcore_config数组 */ struct lcore_config lcore_config[RTE_MAX_LCORE]; 

rte_eal_init()

MASTER lcore的主循环函数

/* Launch threads, called at application init(). */ int rte_eal_init(int argc, char **argv) { 
    ... /* rte_eal_cpu_init() -> * eal_cpu_core_id() * eal_cpu_socket_id() * 读取/sys/devices/system/[cpu|node] * 设置lcore_config->[core_role|core_id|socket_id] */ if (rte_eal_cpu_init() < 0) { 
    rte_eal_init_alert("Cannot detect lcores."); rte_errno = ENOTSUP; return -1; } /* eal_parse_args() -> * eal_parse_common_option() -> * eal_parse_coremask() * eal_parse_master_lcore() * eal_parse_lcores() * eal_adjust_config() * 解析-c、--master_lcore、--lcores参数 * 在eal_parse_lcores()中确认可用的logical CPU * 在eal_adjust_config()中设置rte_config.master_lcore为0 (设置第一个lcore为MASTER lcore) */ fctret = eal_parse_args(argc, argv); if (fctret < 0) { 
    rte_eal_init_alert("Invalid 'command line' arguments."); rte_errno = EINVAL; rte_atomic32_clear(&run_once); return -1; } ... /* 初始化大页信息 */ if (rte_eal_memory_init() < 0) { 
    rte_eal_init_alert("Cannot init memory\n"); rte_errno = ENOMEM; return -1; } ... /* eal_thread_init_master() -> * eal_thread_set_affinity() * 设置当前线程为MASTER lcore * 在eal_thread_set_affinity()中绑定MASTER lcore到logical CPU */ eal_thread_init_master(rte_config.master_lcore); ... /* rte_bus_scan() -> * rte_pci_scan() -> * pci_scan_one() -> * pci_parse_sysfs_resource() * rte_pci_add_device() * 遍历rte_bus_list链表,调用每个bus的scan函数,pci为rte_pci_scan() * 遍历/sys/bus/pci/devices目录,为每个DBSF分配struct rte_pci_device * 逐行读取并解析每个DBSF的resource,保存到dev->mem_resource[i] * 将dev插入rte_pci_bus.device_list链表 */ if (rte_bus_scan()) { 
    rte_eal_init_alert("Cannot scan the buses for devices\n"); rte_errno = ENODEV; return -1; } /* pthread_create() -> * eal_thread_loop() -> * eal_thread_set_affinity() * 为每个SLAVE lcore创建线程,线程函数为eal_thread_loop() * 在eal_thread_set_affinity()中绑定SLAVE lcore到logical CPU */ RTE_LCORE_FOREACH_SLAVE(i) { 
    /* * create communication pipes between master thread * and children */ /* MASTER lcore创建pipes用于MASTER和SLAVE lcore间通信(父子线程间通信) */ if (pipe(lcore_config[i].pipe_master2slave) < 0) rte_panic("Cannot create pipe\n"); if (pipe(lcore_config[i].pipe_slave2master) < 0) rte_panic("Cannot create pipe\n"); lcore_config[i].state = WAIT; /* 设置SLAVE lcore的状态为WAIT */ /* create a thread for each lcore */ ret = pthread_create(&lcore_config[i].thread_id, NULL, eal_thread_loop, NULL); ... } /* * Launch a dummy function on all slave lcores, so that master lcore * knows they are all ready when this function returns. */ rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER); rte_eal_mp_wait_lcore(); ... /* Probe all the buses and devices/drivers on them */ /* rte_bus_probe() -> * rte_pci_probe() -> * pci_probe_all_drivers() -> * rte_pci_probe_one_driver() -> * rte_pci_match() * rte_pci_map_device() -> * pci_uio_map_resource() * eth_ixgbe_pci_probe() * 遍历rte_bus_list链表,调用每个bus的probe函数,pci为rte_pci_probe() * rte_pci_probe()/pci_probe_all_drivers()分别遍历rte_pci_bus.device_list/driver_list链表,匹配设备和驱动 * 映射BAR,调用驱动的probe函数,ixgbe为eth_ixgbe_pci_probe() */ if (rte_bus_probe()) { 
    rte_eal_init_alert("Cannot probe devices\n"); rte_errno = ENOTSUP; return -1; } ... } 

rte_pci_device和mapped_pci_resource

讯享网struct rte_pci_device { 
    TAILQ_ENTRY(rte_pci_device) next; /< Next probed PCI device. */ struct rte_device device; /< Inherit core device */ /* DBSF */ struct rte_pci_add addr; /< PCI location. */ struct rte_pci_id id; /< PCI ID. */ struct rte_mem_resource mem_resource[PCI_MAX_RESOURCE]; /< PCI Memory Resource */ struct rte_intr_handle intr_handle; /< Interrupt handle */ struct rte_pci_driver *driver; /< Associated driver */ uint16_t max_vfs; /< sriov enable if not zero */ enum rte_kernel_driver kdrv; /< Kernel driver passthrough */ char name[PCI_PRI_STR_SIZE+1]; /< PCI location (ASCII) */ }; struct rte_mem_resource { 
    /* 总线地址 */ uint64_t phys_addr; /< Physical address, 0 if not resource. */ uint64_t len; /< Length of the resource. */ /* 虚拟地址 */ void *addr; /< Virtual address, NULL when not mapped. */ }; struct mapped_pci_resource { 
    TAILQ_ENTRY(mapped_pci_resource) next; /* DBSF */ struct rte_pci_addr pci_addr; char path[PATH_MAX]; int nb_maps; struct pci_map maps[PCI_MAX_RESOURCE]; }; TAILQ_HEAD(mapped_pci_res_list, mapped_pci_resource); 

pci_parse_sysfs_resource()

/* [root@localhost ~]# cat /sys/bus/pci/devices/0000:07:00.0/resource * 0x00000000df 0x00000000df9fffff 0x000000000014220c * 0x0000000000000000 0x0000000000000000 0x0000000000000000 * 0x0000000000000000 0x0000000000000000 0x0000000000000000 * 0x0000000000000000 0x0000000000000000 0x0000000000000000 * 0x00000000dfa04000 0x00000000dfa07fff 0x000000000014220c * 0x0000000000000000 0x0000000000000000 0x0000000000000000 * 0x00000000dfc80000 0x00000000dfcfffff 0x000000000004e200 * 0x0000000000000000 0x0000000000000000 0x0000000000000000 * 0x0000000000000000 0x0000000000000000 0x0000000000000000 * 0x0000000000000000 0x0000000000000000 0x0000000000000000 * 0x0000000000000000 0x0000000000000000 0x0000000000000000 * 0x0000000000000000 0x0000000000000000 0x0000000000000000 * 0x0000000000000000 0x0000000000000000 0x0000000000000000 * 每列分别表示start、end、flag */ static int pci_parse_sysfs_resource(const char *filename, struct rte_pci_device *dev) { 
    FILE *f; char buf[BUFSIZ]; int i; uint64_t phys_addr, end_addr, flags; f = fopen(filename, "r"); /* 打开/sys/bus/pci/devices/DBSF/resource文件 */ if (f == NULL) { 
    RTE_LOG(ERR, EAL, "Cannot open sysfs resource\n"); return -1; } for (i = 0; i<PCI_MAX_RESOURCE; i++) { 
    /* 最多6个BAR */ if (fgets(buf, sizeof(buf), f) == NULL) { 
    /* 逐行读取 */ RTE_LOG(ERR, EAL, "%s(): cannot read resource\n", __func__); goto error; } if (pci_parse_one_sysfs_resource(buf, sizeof(buf), &phys_addr, &end_addr, &flags) < 0) goto error; if (flags & IORESOURCE_MEM) { 
    /* MEM地址空间 */ dev->mem_resource[i].phys_addr = phys_addr; /* 总线地址 */ dev->mem_resource[i].len = end_addr - phys_addr + 1; /* not mapped for now */ dev->mem_resource[i].addr = NULL; /* 虚拟地址 */ } } fclose(f); return 0; error: fclose(f); return -1; } int pci_parse_one_sysfs_resource(char *line, size_t len, uint64_t *phys_addr, uint64_t *end_addr, uint64_t *flags) { 
    union pci_resource_info { 
    struct { 
    char *phys_addr; char *end_addr; char *flags; }; char *ptrs[PCI_RESOURCE_FMT_NVAL]; } res_info; if (rte_strsplit(line, len, res_info.ptrs, 3, ' ') != 3) { 
    RTE_LOG(ERR, EAL, "%s(): bad resource format\n", __func__); return -1; } errno = 0; *phys_addr = strtoull(res_info.phys_addr, NULL, 16); /* 16进制字符串转换为unsigned long long */ *end_addr = strtoull(res_info.end_addr, NULL, 16); *flags = strtoull(res_info.flags, NULL, 16); if (errno != 0) { 
    RTE_LOG(ERR, EAL, "%s(): bad resource format\n", __func__); return -1; } return 0; } 

pci_uio_map_resource()

讯享网int pci_uio_map_resource(struct rte_pci_device *dev) { 
    int i, map_idx = 0, ret; uint64_t phaddr; struct mapped_pci_resource *uio_res = NULL; struct mapped_pci_res_list *uio_res_list = RTE_TAILQ_CAST(rte_uio_tailq.head, mapped_pci_res_list); dev->intr_handle.fd = -1; dev->intr_handle.uio_cfg_fd = -1; dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; /* secondary processes - use already recorded details */ if (rte_eal_process_type() != RTE_PROC_PRIMARY) return pci_uio_map_secondary(dev); /* allocate uio resource */ ret = pci_uio_alloc_resource(dev, &uio_res); /* 为/dev/uioX分配struct mapped_pci_resource */ if (ret) return ret; /* Map all BARs */ for (i = 0; i != PCI_MAX_RESOURCE; i++) { 
    /* 最多6个BAR */ /* skip empty BAR */ phaddr = dev->mem_resource[i].phys_addr; if (phaddr == 0) /* 对于0000:07:00.0,当i为0或4时,phaddr非空 */ continue; ret = pci_uio_map_resource_by_index(dev, i, uio_res, map_idx); /* 映射BAR */ if (ret) goto error; map_idx++; } uio_res->nb_maps = map_idx; /* 映射BAR的个数 */ TAILQ_INSERT_TAIL(uio_res_list, uio_res, next); /* 将uio_res插入uio_res_list链表 */ return 0; error: for (i = 0; i < map_idx; i++) { 
    pci_unmap_resource(uio_res->maps[i].addr, (size_t)uio_res->maps[i].size); rte_free(uio_res->maps[i].path); } pci_uio_free_resource(dev, uio_res); return -1; } int pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx, struct mapped_pci_resource *uio_res, int map_idx) { 
    int fd; char devname[PATH_MAX]; void *mapaddr; struct rte_pci_addr *loc; struct pci_map *maps; loc = &dev->addr; maps = uio_res->maps; /* update devname for mmap */ snprintf(devname, sizeof(devname), "%s/" PCI_PRI_FMT "/resource%d", pci_get_sysfs_path(), loc->domain, loc->bus, loc->devid, loc->function, res_idx); /* allocate memory to keep path */ maps[map_idx].path = rte_malloc(NULL, strlen(devname) + 1, 0); if (maps[map_idx].path == NULL) { 
    RTE_LOG(ERR, EAL, "Cannot allocate memory for path: %s\n", strerror(errno)); return -1; } /* * open resource file, to mmap it */ fd = open(devname, O_RDWR); /* 打开/sys/bus/pci/devices/0000:07:00.0/resource0文件 */ if (fd < 0) { 
    RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", devname, strerror(errno)); goto error; } /* try mapping somewhere close to the end of hugepages */ if (pci_map_addr == NULL) pci_map_addr = pci_find_max_end_va(); mapaddr = pci_map_resource(pci_map_addr, fd, 0, (size_t)dev->mem_resource[res_idx].len, 0); /* 映射BAR */ close(fd); if (mapaddr == MAP_FAILED) goto error; pci_map_addr = RTE_PTR_ADD(mapaddr, (size_t)dev->mem_resource[res_idx].len); maps[map_idx].phaddr = dev->mem_resource[res_idx].phys_addr; /* 总线地址 */ maps[map_idx].size = dev->mem_resource[res_idx].len; maps[map_idx].addr = mapaddr; /* mmap()得到的虚拟地址 */ maps[map_idx].offset = 0; strcpy(maps[map_idx].path, devname); dev->mem_resource[res_idx].addr = mapaddr; /* mmap()得到的虚拟地址 */ return 0; error: rte_free(maps[map_idx].path); return -1; } void * pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size, int additional_flags) { 
    void *mapaddr; /* Map the PCI memory resource of device */ /* 将从fd + offset开始、大小为size的总线地址映射到从requested_addr开始的虚拟地址 */ mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE, MAP_SHARED | additional_flags, fd, offset); if (mapaddr == MAP_FAILED) { 
    RTE_LOG(ERR, EAL, "%s(): cannot mmap(%d, %p, 0x%lx, 0x%lx): %s (%p)\n", __func__, fd, requested_addr, (unsigned long)size, (unsigned long)offset, strerror(errno), mapaddr); } else RTE_LOG(DEBUG, EAL, " PCI memory mapped at %p\n", mapaddr); return mapaddr; } 

eth_ixgbe_pci_probe()

static int eth_ixgbe_pci_probe(struct rte_pci_driver *pci_drv __rte_unused, struct rte_pci_device *pci_dev) { 
    return rte_eth_dev_pci_generic_probe(pci_dev, sizeof(struct ixgbe_adapter), eth_ixgbe_dev_init); } static inline int rte_eth_dev_pci_generic_probe(struct rte_pci_device *pci_dev, size_t private_data_size, eth_dev_pci_callback_t dev_init) { 
    ... eth_dev = rte_eth_dev_pci_allocate(pci_dev, private_data_size); ... ret = dev_init(eth_dev); /* ixgbe为eth_ixgbe_dev_init() */ ... } static inline struct rte_eth_dev * rte_eth_dev_pci_allocate(struct rte_pci_device *dev, size_t private_data_size) { 
    ... /* rte_eth_dev_allocate() -> * rte_eth_dev_find_free_port() * rte_eth_dev_data_alloc() * eth_dev_get() */ eth_dev = rte_eth_dev_allocate(name); ... /* 分配private data,ixgbe为struct ixgbe_adapter */ eth_dev->data->dev_private = rte_zmalloc_socket(name, private_data_size, RTE_CACHE_LINE_SIZE, dev->device.numa_node); ... } struct rte_eth_dev * rte_eth_dev_allocate(const char *name) { 
    ... /* 遍历rte_eth_devices数组,找到一个空闲的设备 */ port_id = rte_eth_dev_find_free_port(); ... /* 分配rte_eth_dev_data数组 */ rte_eth_dev_data_alloc(); ... /* 设置port_id对应的设备的state为RTE_ETH_DEV_ATTACHED */ eth_dev = eth_dev_get(port_id); ... } 

eth_ixgbe_dev_init()

讯享网static int eth_ixgbe_dev_init(struct rte_eth_dev *eth_dev) { 
    ... eth_dev->dev_ops = &ixgbe_eth_dev_ops; /* 注册ixgbe_eth_dev_ops函数表 */ eth_dev->rx_pkt_burst = &ixgbe_recv_pkts; /* burst收包函数 */ eth_dev->tx_pkt_burst = &ixgbe_xmit_pkts; /* burst发包函数 */ eth_dev->tx_pkt_prepare = &ixgbe_prep_pkts; ... hw->device_id = pci_dev->id.device_id; /* device_id */ hw->vendor_id = pci_dev->id.vendor_id; /* vendor_id */ hw->hw_addr = (void *)pci_dev->mem_resource[0].addr; /* mmap()得到的BAR的虚拟地址 */ ... /* ixgbe_init_shared_code() -> * ixgbe_set_mac_type() * ixgbe_init_ops_82599() * 在ixgbe_set_mac_type()中根据vendor_id和device_id设置hw->mac.type,82599为ixgbe_mac_82599EB * 根据hw->mac.type调用对应的函数设置hw->mac.ops,82599为ixgbe_init_ops_82599() */ diag = ixgbe_init_shared_code(hw); ... /* ixgbe_init_hw() -> * ixgbe_call_func() -> * ixgbe_init_hw_generic() -> * ixgbe_reset_hw_82599() -> * ixgbe_get_mac_addr_generic() * 得到网卡的mac地址 */ diag = ixgbe_init_hw(hw); ... ether_addr_copy((struct ether_addr *) hw->mac.perm_addr, &eth_dev->data->mac_addrs[0]); /* 复制网卡的mac地址到eth_dev->data->mac_addrs */ ... } static const struct eth_dev_ops ixgbe_eth_dev_ops = { 
    .dev_configure = ixgbe_dev_configure, .dev_start = ixgbe_dev_start, ... .rx_queue_setup = ixgbe_dev_rx_queue_setup, ... .tx_queue_setup = ixgbe_dev_tx_queue_setup, ... }; 

DPDK/Kernel映射BAR的区别

图片来源于
http://blog.chinaunix.net/uid-20528014-id-314322.html
http://blog.chinaunix.net/uid-20528014-id-315798.html
http://blog.chinaunix.net/uid-20528014-id-315801.html
在这里插入图片描述
在这里插入图片描述
DPDK使用mmap()将总线地址映射到用户空间虚拟地址
Kernel使用ioremap()将总线地址映射到内核空间虚拟地址

小讯
上一篇 2025-02-20 23:45
下一篇 2025-03-20 07:56

相关推荐

版权声明:本文内容由互联网用户自发贡献,该文观点仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容,请联系我们,一经查实,本站将立刻删除。
如需转载请保留出处:https://51itzy.com/kjqy/26614.html