2025年Linux内核源码 ---- el2_setup源码解析

Linux内核源码 ---- el2_setup源码解析kernel 版本 5 10 129 进入内核的时候 arm64 处理器的异常级别可能是 1 或者 2 函数 el2 setup 的主要工作如下 1 如果异常级别是 1 那么在异常级别 1 执行内核 2 如果异常级别是 2 那么根据处理器是否支持虚拟化宿主扩展 Virtuallizat Host

大家好,我是讯享网,很高兴认识大家。

kernel版本:5.10.129
进入内核的时候,arm64处理器的异常级别可能是1或者2,函数el2_setup的主要工作如下:
1、如果异常级别是1,那么在异常级别1执行内核
2、如果异常级别是2, 那么根据处理器是否支持虚拟化宿主扩展(Virtuallization Host Extensions,VHE),决定是否需要降级到异常级别

具体实现代码如下:

/* * If we're fortunate enough to boot at EL2, ensure that the world is * sane before dropping to EL1. * * Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in w0 if * booted in EL1 or EL2 respectively. */ SYM_FUNC_START(el2_setup) msr SPsel, #1 // We want to use SP_EL{1,2} mrs x0, CurrentEL cmp x0, #CurrentEL_EL2 b.eq 1f mov_q x0, (SCTLR_EL1_RES1 | ENDIAN_SET_EL1) msr sctlr_el1, x0 mov w0, #BOOT_CPU_MODE_EL1 // This cpu booted in EL1 isb ret 1: mov_q x0, (SCTLR_EL2_RES1 | ENDIAN_SET_EL2) msr sctlr_el2, x0 #ifdef CONFIG_ARM64_VHE /* * Check for VHE being present. For the rest of the EL2 setup, * x2 being non-zero indicates that we do have VHE, and that the * kernel is intended to run at EL2. */ mrs x2, id_aa64mmfr1_el1 ubfx x2, x2, #ID_AA64MMFR1_VHE_SHIFT, #4 #else mov x2, xzr #endif /* Hyp configuration. */ mov_q x0, HCR_HOST_NVHE_FLAGS cbz x2, set_hcr mov_q x0, HCR_HOST_VHE_FLAGS set_hcr: msr hcr_el2, x0 isb /* * Allow Non-secure EL1 and EL0 to access physical timer and counter. * This is not necessary for VHE, since the host kernel runs in EL2, * and EL0 accesses are configured in the later stage of boot process. * Note that when HCR_EL2.E2H == 1, CNTHCTL_EL2 has the same bit layout * as CNTKCTL_EL1, and CNTKCTL_EL1 accessing instructions are redefined * to access CNTHCTL_EL2. This allows the kernel designed to run at EL1 * to transparently mess with the EL0 bits via CNTKCTL_EL1 access in * EL2. */ cbnz x2, 1f mrs x0, cnthctl_el2 orr x0, x0, #3 // Enable EL1 physical timers msr cnthctl_el2, x0 1: msr cntvoff_el2, xzr // Clear virtual offset #ifdef CONFIG_ARM_GIC_V3 /* GICv3 system register access */ mrs x0, id_aa64pfr0_el1 ubfx x0, x0, #ID_AA64PFR0_GIC_SHIFT, #4 cbz x0, 3f mrs_s x0, SYS_ICC_SRE_EL2 orr x0, x0, #ICC_SRE_EL2_SRE // Set ICC_SRE_EL2.SRE==1 orr x0, x0, #ICC_SRE_EL2_ENABLE // Set ICC_SRE_EL2.Enable==1 msr_s SYS_ICC_SRE_EL2, x0 isb // Make sure SRE is now set mrs_s x0, SYS_ICC_SRE_EL2 // Read SRE back, tbz x0, #0, 3f // and check that it sticks msr_s SYS_ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to defaults 3: #endif /* Populate ID registers. */ mrs x0, midr_el1 mrs x1, mpidr_el1 msr vpidr_el2, x0 msr vmpidr_el2, x1 #ifdef CONFIG_COMPAT msr hstr_el2, xzr // Disable CP15 traps to EL2 #endif /* EL2 debug */ mrs x1, id_aa64dfr0_el1 sbfx x0, x1, #ID_AA64DFR0_PMUVER_SHIFT, #4 cmp x0, #1 b.lt 4f // Skip if no PMU present mrs x0, pmcr_el0 // Disable debug access traps ubfx x0, x0, #11, #5 // to EL2 and allow access to 4: csel x3, xzr, x0, lt // all PMU counters from EL1 /* Statistical profiling */ ubfx x0, x1, #ID_AA64DFR0_PMSVER_SHIFT, #4 cbz x0, 7f // Skip if SPE not present cbnz x2, 6f // VHE? mrs_s x4, SYS_PMBIDR_EL1 // If SPE available at EL2, and x4, x4, #(1 << SYS_PMBIDR_EL1_P_SHIFT) cbnz x4, 5f // then permit sampling of physical mov x4, #(1 << SYS_PMSCR_EL2_PCT_SHIFT | \ 1 << SYS_PMSCR_EL2_PA_SHIFT) msr_s SYS_PMSCR_EL2, x4 // addresses and physical counter 5: mov x1, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT) orr x3, x3, x1 // If we don't have VHE, then b 7f // use EL1&0 translation. 6: // For VHE, use EL2 translation orr x3, x3, #MDCR_EL2_TPMS // and disable access from EL1 7: msr mdcr_el2, x3 // Configure debug traps /* LORegions */ mrs x1, id_aa64mmfr1_el1 ubfx x0, x1, #ID_AA64MMFR1_LOR_SHIFT, 4 cbz x0, 1f msr_s SYS_LORC_EL1, xzr 1: /* Stage-2 translation */ msr vttbr_el2, xzr cbz x2, install_el2_stub mov w0, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2 isb ret SYM_INNER_LABEL(install_el2_stub, SYM_L_LOCAL) /* * When VHE is not in use, early init of EL2 and EL1 needs to be * done here. * When VHE _is_ in use, EL1 will not be used in the host and * requires no configuration, and all non-hyp-specific EL2 setup * will be done via the _EL1 system register aliases in __cpu_setup. */ mov_q x0, (SCTLR_EL1_RES1 | ENDIAN_SET_EL1) msr sctlr_el1, x0 /* Coprocessor traps. */ mov x0, #0x33ff msr cptr_el2, x0 // Disable copro. traps to EL2 /* SVE register access */ mrs x1, id_aa64pfr0_el1 ubfx x1, x1, #ID_AA64PFR0_SVE_SHIFT, #4 cbz x1, 7f bic x0, x0, #CPTR_EL2_TZ // Also disable SVE traps msr cptr_el2, x0 // Disable copro. traps to EL2 isb mov x1, #ZCR_ELx_LEN_MASK // SVE: Enable full vector msr_s SYS_ZCR_EL2, x1 // length for EL1. /* Hypervisor stub */ 7: adr_l x0, __hyp_stub_vectors msr vbar_el2, x0 /* spsr */ mov x0, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\ PSR_MODE_EL1h) msr spsr_el2, x0 msr elr_el2, lr mov w0, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2 eret SYM_FUNC_END(el2_setup) 

讯享网

该汇编函数,用w0寄存器保存返回值,返回值有两种:

BOOT_CPU_MODE_EL1:表示当前CPU跳入内核时处于权限级EL1

BOOT_CPU_MODE_EL2:表示当前CPU跳入内核时处于权限级EL2

该汇编函数中的状态寄存器访问指令:
MRS: 状态寄存器到通用寄存器的传送指令。
MRS:({R0-R12}⬅CPSR,SPSR)
MSR: 通用寄存器到状态寄存器的传送指令。
MSR:(CPSR,SPSR⬅{R0-R12})

选择栈

讯享网 msr SPsel, #1 

由于当前异常级别可能是EL1也可能是EL2,因此msr SPsel, #1切换到当前异常级别的SP,如果当前为EL1,则切换到SPEL1,如果当前为EL2,则切换到SPEL2

判断当前的异常级别

 mrs x0, CurrentEL cmp x0, #CurrentEL_EL2 b.ne 1f 

CurrentEL就是获取PSTATE中current exception level域的特殊寄存器
#define CurrentEL_EL2 (2 << 2)
判断当前是否处于EL2 ,当cpu不处于EL2时跳转到标号1

设置EL2端模式

讯享网 1: mov_q x0, (SCTLR_EL2_RES1 | ENDIAN_SET_EL2) msr sctlr_el2, x0 

设置EL2下为大端存储

设置EL0/EL1的端模式

 mov_q x0, (SCTLR_EL1_RES1 | ENDIAN_SET_EL1) msr sctlr_el1, x0 mov w0, #BOOT_CPU_MODE_EL1 // This cpu booted in EL1 isb ret 

如果处于EL1级别,设置EL0和EL1下为大端存储,将启动时的异常级别EL1保存到w0

VHE支持

讯享网#ifdef CONFIG_ARM64_VHE /* * Check for VHE being present. For the rest of the EL2 setup, * x2 being non-zero indicates that we do have VHE, and that the * kernel is intended to run at EL2. */ mrs x2, id_aa64mmfr1_el1 ubfx x2, x2, #ID_AA64MMFR1_VHE_SHIFT, #4 #else mov x2, xzr #endif 

通过读取id_aa64mmfr1_el1,存入X2, 判断CPU是支持虚拟主机扩展VHE模式,还是传统的分离Hyp模式。

vhe的全称是Virtualization Host Extension support。是armv8.1的新特性,其最终要就是支持type-2的hypervisors 这种扩展让kernel直接跑在el2上,这样可以减少host和guest之间share的寄存器,并减少overhead of virtualization 具体实现如下面的patch:https://lwn.net/Articles//

Hyp configuration

 /* Hyp configuration. */ mov_q x0, HCR_HOST_NVHE_FLAGS cbz x2, set_hcr mov_q x0, HCR_HOST_VHE_FLAGS set_hcr: msr hcr_el2, x0 isb 
讯享网#define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA) 

HCR_HOST_NVHE_FLAGS宏定义如下:

#define HCR_HOST_NVHE_FLAGS (HCR_RW | HCR_API | HCR_APK | HCR_ATA) 

access physical timer and counter


讯享网

讯享网 /* * Allow Non-secure EL1 and EL0 to access physical timer and counter. * This is not necessary for VHE, since the host kernel runs in EL2, * and EL0 accesses are configured in the later stage of boot process. * Note that when HCR_EL2.E2H == 1, CNTHCTL_EL2 has the same bit layout * as CNTKCTL_EL1, and CNTKCTL_EL1 accessing instructions are redefined * to access CNTHCTL_EL2. This allows the kernel designed to run at EL1 * to transparently mess with the EL0 bits via CNTKCTL_EL1 access in * EL2. */ cbnz x2, 1f mrs x0, cnthctl_el2 orr x0, x0, #3 // Enable EL1 physical timers msr cnthctl_el2, x0 1: msr cntvoff_el2, xzr // Clear virtual offset 

Hyp模式, 启动计数器 ,设定Non-secure EL1 and EL0可访问物理定时器和计数器,虚拟偏移量清零; VHE模式只需要清零偏移量

GIC设置

 /* GICv3 system register access */ mrs x0, id_aa64pfr0_el1 ubfx x0, x0, #ID_AA64PFR0_GIC_SHIFT, #4 cbz x0, 3f 

ID_AA64PFR0_EL1的bit24~27如果为0表示不支持GIC V3, 为1表示支持GICV3

讯享网 mrs_s x0, SYS_ICC_SRE_EL2 orr x0, x0, #ICC_SRE_EL2_SRE // Set ICC_SRE_EL2.SRE==1 orr x0, x0, #ICC_SRE_EL2_ENABLE // Set ICC_SRE_EL2.Enable==1 msr_s SYS_ICC_SRE_EL2, x0 isb // Make sure SRE is now set mrs_s x0, SYS_ICC_SRE_EL2 // Read SRE back, tbz x0, #0, 3f // and check that it sticks msr_s SYS_ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to  

ICC_SRE_EL2_ENABLE表示Non-secure EL1 accesses to ICC_SRE_EL1 are permitted if EL3 is not present or ICC_SRE_EL3.Enable is 1, otherwise Non-secure EL1 accesses to ICC_SRE_EL1 trap to EL3

如果是GICv3,设置SYS_ICC_SRE_EL2、SYS_ICH_HCR_EL2寄存器,这两个寄存器是GICv3的CPU接口寄存器。设置完毕后需要重新读取来确认,如果设置不成功则跳转到3f

Populate ID registers

 mrs x0, midr_el1 mrs x1, mpidr_el1 msr vpidr_el2, x0 msr vmpidr_el2, x1 

根据物理CPU的ID寄存器和亲合属性寄存器,来设置虚拟CPU对应的寄存器

hstr_el2清零

讯享网 msr hstr_el2, xzr 

将Hypervisor系统陷入寄存器HSTR_EL2清零。一般情况下,当客户虚拟机是AArch32位,会有Thumb和协处理器方式,不希望在访问相关寄存器陷入到EL2中

EL2 debug

 mrs x1, id_aa64dfr0_el1 sbfx x0, x1, #ID_AA64DFR0_PMUVER_SHIFT, #4 cmp x0, #1 b.lt 4f // Skip if no PMU present mrs x0, pmcr_el0 // Disable debug access traps ubfx x0, x0, #11, #5 // to EL2 and allow access to 4: csel x3, xzr, x0, lt // all PMU counters from EL1 

通过id_aa64dfr0_el1寄存器的pmu version来判断是否支持pmu
如果pmu version小于1则不支持PMU,直接将x3清零;
否则表示支持pmu,则从pmcr_el0获取事件的数量赋值给x3,也就是说x3用于保存事件数量

Statistical profiling

讯享网 /* Statistical profiling */ ubfx x0, x1, #ID_AA64DFR0_PMSVER_SHIFT, #4 cbz x0, 7f // Skip if SPE not present cbnz x2, 6f // VHE? mrs_s x4, SYS_PMBIDR_EL1 // If SPE available at EL2, and x4, x4, #(1 << SYS_PMBIDR_EL1_P_SHIFT) cbnz x4, 5f // then permit sampling of physical mov x4, #(1 << SYS_PMSCR_EL2_PCT_SHIFT | \ 1 << SYS_PMSCR_EL2_PA_SHIFT) msr_s SYS_PMSCR_EL2, x4 // addresses and physical counter 5: mov x1, #(MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT) orr x3, x3, x1 // If we don't have VHE, then b 7f // use EL1&0 translation. 6: // For VHE, use EL2 translation orr x3, x3, #MDCR_EL2_TPMS // and disable access from EL1 7: msr mdcr_el2, x3 // Configure debug traps 

判断PMS是否支持

LORegions

 mrs x1, id_aa64mmfr1_el1 ubfx x0, x1, #ID_AA64MMFR1_LOR_SHIFT, 4 cbz x0, 1f msr_s SYS_LORC_EL1, xzr 

Stage-2 translation

讯享网 msr vttbr_el2, xzr cbz x2, install_el2_stub mov w0, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2 isb ret 

如上VHE和HYP模式将分别走不同的分支

如果是VHE模式,直接返回w0为BOOT_CPU_MODE_EL2。由于进入内核时,CPU处于EL2?,所以直接调用ret指令返回,CPU仍然是EL2 install_el2_stub是Hyp模式相关设置和返回代码

install_el2_stub

 SYM_INNER_LABEL(install_el2_stub, SYM_L_LOCAL) /* * When VHE is not in use, early init of EL2 and EL1 needs to be * done here. * When VHE _is_ in use, EL1 will not be used in the host and * requires no configuration, and all non-hyp-specific EL2 setup * will be done via the _EL1 system register aliases in __cpu_setup. */ mov_q x0, (SCTLR_EL1_RES1 | ENDIAN_SET_EL1) msr sctlr_el1, x0 /* Coprocessor traps. */ mov x0, #0x33ff msr cptr_el2, x0 // Disable copro. traps to EL2  /* SVE register access */ mrs x1, id_aa64pfr0_el1 ubfx x1, x1, #ID_AA64PFR0_SVE_SHIFT, #4 cbz x1, 7f bic x0, x0, #CPTR_EL2_TZ // Also disable SVE traps msr cptr_el2, x0 // Disable copro. traps to EL2  isb mov x1, #ZCR_ELx_LEN_MASK // SVE: Enable full vector  msr_s SYS_ZCR_EL2, x1 // length for EL1.  /* Hypervisor stub */ 7: adr_l x0, __hyp_stub_vectors msr vbar_el2, x0 /* spsr */ mov x0, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\ PSR_MODE_EL1h) msr spsr_el2, x0 msr elr_el2, lr mov w0, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2  eret 

当非VHE模式下,用于EL1和EL2的早期初始化

总结
el2_setup根据当前CPU处于EL1还是EL2主要做了如下的工作:

1、设置当前异常级别的栈
2、判断当前的异常级别,并据此设置端模式;
3、如果支持VHE,设置VHE,否则设置传统HYP模式
4、启动定时器
5、GIC设置
6、根据物理CPU的ID寄存器和亲合属性寄存器,来设置虚拟CPU对应的寄存器
7、hstr_el2清零
8、EL2 debug相关寄存器设置

小讯
上一篇 2025-01-17 07:35
下一篇 2025-02-13 15:20

相关推荐

版权声明:本文内容由互联网用户自发贡献,该文观点仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容,请联系我们,一经查实,本站将立刻删除。
如需转载请保留出处:https://51itzy.com/kjqy/124930.html