Index: sys/arch/amd64/amd64/identcpu.c =================================================================== RCS file: /mount/openbsd/cvs/src/sys/arch/amd64/amd64/identcpu.c,v diff -u -p -u -p -r1.148 identcpu.c --- sys/arch/amd64/amd64/identcpu.c 7 Oct 2024 20:30:17 -0000 1.148 +++ sys/arch/amd64/amd64/identcpu.c 24 Feb 2025 22:58:05 -0000 @@ -67,6 +67,7 @@ int cpuspeed; int amd64_has_xcrypt; int amd64_pos_cbit; /* C bit position for SEV */ +int amd64_min_noes_asid; int has_rdrand; int has_rdseed; @@ -710,6 +711,11 @@ identifycpu(struct cpu_info *ci) 'd', CPUID_MEMBER(ci_feature_amdsev_edx), CPUID_AMDSEV_EDX_BITS); amd64_pos_cbit = (ci->ci_feature_amdsev_ebx & 0x3f); + amd64_min_noes_asid = (ci->ci_feature_amdsev_edx); + if (cpu_sev_guestmode && CPU_IS_PRIMARY(ci)) + printf("\n%s: SEV%s guest mode", ci->ci_dev->dv_xname, + ISSET(cpu_sev_guestmode, SEV_STAT_ES_ENABLED) ? + "-ES" : ""); } printf("\n"); Index: sys/arch/amd64/amd64/locore0.S =================================================================== RCS file: /mount/openbsd/cvs/src/sys/arch/amd64/amd64/locore0.S,v diff -u -p -u -p -r1.26 locore0.S --- sys/arch/amd64/amd64/locore0.S 4 Oct 2024 21:15:52 -0000 1.26 +++ sys/arch/amd64/amd64/locore0.S 24 Feb 2025 22:58:05 -0000 @@ -111,6 +111,7 @@ #include #include #include +#include /* * override user-land alignment before including asm.h @@ -193,6 +194,58 @@ bi_size_ok: pushl $PSL_MBO popfl + /* + * Setup temporary #VC trap handler, in case we are running + * on an AMD CPU in SEV-ES guest mode. Will be reset by + * init_x86_64(). + * We are setting up two handlers: + * + * 1) locore_vc_trap32: Triggered when we are running in + * 32-bit legacy mode. + * + * 2) locore_vc_trap64: Triggered when we are running in + * 32-bit compatibility mode. + * + * The latter one is used by vmd(8). + */ + movl $RELOC(early_idt), %ecx + movl $T_VC, %edx + leal (%ecx, %edx, 8), %ecx /* 32bit #VC IDT slot */ + + pushl %cs /* get current %cs */ + popl %ebx + shll $16, %ebx + + movl $RELOC(locore_vc_trap32), %eax + andl $0x0000ffff, %eax + orl %ebx, %eax /* use current %cs */ + movl %eax, (%ecx) + + movl $RELOC(locore_vc_trap32), %eax + andl $0xffff0000, %eax + orl $((0x80 | SDT_SYS386IGT) << 8), %eax + movl %eax, 4(%ecx) + + movl $RELOC(early_idt), %ecx + movl $(2 * T_VC), %edx + leal (%ecx, %edx, 8), %ecx /* 64bit #VC IDT slot */ + + movl $RELOC(locore_vc_trap64), %eax + andl $0x0000ffff, %eax + orl $(GSEL(3, SEL_KPL) << 16), %eax + movl %eax, (%ecx) + + movl $RELOC(locore_vc_trap64), %eax + andl $0xffff0000, %eax + orl $((0x80 | SDT_SYS386IGT) << 8), %eax + movl %eax, 4(%ecx) + xorl %eax, %eax + movl %eax, 8(%ecx) + movl %eax, 12(%ecx) + + movl $RELOC(idtlc), %eax + lidt (%eax) + xorl %eax,%eax cpuid movl %eax,RELOC(cpuid_level) @@ -288,8 +341,9 @@ cont: /* Are we in guest mode with SEV enabled? */ movl $MSR_SEV_STATUS, %ecx rdmsr - andl $SEV_STAT_ENABLED, %eax + testl $SEV_STAT_ENABLED, %eax jz .Lno_sev + movl %eax, RELOC(cpu_sev_guestmode) /* we are a SEV guest */ /* Determine C bit position */ movl %ebx, %ecx /* %ebx from previous cpuid */ @@ -332,8 +386,6 @@ cont: andl %eax, RELOC(pg_frame + 4) /* apply mask */ andl %eax, RELOC(pg_lgframe + 4) - movl $0x1, RELOC(cpu_sev_guestmode) /* we are a SEV guest */ - .Lno_sev: /* @@ -379,7 +431,9 @@ cont: #define PROC0_DMP2_OFF (PROC0_DMP3_OFF + NDML3_ENTRIES * NBPG) #define TABLESIZE \ ((NKL4_KIMG_ENTRIES + TABLE_L3_ENTRIES + TABLE_L2_ENTRIES + 1 + UPAGES + \ - NDML3_ENTRIES + NDML2_ENTRIES + 3) * NBPG) + NDML3_ENTRIES + NDML2_ENTRIES + 2 + 3) * NBPG) +#define PROC0_GHCB_OFF (TABLESIZE - 5 * NBPG) +#define GHCB_SIZE (2 * NBPG) #define fillkpt \ pushl %ebp ; /* save */ \ @@ -403,6 +457,17 @@ cont: loop 1b ; /* till finished */ \ popl %ebp + +#define fillkpt_nx_nc \ + pushl %ebp ; /* save */ \ + movl RELOC((pg_nx + 4)), %ebp ; /* NX bit? */ \ +1: movl %eax,(%ebx) ; /* store phys addr */ \ + movl %ebp,4(%ebx) ; /* upper 32 bits */ \ + addl $8,%ebx ; /* next pte/pde */ \ + addl $NBPG,%eax ; /* next phys page */ \ + loop 1b ; /* till finished */ \ + popl %ebp + /* Find end of kernel image. */ movl $RELOC(end),%edi #if (NKSYMS || defined(DDB)) @@ -509,6 +574,16 @@ map_tables: shrl $PGSHIFT,%ecx fillkpt_nx + /* Re-Map GHCB shared (ie. unencrypted) */ + /* XXX hshoexer: Only in SEV-ES guestmode. */ + pushl %ebx /* save current slot */ + subl $(5 << 3),%ebx /* move back to slot of GHCB */ + leal (PROC0_GHCB_OFF)(%esi),%eax + orl $(PG_V|PG_KW), %eax + movl $(GHCB_SIZE>>PGSHIFT), %ecx + fillkpt_nx_nc + popl %ebx /* continue with slot saved above */ + /* Map ISA I/O mem (later atdevbase) RW, NX */ movl $(IOM_BEGIN|PG_V|PG_KW/*|PG_N*/),%eax movl $(IOM_SIZE>>PGSHIFT),%ecx @@ -626,7 +701,6 @@ store_pte: */ movl $MSR_EFER,%ecx rdmsr - xorl %eax,%eax /* XXX */ orl $(EFER_LME|EFER_SCE),%eax movl RELOC((pg_nx + 4)), %ebx cmpl $0, %ebx @@ -712,6 +786,12 @@ longmode_hi: addq %rsi,%rdx movq %rdx,atdevbase(%rip) + /* Relocate GHCB. */ + /* XXX hshoexer: Only in SEV-ES guestmode. */ + movq $(PROC0_GHCB_OFF+KERNBASE),%rdx + addq %rsi,%rdx + movq %rdx,ghcb_vaddr(%rip) + /* Record start of symbols */ movq $__kernel_bss_end, ssym(%rip) @@ -734,12 +814,131 @@ longmode_hi: movw %ax,%fs leaq TABLESIZE(%rsi),%rdi + subq $(NBPG*2), %rdi subq $(NBPG*3), %rdi /* XXX merge these */ call init_x86_64 call main + /* MSR Protocol Request Codes */ +#define MSRPROTO_CPUID_REQ 0x4 +#define MSRPROTO_TERM_REQ 0x100 + +vc_cpuid64: + shll $30, %eax /* requested register */ + orl $MSRPROTO_CPUID_REQ, %eax + movl %ebx, %edx /* CPUID function */ + movl $MSR_SEV_GHCB, %ecx + wrmsr + rep vmmcall + rdmsr + ret + + .globl locore_vc_trap64 +locore_vc_trap64: + pushq %rax + pushq %rbx + pushq %rcx + pushq %rdx + +#define SVM_VMEXIT_CPUID 0x72 + cmpl $SVM_VMEXIT_CPUID, 32(%rsp) + jne .Lterminate64 + + movl %eax, %ebx /* save CPUID function */ + + movl $0, %eax /* request cpuid, get %eax */ + call vc_cpuid64 + movq %rdx, 24(%rsp) + + movl $1, %eax /* get %ebx */ + call vc_cpuid64 + movq %rdx, 16(%rsp) + + movl $2, %eax /* get %ecx */ + call vc_cpuid64 + movq %rdx, 8(%rsp) + + movl $3, %eax /* get %edx */ + call vc_cpuid64 + movq %rdx, 0(%rsp) + + popq %rdx + popq %rcx + popq %rbx + popq %rax + addq $8, %rsp + addq $2, (%rsp) + iretq + +.Lterminate64: + movl $MSRPROTO_TERM_REQ, %eax + movl $MSR_SEV_GHCB, %ecx + wrmsr + rep vmmcall +.Lterm_loop64: + hlt + jmp .Lterm_loop64 + + .code32 +vc_cpuid32: + shll $30, %eax /* requested register */ + orl $MSRPROTO_CPUID_REQ, %eax + movl %ebx, %edx /* CPUID function */ + movl $MSR_SEV_GHCB, %ecx + wrmsr + rep vmmcall + rdmsr + ret + + .globl locore_vc_trap32 +locore_vc_trap32: + pushl %eax + pushl %ebx + pushl %ecx + pushl %edx + +#define SVM_VMEXIT_CPUID 0x72 + cmpl $SVM_VMEXIT_CPUID, 16(%esp) + jne .Lterminate32 + + movl %eax, %ebx /* save CPUID function */ + + movl $0, %eax /* request cpuid, get %eax */ + call vc_cpuid32 + movl %edx, 12(%esp) + + movl $1, %eax /* get %ebx */ + call vc_cpuid32 + movl %edx, 8(%esp) + + movl $2, %eax /* get %ecx */ + call vc_cpuid32 + movl %edx, 4(%esp) + + movl $3, %eax /* get %edx */ + call vc_cpuid32 + movl %edx, 0(%esp) + + popl %edx + popl %ecx + popl %ebx + popl %eax + addl $4, %esp + addl $2, (%esp) + iret + +.Lterminate32: + movl $MSRPROTO_TERM_REQ, %eax + movl $MSR_SEV_GHCB, %ecx + wrmsr + rep vmmcall +.Lterm_loop32: + hlt + jmp .Lterm_loop32 + + .section .codepatch,"a" .align 8, 0xcc .globl codepatch_begin @@ -752,6 +951,20 @@ codepatch_end: .previous .data + .globl idtlc /* temporary locore IDT */ +idtlc: + .word early_idt_end-early_idt-1 + .long _RELOC(early_idt) + .align 64, 0xcc + + .globl early_idt +early_idt: + .rept NIDT + .quad 0x0000000000000000 + .quad 0x0000000000000000 + .endr +early_idt_end: + .globl gdt64 gdt64: .word gdt64_end-gdt64_start-1 Index: sys/arch/amd64/amd64/machdep.c =================================================================== RCS file: /mount/openbsd/cvs/src/sys/arch/amd64/amd64/machdep.c,v diff -u -p -u -p -r1.297 machdep.c --- sys/arch/amd64/amd64/machdep.c 21 Sep 2024 19:06:07 -0000 1.297 +++ sys/arch/amd64/amd64/machdep.c 24 Feb 2025 22:58:05 -0000 @@ -194,6 +194,9 @@ paddr_t msgbuf_paddr; vaddr_t idt_vaddr; paddr_t idt_paddr; +vaddr_t ghcb_vaddr; +paddr_t ghcb_paddr; + vaddr_t lo32_vaddr; paddr_t lo32_paddr; paddr_t tramp_pdirpa; @@ -486,6 +489,8 @@ bios_sysctl(int *name, u_int namelen, vo extern int tsc_is_invariant; extern int amd64_has_xcrypt; extern int need_retpoline; +extern int cpu_sev_guestmode; + const struct sysctl_bounded_args cpuctl_vars[] = { { CPU_LIDACTION, &lid_action, -1, 2 }, @@ -495,6 +500,7 @@ const struct sysctl_bounded_args cpuctl_ { CPU_XCRYPT, &amd64_has_xcrypt, SYSCTL_INT_READONLY }, { CPU_INVARIANTTSC, &tsc_is_invariant, SYSCTL_INT_READONLY }, { CPU_RETPOLINE, &need_retpoline, SYSCTL_INT_READONLY }, + { CPU_SEVGUESTMODE, &cpu_sev_guestmode, SYSCTL_INT_READONLY }, }; /* @@ -1309,6 +1315,38 @@ cpu_init_idt(void) lidt(®ion); } +uint64_t early_gdt[GDT_SIZE / 8]; + +void +cpu_init_early_vctrap(paddr_t addr) +{ + struct region_descriptor region; + + extern struct region_descriptor gdt64; + extern struct gate_descriptor early_idt[NIDT]; + extern void Xvctrap_early(void); + + /* Setup temporary "early" longmode GDT, will be reset soon */ + memset(early_gdt, 0, sizeof(early_gdt)); + set_mem_segment(GDT_ADDR_MEM(early_gdt, GCODE_SEL), 0, 0xfffff, + SDT_MEMERA, SEL_KPL, 1, 0, 1); + set_mem_segment(GDT_ADDR_MEM(early_gdt, GDATA_SEL), 0, 0xfffff, + SDT_MEMRWA, SEL_KPL, 1, 0, 1); + setregion(®ion, early_gdt, GDT_SIZE - 1); + lgdt(®ion); + + /* Setup temporary "early" longmode #VC entry, will be reset soon */ + setgate(&early_idt[T_VC], Xvctrap_early, 0, SDT_SYS386IGT, + SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + setregion(®ion, early_idt, NIDT * sizeof(idt[0]) - 1); + lidt(®ion); + + /* Tell vmm(4) about our GHCB. */ + ghcb_paddr = addr; + memset((void *)ghcb_vaddr, 0, 2 * PAGE_SIZE); + wrmsr(MSR_SEV_GHCB, ghcb_paddr); +} + void cpu_init_extents(void) { @@ -1428,6 +1466,13 @@ init_x86_64(paddr_t first_avail) bios_memmap_t *bmp; int x, ist; uint64_t max_dm_size = ((uint64_t)512 * NUM_L4_SLOT_DIRECT) << 30; + + /* + * locore0 mapped 2 pages for use as GHCB before pmap is initialized. + */ + if (ISSET(cpu_sev_guestmode, SEV_STAT_ES_ENABLED)) + cpu_init_early_vctrap(first_avail); + first_avail += 2 * NBPG; /* * locore0 mapped 3 pages for use before the pmap is initialized Index: sys/arch/amd64/amd64/trap.c =================================================================== RCS file: /mount/openbsd/cvs/src/sys/arch/amd64/amd64/trap.c,v diff -u -p -u -p -r1.106 trap.c --- sys/arch/amd64/amd64/trap.c 4 Sep 2024 07:54:51 -0000 1.106 +++ sys/arch/amd64/amd64/trap.c 24 Feb 2025 22:58:05 -0000 @@ -86,6 +86,8 @@ #include #include #include +#include +#include #ifdef DDB #include #include @@ -95,6 +97,7 @@ int upageflttrap(struct trapframe *, uint64_t); int kpageflttrap(struct trapframe *, uint64_t); +int vctrap(struct trapframe *); void kerntrap(struct trapframe *); void usertrap(struct trapframe *); void ast(struct trapframe *); @@ -123,6 +126,7 @@ const char * const trap_type[] = { "SSE FP exception", /* 19 T_XMM */ "virtualization exception", /* 20 T_VE */ "control protection exception", /* 21 T_CP */ + "VMM communication exception", /* 29 T_VC */ }; const int trap_types = nitems(trap_type); @@ -297,6 +301,183 @@ kpageflttrap(struct trapframe *frame, ui return 1; } +int +vctrap(struct trapframe *frame) +{ + uint64_t sw_exitcode, sw_exitinfo1, sw_exitinfo2; + uint64_t rax, rbx, rcx, rdx; + uint8_t *rip = (uint8_t *)(frame->tf_rip); + uint8_t valid_bm[GHCB_VB_SZ], expected_bm[GHCB_VB_SZ]; + uint16_t port; + struct ghcb_sa *ghcb; + + extern vaddr_t ghcb_vaddr; + + intr_disable(); + + ghcb = (struct ghcb_sa *)ghcb_vaddr; + ghcb_clear(ghcb); + memset(valid_bm, 0, sizeof(valid_bm)); + memset(expected_bm, 0, sizeof(expected_bm)); + + sw_exitcode = frame->tf_err; + sw_exitinfo1 = 0; + sw_exitinfo2 = 0; + + switch (sw_exitcode) { + case SVM_VMEXIT_CPUID: + ghcb_valbm_set(valid_bm, GHCB_RAX); + ghcb_valbm_set(valid_bm, GHCB_RCX); + ghcb_valbm_set(expected_bm, GHCB_RAX); + ghcb_valbm_set(expected_bm, GHCB_RBX); + ghcb_valbm_set(expected_bm, GHCB_RCX); + ghcb_valbm_set(expected_bm, GHCB_RDX); + frame->tf_rip += 2; + break; + case SVM_VMEXIT_MSR: { + if (*rip == 0x0f && *(rip + 1) == 0x30) { + /* WRMSR */ + ghcb_valbm_set(valid_bm, GHCB_RAX); + ghcb_valbm_set(valid_bm, GHCB_RCX); + ghcb_valbm_set(valid_bm, GHCB_RDX); + sw_exitinfo1 = 1; + } else if (*rip == 0x0f && *(rip + 1) == 0x32) { + /* RDMSR */ + ghcb_valbm_set(valid_bm, GHCB_RCX); + ghcb_valbm_set(expected_bm, GHCB_RAX); + ghcb_valbm_set(expected_bm, GHCB_RDX); + } else { + sw_exitinfo1 = sw_exitcode; + sw_exitinfo2 = frame->tf_rip; + sw_exitcode = SVM_VMEXIT_INVALID; + } + frame->tf_rip += 2; + break; + } + case SVM_VMEXIT_IOIO: { + switch (*rip) { + case 0x66: { + switch (*(rip + 1)) { + case 0xef: /* out %ax,(%dx) */ + ghcb_valbm_set(valid_bm, GHCB_RAX); + port = (uint16_t)frame->tf_rdx; + sw_exitinfo1 = (port << 16) | + (1ULL << 5); + frame->tf_rip += 2; + break; + case 0xed: /* in (%dx),%ax */ + ghcb_valbm_set(expected_bm, GHCB_RAX); + port = (uint16_t)frame->tf_rdx; + sw_exitinfo1 = (port << 16) | + (1ULL << 5) | (1ULL << 0); + frame->tf_rip += 2; + break; + default: + panic("failed to decode prefixed IOIO"); + } + break; + } + case 0xe4: /* in $0x71,%al */ + ghcb_valbm_set(expected_bm, GHCB_RAX); + port = *(rip + 1); + sw_exitinfo1 = (port << 16) | (1ULL << 4) | + (1ULL << 0); + frame->tf_rip += 2; + break; + case 0xe6: /* outb %al,$0x43 */ + ghcb_valbm_set(valid_bm, GHCB_RAX); + port = *(rip + 1); + sw_exitinfo1 = (port << 16) | (1ULL << 4); + frame->tf_rip += 2; + break; + case 0xec: /* in (%dx),%al */ + ghcb_valbm_set(expected_bm, GHCB_RAX); + port = (uint16_t)frame->tf_rdx; + sw_exitinfo1 = (port << 16) | (1ULL << 4) | + (1ULL << 0); + frame->tf_rip += 1; + break; + case 0xed: /* in (%dx),%eax */ + ghcb_valbm_set(expected_bm, GHCB_RAX); + port = (uint16_t)frame->tf_rdx; + sw_exitinfo1 = (port << 16) | (1ULL << 6) | + (1ULL << 0); + frame->tf_rip += 1; + break; + case 0xee: /* out %al,(%dx) */ + ghcb_valbm_set(valid_bm, GHCB_RAX); + port = (uint16_t)frame->tf_rdx; + sw_exitinfo1 = (port << 16) | (1ULL << 4); + frame->tf_rip += 1; + break; + case 0xef: /* out %eax,(%dx) */ + ghcb_valbm_set(valid_bm, GHCB_RAX); + port = (uint16_t)frame->tf_rdx; + sw_exitinfo1 = (port << 16) | (1ULL << 6); + frame->tf_rip += 1; + break; + default: + panic("failed to decode IOIO"); + } + break; + } + default: + panic("invalid exit code 0x%llx", sw_exitcode); + } + + /* Always required */ + ghcb->v_sw_exitcode = sw_exitcode; + ghcb->v_sw_exitinfo1 = sw_exitinfo1; + ghcb->v_sw_exitinfo2 = sw_exitinfo2; + ghcb_valbm_set(valid_bm, GHCB_SW_EXITCODE); + ghcb_valbm_set(valid_bm, GHCB_SW_EXITINFO1); + ghcb_valbm_set(valid_bm, GHCB_SW_EXITINFO2); + ghcb_valbm_set(expected_bm, GHCB_SW_EXITINFO1); + ghcb_valbm_set(expected_bm, GHCB_SW_EXITINFO2); + + /* These are exit specific. */ + if (ghcb_valbm_isset(valid_bm, GHCB_RAX)) + ghcb->v_rax = frame->tf_rax; + if (ghcb_valbm_isset(valid_bm, GHCB_RBX)) + ghcb->v_rbx = frame->tf_rbx; + if (ghcb_valbm_isset(valid_bm, GHCB_RCX)) + ghcb->v_rcx = frame->tf_rcx; + if (ghcb_valbm_isset(valid_bm, GHCB_RDX)) + ghcb->v_rdx = frame->tf_rdx; + memcpy(ghcb->valid_bitmap, valid_bm, sizeof(ghcb->valid_bitmap)); + + /* Call hypervisor. */ + vmgexit(); + + /* + * Save the relevant GHCB members on the stack. This allows + * us to be re-entrant in case of a panic below. And clear + * GHCB right away. + */ + memcpy(valid_bm, ghcb->valid_bitmap, sizeof(valid_bm)); + rax = ghcb->v_rax; + rbx = ghcb->v_rbx; + rcx = ghcb->v_rcx; + rdx = ghcb->v_rdx; + ghcb_clear(ghcb); + + if (ghcb_verify_bm_guest(valid_bm, expected_bm)) { + panic("invalid hypervisor response"); + } + + /* Only sync back expected registers, discard everything else */ + if (ghcb_valbm_isset(expected_bm, GHCB_RAX)) + frame->tf_rax = rax; + if (ghcb_valbm_isset(expected_bm, GHCB_RBX)) + frame->tf_rbx = rbx; + if (ghcb_valbm_isset(expected_bm, GHCB_RCX)) + frame->tf_rcx = rcx; + if (ghcb_valbm_isset(expected_bm, GHCB_RDX)) + frame->tf_rdx = rdx; + + return 1; +} + /* * kerntrap(frame): @@ -348,6 +529,11 @@ kerntrap(struct trapframe *frame) else return; #endif /* NISA > 0 */ + + case T_VC: + if (vctrap(frame)) + return; + goto we_re_toast; } } @@ -427,6 +613,9 @@ usertrap(struct trapframe *frame) code = (frame->tf_err & 0x7fff) < 4 ? ILL_BTCFI : ILL_BADSTK; break; + case T_VC: + vctrap(frame); + goto out; case T_PAGEFLT: /* page fault */ if (!uvm_map_inentry(p, &p->p_spinentry, PROC_STACK(p), Index: sys/arch/amd64/amd64/vector.S =================================================================== RCS file: /mount/openbsd/cvs/src/sys/arch/amd64/amd64/vector.S,v diff -u -p -u -p -r1.99 vector.S --- sys/arch/amd64/amd64/vector.S 10 Nov 2024 03:02:43 -0000 1.99 +++ sys/arch/amd64/amd64/vector.S 24 Feb 2025 22:58:05 -0000 @@ -373,6 +373,8 @@ IDTVEC(trap14) ZTRAP(T_VE) IDTVEC(trap15) TRAP(T_CP) +IDTVEC(trap1d) + TRAP(T_VC) IDTVEC(trap1f) IDTVEC_ALIAS(trap16, trap1f) IDTVEC_ALIAS(trap17, trap1f) @@ -381,7 +383,6 @@ IDTVEC_ALIAS(trap19, trap1f) IDTVEC_ALIAS(trap1a, trap1f) IDTVEC_ALIAS(trap1b, trap1f) IDTVEC_ALIAS(trap1c, trap1f) -IDTVEC_ALIAS(trap1d, trap1f) IDTVEC_ALIAS(trap1e, trap1f) /* 22 - 31 reserved for future exp */ ZTRAP(T_RESERVED) @@ -513,6 +514,17 @@ END(alltraps_kern) END(alltraps_kern_meltdown) KTEXT_PAGE_END +/* #VC trap entry for early bootstrap */ +IDTVEC(vctrap_early) + pushq $T_VC + TRAP_ENTRY_KERN /* early #VC has to be in kernel mode */ + sti + cld + movq %rsp, %rdi + call vctrap + movq $0,-8(%rsp) + cli + INTRFASTEXIT /* * Macros for interrupt entry, call to handler, and exit. Index: sys/arch/amd64/amd64/vmm_machdep.c =================================================================== RCS file: /mount/openbsd/cvs/src/sys/arch/amd64/amd64/vmm_machdep.c,v diff -u -p -u -p -r1.41 vmm_machdep.c --- sys/arch/amd64/amd64/vmm_machdep.c 27 Nov 2024 10:09:51 -0000 1.41 +++ sys/arch/amd64/amd64/vmm_machdep.c 24 Feb 2025 22:58:05 -0000 @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -80,6 +81,7 @@ int vcpu_writeregs_svm(struct vcpu *, ui int vcpu_reset_regs(struct vcpu *, struct vcpu_reg_state *); int vcpu_reset_regs_vmx(struct vcpu *, struct vcpu_reg_state *); int vcpu_reset_regs_svm(struct vcpu *, struct vcpu_reg_state *); +int vcpu_svm_init_vmsa(struct vcpu *, struct vcpu_reg_state *); int vcpu_reload_vmcs_vmx(struct vcpu *); int vcpu_init(struct vcpu *, struct vm_create_params *); int vcpu_init_vmx(struct vcpu *); @@ -95,6 +97,11 @@ int vmx_get_exit_info(uint64_t *, uint64 int vmx_load_pdptes(struct vcpu *); int vmx_handle_exit(struct vcpu *); int svm_handle_exit(struct vcpu *); +int svm_gexit_sync_host(struct vcpu *); +int svm_gexit_sync_guest(struct vcpu *); +int svm_handle_gexit(struct vcpu *); +int svm_handle_efercr(struct vcpu *, uint64_t); +int svm_get_iflag(struct vcpu *, uint64_t); int svm_handle_msr(struct vcpu *); int vmm_handle_xsetbv(struct vcpu *, uint64_t *); int vmx_handle_xsetbv(struct vcpu *); @@ -125,6 +132,7 @@ int vmx_fault_page(struct vcpu *, paddr_ int vmx_handle_np_fault(struct vcpu *); int svm_handle_np_fault(struct vcpu *); int vmm_alloc_vpid(uint16_t *); +int vmm_alloc_asid(uint16_t *, struct vcpu *); void vmm_free_vpid(uint16_t); const char *vcpu_state_decode(u_int); const char *vmx_exit_reason_decode(uint32_t); @@ -138,6 +146,7 @@ void vmx_setmsrbw(struct vcpu *, uint32_ void vmx_setmsrbrw(struct vcpu *, uint32_t); void svm_set_clean(struct vcpu *, uint32_t); void svm_set_dirty(struct vcpu *, uint32_t); +int svm_get_vmsa(uint32_t, uint32_t, uint64_t *); int vmm_gpa_is_valid(struct vcpu *vcpu, paddr_t gpa, size_t obj_size); void vmm_init_pvclock(struct vcpu *, paddr_t); @@ -231,12 +240,16 @@ extern struct vmm_softc *vmm_softc; extern vaddr_t idt_vaddr; extern struct gate_descriptor *idt; +/* Minimum ASID value for an SEV enabled, SEV-ES disabled guest. */ +extern int amd64_min_noes_asid; + /* Constants used in "CR access exit" */ #define CR_WRITE 0 #define CR_READ 1 #define CR_CLTS 2 #define CR_LMSW 3 + /* * vmm_enabled * @@ -1588,6 +1601,7 @@ vcpu_reset_regs_svm(struct vcpu *vcpu, s * External NMI exiting (SVM_INTERCEPT_NMI) * CPUID instruction (SVM_INTERCEPT_CPUID) * HLT instruction (SVM_INTERCEPT_HLT) + * INVLPGA instruction (SVM_INTERCEPT_INVLPGA) * I/O instructions (SVM_INTERCEPT_INOUT) * MSR access (SVM_INTERCEPT_MSR) * shutdown events (SVM_INTERCEPT_SHUTDOWN) @@ -1617,9 +1631,17 @@ vcpu_reset_regs_svm(struct vcpu *vcpu, s SVM_INTERCEPT_MWAIT_UNCOND | SVM_INTERCEPT_MONITOR | SVM_INTERCEPT_MWAIT_COND | SVM_INTERCEPT_RDTSCP; - if (xsave_mask) + if (xsave_mask && !vcpu->vc_seves) /* XXX hshoexer */ vmcb->v_intercept2 |= SVM_INTERCEPT_XSETBV; + if (vcpu->vc_seves) { + /* With SEV-ES also intercept post EFER and CR[048] writes */ + vmcb->v_intercept2 |= SVM_INTERCEPT_EFER_WRITE; + vmcb->v_intercept2 |= SVM_INTERCEPT_CR0_WRITE_POST; + vmcb->v_intercept2 |= SVM_INTERCEPT_CR4_WRITE_POST; + vmcb->v_intercept2 |= SVM_INTERCEPT_CR8_WRITE_POST; + } + /* Setup I/O bitmap */ memset((uint8_t *)vcpu->vc_svm_ioio_va, 0xFF, 3 * PAGE_SIZE); vmcb->v_iopm_pa = (uint64_t)(vcpu->vc_svm_ioio_pa); @@ -1638,9 +1660,18 @@ vcpu_reset_regs_svm(struct vcpu *vcpu, s svm_setmsrbrw(vcpu, MSR_FSBASE); svm_setmsrbrw(vcpu, MSR_GSBASE); svm_setmsrbrw(vcpu, MSR_KERNELGSBASE); + svm_setmsrbrw(vcpu, MSR_SEV_GHCB); + + /* allow reading SEV status */ + svm_setmsrbrw(vcpu, MSR_SEV_STATUS); - /* EFER is R/O so we can ensure the guest always has SVME */ - svm_setmsrbr(vcpu, MSR_EFER); + if (vcpu->vc_seves) { + /* With SEV-ES SVME can not be modified by the guest */ + svm_setmsrbrw(vcpu, MSR_EFER); + } else { + /* EFER is R/O so we can ensure the guest always has SVME */ + svm_setmsrbr(vcpu, MSR_EFER); + } /* allow reading TSC */ svm_setmsrbr(vcpu, MSR_TSC); @@ -1653,7 +1684,10 @@ vcpu_reset_regs_svm(struct vcpu *vcpu, s vmcb->v_asid = vcpu->vc_vpid; /* TLB Control - First time in, flush all*/ - vmcb->v_tlb_control = SVM_TLB_CONTROL_FLUSH_ALL; + if (vcpu->vc_seves) + vmcb->v_tlb_control = SVM_TLB_CONTROL_FLUSH_ASID; /* XXX hshoexer */ + else + vmcb->v_tlb_control = SVM_TLB_CONTROL_FLUSH_ALL; /* INTR masking */ vmcb->v_intr_masking = 1; @@ -1672,20 +1706,94 @@ vcpu_reset_regs_svm(struct vcpu *vcpu, s if (vcpu->vc_sev) vmcb->v_np_enable |= SVM_ENABLE_SEV; + /* SEV-ES */ + if (vcpu->vc_seves) { + vmcb->v_np_enable |= SVM_SEVES_ENABLE; + vmcb->v_lbr_virt_enable |= SVM_LBRVIRT_ENABLE; + + /* Set VMSA. */ + vmcb->v_vmsa_pa = vcpu->vc_svm_vmsa_pa; + + /* XXX hshoexer: LBR: guest_state_protected flag? */ + svm_setmsrbrw(vcpu, MSR_DEBUGCTLMSR); + svm_setmsrbrw(vcpu, MSR_LASTBRANCHFROMIP); + svm_setmsrbrw(vcpu, MSR_LASTBRANCHTOIP); + svm_setmsrbrw(vcpu, MSR_LASTINTFROMIP); + svm_setmsrbrw(vcpu, MSR_LASTINTTOIP); + + /* XXX hshoexer: virt vmload/vmsave */ + vmcb->v_lbr_virt_enable |= 0x2; + } + /* Enable SVME in EFER (must always be set) */ vmcb->v_efer |= EFER_SVME; - ret = vcpu_writeregs_svm(vcpu, VM_RWREGS_ALL, vrs); + if ((ret = vcpu_writeregs_svm(vcpu, VM_RWREGS_ALL, vrs)) != 0) + goto exit; /* xcr0 power on default sets bit 0 (x87 state) */ vcpu->vc_gueststate.vg_xcr0 = XFEATURE_X87 & xsave_mask; vcpu->vc_parent->vm_map->pmap->eptp = 0; + if ((ret = vcpu_svm_init_vmsa(vcpu, vrs)) != 0) + goto exit; + +exit: return ret; } /* + * vcpu_svm_init_vmsa + * + * Initialize VMSA with initial VCPU state. + */ +int +vcpu_svm_init_vmsa(struct vcpu *vcpu, struct vcpu_reg_state *vrs) +{ + uint64_t *gprs = vrs->vrs_gprs; + struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va; + struct vmsa *vmsa; + + if (!vcpu->vc_seves) + return 0; + + if (vmcb->v_dr7 & ~0x00000400) /* XXX hshoexer? */ + return 1; + + vmsa = (struct vmsa *)vcpu->vc_svm_vmsa_va; + memcpy(vmsa, &vmcb->vmcb_layout, sizeof(vmcb->vmcb_layout)); + + vmsa->v_rax = gprs[VCPU_REGS_RAX]; + vmsa->v_rbx = gprs[VCPU_REGS_RBX]; + vmsa->v_rcx = gprs[VCPU_REGS_RCX]; + vmsa->v_rdx = gprs[VCPU_REGS_RDX]; + vmsa->v_rsp = gprs[VCPU_REGS_RSP]; + vmsa->v_rbp = gprs[VCPU_REGS_RBP]; + vmsa->v_rsi = gprs[VCPU_REGS_RSI]; + vmsa->v_rdi = gprs[VCPU_REGS_RDI]; + + vmsa->v_r8 = gprs[VCPU_REGS_R8]; + vmsa->v_r9 = gprs[VCPU_REGS_R9]; + vmsa->v_r10 = gprs[VCPU_REGS_R10]; + vmsa->v_r11 = gprs[VCPU_REGS_R11]; + vmsa->v_r12 = gprs[VCPU_REGS_R12]; + vmsa->v_r13 = gprs[VCPU_REGS_R13]; + vmsa->v_r14 = gprs[VCPU_REGS_R14]; + vmsa->v_r15 = gprs[VCPU_REGS_R15]; + + vmsa->v_rip = gprs[VCPU_REGS_RIP]; + + vmsa->v_xcr0 = vcpu->vc_gueststate.vg_xcr0; + + /* initialize FPU */ + vmsa->v_x87_fcw = __INITIAL_NPXCW__; + vmsa->v_mxcsr = __INITIAL_MXCSR__; + + return 0; +} + +/* * svm_setmsrbr * * Allow read access to the specified msr on the supplied vcpu. @@ -2765,7 +2873,7 @@ vcpu_init_svm(struct vcpu *vcpu, struct int ret = 0; /* Allocate an ASID early to avoid km_alloc if we're out of ASIDs. */ - if (vmm_alloc_vpid(&vcpu->vc_vpid)) + if (vmm_alloc_asid(&vcpu->vc_vpid, vcpu)) return (ENOMEM); /* Allocate VMCB VA */ @@ -2829,6 +2937,28 @@ vcpu_init_svm(struct vcpu *vcpu, struct (uint64_t)vcpu->vc_svm_hsa_va, (uint64_t)vcpu->vc_svm_hsa_pa); + + /* Allocate VM save area VA */ + vcpu->vc_svm_vmsa_va = (vaddr_t)km_alloc(PAGE_SIZE, &kv_page, + &kp_zero, &kd_waitok); + + if (!vcpu->vc_svm_vmsa_va) { + ret = ENOMEM; + goto exit; + } + + /* Compute VM save area PA */ + if (!pmap_extract(pmap_kernel(), vcpu->vc_svm_vmsa_va, + &vcpu->vc_svm_vmsa_pa)) { + ret = ENOMEM; + goto exit; + } + + DPRINTF("%s: VMSA va @ 0x%llx, pa @ 0x%llx\n", __func__, + (uint64_t)vcpu->vc_svm_vmsa_va, + (uint64_t)vcpu->vc_svm_vmsa_pa); + + /* Allocate IOIO area VA (3 pages) */ vcpu->vc_svm_ioio_va = (vaddr_t)km_alloc(3 * PAGE_SIZE, &kv_any, &vmm_kp_contig, &kd_waitok); @@ -2851,6 +2981,7 @@ vcpu_init_svm(struct vcpu *vcpu, struct /* Shall we enable SEV? */ vcpu->vc_sev = vcp->vcp_sev; + vcpu->vc_seves = vcp->vcp_seves; /* Inform vmd(8) about ASID and C bit position. */ vcp->vcp_poscbit = amd64_pos_cbit; @@ -2957,6 +3088,11 @@ vcpu_deinit_svm(struct vcpu *vcpu) &kp_zero); vcpu->vc_svm_hsa_va = 0; } + if (vcpu->vc_svm_vmsa_va) { + km_free((void *)vcpu->vc_svm_vmsa_va, PAGE_SIZE, &kv_page, + &kp_zero); + vcpu->vc_svm_vmsa_va = 0; + } if (vcpu->vc_svm_ioio_va) { km_free((void *)vcpu->vc_svm_ioio_va, 3 * PAGE_SIZE, &kv_any, &vmm_kp_contig); @@ -4038,7 +4174,7 @@ svm_handle_hlt(struct vcpu *vcpu) /* All HLT insns are 1 byte */ vcpu->vc_gueststate.vg_rip += 1; - if (!(rflags & PSL_I)) { + if (!svm_get_iflag(vcpu, rflags)) { DPRINTF("%s: guest halted with interrupts disabled\n", __func__); return (EIO); @@ -4134,7 +4270,7 @@ svm_handle_exit(struct vcpu *vcpu) switch (exit_reason) { case SVM_VMEXIT_VINTR: - if (!(rflags & PSL_I)) { + if (!svm_get_iflag(vcpu, rflags)) { DPRINTF("%s: impossible interrupt window exit " "config\n", __func__); ret = EINVAL; @@ -4198,6 +4334,16 @@ svm_handle_exit(struct vcpu *vcpu) ret = vmm_inject_ud(vcpu); update_rip = 0; break; + case SVM_VMEXIT_EFER_WRITE_TRAP: + case SVM_VMEXIT_CR0_WRITE_TRAP: + case SVM_VMEXIT_CR4_WRITE_TRAP: + case SVM_VMEXIT_CR8_WRITE_TRAP: + ret = svm_handle_efercr(vcpu, exit_reason); + update_rip = 0; + break; + case SVM_VMEXIT_VMGEXIT: + ret = svm_handle_gexit(vcpu); + break; default: DPRINTF("%s: unhandled exit 0x%llx (pa=0x%llx)\n", __func__, exit_reason, (uint64_t)vcpu->vc_control_pa); @@ -4224,6 +4370,328 @@ svm_handle_exit(struct vcpu *vcpu) } /* + * sync guest ghcb -> host vmcb/vcpu + */ +int +svm_gexit_sync_host(struct vcpu *vcpu) +{ + struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va; + struct ghcb_sa *ghcb; + uint64_t svm_sw_exitcode; + uint8_t *valid_bm, expected_bm[0x10]; + + if (!vcpu->vc_seves) + return (0); + + if (vcpu->vc_svm_ghcb_va == 0) { + printf("%s: GHCB not set\n", __func__); + return (0); + } + ghcb = (struct ghcb_sa *)vcpu->vc_svm_ghcb_va; + + if (!ghcb_valid(ghcb)) + return (EINVAL); + + valid_bm = ghcb->valid_bitmap; + + /* Always required. */ + memset(expected_bm, 0, sizeof(expected_bm)); + ghcb_valbm_set(expected_bm, GHCB_SW_EXITCODE); + ghcb_valbm_set(expected_bm, GHCB_SW_EXITINFO1); + ghcb_valbm_set(expected_bm, GHCB_SW_EXITINFO2); + + svm_sw_exitcode = ghcb->v_sw_exitcode; + switch (svm_sw_exitcode) { + case SVM_VMEXIT_CPUID: + ghcb_valbm_set(expected_bm, GHCB_RAX); + ghcb_valbm_set(expected_bm, GHCB_RCX); + break; + case SVM_VMEXIT_MSR: + if (ghcb->v_sw_exitinfo1 == 1) { + /* WRMSR */ + ghcb_valbm_set(expected_bm, GHCB_RAX); + ghcb_valbm_set(expected_bm, GHCB_RCX); + ghcb_valbm_set(expected_bm, GHCB_RDX); + } else { + /* RDMSR */ + ghcb_valbm_set(expected_bm, GHCB_RCX); + } + break; + case SVM_VMEXIT_IOIO: + if (ghcb->v_sw_exitinfo1 & 0x1) { + /* in instruction, no registers used */ + } else { + /* out instruction */ + ghcb_valbm_set(expected_bm, GHCB_RAX); + } + break; + default: + return (EINVAL); + } + + if (ghcb_verify_bm(valid_bm, expected_bm) != 0) + return (EINVAL); + + /* Always required */ + vmcb->v_exitcode = vcpu->vc_gueststate.vg_exit_reason = + ghcb->v_sw_exitcode; + vmcb->v_exitinfo1 = ghcb->v_sw_exitinfo1; + vmcb->v_exitinfo2 = ghcb->v_sw_exitinfo2; + + if (ghcb_valbm_isset(expected_bm, GHCB_RAX)) + vmcb->v_rax = vcpu->vc_gueststate.vg_rax = ghcb->v_rax; + if (ghcb_valbm_isset(expected_bm, GHCB_RBX)) + vcpu->vc_gueststate.vg_rbx = ghcb->v_rbx; + if (ghcb_valbm_isset(expected_bm, GHCB_RCX)) + vcpu->vc_gueststate.vg_rcx = ghcb->v_rcx; + if (ghcb_valbm_isset(expected_bm, GHCB_RDX)) + vcpu->vc_gueststate.vg_rdx = ghcb->v_rdx; + + return (0); +} + +/* + * sync host vmcb/vcpu -> guest ghcb + */ +int +svm_gexit_sync_guest(struct vcpu *vcpu) +{ + uint64_t svm_sw_exitcode; + uint64_t svm_sw_exitinfo1, svm_sw_exitinfo2; + uint8_t *valid_bm; + struct ghcb_sa *ghcb; + + if (!vcpu->vc_seves) + return (0); + + if (vcpu->vc_svm_ghcb_va == 0) + return (0); + + ghcb = (struct ghcb_sa *)vcpu->vc_svm_ghcb_va; + svm_sw_exitcode = ghcb->v_sw_exitcode; + svm_sw_exitinfo1 = ghcb->v_sw_exitinfo1; + svm_sw_exitinfo2 = ghcb->v_sw_exitinfo2; + ghcb_clear(ghcb); + valid_bm = ghcb->valid_bitmap; + + switch (svm_sw_exitcode) { + case SVM_VMEXIT_CPUID: + ghcb_valbm_set(valid_bm, GHCB_RAX); + ghcb_valbm_set(valid_bm, GHCB_RBX); + ghcb_valbm_set(valid_bm, GHCB_RCX); + ghcb_valbm_set(valid_bm, GHCB_RDX); + break; + case SVM_VMEXIT_MSR: + if (svm_sw_exitinfo1 == 1) { + /* WRMSR -- nothing to return */ + } else { + /* RDMSR */ + ghcb_valbm_set(valid_bm, GHCB_RAX); + ghcb_valbm_set(valid_bm, GHCB_RDX); + } + break; + case SVM_VMEXIT_IOIO: + if (svm_sw_exitinfo1 & 0x1) { + /* IN */ + ghcb_valbm_set(valid_bm, GHCB_RAX); + } else { + /* OUT -- nothing to return */ + } + break; + default: + return (EINVAL); + } + + /* Always required */ + svm_sw_exitinfo1 = 0; + svm_sw_exitinfo2 = 0; + ghcb_valbm_set(valid_bm, GHCB_SW_EXITINFO1); + ghcb_valbm_set(valid_bm, GHCB_SW_EXITINFO2); + + if (ghcb_valbm_isset(valid_bm, GHCB_RAX)) + ghcb->v_rax = vcpu->vc_gueststate.vg_rax; + if (ghcb_valbm_isset(valid_bm, GHCB_RBX)) + ghcb->v_rbx = vcpu->vc_gueststate.vg_rbx; + if (ghcb_valbm_isset(valid_bm, GHCB_RCX)) + ghcb->v_rcx = vcpu->vc_gueststate.vg_rcx; + if (ghcb_valbm_isset(valid_bm, GHCB_RDX)) + ghcb->v_rdx = vcpu->vc_gueststate.vg_rdx; + + if (ghcb_valbm_isset(valid_bm, GHCB_SW_EXITINFO1)) + ghcb->v_sw_exitinfo1 = svm_sw_exitinfo1; + if (ghcb_valbm_isset(valid_bm, GHCB_SW_EXITINFO2)) + ghcb->v_sw_exitinfo2 = svm_sw_exitinfo2; + + return (0); +} + +/* + * svm_handle_gexit + * + * Handle exits initiated by the guest due to #VC exceptions generated + * when SEV-ES is enabled. + */ +int +svm_handle_gexit(struct vcpu *vcpu) +{ + struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va; + struct vm *vm = vcpu->vc_parent; + struct ghcb_sa *ghcb; + paddr_t ghcb_gpa, ghcb_hpa; + uint32_t req, resp; + uint64_t result; + int syncout, error = 0; + + if (vcpu->vc_svm_ghcb_va == 0 && (vmcb->v_ghcb_gpa & ~PG_FRAME) == 0 && + (vmcb->v_ghcb_gpa & PG_FRAME) != 0) { + /* + * Guest provides a valid guest physcial address + * for GHCB and it is not set yet -> assign it. + * + * We only accept a GHCB once; we decline re-definition. + */ + ghcb_gpa = vmcb->v_ghcb_gpa & PG_FRAME; + if (!pmap_extract(vm->vm_map->pmap, ghcb_gpa, &ghcb_hpa)) + return (EINVAL); + vcpu->vc_svm_ghcb_va = (vaddr_t)PMAP_DIRECT_MAP(ghcb_hpa); + } else if ((vmcb->v_ghcb_gpa & ~PG_FRAME) != 0) { + /* + * Low bits in use, thus must be a MSR protocol + * request. + */ + req = (vmcb->v_ghcb_gpa & 0xffffffff); + + /* we only support cpuid and terminate */ + if ((req & ~PG_FRAME) == 0x100) { + DPRINTF("%s: guest requests termination\n", __func__); + return (1); + } + else if ((req & ~PG_FRAME) != 0x4) + return (EINVAL); + + vmcb->v_exitcode = SVM_VMEXIT_CPUID; + + switch (req >> 30) { + case 0: /* eax: emulate cpuid and return eax */ + vmcb->v_rax = vmcb->v_ghcb_gpa >> 32; + vcpu->vc_gueststate.vg_rax = 0; + vcpu->vc_gueststate.vg_rbx = 0; + vcpu->vc_gueststate.vg_rcx = 0; + vcpu->vc_gueststate.vg_rdx = 0; + error = vmm_handle_cpuid(vcpu); + if (error) + goto out; + result = vmcb->v_rax; + break; + case 1: /* return ebx */ + result = vcpu->vc_gueststate.vg_rbx; + break; + case 2: /* return ecx */ + result = vcpu->vc_gueststate.vg_rcx; + break; + case 3: /* return edx */ + result = vcpu->vc_gueststate.vg_rdx; + break; + default: + DPRINTF("%s: unknown request 0x%x\n", __func__, req); + return (EINVAL); + } + + /* build response */ + resp = 0x5 | (req & 0xc0000000); + vmcb->v_ghcb_gpa = (result << 32) | resp; + + return (0); + } + + /* Verify GHCB and synchronize guest state information. */ + ghcb = (struct ghcb_sa *)vcpu->vc_svm_ghcb_va; + if (svm_gexit_sync_host(vcpu)) { + error = EINVAL; + goto out; + } + + /* Handle GHCB protocol */ + syncout = 0; + switch (vmcb->v_exitcode) { + case SVM_VMEXIT_CPUID: + error = vmm_handle_cpuid(vcpu); + vmcb->v_rip = vcpu->vc_gueststate.vg_rip; + vcpu->vc_gueststate.vg_rax = vmcb->v_rax; + syncout = 1; + break; + case SVM_VMEXIT_IOIO: + if (svm_handle_inout(vcpu) == 0) + error = EAGAIN; + break; + case SVM_VMEXIT_MSR: + error = svm_handle_msr(vcpu); + vmcb->v_rip = vcpu->vc_gueststate.vg_rip; + syncout = 1; + break; + default: + DPRINTF("%s: unknown exit 0x%llx\n", __func__, + vmcb->v_exitcode); + error = EINVAL; + } + + if (syncout) + error = svm_gexit_sync_guest(vcpu); + +out: + return (error); +} + +/* + * svm_handle_efercr + * + * With SEV-ES the hypervisor can not intercept and modify writes + * to CR and EFER. However, a post write intercept notifies about + * the new state of these registers. + */ +int +svm_handle_efercr(struct vcpu *vcpu, uint64_t exit_reason) +{ + struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va; + + switch (exit_reason) { + case SVM_VMEXIT_EFER_WRITE_TRAP: + vmcb->v_efer = vmcb->v_exitinfo1; + break; + case SVM_VMEXIT_CR0_WRITE_TRAP: + vmcb->v_cr0 = vmcb->v_exitinfo1; + break; + case SVM_VMEXIT_CR4_WRITE_TRAP: + vmcb->v_cr4 = vmcb->v_exitinfo1; + break; + case SVM_VMEXIT_CR8_WRITE_TRAP: + /* XXX hshoexer: no state for CR8? */ + break; + default: + return (EINVAL); + } + + return (0); +} + +/* + * svm_get_iflag + * + * With SEV-ES the hypervisor has no access to the flags register. + * Only the the state of the PSL_I is proivded by v_intr_shadow in + * the VMCB. + */ +int +svm_get_iflag(struct vcpu *vcpu, uint64_t rflags) +{ + struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va; + + if (vcpu->vc_seves) + return (vmcb->v_intr_shadow & SMV_GUEST_INTR_MASK); + return (rflags & PSL_I); +} + +/* * vmx_handle_exit * * Handle exits from the VM by decoding the exit reason and calling various @@ -6104,6 +6572,8 @@ vcpu_run_svm(struct vcpu *vcpu, struct v vcpu->vc_gueststate.vg_rip = vcpu->vc_exit.vrs.vrs_gprs[VCPU_REGS_RIP]; vmcb->v_rip = vcpu->vc_gueststate.vg_rip; + if (svm_gexit_sync_guest(vcpu)) + return (EINVAL); break; case SVM_VMEXIT_NPF: ret = vcpu_writeregs_svm(vcpu, VM_RWREGS_GPRS, @@ -6235,8 +6705,13 @@ vcpu_run_svm(struct vcpu *vcpu, struct v KASSERT(vmcb->v_intercept1 & SVM_INTERCEPT_INTR); wrmsr(MSR_AMD_VM_HSAVE_PA, vcpu->vc_svm_hsa_pa); - ret = svm_enter_guest(vcpu->vc_control_pa, - &vcpu->vc_gueststate, &gdt); + if (vcpu->vc_seves) { + ret = svm_seves_enter_guest(vcpu->vc_control_pa, + vcpu->vc_svm_hsa_va + SVM_HSA_OFFSET, &gdt); + } else { + ret = svm_enter_guest(vcpu->vc_control_pa, + &vcpu->vc_gueststate, &gdt); + } /* Restore host PKRU state. */ if (vmm_softc->sc_md.pkru_enabled) { @@ -6275,7 +6750,7 @@ vcpu_run_svm(struct vcpu *vcpu, struct v */ ret = svm_handle_exit(vcpu); - if (vcpu->vc_gueststate.vg_rflags & PSL_I) + if (svm_get_iflag(vcpu, vcpu->vc_gueststate.vg_rflags)) vcpu->vc_irqready = 1; else vcpu->vc_irqready = 0; @@ -6325,27 +6800,32 @@ vcpu_run_svm(struct vcpu *vcpu, struct v } /* - * vmm_alloc_vpid + * _vmm_alloc_vpid * * Sets the memory location pointed to by "vpid" to the next available VPID - * or ASID. + * or ASID. For SEV-ES consider minimum ASID value for non-ES enabled guests. * * Parameters: * vpid: Pointer to location to receive the next VPID/ASID + * vcpu: Pointer to VCPU data structure * * Return Values: * 0: The operation completed successfully * ENOMEM: No VPIDs/ASIDs were available. Content of 'vpid' is unchanged. */ int -vmm_alloc_vpid(uint16_t *vpid) +_vmm_alloc_vpid(uint16_t *vpid, struct vcpu *vcpu) { - uint16_t i; + uint16_t i, minasid; uint8_t idx, bit; struct vmm_softc *sc = vmm_softc; rw_enter_write(&vmm_softc->vpid_lock); - for (i = 1; i <= sc->max_vpid; i++) { + if (vcpu == NULL || vcpu->vc_seves) + minasid = 1; + else + minasid = amd64_min_noes_asid; + for (i = minasid; i <= sc->max_vpid; i++) { idx = i / 8; bit = i - (idx * 8); @@ -6367,6 +6847,18 @@ vmm_alloc_vpid(uint16_t *vpid) return ENOMEM; } +int +vmm_alloc_vpid(uint16_t *vpid) +{ + return _vmm_alloc_vpid(vpid, NULL); +} + +int +vmm_alloc_asid(uint16_t *asid, struct vcpu *vcpu) +{ + return _vmm_alloc_vpid(asid, vcpu); +} + /* * vmm_free_vpid * @@ -6777,6 +7269,36 @@ vcpu_state_decode(u_int state) case VCPU_STATE_UNKNOWN: return "unknown"; default: return "invalid"; } +} + +/* + * svm_get_vmsa + * + * Return physical address of VMSA for specified VCPU. + */ +int +svm_get_vmsa(uint32_t vmid, uint32_t vcpuid, uint64_t *vmsapa) +{ + struct vm *vm; + struct vcpu *vcpu; + int error, ret = 0; + + error = vm_find(vmid, &vm); + if (error) + return (error); + + vcpu = vm_find_vcpu(vm, vcpuid); + if (vcpu == NULL) { + ret = ENOENT; + goto out; + } + + if (vmsapa) + *vmsapa = vcpu->vc_svm_vmsa_pa; + +out: + refcnt_rele_wake(&vm->vm_refcnt); + return (ret); } #ifdef VMM_DEBUG Index: sys/arch/amd64/amd64/vmm_support.S =================================================================== RCS file: /mount/openbsd/cvs/src/sys/arch/amd64/amd64/vmm_support.S,v diff -u -p -u -p -r1.28 vmm_support.S --- sys/arch/amd64/amd64/vmm_support.S 9 Jul 2024 11:15:58 -0000 1.28 +++ sys/arch/amd64/amd64/vmm_support.S 24 Feb 2025 22:58:05 -0000 @@ -42,6 +42,7 @@ .global vmx_enter_guest .global vmm_dispatch_intr .global svm_enter_guest + .global svm_seves_enter_guest .text .code64 @@ -662,3 +663,163 @@ restore_host_svm: ret lfence END(svm_enter_guest) + +/* + * When using SEV-ES we have to save some of the host registers to + * the host state save area (HSA). According to the AMD Programmer's + * Manual Volume 2 Appendix B the HSA has the same layout as the guest + * save area (VMSA) except that it starts at offset 0x400 in the HSA + * page. + */ +ENTRY(svm_seves_enter_guest) + RETGUARD_SETUP(svm_seves_enter_guest, r11) + clgi + movq %rdi, %r8 + pushfq + + pushq %rdx /* gdt pointer */ + + /* + * Save (possibly) lazy-switched selectors + */ + strw %ax + pushw %ax + movw %es, %ax + pushw %ax + movw %ds, %ax + pushw %ax + movw %ss, %ax + pushw %ax + + movq $MSR_FSBASE, %rcx + rdmsr + pushq %rax + pushq %rdx + pushw %fs + movq $MSR_GSBASE, %rcx + rdmsr + pushq %rax + pushq %rdx + pushw %gs + movq $MSR_KERNELGSBASE, %rcx + rdmsr + pushq %rax + pushq %rdx + + /* + * Save various MSRs + */ + movq $MSR_STAR, %rcx + rdmsr + pushq %rax + pushq %rdx + + movq $MSR_LSTAR, %rcx + rdmsr + pushq %rax + pushq %rdx + + movq $MSR_SFMASK, %rcx + rdmsr + pushq %rax + pushq %rdx + + RETGUARD_PUSH(r11) + + /* + * Preserve callee-preserved registers as per AMD64 ABI in + * HSA. Although all registers will be restored from HSA + * on vmexit, these will not be saved on vmrun. + */ + movq %r15, 0x378(%rsi) + movq %r14, 0x370(%rsi) + movq %r13, 0x368(%rsi) + movq %r12, 0x360(%rsi) + movq %rbp, 0x328(%rsi) + movq %rbx, 0x318(%rsi) + + movq %r8, %rax /* rax = vmcb pa */ + + vmrun %rax + + /* %rdi = 0 means we took an exit */ + xorq %rdi, %rdi + + RETGUARD_POP(r11) + + /* + * Restore saved MSRs + */ + popq %rdx + popq %rax + movq $MSR_SFMASK, %rcx + wrmsr + + /* make sure guest doesn't bleed into host */ + xorl %edx, %edx + xorl %eax, %eax + movq $MSR_CSTAR, %rcx + wrmsr + + popq %rdx + popq %rax + movq $MSR_LSTAR, %rcx + wrmsr + + popq %rdx + popq %rax + movq $MSR_STAR, %rcx + wrmsr + + /* + * popw %gs will reset gsbase to 0, so preserve it + * first. This is to accommodate possibly lazy-switched + * selectors from above + */ + cli + popq %rdx + popq %rax + movq $MSR_KERNELGSBASE, %rcx + wrmsr + + popw %gs + popq %rdx + popq %rax + movq $MSR_GSBASE, %rcx + wrmsr + + popw %fs + popq %rdx + popq %rax + movq $MSR_FSBASE, %rcx + wrmsr + + popw %ax + movw %ax, %ss + popw %ax + movw %ax, %ds + popw %ax + movw %ax, %es + + xorq %rax, %rax + lldtw %ax /* Host LDT is always 0 */ + + popw %ax /* ax = saved TR */ + + popq %rdx + addq $0x2, %rdx + movq (%rdx), %rdx + + /* rdx = GDTR base addr */ + andb $0xF9, 5(%rdx, %rax) + + ltrw %ax + + popfq + + movq %rdi, %rax + + RETGUARD_CHECK(svm_seves_enter_guest, r11) + ret + lfence +END(svm_seves_enter_guest) Index: sys/arch/amd64/conf/files.amd64 =================================================================== RCS file: /mount/openbsd/cvs/src/sys/arch/amd64/conf/files.amd64,v diff -u -p -u -p -r1.112 files.amd64 --- sys/arch/amd64/conf/files.amd64 4 Nov 2024 22:08:03 -0000 1.112 +++ sys/arch/amd64/conf/files.amd64 24 Feb 2025 22:58:05 -0000 @@ -29,6 +29,7 @@ file arch/amd64/amd64/fpu.c file arch/amd64/amd64/softintr.c file arch/amd64/amd64/i8259.c file arch/amd64/amd64/cacheinfo.c +file arch/amd64/amd64/ghcb.c file arch/amd64/amd64/vector.S file arch/amd64/amd64/copy.S file arch/amd64/amd64/spl.S Index: sys/arch/amd64/include/cpu.h =================================================================== RCS file: /mount/openbsd/cvs/src/sys/arch/amd64/include/cpu.h,v diff -u -p -u -p -r1.179 cpu.h --- sys/arch/amd64/include/cpu.h 22 Oct 2024 10:14:49 -0000 1.179 +++ sys/arch/amd64/include/cpu.h 24 Feb 2025 22:58:05 -0000 @@ -501,7 +501,8 @@ void mp_setperf_init(void); #define CPU_INVARIANTTSC 17 /* has invariant TSC */ #define CPU_PWRACTION 18 /* action caused by power button */ #define CPU_RETPOLINE 19 /* cpu requires retpoline pattern */ -#define CPU_MAXID 20 /* number of valid machdep ids */ +#define CPU_SEVGUESTMODE 20 /* running as SEV guest */ +#define CPU_MAXID 21 /* number of valid machdep ids */ #define CTL_MACHDEP_NAMES { \ { 0, 0 }, \ @@ -524,6 +525,7 @@ void mp_setperf_init(void); { "invarianttsc", CTLTYPE_INT }, \ { "pwraction", CTLTYPE_INT }, \ { "retpoline", CTLTYPE_INT }, \ + { "sevguestmode", CTLTYPE_INT}, \ } #endif /* !_MACHINE_CPU_H_ */ Index: sys/arch/amd64/include/cpufunc.h =================================================================== RCS file: /mount/openbsd/cvs/src/sys/arch/amd64/include/cpufunc.h,v diff -u -p -u -p -r1.43 cpufunc.h --- sys/arch/amd64/include/cpufunc.h 8 Nov 2024 12:08:22 -0000 1.43 +++ sys/arch/amd64/include/cpufunc.h 24 Feb 2025 22:58:05 -0000 @@ -420,6 +420,27 @@ breakpoint(void) __asm volatile("int $3"); } +/* VMGEXIT */ +static __inline void +vmgexit(void) +{ + __asm volatile("rep; vmmcall"); +} + +/* Request VM termination from hypervisor. */ +static __inline void +vmterminate(void) +{ + __asm volatile( + " movl $MSRPROTO_TERM_REQ, %%rdx ;" + " movl $MSR_SEV_GHCB, %%rcx ;" + " wrmsr ;" + " rep vmmcall ;" + "1: hlt ;" + " jmp 1b ;" + : :); +} + void amd64_errata(struct cpu_info *); void cpu_ucode_setup(void); void cpu_ucode_apply(struct cpu_info *); Index: sys/arch/amd64/include/specialreg.h =================================================================== RCS file: /mount/openbsd/cvs/src/sys/arch/amd64/include/specialreg.h,v diff -u -p -u -p -r1.116 specialreg.h --- sys/arch/amd64/include/specialreg.h 4 Aug 2024 11:05:18 -0000 1.116 +++ sys/arch/amd64/include/specialreg.h 24 Feb 2025 22:58:05 -0000 @@ -723,8 +723,13 @@ #define NB_CFG_DISIOREQLOCK 0x0000000000000004ULL #define NB_CFG_DISDATMSK 0x0000001000000000ULL +#define MSR_SEV_GHCB 0xc0010130 +#define SEV_CPUID_REQ 0x00000004 +#define SEV_CPUID_RESP 0x00000005 + #define MSR_SEV_STATUS 0xc0010131 #define SEV_STAT_ENABLED 0x00000001 +#define SEV_STAT_ES_ENABLED 0x00000002 #define MSR_LS_CFG 0xc0011020 #define LS_CFG_DIS_LS2_SQUISH 0x02000000 Index: sys/arch/amd64/include/trap.h =================================================================== RCS file: /mount/openbsd/cvs/src/sys/arch/amd64/include/trap.h,v diff -u -p -u -p -r1.5 trap.h --- sys/arch/amd64/include/trap.h 15 Apr 2023 01:22:50 -0000 1.5 +++ sys/arch/amd64/include/trap.h 24 Feb 2025 22:58:05 -0000 @@ -62,3 +62,4 @@ #define T_XMM 19 /* SSE FP exception */ #define T_VE 20 /* virtualization exception */ #define T_CP 21 /* control protection exception */ +#define T_VC 29 /* VMM communication exception */ Index: sys/arch/amd64/include/vmmvar.h =================================================================== RCS file: /mount/openbsd/cvs/src/sys/arch/amd64/include/vmmvar.h,v diff -u -p -u -p -r1.109 vmmvar.h --- sys/arch/amd64/include/vmmvar.h 22 Oct 2024 21:50:02 -0000 1.109 +++ sys/arch/amd64/include/vmmvar.h 24 Feb 2025 22:58:05 -0000 @@ -625,6 +625,11 @@ struct vmcb_segment { #define SVM_ENABLE_NP (1ULL << 0) #define SVM_ENABLE_SEV (1ULL << 1) +#define SVM_SEVES_ENABLE (1ULL << 2) + +#define SMV_GUEST_INTR_MASK (1ULL << 1) + +#define SVM_LBRVIRT_ENABLE (1ULL << 0) struct vmcb { union { @@ -656,7 +661,7 @@ struct vmcb { uint64_t v_exitintinfo; /* 088h */ uint64_t v_np_enable; /* 090h */ uint64_t v_avic_apic_bar; /* 098h */ - uint64_t v_pad4; /* 0A0h */ + uint64_t v_ghcb_gpa; /* 0A0h */ uint64_t v_eventinj; /* 0A8h */ uint64_t v_n_cr3; /* 0B0h */ uint64_t v_lbr_virt_enable; /* 0B8h */ @@ -668,6 +673,8 @@ struct vmcb { uint64_t v_pad5; /* 0E8h-0EFh */ uint64_t v_avic_logical_table; /* 0F0h */ uint64_t v_avic_phys; /* 0F8h */ + uint64_t v_pad12; /* 100h */ + uint64_t v_vmsa_pa; /* 108h */ }; uint8_t vmcb_control[0x400]; @@ -723,6 +730,135 @@ struct vmcb { }; }; +struct vmsa { + struct vmcb_segment v_es; /* 000h */ + struct vmcb_segment v_cs; /* 010h */ + struct vmcb_segment v_ss; /* 020h */ + struct vmcb_segment v_ds; /* 030h */ + struct vmcb_segment v_fs; /* 040h */ + struct vmcb_segment v_gs; /* 050h */ + struct vmcb_segment v_gdtr; /* 060h */ + struct vmcb_segment v_ldtr; /* 070h */ + struct vmcb_segment v_idtr; /* 080h */ + struct vmcb_segment v_tr; /* 090h */ + uint64_t v_pl0_ssp; /* 0A0h */ + uint64_t v_pl1_ssp; /* 0A8h */ + uint64_t v_pl2_ssp; /* 0B0h */ + uint64_t v_pl3_ssp; /* 0B8h */ + uint64_t v_u_cet; /* 0C0h */ + uint8_t v_pad1[0x2]; /* 0C8h-0C9h */ + uint8_t v_vmpl; /* 0CAh */ + uint8_t v_cpl; /* 0CBh */ + uint8_t v_pad2[0x4]; /* 0CCh-0CFh */ + uint64_t v_efer; /* 0D0h */ + uint8_t v_pad3[0x68]; /* 0D8h-13Fh */ + uint64_t v_xss; /* 140h */ + uint64_t v_cr4; /* 148h */ + uint64_t v_cr3; /* 150h */ + uint64_t v_cr0; /* 158h */ + uint64_t v_dr7; /* 160h */ + uint64_t v_dr6; /* 168h */ + uint64_t v_rflags; /* 170h */ + uint64_t v_rip; /* 178h */ + uint64_t v_dr0; /* 180h */ + uint64_t v_dr1; /* 188h */ + uint64_t v_dr2; /* 190h */ + uint64_t v_dr3; /* 198h */ + uint64_t v_dr0_addr_msk; /* 1A0h */ + uint64_t v_dr1_addr_msk; /* 1A8h */ + uint64_t v_dr2_addr_msk; /* 1B0h */ + uint64_t v_dr3_addr_msk; /* 1B8h */ + uint8_t v_pad4[0x18]; /* 1C0h-1D7h */ + uint64_t v_rsp; /* 1D8h */ + uint64_t v_s_cet; /* 1E0h */ + uint64_t v_ssp; /* 1E8h */ + uint64_t v_isst_addr; /* 1F0h */ + uint64_t v_rax; /* 1F8h */ + uint64_t v_star; /* 200h */ + uint64_t v_lstar; /* 208h */ + uint64_t v_cstar; /* 210h */ + uint64_t v_sfmask; /* 218h */ + uint64_t v_kgsbase; /* 220h */ + uint64_t v_sysenter_cs; /* 228h */ + uint64_t v_sysenter_esp; /* 230h */ + uint64_t v_sysenter_eip; /* 238h */ + uint64_t v_cr2; /* 240h */ + uint8_t v_pad5[0x20]; /* 248h-267h */ + uint64_t v_g_pat; /* 268h */ + uint64_t v_dbgctl; /* 270h */ + uint64_t v_br_from; /* 278h */ + uint64_t v_br_to; /* 280h */ + uint64_t v_lastexcpfrom; /* 288h */ + uint64_t v_lastexcpto; /* 290h */ + uint8_t v_pad6[0x48]; /* 298h-2DFh */ + uint8_t v_pad7[0x8]; /* 2E0h-2E7h */ + uint32_t v_pkru; /* 2E8h */ + uint32_t v_tsc_aux; /* 2ECh */ + uint64_t v_gst_tsc_scale;/* 2F0h */ + uint64_t v_gst_tsc_off; /* 2F8h */ + uint64_t v_reg_prot_nce; /* 300h */ + uint64_t v_rcx; /* 308h */ + uint64_t v_rdx; /* 310h */ + uint64_t v_rbx; /* 318h */ + uint64_t v_pad8; /* 320h */ + uint64_t v_rbp; /* 328h */ + uint64_t v_rsi; /* 330h */ + uint64_t v_rdi; /* 338h */ + uint64_t v_r8; /* 340h */ + uint64_t v_r9; /* 348h */ + uint64_t v_r10; /* 350h */ + uint64_t v_r11; /* 358h */ + uint64_t v_r12; /* 360h */ + uint64_t v_r13; /* 368h */ + uint64_t v_r14; /* 370h */ + uint64_t v_r15; /* 378h */ + uint8_t v_pad9[0x10]; /* 380h-38Fh */ + uint64_t v_gst_exitinfo1;/* 390h */ + uint64_t v_gst_exitinfo2;/* 398h */ + uint64_t v_gst_exitiinfo;/* 3A0h */ + uint64_t v_gst_nrip; /* 3A8h */ + uint64_t v_sev_features; /* 3B0h */ + uint64_t v_intr_ctrl; /* 3B8h */ + uint64_t v_gst_exitcode; /* 3C0h */ + uint64_t v_virtual_tom; /* 3C8h */ + uint64_t v_tlb_id; /* 3D0h */ + uint64_t v_pcup_id; /* 3D8h */ + uint64_t v_eventinj; /* 3E0h */ + uint64_t v_xcr0; /* 3E8h */ + uint8_t v_pad10[0x10]; /* 3F0h-3FFh */ + uint64_t v_x87_dp; /* 400h */ + uint32_t v_mxcsr; /* 408h */ + uint16_t v_x87_ftw; /* 40Ch */ + uint16_t v_x87_fsw; /* 40Eh */ + uint16_t v_x87_fcw; /* 410h */ + uint16_t v_x87_fop; /* 412h */ + uint16_t v_x87_ds; /* 414h */ + uint16_t v_x87_cs; /* 416h */ + uint64_t v_x87_rip; /* 418h */ + uint8_t v_fp_x87[0x50]; /* 420h-46Fh */ + uint8_t v_fp_xmm[0x100];/* 470h-56Fh */ + uint8_t v_fp_ymm[0x100];/* 570h-66fh */ + uint8_t v_lbr_st[0x100];/* 670h-76Fh */ + uint64_t v_lbr_select; /* 770h */ + uint64_t v_ibs_fetch_ctl;/* 778h */ + uint64_t v_ibs_fetch_la; /* 780h */ + uint64_t v_ibs_op_ctl; /* 788h */ + uint64_t v_ibs_op_rip; /* 790h */ + uint64_t v_ibs_op_data; /* 798h */ + uint64_t v_ibs_op_data2; /* 7A0h */ + uint64_t v_ibs_op_data3; /* 7A8h */ + uint64_t v_ibs_dc_la; /* 7B0h */ + uint64_t v_ibstgt_rip; /* 7B8h */ + uint64_t v_ic_ibs_xtd_ct;/* 7C0h */ +}; + +/* + * With SEV-ES the host save are (HSA) has the same layout as the + * VMSA. However, it has the offset 0x400 into the HSA page. + * See AMD APM Vol 2, Appendix B. + */ +#define SVM_HSA_OFFSET 0x400 + struct vmcs { uint32_t vmcs_revision; }; @@ -876,9 +1012,14 @@ struct vcpu { /* SVM only (all requiring [v]) */ vaddr_t vc_svm_hsa_va; paddr_t vc_svm_hsa_pa; + vaddr_t vc_svm_vmsa_va; + paddr_t vc_svm_vmsa_pa; + vaddr_t vc_svm_ghcb_va; + paddr_t vc_svm_ghcb_pa; vaddr_t vc_svm_ioio_va; paddr_t vc_svm_ioio_pa; int vc_sev; /* [I] */ + int vc_seves; /* [I] */ }; SLIST_HEAD(vcpu_head, vcpu); @@ -896,6 +1037,7 @@ int invept(uint64_t, struct vmx_invept_d int vmx_enter_guest(paddr_t *, struct vcpu_gueststate *, int, uint8_t); int svm_enter_guest(uint64_t, struct vcpu_gueststate *, struct region_descriptor *); +int svm_seves_enter_guest(uint64_t, vaddr_t, struct region_descriptor *); void start_vmm_on_cpu(struct cpu_info *); void stop_vmm_on_cpu(struct cpu_info *); void vmclear_on_cpu(struct cpu_info *); @@ -911,6 +1053,7 @@ int vcpu_init(struct vcpu *, struct vm_c void vcpu_deinit(struct vcpu *); int vm_rwregs(struct vm_rwregs_params *, int); int vcpu_reset_regs(struct vcpu *, struct vcpu_reg_state *); +int svm_get_vmsa(uint32_t, uint32_t, uint64_t *); #endif /* _KERNEL */ Index: sys/dev/ic/psp.c =================================================================== RCS file: /mount/openbsd/cvs/src/sys/dev/ic/psp.c,v diff -u -p -u -p -r1.15 psp.c --- sys/dev/ic/psp.c 20 Nov 2024 13:36:55 -0000 1.15 +++ sys/dev/ic/psp.c 24 Feb 2025 22:58:05 -0000 @@ -29,6 +29,7 @@ #include #include +#include #include #include @@ -538,6 +539,32 @@ out: } int +psp_launch_update_vmsa(struct psp_softc *sc, + struct psp_launch_update_vmsa *uluv) +{ + struct psp_launch_update_vmsa *luvmsa; + int error; + + luvmsa = (struct psp_launch_update_vmsa *)sc->sc_cmd_kva; + bzero(luvmsa, sizeof(*luvmsa)); + + luvmsa->handle = uluv->handle; + luvmsa->paddr = uluv->paddr; + luvmsa->length = PAGE_SIZE; + + /* Drain caches before we encrypt the VMSA. */ + wbinvd_on_all_cpus_acked(); + + error = ccp_docmd(sc, PSP_CMD_LAUNCH_UPDATE_VMSA, + sc->sc_cmd_map->dm_segs[0].ds_addr); + + if (error != 0) + return (EIO); + + return (0); +} + +int psp_launch_measure(struct psp_softc *sc, struct psp_launch_measure *ulm) { struct psp_launch_measure *lm; @@ -634,6 +661,26 @@ psp_activate(struct psp_softc *sc, struc } int +psp_encrypt_state(struct psp_softc *sc, struct psp_encrypt_state *ues) +{ + struct psp_launch_update_vmsa luvmsa; + uint64_t vmsa_paddr; + int error; + + error = svm_get_vmsa(ues->vmid, ues->vcpuid, &vmsa_paddr); + if (error != 0) + return (error); + + bzero(&luvmsa, sizeof(luvmsa)); + luvmsa.handle = ues->handle; + luvmsa.paddr = vmsa_paddr; + + error = psp_launch_update_vmsa(sc, &luvmsa); + + return (error); +} + +int psp_deactivate(struct psp_softc *sc, struct psp_deactivate *udeact) { struct psp_deactivate *deact; @@ -814,6 +861,10 @@ pspioctl(dev_t dev, u_long cmd, caddr_t error = psp_launch_update_data(sc, (struct psp_launch_update_data *)data, p); break; + case PSP_IOC_LAUNCH_UPDATE_VMSA: + error = psp_launch_update_vmsa(sc, + (struct psp_launch_update_vmsa *)data); + break; case PSP_IOC_LAUNCH_MEASURE: error = psp_launch_measure(sc, (struct psp_launch_measure *)data); @@ -838,6 +889,9 @@ pspioctl(dev_t dev, u_long cmd, caddr_t error = psp_snp_get_pstatus(sc, (struct psp_snp_platform_status *)data); break; + case PSP_IOC_ENCRYPT_STATE: + error = psp_encrypt_state(sc, (struct psp_encrypt_state *)data); + break; default: error = ENOTTY; break; @@ -862,6 +916,7 @@ pledge_ioctl_psp(struct proc *p, long co case PSP_IOC_LAUNCH_MEASURE: case PSP_IOC_LAUNCH_FINISH: case PSP_IOC_ACTIVATE: + case PSP_IOC_ENCRYPT_STATE: case PSP_IOC_GUEST_SHUTDOWN: return (0); default: Index: sys/dev/ic/pspvar.h =================================================================== RCS file: /mount/openbsd/cvs/src/sys/dev/ic/pspvar.h,v diff -u -p -u -p -r1.6 pspvar.h --- sys/dev/ic/pspvar.h 5 Nov 2024 13:28:35 -0000 1.6 +++ sys/dev/ic/pspvar.h 24 Feb 2025 22:58:05 -0000 @@ -86,6 +86,7 @@ #define PSP_CMD_GUESTSTATUS 0x23 #define PSP_CMD_LAUNCH_START 0x30 #define PSP_CMD_LAUNCH_UPDATE_DATA 0x31 +#define PSP_CMD_LAUNCH_UPDATE_VMSA 0x32 #define PSP_CMD_LAUNCH_MEASURE 0x33 #define PSP_CMD_LAUNCH_FINISH 0x35 #define PSP_CMD_ATTESTATION 0x36 @@ -133,6 +134,24 @@ struct psp_launch_update_data { uint32_t length; } __packed; +struct psp_launch_update_vmsa { + /* Input parameters for PSP_CMD_LAUNCH_UPDATE_VMSA */ + uint32_t handle; + uint32_t reserved; + uint64_t paddr; + uint32_t length; +} __packed; + +struct psp_encrypt_state { + /* + * Input parameters state encryption + */ + uint32_t handle; + uint32_t asid; + uint32_t vmid; + uint32_t vcpuid; +} __packed; + struct psp_measure { /* Output buffer for PSP_CMD_LAUNCH_MEASURE */ uint8_t measure[32]; @@ -258,6 +277,9 @@ struct psp_snp_platform_status { #define PSP_IOC_SNP_GET_PSTATUS _IOR('P', 11, struct psp_snp_platform_status) #define PSP_IOC_INIT _IO('P', 12) #define PSP_IOC_SHUTDOWN _IO('P', 13) +#define PSP_IOC_LAUNCH_UPDATE_VMSA \ + _IOW('P', 14, struct psp_launch_update_vmsa) +#define PSP_IOC_ENCRYPT_STATE _IOW('P', 254, struct psp_encrypt_state) #define PSP_IOC_GUEST_SHUTDOWN _IOW('P', 255, struct psp_guest_shutdown) #ifdef _KERNEL Index: sys/dev/vmm/vmm.c =================================================================== RCS file: /mount/openbsd/cvs/src/sys/dev/vmm/vmm.c,v diff -u -p -u -p -r1.4 vmm.c --- sys/dev/vmm/vmm.c 10 Feb 2025 16:45:46 -0000 1.4 +++ sys/dev/vmm/vmm.c 24 Feb 2025 22:58:05 -0000 @@ -401,6 +401,8 @@ vm_create(struct vm_create_params *vcp, vcpu->vc_parent = vm; vcpu->vc_id = vm->vm_vcpu_ct; vm->vm_vcpu_ct++; + vcpu->vc_sev = vcp->vcp_sev; + vcpu->vc_seves = vcp->vcp_seves; if ((ret = vcpu_init(vcpu, vcp)) != 0) { printf("failed to init vcpu %d for vm %p\n", i, vm); vm_teardown(&vm); Index: sys/dev/vmm/vmm.h =================================================================== RCS file: /mount/openbsd/cvs/src/sys/dev/vmm/vmm.h,v diff -u -p -u -p -r1.7 vmm.h --- sys/dev/vmm/vmm.h 27 Aug 2024 09:16:03 -0000 1.7 +++ sys/dev/vmm/vmm.h 24 Feb 2025 22:58:05 -0000 @@ -50,6 +50,7 @@ struct vm_create_params { struct vm_mem_range vcp_memranges[VMM_MAX_MEM_RANGES]; char vcp_name[VMM_MAX_NAME_LEN]; int vcp_sev; + int vcp_seves; /* Output parameter from VMM_IOC_CREATE */ uint32_t vcp_id; Index: usr.sbin/vmd/loadfile_elf.c =================================================================== RCS file: /mount/openbsd/cvs/src/usr.sbin/vmd/loadfile_elf.c,v diff -u -p -u -p -r1.50 loadfile_elf.c --- usr.sbin/vmd/loadfile_elf.c 26 Sep 2024 01:45:13 -0000 1.50 +++ usr.sbin/vmd/loadfile_elf.c 24 Feb 2025 22:58:05 -0000 @@ -110,7 +110,7 @@ union { } hdr; static void setsegment(struct mem_segment_descriptor *, uint32_t, - size_t, int, int, int, int); + size_t, int, int, int, int, int); static int elf32_exec(gzFile, Elf32_Ehdr *, u_long *, int); static int elf64_exec(gzFile, Elf64_Ehdr *, u_long *, int); static size_t create_bios_memmap(struct vm_create_params *, bios_memmap_t *); @@ -148,7 +148,7 @@ uint64_t pg_crypt = 0; */ static void setsegment(struct mem_segment_descriptor *sd, uint32_t base, size_t limit, - int type, int dpl, int def32, int gran) + int type, int dpl, int def32, int gran, int lm) { sd->sd_lolimit = (int)limit; sd->sd_lobase = (int)base; @@ -157,7 +157,7 @@ setsegment(struct mem_segment_descriptor sd->sd_p = 1; sd->sd_hilimit = (int)limit >> 16; sd->sd_avl = 0; - sd->sd_long = 0; + sd->sd_long = lm; sd->sd_def32 = def32; sd->sd_gran = gran; sd->sd_hibase = (int)base >> 24; @@ -185,11 +185,13 @@ push_gdt(void) * Create three segment descriptors: * * GDT[0] : null descriptor. "Created" via memset above. - * GDT[1] (selector @ 0x8): Executable segment, for CS + * GDT[1] (selector @ 0x8): Executable segment (compat mode), for CS * GDT[2] (selector @ 0x10): RW Data segment, for DS/ES/SS + * GDT[3] (selector @ 0x18): Executable segment (long mode), for CS */ - setsegment(&sd[1], 0, 0xffffffff, SDT_MEMERA, SEL_KPL, 1, 1); - setsegment(&sd[2], 0, 0xffffffff, SDT_MEMRWA, SEL_KPL, 1, 1); + setsegment(&sd[1], 0, 0xffffffff, SDT_MEMERA, SEL_KPL, 1, 1, 0); + setsegment(&sd[2], 0, 0xffffffff, SDT_MEMRWA, SEL_KPL, 1, 1, 0); + setsegment(&sd[3], 0, 0xffffffff, SDT_MEMERA, SEL_KPL, 0, 1, 1); write_mem(GDT_PAGE, gdtpage, PAGE_SIZE); sev_register_encryption(GDT_PAGE, PAGE_SIZE); Index: usr.sbin/vmd/parse.y =================================================================== RCS file: /mount/openbsd/cvs/src/usr.sbin/vmd/parse.y,v diff -u -p -u -p -r1.71 parse.y --- usr.sbin/vmd/parse.y 26 Sep 2024 01:45:13 -0000 1.71 +++ usr.sbin/vmd/parse.y 24 Feb 2025 22:58:05 -0000 @@ -123,7 +123,7 @@ typedef struct { %token FORMAT GROUP %token INET6 INSTANCE INTERFACE LLADDR LOCAL LOCKED MEMORY NET NIFS OWNER %token PATH PREFIX RDOMAIN SIZE SOCKET SWITCH UP VM VMID STAGGERED START -%token PARALLEL DELAY SEV +%token PARALLEL DELAY SEV SEVES %token NUMBER %token STRING %type lladdr @@ -138,6 +138,7 @@ typedef struct { %type string %type vm_instance %type sev; +%type seves; %% @@ -415,6 +416,9 @@ vm_opts : disable { | sev { vcp->vcp_sev = 1; } + | seves { + vcp->vcp_sev = vcp->vcp_seves = 1; + } | DISK string image_format { if (parse_disk($2, $3) != 0) { yyerror("failed to parse disks: %s", $2); @@ -761,6 +765,9 @@ disable : ENABLE { $$ = 0; } sev : SEV { $$ = 1; } ; +seves : SEVES { $$ = 1; } + ; + bootdevice : CDROM { $$ = VMBOOTDEV_CDROM; } | DISK { $$ = VMBOOTDEV_DISK; } | NET { $$ = VMBOOTDEV_NET; } @@ -846,6 +853,7 @@ lookup(char *s) { "prefix", PREFIX }, { "rdomain", RDOMAIN }, { "sev", SEV }, + { "seves", SEVES }, { "size", SIZE }, { "socket", SOCKET }, { "staggered", STAGGERED }, Index: usr.sbin/vmd/psp.c =================================================================== RCS file: /mount/openbsd/cvs/src/usr.sbin/vmd/psp.c,v diff -u -p -u -p -r1.5 psp.c --- usr.sbin/vmd/psp.c 6 Nov 2024 23:04:45 -0000 1.5 +++ usr.sbin/vmd/psp.c 24 Feb 2025 22:58:05 -0000 @@ -118,7 +118,7 @@ psp_get_gstate(uint32_t handle, uint32_t * Start the launch sequence of a guest. */ int -psp_launch_start(uint32_t *handle) +psp_launch_start(uint32_t *handle, int seves) { struct psp_launch_start ls; @@ -128,6 +128,9 @@ psp_launch_start(uint32_t *handle) ls.policy = (GPOL_NODBG | GPOL_NOKS | GPOL_NOSEND | GPOL_DOMAIN | GPOL_SEV); + if (seves) /* Add ES */ + ls.policy |= GPOL_ES; + if (ioctl(env->vmd_psp_fd, PSP_IOC_LAUNCH_START, &ls) < 0) { log_warn("%s: ioctl", __func__); return (-1); @@ -170,6 +173,27 @@ psp_launch_update(uint32_t handle, vaddr * the PSP, the measurement is not really meaningful. Thus we just * log it for now. */ +int +psp_encrypt_state(uint32_t handle, uint32_t asid, uint32_t vmid, + uint32_t vcpuid) +{ + struct psp_encrypt_state es; + + memset(&es, 0, sizeof(es)); + es.handle = handle; + es.asid = asid; + es.vmid = vmid; + es.vcpuid = vcpuid; + + if (ioctl(env->vmd_psp_fd, PSP_IOC_ENCRYPT_STATE, &es) < 0) { + log_warn("%s: ioctl", __func__); + return (-1); + } + + return (0); +} + + int psp_launch_measure(uint32_t handle) { Index: usr.sbin/vmd/sev.c =================================================================== RCS file: /mount/openbsd/cvs/src/usr.sbin/vmd/sev.c,v diff -u -p -u -p -r1.5 sev.c --- usr.sbin/vmd/sev.c 6 Nov 2024 22:06:16 -0000 1.5 +++ usr.sbin/vmd/sev.c 24 Feb 2025 22:58:05 -0000 @@ -58,7 +58,7 @@ sev_init(struct vmd_vm *vm) return (-1); } - if (psp_launch_start(&handle) < 0) { + if (psp_launch_start(&handle, vcp->vcp_seves) < 0) { log_warnx("%s: launch failed", __func__); return (-1); } @@ -148,7 +148,6 @@ sev_encrypt_memory(struct vmd_vm *vm) struct vm_create_params *vcp = &vmc->vmc_params; struct vm_mem_range *vmr; size_t i; - uint8_t gstate; if (!vcp->vcp_sev) return (0); @@ -167,23 +166,6 @@ sev_encrypt_memory(struct vmd_vm *vm) log_debug("%s: encrypted %zu:0x%lx size 0x%lx", __func__, i, vmr->vmr_va, vmr->vmr_size); } - if (psp_launch_measure(vm->vm_sev_handle)) { - log_warnx("%s: failed to launch measure", __func__); - return (-1); - } - if (psp_launch_finish(vm->vm_sev_handle)) { - log_warnx("%s: failed to launch finish", __func__); - return (-1); - } - - if (psp_get_gstate(vm->vm_sev_handle, NULL, NULL, &gstate)) { - log_warnx("%s: failed to get guest state", __func__); - return (-1); - } - if (gstate != PSP_GSTATE_RUNNING) { - log_warnx("%s: invalid guest state: 0x%hx", __func__, gstate); - return (-1); - } return (0); } @@ -221,6 +203,57 @@ sev_activate(struct vmd_vm *vm, int vcpu return (0); } + +int +sev_encrypt_state(struct vmd_vm *vm, int vcpu_id) +{ + struct vmop_create_params *vmc = &vm->vm_params; + struct vm_create_params *vcp = &vmc->vmc_params; + + if (!vcp->vcp_seves) + return (0); + + if (psp_encrypt_state(vm->vm_sev_handle, vm->vm_sev_asid[vcpu_id], + vcp->vcp_id, vcpu_id)) { + log_warnx("%s: failed to encrypt statet: 0x%x 0x%x 0x%0x 0x%0x", + __func__, vm->vm_sev_handle, vm->vm_sev_asid[vcpu_id], + vm->vm_vmid, vcpu_id); + return (-1); + } + + return (0); +} + +int +sev_launch_finalize(struct vmd_vm *vm) +{ + struct vmop_create_params *vmc = &vm->vm_params; + struct vm_create_params *vcp = &vmc->vmc_params; + uint8_t gstate; + + if (!vcp->vcp_sev) + return (0); + + if (psp_launch_measure(vm->vm_sev_handle)) { + log_warnx("%s: failed to launch measure", __func__); + return (-1); + } + if (psp_launch_finish(vm->vm_sev_handle)) { + log_warnx("%s: failed to launch finish", __func__); + return (-1); + } + + if (psp_get_gstate(vm->vm_sev_handle, NULL, NULL, &gstate)) { + log_warnx("%s: failed to get guest state", __func__); + return (-1); + } + if (gstate != PSP_GSTATE_RUNNING) { + log_warnx("%s: invalid guest state: 0x%hx", __func__, gstate); + return (-1); + } + + return (0); +} /* * Deactivate and decommission a guest's SEV crypto state. Index: usr.sbin/vmd/vm.c =================================================================== RCS file: /mount/openbsd/cvs/src/usr.sbin/vmd/vm.c,v diff -u -p -u -p -r1.110 vm.c --- usr.sbin/vmd/vm.c 21 Nov 2024 13:25:30 -0000 1.110 +++ usr.sbin/vmd/vm.c 24 Feb 2025 22:58:05 -0000 @@ -958,6 +958,18 @@ run_vm(struct vmop_create_params *vmc, s } } + if (sev_encrypt_state(current_vm, i)) { + log_warnx("%s: state encryption failed for VCPU " + "%zu failed - exiting.", __progname, i); + return (EIO); + } + + if (sev_launch_finalize(current_vm)) { + log_warnx("%s: state encryption failed for VCPU " + "%zu failed - exiting.", __progname, i); + return (EIO); + } + ret = pthread_cond_init(&vcpu_run_cond[i], NULL); if (ret) { log_warnx("%s: cannot initialize cond var (%d)", Index: usr.sbin/vmd/vmd.h =================================================================== RCS file: /mount/openbsd/cvs/src/usr.sbin/vmd/vmd.h,v diff -u -p -u -p -r1.132 vmd.h --- usr.sbin/vmd/vmd.h 8 Jan 2025 15:46:10 -0000 1.132 +++ usr.sbin/vmd/vmd.h 24 Feb 2025 22:58:05 -0000 @@ -587,8 +587,9 @@ __dead void vioblk_main(int, int); int psp_get_pstate(uint16_t *, uint8_t *, uint8_t *, uint8_t *, uint8_t *); int psp_df_flush(void); int psp_get_gstate(uint32_t, uint32_t *, uint32_t *, uint8_t *); -int psp_launch_start(uint32_t *); +int psp_launch_start(uint32_t *, int); int psp_launch_update(uint32_t, vaddr_t, size_t); +int psp_encrypt_state(uint32_t, uint32_t, uint32_t, uint32_t); int psp_launch_measure(uint32_t); int psp_launch_finish(uint32_t); int psp_activate(uint32_t, uint32_t); @@ -600,6 +601,8 @@ int sev_init(struct vmd_vm *); int sev_register_encryption(vaddr_t, size_t); int sev_encrypt_memory(struct vmd_vm *); int sev_activate(struct vmd_vm *, int); +int sev_encrypt_state(struct vmd_vm *, int); +int sev_launch_finalize(struct vmd_vm *); int sev_shutdown(struct vmd_vm *); #endif /* VMD_H */