nes-proj/cpu/x86/mm/syscalls-int.c
Michael LeMay e0aefd11d9 x86: Add support for SW-switched segment-based protection domains
This patch extends the protection domain framework with a third plugin
that is a hybrid of the previous two.  The hardware task switching
mechanism has a strictly-defined format for TSS data structures that
causes more space to be consumed than would otherwise be required.
This patch defines a smaller data structure that is allocated for each
protection domain, only requiring 32 bytes instead of 128 bytes.  It
uses the same multi-segment memory layout as the TSS-based plugin and
leaves paging disabled.  However, it uses a similar mechanism as the
paging plugin to perform system call dispatches and returns.

For additional information, please refer to cpu/x86/mm/README.md.
2016-04-22 08:16:43 -07:00

319 lines
11 KiB
C

/*
* Copyright (C) 2015, Intel Corporation. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
* COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "prot-domains.h"
#include "tss.h"
#include "helpers.h"
#include "stacks.h"
#include "idt.h"
#include "syscalls.h"
#include "gdt.h"
#include "gdt-layout.h"
#include "interrupt.h"
/**
* Current protection domain. Not protected, since it is just a convenience
* variable to avoid unneeded protection domain switches.
*/
dom_id_t cur_dom = DOM_ID_app;
/* defined in syscalls-int-asm.S */
void prot_domains_sysret_dispatcher(void);
/* Maximum depth of inter-domain call stack */
#define MAX_INTER_DOM_CALL_STK_SZ 4
/* Protected call stack for inter-domain system calls. The stack grows up. */
static volatile dom_id_t ATTR_BSS_KERN
inter_dom_call_stk[MAX_INTER_DOM_CALL_STK_SZ];
/* Pointer to the next (free) slot in the inter-domain call stack */
static int ATTR_BSS_KERN inter_dom_call_stk_ptr;
/*---------------------------------------------------------------------------*/
static inline void __attribute__((always_inline))
update_eflags(dom_id_t from_id, dom_id_t to_id, interrupt_stack_t *intr_stk)
{
if((to_id == DOM_ID_app) &&
(DT_SEL_GET_RPL(intr_stk->cs) == PRIV_LVL_USER)) {
/* Only enable interrupts in the application protection domain cooperative
* scheduling context.
*/
intr_stk->eflags |= EFLAGS_IF;
} else {
intr_stk->eflags &= ~EFLAGS_IF;
}
}
/*---------------------------------------------------------------------------*/
static inline void __attribute__((always_inline))
dispatcher_tail(dom_id_t from_id, dom_id_t to_id, interrupt_stack_t *intr_stk)
{
cur_dom = to_id;
prot_domains_switch(from_id, to_id, intr_stk);
prot_domains_set_wp(true);
update_eflags(from_id, to_id, intr_stk);
}
/*---------------------------------------------------------------------------*/
int main(void);
static inline void __attribute__((always_inline))
syscall_dispatcher_tail(interrupt_stack_t *intr_stk,
dom_id_t to_id,
uint32_t syscall_eip)
{
dom_id_t from_id;
uint32_t tmp;
volatile dom_kern_data_t ATTR_KERN_ADDR_SPACE *from_dkd, *to_dkd;
uint32_t loc_call_stk_ptr;
to_dkd = prot_domains_kern_data + to_id;
/* This implementation of protection domains is non-reentrant. For example,
* it stores the return address taken from the stack of a caller domain
* while dispatching a system call and stores it in a single field in the
* kernel data associated with that protection domain. That model does not
* permit reentrancy.
*/
KERN_READL(tmp, to_dkd->flags);
if((tmp & PROT_DOMAINS_FLAG_BUSY) == PROT_DOMAINS_FLAG_BUSY) {
halt();
}
tmp |= PROT_DOMAINS_FLAG_BUSY;
KERN_WRITEL(to_dkd->flags, tmp);
/* Update the interrupt stack so that the IRET instruction will return to the
* system call entrypoint.
*/
intr_stk->eip = syscall_eip;
KERN_READL(loc_call_stk_ptr, inter_dom_call_stk_ptr);
/* Lookup the information for the caller */
KERN_READL(from_id, inter_dom_call_stk[loc_call_stk_ptr - 1]);
from_dkd = prot_domains_kern_data + from_id;
/* Save the current return address from the unprivileged stack to a protected
* location in the kernel-owned data structure. This enforces return
* entrypoint control.
*/
KERN_WRITEL(from_dkd->orig_ret_addr, *(uintptr_t *)intr_stk->esp);
/* Update the unprivileged stack so that when the system call body is
* complete, it will invoke the system call return stub.
*/
*((uintptr_t *)intr_stk->esp) = (uintptr_t)prot_domains_sysret_stub;
if(MAX_INTER_DOM_CALL_STK_SZ <= loc_call_stk_ptr) {
halt();
}
KERN_WRITEL(inter_dom_call_stk[loc_call_stk_ptr], to_id);
loc_call_stk_ptr++;
KERN_WRITEL(inter_dom_call_stk_ptr, loc_call_stk_ptr);
dispatcher_tail(from_id, to_id, intr_stk);
}
/*---------------------------------------------------------------------------*/
void __attribute__((fastcall))
prot_domains_syscall_dispatcher_impl(interrupt_stack_t *intr_stk,
dom_id_t to_id,
syscalls_entrypoint_t *syscall)
{
uint32_t tmp;
uint32_t syscall_eip;
if(PROT_DOMAINS_ACTUAL_CNT <= to_id) {
halt();
}
/* Get the approved entrypoint for the system call being invoked */
if(!((((uintptr_t)syscalls_entrypoints) <= (uintptr_t)syscall) &&
(((uintptr_t)syscall) < (uintptr_t)syscalls_entrypoints_end) &&
(((((uintptr_t)syscall) - (uintptr_t)syscalls_entrypoints)
% sizeof(syscalls_entrypoint_t)) == 0))) {
/* Assert is not usable when switching protection domains */
halt();
}
KERN_READL(tmp, syscall->doms);
if((BIT(to_id) & tmp) == 0) {
halt();
}
KERN_READL(syscall_eip, syscall->entrypoint);
prot_domains_set_wp(false);
syscall_dispatcher_tail(intr_stk, to_id, syscall_eip);
}
/*---------------------------------------------------------------------------*/
int main(void);
void __attribute__((fastcall))
prot_domains_launch_kernel_impl(interrupt_stack_t *intr_stk)
{
KERN_WRITEL(inter_dom_call_stk[0], DOM_ID_app);
KERN_WRITEL(inter_dom_call_stk_ptr, 1);
syscall_dispatcher_tail(intr_stk, DOM_ID_kern, (uint32_t)main);
}
/*---------------------------------------------------------------------------*/
void __attribute__((fastcall))
prot_domains_sysret_dispatcher_impl(interrupt_stack_t *intr_stk)
{
dom_id_t from_id, to_id;
uint32_t loc_call_stk_ptr;
uint32_t flags;
KERN_READL(loc_call_stk_ptr, inter_dom_call_stk_ptr);
if(loc_call_stk_ptr <= 1) {
halt();
}
KERN_READL(from_id, inter_dom_call_stk[loc_call_stk_ptr - 1]);
KERN_READL(to_id, inter_dom_call_stk[loc_call_stk_ptr - 2]);
KERN_READL(intr_stk->eip,
prot_domains_kern_data[to_id].orig_ret_addr);
prot_domains_set_wp(false);
KERN_READL(flags, prot_domains_kern_data[from_id].flags);
flags &= ~PROT_DOMAINS_FLAG_BUSY;
KERN_WRITEL(prot_domains_kern_data[from_id].flags, flags);
KERN_WRITEL(inter_dom_call_stk_ptr, loc_call_stk_ptr - 1);
dispatcher_tail(from_id, to_id, intr_stk);
}
/*---------------------------------------------------------------------------*/
/**
* \brief Lookup the current protection domain.
* \return Kernel data structure for the current protection domain.
*/
static volatile dom_kern_data_t ATTR_KERN_ADDR_SPACE *
get_current_domain(void)
{
uint32_t loc_call_stk_ptr;
dom_id_t id;
KERN_READL(loc_call_stk_ptr, inter_dom_call_stk_ptr);
KERN_READL(id, inter_dom_call_stk[loc_call_stk_ptr - 1]);
return prot_domains_kern_data + id;
}
/*---------------------------------------------------------------------------*/
/**
* \brief Check whether the protection domain is authorized to perform port
* I/O from the cooperative scheduling context.
* \param dkd Protection domain to check
* \return Result of the check as a Boolean value
*/
static bool
needs_port_io(volatile dom_kern_data_t ATTR_KERN_ADDR_SPACE *dkd)
{
uint32_t dkd_flags;
KERN_READL(dkd_flags, dkd->flags);
return (dkd_flags & PROT_DOMAINS_FLAG_PIO) == PROT_DOMAINS_FLAG_PIO;
}
/*---------------------------------------------------------------------------*/
/* Mark the context parameter as volatile so that writes to it will not get
* optimized out. This parameter is not handled like ordinary function
* parameters. It actually partially includes the contents of the exception
* stack, so updates to those locations can affect the operation of the
* subsequent interrupt return.
*/
static void
gp_fault_handler(volatile struct interrupt_context context)
{
uint32_t cs_lim;
uint8_t opcode;
volatile dom_kern_data_t ATTR_KERN_ADDR_SPACE *dkd = get_current_domain();
if (needs_port_io(dkd)) {
__asm__ __volatile__ (
"mov %%cs, %0\n\t"
"lsl %0, %0\n\t"
: "=r"(cs_lim));
if (cs_lim < context.eip) {
halt();
}
/* Load first byte of faulting instruction */
__asm__ __volatile__ (
"movb %%cs:%1, %0"
: "=q"(opcode)
: "m"(*(uint8_t *)context.eip));
switch (opcode) {
case 0xEC: /* inb */
context.eax = (context.eax & ~0xFF) | inb((uint16_t)context.edx);
break;
case 0xED: /* inl */
context.eax = inl((uint16_t)context.edx);
break;
case 0xEE: /* outb */
outb((uint16_t)context.edx, (uint8_t)context.eax);
break;
case 0xEF: /* outl */
outl((uint16_t)context.edx, context.eax);
break;
default:
halt();
}
/* Skip the faulting port I/O instruction that was emulated. */
context.eip++;
} else {
halt();
}
}
/*---------------------------------------------------------------------------*/
void
syscalls_int_init(void)
{
tss_init();
SET_EXCEPTION_HANDLER(13, 1, gp_fault_handler);
/* Register system call dispatchers: */
idt_set_intr_gate_desc(PROT_DOMAINS_SYSCALL_DISPATCH_INT,
(uint32_t)prot_domains_syscall_dispatcher,
GDT_SEL_CODE_EXC,
PRIV_LVL_USER);
idt_set_intr_gate_desc(PROT_DOMAINS_SYSRET_DISPATCH_INT,
(uint32_t)prot_domains_sysret_dispatcher,
GDT_SEL_CODE_EXC,
PRIV_LVL_USER);
}
/*---------------------------------------------------------------------------*/