diff --git a/cpu/x86/Makefile.x86_quarkX1000 b/cpu/x86/Makefile.x86_quarkX1000 index 4a7668cc6..13a9c686f 100644 --- a/cpu/x86/Makefile.x86_quarkX1000 +++ b/cpu/x86/Makefile.x86_quarkX1000 @@ -20,6 +20,11 @@ CFLAGS += -DX86_CONF_USE_INVLPG endif # This matches the definition of X86_CONF_PROT_DOMAINS__PAGING in prot-domains.h: CFLAGS += -DX86_CONF_PROT_DOMAINS=1 +else ifeq ($(X86_CONF_PROT_DOMAINS),tss) +# This matches the definition of X86_CONF_PROT_DOMAINS__TSS in prot-domains.h: +CFLAGS += -DX86_CONF_PROT_DOMAINS=2 +X86_CONF_MULTI_SEG = 1 +CONTIKI_SOURCEFILES += tss-prot-domains-asm.S else $(error Unrecognized setting for X86_CONF_PROT_DOMAINS: \ $(X86_CONF_PROT_DOMAINS). See cpu/x86/mm/README.md for \ @@ -30,6 +35,20 @@ ifeq ($(X86_CONF_SYSCALLS_INT),1) CONTIKI_SOURCEFILES += syscalls-int-asm.S tss.c endif +ifeq ($(X86_CONF_MULTI_SEG),1) +LINKERSCRIPT_SFX = _multi_seg +CONTIKI_SOURCEFILES += multi-segment.c +# Due to the way the multi-segment implementation of protection domains define +# tightly-bounded stack segments, the base pointer register cannot be used as +# a general-purpose register in all circumstances. The stack segment is used +# by default for a data access that uses the base pointer as the base register +# to compute the address. If the data referenced by the base pointer is not +# on the stack, then the access will fail. Thus, it is necessary to disable +# the omit-frame-pointer optimization. See mm/README.md for more details of +# how multi-segment protection domains are implemented. +CFLAGS += -fno-omit-frame-pointer +endif + endif CFLAGS += -m32 -march=i586 -mtune=i586 diff --git a/cpu/x86/bootstrap_quarkX1000.S b/cpu/x86/bootstrap_quarkX1000.S index 4211e51a3..622c9dab8 100644 --- a/cpu/x86/bootstrap_quarkX1000.S +++ b/cpu/x86/bootstrap_quarkX1000.S @@ -45,5 +45,17 @@ .global start start: cli +#if X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__TSS + /* TSS-based protection domains use a multi-segment model that defines + * tight bounds around stacks. That means that the bottom of the stack + * has an offset of 0, which is the address of the stacks_main symbol. + * The following code computes the physical load address of the top of + * the stack, which is what should be initially used as the stack + * pointer while the flat memory model is in use. + */ + lea _sdata_addr, %eax + lea (stacks_main + STACKS_SIZE_MAIN)(%eax), %esp +#else mov $(stacks_main + STACKS_SIZE_MAIN), %esp +#endif call cpu_boot_stage0 diff --git a/cpu/x86/dma.h b/cpu/x86/dma.h index b0122fcdb..7a8d991b1 100644 --- a/cpu/x86/dma.h +++ b/cpu/x86/dma.h @@ -43,6 +43,6 @@ #endif #endif -extern int _sbss_dma_addr, _ebss_dma_addr; +extern int _ebss_pre_dma_addr, _sbss_dma_addr, _ebss_dma_addr; #endif /* CPU_X86_DMA_H_ */ diff --git a/cpu/x86/drivers/legacy_pc/pci.c b/cpu/x86/drivers/legacy_pc/pci.c index e94c9ecbe..4584d454c 100644 --- a/cpu/x86/drivers/legacy_pc/pci.c +++ b/cpu/x86/drivers/legacy_pc/pci.c @@ -138,7 +138,9 @@ SYSCALLS_DEFINE_SINGLETON(pci_irq_agent_set_pirq, offset = 0x3146; } - value = *(uint16_t*)(rcba_addr + offset); + prot_domains_enable_mmio(); + + MMIO_READW(value, *(uint16_t ATTR_MMIO_ADDR_SPACE *)(rcba_addr + offset)); /* clear interrupt pin route and set corresponding pirq. */ switch(pin) { @@ -159,7 +161,9 @@ SYSCALLS_DEFINE_SINGLETON(pci_irq_agent_set_pirq, value |= (pirq << 12); } - *(uint16_t*)(rcba_addr + offset) = value; + MMIO_WRITEW(*(uint16_t ATTR_MMIO_ADDR_SPACE *)(rcba_addr + offset), value); + + prot_domains_disable_mmio(); } /*---------------------------------------------------------------------------*/ /** @@ -231,7 +235,7 @@ pci_pirq_set_irq(PIRQ pirq, uint8_t irq, uint8_t route_to_legacy) * \param meta_sz Size of optional driver-defined metadata. */ void -pci_init(pci_driver_t *c_this, +pci_init(pci_driver_t ATTR_KERN_ADDR_SPACE *c_this, pci_config_addr_t pci_addr, size_t mmio_sz, uintptr_t meta, diff --git a/cpu/x86/drivers/legacy_pc/pci.h b/cpu/x86/drivers/legacy_pc/pci.h index fff53a048..666b3c29e 100644 --- a/cpu/x86/drivers/legacy_pc/pci.h +++ b/cpu/x86/drivers/legacy_pc/pci.h @@ -102,7 +102,7 @@ void pci_command_enable(pci_config_addr_t addr, uint32_t flags); typedef dom_client_data_t pci_driver_t; -void pci_init(pci_driver_t *c_this, +void pci_init(pci_driver_t ATTR_KERN_ADDR_SPACE *c_this, pci_config_addr_t pci_addr, size_t mmio_sz, uintptr_t meta, @@ -113,10 +113,12 @@ void pci_root_complex_init(void); void pci_root_complex_lock(void); #define PCI_MMIO_READL(c_this, dest, reg_addr) \ - dest = *((volatile uint32_t *) \ - (((uintptr_t)PROT_DOMAINS_MMIO(c_this)) + (reg_addr))) + MMIO_READL(dest, \ + *((volatile uint32_t ATTR_MMIO_ADDR_SPACE *) \ + (((uintptr_t)PROT_DOMAINS_MMIO(c_this)) + (reg_addr)))) #define PCI_MMIO_WRITEL(c_this, reg_addr, src) \ - *((volatile uint32_t *) \ - (((uintptr_t)PROT_DOMAINS_MMIO(c_this)) + (reg_addr))) = (src) + MMIO_WRITEL(*((volatile uint32_t ATTR_MMIO_ADDR_SPACE *) \ + (((uintptr_t)PROT_DOMAINS_MMIO(c_this)) + (reg_addr))), \ + src) #endif /* CPU_X86_DRIVERS_LEGACY_PC_PCI_H_ */ diff --git a/cpu/x86/drivers/legacy_pc/uart-16x50.c b/cpu/x86/drivers/legacy_pc/uart-16x50.c index d1f2c498d..d17e61498 100644 --- a/cpu/x86/drivers/legacy_pc/uart-16x50.c +++ b/cpu/x86/drivers/legacy_pc/uart-16x50.c @@ -74,6 +74,11 @@ typedef struct uart_16x50_regs { */ #define UART_MMIO_SZ MIN_PAGE_SIZE #else +/* Multi-segment protection domain implementations can control memory with + * byte granularity. Thus, only the registers defined in the uart_16x50_regs + * structure are included in the MMIO region allocated for this protection + * domain: + */ #define UART_MMIO_SZ sizeof(uart_16x50_regs_t) #endif @@ -82,24 +87,30 @@ void uart_16x50_setup(uart_16x50_driver_t c_this, uint16_t dl); /*---------------------------------------------------------------------------*/ SYSCALLS_DEFINE(uart_16x50_setup, uart_16x50_driver_t c_this, uint16_t dl) { - uart_16x50_regs_t *regs = (uart_16x50_regs_t *)PROT_DOMAINS_MMIO(c_this); + uart_16x50_regs_t ATTR_MMIO_ADDR_SPACE *regs = + (uart_16x50_regs_t ATTR_MMIO_ADDR_SPACE *)PROT_DOMAINS_MMIO(c_this); + + prot_domains_enable_mmio(); /* Set the DLAB bit to enable access to divisor settings. */ - regs->lcr = UART_LCR_7_DLAB; + MMIO_WRITEL(regs->lcr, UART_LCR_7_DLAB); /* The divisor settings configure the baud rate, and may need to be defined * on a per-device basis. */ - regs->rbr_thr_dll = dl & UINT8_MAX; - regs->ier_dlh = dl >> 8; + MMIO_WRITEL(regs->rbr_thr_dll, dl & UINT8_MAX); + MMIO_WRITEL(regs->ier_dlh, dl >> 8); /* Clear the DLAB bit to enable access to other settings and configure other * UART parameters. */ - regs->lcr = UART_LCR_8BITS; + MMIO_WRITEL(regs->lcr, UART_LCR_8BITS); /* Enable the FIFOs. */ - regs->iir_fcr = UART_FCR_0_FIFOE | UART_FCR_1_RFIFOR | UART_FCR_2_XFIFOR; + MMIO_WRITEL(regs->iir_fcr, + UART_FCR_0_FIFOE | UART_FCR_1_RFIFOR | UART_FCR_2_XFIFOR); + + prot_domains_disable_mmio(); } /*---------------------------------------------------------------------------*/ /** @@ -112,13 +123,21 @@ SYSCALLS_DEFINE(uart_16x50_setup, uart_16x50_driver_t c_this, uint16_t dl) */ SYSCALLS_DEFINE(uart_16x50_tx, uart_16x50_driver_t c_this, uint8_t c) { - uart_16x50_regs_t *regs = (uart_16x50_regs_t *)PROT_DOMAINS_MMIO(c_this); + uint32_t ready; + uart_16x50_regs_t ATTR_MMIO_ADDR_SPACE *regs = + (uart_16x50_regs_t ATTR_MMIO_ADDR_SPACE *)PROT_DOMAINS_MMIO(c_this); + + prot_domains_enable_mmio(); /* Wait for space in TX FIFO. */ - while((regs->lsr & UART_LSR_5_THRE) == 0); + do { + MMIO_READL(ready, regs->lsr); + } while((ready & UART_LSR_5_THRE) == 0); /* Add character to TX FIFO. */ - regs->rbr_thr_dll = c; + MMIO_WRITEL(regs->rbr_thr_dll, c); + + prot_domains_disable_mmio(); } /*---------------------------------------------------------------------------*/ /** @@ -128,10 +147,12 @@ SYSCALLS_DEFINE(uart_16x50_tx, uart_16x50_driver_t c_this, uint8_t c) * \param dl Divisor setting to configure the baud rate. */ void -uart_16x50_init(uart_16x50_driver_t *c_this, +uart_16x50_init(uart_16x50_driver_t ATTR_KERN_ADDR_SPACE *c_this, pci_config_addr_t pci_addr, uint16_t dl) { + uart_16x50_driver_t loc_c_this; + /* This assumes that the UART had an MMIO range assigned to it by the * firmware during boot. */ @@ -141,6 +162,8 @@ uart_16x50_init(uart_16x50_driver_t *c_this, SYSCALLS_INIT(uart_16x50_tx); SYSCALLS_AUTHZ(uart_16x50_tx, *c_this); - uart_16x50_setup(*c_this, dl); + prot_domains_copy_dcd(&loc_c_this, c_this); + + uart_16x50_setup(loc_c_this, dl); } /*---------------------------------------------------------------------------*/ diff --git a/cpu/x86/drivers/legacy_pc/uart-16x50.h b/cpu/x86/drivers/legacy_pc/uart-16x50.h index 615806518..4a038b948 100644 --- a/cpu/x86/drivers/legacy_pc/uart-16x50.h +++ b/cpu/x86/drivers/legacy_pc/uart-16x50.h @@ -35,7 +35,7 @@ typedef pci_driver_t uart_16x50_driver_t; -void uart_16x50_init(uart_16x50_driver_t *c_this, +void uart_16x50_init(uart_16x50_driver_t ATTR_KERN_ADDR_SPACE *c_this, pci_config_addr_t pci_addr, uint16_t dl); diff --git a/cpu/x86/drivers/quarkX1000/eth.c b/cpu/x86/drivers/quarkX1000/eth.c index 5c16b10a5..88782ebc2 100644 --- a/cpu/x86/drivers/quarkX1000/eth.c +++ b/cpu/x86/drivers/quarkX1000/eth.c @@ -216,13 +216,19 @@ SYSCALLS_DEFINE_SINGLETON(quarkX1000_eth_setup, drv, uintptr_t meta_phys_base) { uip_eth_addr mac_addr; uint32_t mac_tmp1, mac_tmp2; - quarkX1000_eth_meta_t *loc_meta = - (quarkX1000_eth_meta_t *)PROT_DOMAINS_META(drv); + quarkX1000_eth_rx_desc_t rx_desc; + quarkX1000_eth_tx_desc_t tx_desc; + quarkX1000_eth_meta_t ATTR_META_ADDR_SPACE *loc_meta = + (quarkX1000_eth_meta_t ATTR_META_ADDR_SPACE *)PROT_DOMAINS_META(drv); + + prot_domains_enable_mmio(); /* Read the MAC address from the device. */ PCI_MMIO_READL(drv, mac_tmp1, REG_ADDR_MACADDR_HI); PCI_MMIO_READL(drv, mac_tmp2, REG_ADDR_MACADDR_LO); + prot_domains_disable_mmio(); + /* Convert the data read from the device into the format expected by * Contiki. */ @@ -245,29 +251,39 @@ SYSCALLS_DEFINE_SINGLETON(quarkX1000_eth_setup, drv, uintptr_t meta_phys_base) uip_setethaddr(mac_addr); /* Initialize transmit descriptor. */ - loc_meta->tx_desc.tdes0 = 0; - loc_meta->tx_desc.tdes1 = 0; + tx_desc.tdes0 = 0; + tx_desc.tdes1 = 0; - loc_meta->tx_desc.buf1_ptr = - (uint8_t *)PROT_DOMAINS_META_OFF_TO_PHYS( - (uintptr_t)&loc_meta->tx_buf, meta_phys_base); - loc_meta->tx_desc.tx_end_of_ring = 1; - loc_meta->tx_desc.first_seg_in_frm = 1; - loc_meta->tx_desc.last_seg_in_frm = 1; - loc_meta->tx_desc.tx_end_of_ring = 1; + tx_desc.tx_end_of_ring = 1; + tx_desc.first_seg_in_frm = 1; + tx_desc.last_seg_in_frm = 1; + tx_desc.tx_end_of_ring = 1; + + META_WRITEL(loc_meta->tx_desc.tdes0, tx_desc.tdes0); + META_WRITEL(loc_meta->tx_desc.tdes1, tx_desc.tdes1); + META_WRITEL(loc_meta->tx_desc.buf1_ptr, + (uint8_t *)PROT_DOMAINS_META_OFF_TO_PHYS( + (uintptr_t)&loc_meta->tx_buf, meta_phys_base)); + META_WRITEL(loc_meta->tx_desc.buf2_ptr, 0); /* Initialize receive descriptor. */ - loc_meta->rx_desc.rdes0 = 0; - loc_meta->rx_desc.rdes1 = 0; + rx_desc.rdes0 = 0; + rx_desc.rdes1 = 0; - loc_meta->rx_desc.buf1_ptr = - (uint8_t *)PROT_DOMAINS_META_OFF_TO_PHYS( - (uintptr_t)&loc_meta->rx_buf, meta_phys_base); - loc_meta->rx_desc.own = 1; - loc_meta->rx_desc.first_desc = 1; - loc_meta->rx_desc.last_desc = 1; - loc_meta->rx_desc.rx_buf1_sz = UIP_BUFSIZE; - loc_meta->rx_desc.rx_end_of_ring = 1; + rx_desc.own = 1; + rx_desc.first_desc = 1; + rx_desc.last_desc = 1; + rx_desc.rx_buf1_sz = UIP_BUFSIZE; + rx_desc.rx_end_of_ring = 1; + + META_WRITEL(loc_meta->rx_desc.rdes0, rx_desc.rdes0); + META_WRITEL(loc_meta->rx_desc.rdes1, rx_desc.rdes1); + META_WRITEL(loc_meta->rx_desc.buf1_ptr, + (uint8_t *)PROT_DOMAINS_META_OFF_TO_PHYS( + (uintptr_t)&loc_meta->rx_buf, meta_phys_base)); + META_WRITEL(loc_meta->rx_desc.buf2_ptr, 0); + + prot_domains_enable_mmio(); /* Install transmit and receive descriptors. */ PCI_MMIO_WRITEL(drv, REG_ADDR_RX_DESC_LIST, @@ -298,8 +314,11 @@ SYSCALLS_DEFINE_SINGLETON(quarkX1000_eth_setup, drv, uintptr_t meta_phys_base) /* Place the receiver state machine in the Running state. */ OP_MODE_1_START_RX); + prot_domains_disable_mmio(); + printf(LOG_PFX "Enabled 100M full-duplex mode.\n"); } + /*---------------------------------------------------------------------------*/ /** * \brief Poll for a received Ethernet frame. @@ -313,33 +332,43 @@ SYSCALLS_DEFINE_SINGLETON(quarkX1000_eth_poll, drv, uint16_t * frame_len) { uint16_t *loc_frame_len; uint16_t frm_len = 0; - quarkX1000_eth_meta_t *loc_meta = - (quarkX1000_eth_meta_t *)PROT_DOMAINS_META(drv); + quarkX1000_eth_rx_desc_t tmp_desc; + quarkX1000_eth_meta_t ATTR_META_ADDR_SPACE *loc_meta = + (quarkX1000_eth_meta_t ATTR_META_ADDR_SPACE *)PROT_DOMAINS_META(drv); PROT_DOMAINS_VALIDATE_PTR(loc_frame_len, frame_len, sizeof(*frame_len)); + META_READL(tmp_desc.rdes0, loc_meta->rx_desc.rdes0); + /* Check whether the RX descriptor is still owned by the device. If not, * process the received frame or an error that may have occurred. */ - if(loc_meta->rx_desc.own == 0) { - if(loc_meta->rx_desc.err_summary) { + if(tmp_desc.own == 0) { + META_READL(tmp_desc.rdes1, loc_meta->rx_desc.rdes1); + if(tmp_desc.err_summary) { fprintf(stderr, LOG_PFX "Error receiving frame: RDES0 = %08x, RDES1 = %08x.\n", - loc_meta->rx_desc.rdes0, loc_meta->rx_desc.rdes1); + tmp_desc.rdes0, tmp_desc.rdes1); assert(0); } - frm_len = loc_meta->rx_desc.frm_len; + frm_len = tmp_desc.frm_len; assert(frm_len <= UIP_BUFSIZE); - memcpy(uip_buf, (void *)loc_meta->rx_buf, frm_len); + MEMCPY_FROM_META(uip_buf, loc_meta->rx_buf, frm_len); /* Return ownership of the RX descriptor to the device. */ - loc_meta->rx_desc.own = 1; + tmp_desc.own = 1; + + META_WRITEL(loc_meta->rx_desc.rdes0, tmp_desc.rdes0); + + prot_domains_enable_mmio(); /* Request that the device check for an available RX descriptor, since * ownership of the descriptor was just transferred to the device. */ PCI_MMIO_WRITEL(drv, REG_ADDR_RX_POLL_DEMAND, 1); + + prot_domains_disable_mmio(); } *loc_frame_len = frm_len; @@ -356,32 +385,45 @@ SYSCALLS_DEFINE_SINGLETON(quarkX1000_eth_poll, drv, uint16_t * frame_len) */ SYSCALLS_DEFINE_SINGLETON(quarkX1000_eth_send, drv) { - quarkX1000_eth_meta_t *loc_meta = - (quarkX1000_eth_meta_t *)PROT_DOMAINS_META(drv); + quarkX1000_eth_tx_desc_t tmp_desc; + quarkX1000_eth_meta_t ATTR_META_ADDR_SPACE *loc_meta = + (quarkX1000_eth_meta_t ATTR_META_ADDR_SPACE *)PROT_DOMAINS_META(drv); /* Wait until the TX descriptor is no longer owned by the device. */ - while(loc_meta->tx_desc.own == 1); + do { + META_READL(tmp_desc.tdes0, loc_meta->tx_desc.tdes0); + } while(tmp_desc.own == 1); + + META_READL(tmp_desc.tdes1, loc_meta->tx_desc.tdes1); /* Check whether an error occurred transmitting the previous frame. */ - if(loc_meta->tx_desc.err_summary) { + if(tmp_desc.err_summary) { fprintf(stderr, LOG_PFX "Error transmitting frame: TDES0 = %08x, TDES1 = %08x.\n", - loc_meta->tx_desc.tdes0, loc_meta->tx_desc.tdes1); + tmp_desc.tdes0, tmp_desc.tdes1); assert(0); } /* Transmit the next frame. */ assert(uip_len <= UIP_BUFSIZE); - memcpy((void *)loc_meta->tx_buf, uip_buf, uip_len); + MEMCPY_TO_META(loc_meta->tx_buf, uip_buf, uip_len); - loc_meta->tx_desc.tx_buf1_sz = uip_len; + tmp_desc.tx_buf1_sz = uip_len; - loc_meta->tx_desc.own = 1; + META_WRITEL(loc_meta->tx_desc.tdes1, tmp_desc.tdes1); + + tmp_desc.own = 1; + + META_WRITEL(loc_meta->tx_desc.tdes0, tmp_desc.tdes0); + + prot_domains_enable_mmio(); /* Request that the device check for an available TX descriptor, since * ownership of the descriptor was just transferred to the device. */ PCI_MMIO_WRITEL(drv, REG_ADDR_TX_POLL_DEMAND, 1); + + prot_domains_disable_mmio(); } /*---------------------------------------------------------------------------*/ /** diff --git a/cpu/x86/drivers/quarkX1000/gpio.c b/cpu/x86/drivers/quarkX1000/gpio.c index 642cad310..ba825c090 100644 --- a/cpu/x86/drivers/quarkX1000/gpio.c +++ b/cpu/x86/drivers/quarkX1000/gpio.c @@ -56,7 +56,11 @@ #define HIGHEST_REG LS_SYNC +#if X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__PAGING #define MMIO_SZ MIN_PAGE_SIZE +#else +#define MMIO_SZ (HIGHEST_REG + 4) +#endif PROT_DOMAINS_ALLOC(pci_driver_t, drv); @@ -77,7 +81,9 @@ SYSCALLS_DEFINE_SINGLETON(quarkX1000_gpio_mmin, drv, halt(); } + prot_domains_enable_mmio(); PCI_MMIO_READL(drv, *loc_res, offset); + prot_domains_disable_mmio(); } static inline uint32_t @@ -96,7 +102,9 @@ SYSCALLS_DEFINE_SINGLETON(quarkX1000_gpio_mmout, drv, halt(); } + prot_domains_enable_mmio(); PCI_MMIO_WRITEL(drv, offset, val); + prot_domains_disable_mmio(); } static inline void diff --git a/cpu/x86/drivers/quarkX1000/i2c.c b/cpu/x86/drivers/quarkX1000/i2c.c index 746e52b96..9e233e89c 100644 --- a/cpu/x86/drivers/quarkX1000/i2c.c +++ b/cpu/x86/drivers/quarkX1000/i2c.c @@ -51,7 +51,11 @@ #define I2C_IRQ 9 +#if X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__PAGING #define MMIO_SZ MIN_PAGE_SIZE +#else +#define MMIO_SZ (QUARKX1000_IC_HIGHEST + 4) +#endif typedef enum { I2C_DIRECTION_READ, @@ -99,7 +103,9 @@ SYSCALLS_DEFINE_SINGLETON(quarkX1000_i2c_mmin, drv, halt(); } + prot_domains_enable_mmio(); PCI_MMIO_READL(drv, *loc_res, offset); + prot_domains_disable_mmio(); } static inline uint32_t @@ -119,7 +125,9 @@ SYSCALLS_DEFINE_SINGLETON(quarkX1000_i2c_mmout, drv, halt(); } + prot_domains_enable_mmio(); PCI_MMIO_WRITEL(drv, offset, val); + prot_domains_disable_mmio(); } static inline void diff --git a/cpu/x86/drivers/quarkX1000/uart.c b/cpu/x86/drivers/quarkX1000/uart.c index dcd0af8f2..341e31cf7 100644 --- a/cpu/x86/drivers/quarkX1000/uart.c +++ b/cpu/x86/drivers/quarkX1000/uart.c @@ -49,7 +49,7 @@ void quarkX1000_uart_init(quarkX1000_uart_dev_t dev) { pci_config_addr_t pci_addr; - uart_16x50_driver_t *drv; + uart_16x50_driver_t ATTR_KERN_ADDR_SPACE *drv; assert((dev == QUARK_X1000_UART_0) || (dev == QUARK_X1000_UART_1)); @@ -78,7 +78,11 @@ quarkX1000_uart_init(quarkX1000_uart_dev_t dev) void quarkX1000_uart_tx(quarkX1000_uart_dev_t dev, uint8_t c) { + uart_16x50_driver_t drv; assert((dev == QUARK_X1000_UART_0) || (dev == QUARK_X1000_UART_1)); - uart_16x50_tx((dev == QUARK_X1000_UART_0) ? quarkX1000_uart0 : quarkX1000_uart1, c); + prot_domains_copy_dcd(&drv, + (dev == QUARK_X1000_UART_0) ? + &quarkX1000_uart0 : &quarkX1000_uart1); + uart_16x50_tx(drv, c); } /*---------------------------------------------------------------------------*/ diff --git a/cpu/x86/init/common/cpu.c b/cpu/x86/init/common/cpu.c index 94ec2ddab..dd58b96d5 100644 --- a/cpu/x86/init/common/cpu.c +++ b/cpu/x86/init/common/cpu.c @@ -42,8 +42,11 @@ double_fault_handler(struct interrupt_context context) halt(); } /*---------------------------------------------------------------------------*/ -/* The OS has switched to its own segment descriptors. However, the protection - * domain support, if enabled, has not yet been fully activated. +/* The OS has switched to its own segment descriptors. When multi-segment + * protection domain support is enabled, this routine runs with the + * necessary address translations configured to invoke other routines that + * require those translations to be in place. However, the protection domain + * support, if enabled, has not yet been fully activated. */ static void boot_stage1(void) @@ -75,7 +78,8 @@ cpu_boot_stage0(void) uintptr_t top_of_stack = STACKS_INIT_TOP; #if X86_CONF_PROT_DOMAINS != X86_CONF_PROT_DOMAINS__NONE - uintptr_t *top_of_stack_ptr = (uintptr_t *)top_of_stack; + uintptr_t *top_of_stack_ptr = + (uintptr_t *)DATA_OFF_TO_PHYS_ADDR(top_of_stack); top_of_stack_ptr[0] = (uintptr_t)prot_domains_launch_kernel; top_of_stack_ptr[1] = (uintptr_t)prot_domains_launch_app; diff --git a/cpu/x86/init/common/gdt.c b/cpu/x86/init/common/gdt.c index f7fa10342..f63767850 100644 --- a/cpu/x86/init/common/gdt.c +++ b/cpu/x86/init/common/gdt.c @@ -72,7 +72,7 @@ set_descriptor(unsigned int index, segment_desc_init(&descriptor, base, len, flag); /* Save descriptor into gdt */ - gdt[index] = descriptor; + gdt_insert_boot(index, descriptor); } /*---------------------------------------------------------------------------*/ void @@ -86,15 +86,17 @@ gdt_copy_desc_change_dpl(unsigned int dest_idx, halt(); } - desc = gdt[src_idx]; + gdt_lookup(src_idx, &desc); SEG_SET_FLAG(desc, DPL, dpl); - gdt[dest_idx] = desc; + gdt_insert(dest_idx, desc); } /*---------------------------------------------------------------------------*/ /* This function initializes the Global Descriptor Table. For simplicity, the - * memory is organized following the flat model. Thus, memory appears to - * Contiki as a single continuous address space. Code, data, and stack + * memory is initially organized following the flat model. Thus, memory appears + * to Contiki as a single continuous address space. Code, data, and stack * are all contained in this address space (so called linear address space). + * Certain protection domain implementations switch to a multi-segment memory + * model later during boot. */ void gdt_init(void) @@ -103,7 +105,7 @@ gdt_init(void) /* Initialize gdtr structure */ gdtr.limit = sizeof(segment_desc_t) * GDT_LEN - 1; - gdtr.base = (uint32_t) &gdt; + gdtr.base = KERN_DATA_OFF_TO_PHYS_ADDR(gdt); /* Initialize descriptors */ set_descriptor(GDT_IDX_NULL, 0, 0, 0); @@ -115,13 +117,20 @@ gdt_init(void) } /*---------------------------------------------------------------------------*/ void +gdt_insert_boot(unsigned int idx, segment_desc_t desc) +{ + ((segment_desc_t *)KERN_DATA_OFF_TO_PHYS_ADDR(gdt))[idx] = desc; +} +/*---------------------------------------------------------------------------*/ +void gdt_insert(unsigned int idx, segment_desc_t desc) { if(GDT_LEN <= idx) { halt(); } - gdt[idx] = desc; + KERN_WRITEL(gdt[idx].raw_lo, desc.raw_lo); + KERN_WRITEL(gdt[idx].raw_hi, desc.raw_hi); } /*---------------------------------------------------------------------------*/ void @@ -131,6 +140,7 @@ gdt_lookup(unsigned int idx, segment_desc_t *desc) halt(); } - *desc = gdt[idx]; + KERN_READL(desc->raw_lo, gdt[idx].raw_lo); + KERN_READL(desc->raw_hi, gdt[idx].raw_hi); } /*---------------------------------------------------------------------------*/ diff --git a/cpu/x86/init/common/gdt.h b/cpu/x86/init/common/gdt.h index 37f1f4dbe..305e32716 100644 --- a/cpu/x86/init/common/gdt.h +++ b/cpu/x86/init/common/gdt.h @@ -35,13 +35,21 @@ #include "prot-domains.h" #include "segmentation.h" -extern segment_desc_t gdt[]; -extern int _ebss_gdt_addr; +extern segment_desc_t ATTR_KERN_ADDR_SPACE gdt[]; +extern int ATTR_KERN_ADDR_SPACE _ebss_gdt_addr; #define GDT_IDX_OF_DESC(ptr) \ ((((uintptr_t)(ptr)) - ((uintptr_t)&gdt))/ \ sizeof(segment_desc_t)) +typedef struct far_pointer { + /** Far pointer offset. */ + uint32_t offset; + /** Far pointer segment/gate selector. */ + uint16_t sel; + uint16_t pad; +} __attribute__((packed)) far_pointer_t; + /** * \brief Compute the selector for a GDT entry allocated somewhere besides gdt.c. * \param ptr Pointer to GDT descriptor. @@ -49,14 +57,22 @@ extern int _ebss_gdt_addr; */ #define GDT_SEL_OF_DESC(ptr, rpl) GDT_SEL(GDT_IDX_OF_DESC(ptr), rpl) -#define ATTR_BSS_GDT __attribute__((section(".gdt_bss"))) -#define ATTR_BSS_GDT_START __attribute__((section(".gdt_bss_start"))) +/* Section for fixed GDT entries */ +#define ATTR_BSS_GDT \ + __attribute__((section(".gdt_bss"))) ATTR_KERN_ADDR_SPACE +/* Section for TSS and LDT descriptors for protection domains */ +#define ATTR_BSS_GDT_MID \ + __attribute__((used, section(".gdt_bss_mid"))) ATTR_KERN_ADDR_SPACE +/* Section for other GDT entries */ +#define ATTR_BSS_GDT_START \ + __attribute__((section(".gdt_bss_start"))) ATTR_KERN_ADDR_SPACE void gdt_copy_desc_change_dpl(unsigned int dest_idx, unsigned int src_idx, unsigned dpl); void gdt_init(void) ATTR_CODE_BOOT; void gdt_insert(unsigned int idx, segment_desc_t desc); +void gdt_insert_boot(unsigned int idx, segment_desc_t desc) ATTR_CODE_BOOT; void gdt_lookup(unsigned int idx, segment_desc_t *desc); #endif /* GDT_H */ diff --git a/cpu/x86/init/common/idt.c b/cpu/x86/init/common/idt.c index 441668a75..c5de5ed25 100644 --- a/cpu/x86/init/common/idt.c +++ b/cpu/x86/init/common/idt.c @@ -43,17 +43,23 @@ typedef struct idtr { uint32_t base; } __attribute__((packed)) idtr_t; -typedef struct intr_gate_desc { - uint16_t offset_low; - uint16_t selector; /* Segment Selector for destination code segment */ - uint16_t fixed:11; - uint16_t d:1; /* Size of gate: 1 = 32 bits; 0 = 16 bits */ - uint16_t pad:1; - uint16_t dpl:2; /* Descriptor Privilege Level */ - uint16_t p:1; /* Segment Present flag */ - uint16_t offset_high; - -} __attribute__((packed)) intr_gate_desc_t; +typedef union intr_gate_desc { + struct __attribute__((packed)) { + uint16_t offset_low; + uint16_t selector; /* Segment Selector for destination code segment */ + uint16_t fixed:11; + uint16_t d:1; /* Size of gate: 1 = 32 bits; 0 = 16 bits */ + uint16_t pad:1; + uint16_t dpl:2; /* Descriptor Privilege Level */ + uint16_t p:1; /* Segment Present flag */ + uint16_t offset_high; + }; + uint64_t raw; + struct { + uint32_t raw_lo; + uint32_t raw_hi; + }; +} intr_gate_desc_t; /* According to Intel Combined Manual, Vol. 3, Section 6.10, the base addresses * of the IDT should be aligned on an 8-byte boundary to maximize performance @@ -73,15 +79,19 @@ idt_set_intr_gate_desc(int intr_num, uint16_t cs, uint16_t dpl) { - intr_gate_desc_t *desc = &idt[intr_num]; + intr_gate_desc_t desc; - desc->offset_low = offset & 0xFFFF; - desc->selector = cs; - desc->fixed = BIT(9) | BIT(10); - desc->d = 1; - desc->dpl = dpl; - desc->p = 1; - desc->offset_high = (offset >> 16) & 0xFFFF; + desc.offset_low = offset & 0xFFFF; + desc.selector = cs; + desc.fixed = BIT(9) | BIT(10); + desc.pad = 0; + desc.d = 1; + desc.dpl = dpl; + desc.p = 1; + desc.offset_high = (offset >> 16) & 0xFFFF; + + KERN_WRITEL(idt[intr_num].raw_hi, desc.raw_hi); + KERN_WRITEL(idt[intr_num].raw_lo, desc.raw_lo); } /*---------------------------------------------------------------------------*/ /* Initialize Interrupt Descriptor Table. The IDT is initialized with @@ -95,7 +105,7 @@ idt_init(void) /* Initialize idtr structure */ idtr.limit = (sizeof(intr_gate_desc_t) * NUM_DESC) - 1; - idtr.base = (uint32_t)&idt; + idtr.base = KERN_DATA_OFF_TO_PHYS_ADDR((uint32_t)idt); /* Load IDTR register */ __asm__("lidt %0\n\t" :: "m" (idtr)); diff --git a/cpu/x86/init/common/idt.h b/cpu/x86/init/common/idt.h index 18f168ad8..059e81705 100644 --- a/cpu/x86/init/common/idt.h +++ b/cpu/x86/init/common/idt.h @@ -34,7 +34,7 @@ #include #include "prot-domains.h" -void idt_init(void) ATTR_CODE_BOOT; +void idt_init(void); void idt_set_intr_gate_desc(int intr_num, uint32_t offset, uint16_t cs, diff --git a/cpu/x86/mm/README.md b/cpu/x86/mm/README.md index 8990beec9..dcd6370b4 100644 --- a/cpu/x86/mm/README.md +++ b/cpu/x86/mm/README.md @@ -5,13 +5,15 @@ Introduction ------------ The X86 port of Contiki implements a simple, lightweight form of -protection domains using a pluggable framework. Currently, the -following plugin is available: +protection domains using a pluggable framework. Currently, there are +two plugins available: - Flat memory model with paging. + - Multi-segment memory model with hardware-switched segments based on + Task-State Segment (TSS) structures. -For an introduction to paging and possible ways in which it can be -used, refer to the following resources: +For an introduction to paging and TSS and possible ways in which they +can be used, refer to the following resources: - Intel Combined Manual (Intel 64 and IA-32 Architectures Software Developer's Manual), Vol. 3, Chapter 4 @@ -28,7 +30,7 @@ idealized principle is balanced against the practical objectives of limiting the number of relatively time-consuming context switches and minimizing changes to existing code. In fact, no changes were made to code outside of the CPU- and platform-specific code directories for -the initial plugin. +the initial plugins. Each protection domain can optionally be associated with a metadata and/or MMIO region. The hardware can support additional regions per @@ -139,7 +141,11 @@ the one that was interrupted. However, interrupts are only actually enabled in the application protection domain. Similarly, register contents may be accessed and modified across -protection domain boundaries. +protection domain boundaries in some protection domain +implementations. The TSS task switching mechanism automatically saves +and restores many registers to and from TSS data structures when +switching tasks, but the paging-based protection domain implementation +does not perform analogous operations. For the reasons described above, each protection domain should only invoke other protection domains that it trusts to properly handle data @@ -186,7 +192,9 @@ disabled. Flat segments each map the whole 4GiB physical memory space. This is the state of the system when the OS enters boot stage 0. This stage is responsible for setting up a new GDT and loading the segment registers with the appropriate descriptors from the new GDT to -enable boot stage 1 to run. +enable boot stage 1 to run. Code in stage 1 for multi-segment +protection domain implementations require that the appropriate +segment-based address translations be configured. #### Boot Stage 1 @@ -258,17 +266,18 @@ Ring level 1 is unused. ### IO and Interrupt Privileges The kernel protection domain cooperative scheduling context needs -access to IO ports, for device initialization. Other protection -domains may also require such access. The IO Privilege Level (IOPL) -that is assigned to a protection domain using the relevant bits in the +access to IO ports, for device initialization. Some other protection +domains also require such access. The IO Privilege Level (IOPL) that +is assigned to a protection domain using the relevant bits in the EFLAGS field could be set according to whether IO port access is -required in that protection domain. However, this would introduce -additional complexity and overhead in the critical system call and -return dispatchers. Instead, the IOPL is always set to block IO -access from the cooperative scheduling context. Port IO instructions -in that context will then generate general protection faults, and the -exception handler decodes and emulates authorized port IO -instructions. +required in that protection domain. This is straightforward for TSS, +which includes separate flags settings for each protection domain. +However, this would introduce additional complexity and overhead in +the critical system call and return dispatchers for other plugins. +Instead, the IOPL is always set to block IO access from the +cooperative scheduling context. Port IO instructions in that context +will then generate general protection faults, and the exception +handler decodes and emulates authorized port IO instructions. Interrupts are handled at ring level 2, since they do not use any privileged instructions. They do cause the interrupt flag to be @@ -307,11 +316,15 @@ pivoting to the main stack and executing the handler. ### Protection Domain Control Structures (PDCSes) Each protection domain is managed by the kernel and privileged -functions using a PDCS. The PDCS structure is entirely -software-defined. The initial protection domain plugin does not -support re-entrant protection domains to simplify the implementation -of the plugin by enabling domain-specific information (e.g. system -call return address) to be trivially stored in each PDCS. +functions using a PDCS. The structure of the PDCS is partially +hardware-imposed in the cases of the two segment-based plugins, since +the PDCS contains the Local Descriptor Table (LDT) and the TSS, if +applicable. In the paging plugin, the PDCS structure is entirely +software-defined. None of the initial protection domain plugins +support re-entrant protection domains due to hardware-imposed +limitations of TSS and to simplify the implementation of the other +plugins by enabling domain-specific information (e.g. system call +return address) to be trivially stored in each PDCS. ### Paging-Based Protection Domains @@ -547,6 +560,293 @@ be possible to improve the robustness of the system by marking that data as read-only. Doing so would introduce additional complexity into the system. +### Hardware-Switched Segment-Based Protection Domains + +Primary implementation sources: + + - cpu/x86/mm/tss-prot-domains.c + - cpu/x86/mm/tss-prot-domains-asm.S + +#### Introduction + +One TSS is allocated for each protection domain. Each one is +associated with its own dedicated LDT. The memory resources assigned +to each protection domain are represented as segment descriptors in +the LDT for the protection domain. Additional shared memory resources +are represented as segment descriptors in the GDT. + +#### System Call and Return Dispatching + +The system call dispatcher runs in the context of the server +protection domain. It is a common piece of code that is shared among +all protection domains. Thus, each TSS, except the application TSS, +has its EIP field initialized to the entrypoint for the system call +dispatcher so that will be the first code to run when the first switch +to that task is performed. + +The overall process of handling a system call can be illustrated at a +high level as follows. Some minor steps are omitted from this +illustration in the interest of clarity and brevity. + +``` + == BEGIN Client protection domain ========================================== + -- BEGIN Caller ------------------------------------------------------------ + 1. Call system call stub. + -- + 13. Continue execution... + -- END Caller -------------------------------------------------------------- + -- BEGIN System call stub -------------------------------------------------- + 2. Already in desired (server) protection domain? + - No: Request task switch to server protection domain. + - Yes: Jump to system call body. + -- + 12. Return to caller. + -- END System call stub ---------------------------------------------------- + == END Client protection domain ============================================ + == BEGIN Server protection domain ========================================== + -- BEGIN System call dispatcher--------------------------------------------- + 3. Check that the requested system call is allowed. Get entrypoint. + 4. Switch to the main stack. + 5. Pop the client return address off the stack to a callee-saved register. + 6. Push the address of the system call return dispatcher onto the stack. + 7. Jump to system call body. + -- + 10. Restore the client return address to the stack. + 11. Request task switch to client protection domain. + -- END System call dispatcher ---------------------------------------------- + -- BEGIN System call body -------------------------------------------------- + 8. Execute the work for the requested system call. + 9. Return (to system call return stub, unless invoked from server + protection domain, in which case return is to caller). + -- END System call body ---------------------------------------------------- + == END Server protection domain ============================================ +``` + +An additional exception handler is needed, for the "Device Not +Available" exception. The handler comprises just a CLTS and an IRET +instruction. The CLTS instruction is privileged, which is why it must +be run at ring level 0. This exception handler is invoked when a +floating point instruction is used following a task switch, and its +sole purpose is to enable the floating point instruction to execute +after the exception handler returns. See the TSS resources listed +above for more details regarding interactions between task switching +and floating point instructions. + +Each segment register may represent a different data region within +each protection domain, although the FS register is used for two +separate purposes at different times. The segments are defined as +follows: + + - CS (code segment) maps all non-startup code with execute-only + permissions in all protection domains. Limiting the code that is + executable within each protection domain to just the code that is + actually needed within that protection domain could improve the + robustness of the system, but it is challenging to determine all + code that may be needed in a given protection domain (e.g. all + needed library routines). Furthermore, that code may not all be + contiguous, and each segment descriptor can only map a contiguous + memory region. Finally, segment-based memory addressing is + relative to an offset of zero from the beginning of each segment, + introducing additional complexity if such fine-grained memory + management were to be used. + - DS (default data segment) typically maps the main stack and all + non-stack data memory that is accessible from all protection + domains. Limiting the data that is accessible via DS within each + protection domain to just the subset of the data that is actually + needed within that protection domain could improve the robustness + of the system, but it is challenging for similar reasons to those + that apply to CS. Access to the main stack via DS is supported so + that code that copies the stack pointer to a register and attempts + to access stack entries via DS works correctly. Disallowing access + to the main stack via DS could improve the robustness of the + system, but that may require modifying code that expects to be able + to access the stack via DS. + - ES is loaded with the same segment descriptor as DS so that string + operations (e.g. the MOVS instruction) work correctly. + - FS usually maps the kernel-owned data region. That region can only + be written via FS in the kernel protection domain. FS contains a + descriptor specifying a read-only mapping in all other protection + domains except the application protection domain, in which FS is + nullified. Requiring that code specifically request access to the + kernel-owned data region by using the FS segment may improve the + robustness of the system by blocking undesired accesses to the + kernel-owned data region via memory access instructions within the + kernel protection domain that implicitly access DS. The reason for + granting read-only access to the kernel-owned data region from most + protection domains is that the system call dispatcher runs in the + context of the server protection domain to minimize overhead, and + it requires access to the kernel-owned data region. It may improve + the robustness of the system to avoid this by running the system + call dispatcher in a more-privileged ring level (e.g. ring 1) + within the protection domain and just granting access to the + kernel-owned data region from that ring. However, that would + necessitate a ring level transition to ring 3 when dispatching the + system call, which would increase overhead. The application + protection domain does not export any system calls, so it does not + require access to the kernel-owned data region. + - FS is temporarily loaded with a segment descriptor that maps just + an MMIO region used by a driver protection domain when such a + driver needs to perform MMIO accesses. + - GS maps an optional region of readable and writable metadata that + can be associated with a protection domain. In protection domains + that are not associated with metadata, GS is nullified. + - SS usually maps just the main stack. This may improve the + robustness of the system by enabling immediate detection of stack + underflows and overflows rather than allowing such a condition to + result in silent data corruption. Interrupt handlers use a stack + segment that covers the main stack and also includes a region above + the main stack that is specifically for use by interrupt handlers. + In like manner, exception handlers use a stack segment that covers + both of the other stacks and includes an additional region. This + is to support the interrupt dispatchers that copy parameters from + the interrupt-specific stack region to the main stack prior to + pivoting to the main stack to execute an interrupt handler body. + +The approximate memory layout of the system is depicted below, +starting with the highest physical addresses and proceeding to lower +physical addresses. The memory ranges that are mapped at various +times by each of the segment registers are also depicted. Read the +descriptions of each segment above for more information about what +memory range may be mapped by each segment register at various times +with various protection domain configurations. Parenthetical notes +indicate the protection domains that can use each mapping. The suffix +[L] indicates that the descriptor is loaded from LDT. Optional +mappings are denoted by a '?' after the protection domain label. The +'other' protection domain label refers to protection domains other +than the application and kernel domains. + +``` + ... + +------------------------------------------+ \ + | Domain X MMIO | +- FS[L] + +------------------------------------------+ / (other?) + ... + +------------------------------------------+ \ + | Domain X DMA-accessible metadata | +- GS[L] (other?) + | (section .dma_bss) | | + +------------------------------------------+ / + +------------------------------------------+ \ + | Domain X metadata (section .meta_bss) | +- GS[L] (other?) + +------------------------------------------+ / + ... + +------------------------------------------+ \ + | Kernel-private data | | + | (sections .prot_dom_bss, .gdt_bss, etc.) | +- FS[L] (kern) + +------------------------------------------+ | + +------------------------------------------+ \ + | System call data (section .syscall_bss) | | + +------------------------------------------+ +- FS[L] (all) + +------------------------------------------+ | + | Kernel-owned data (section .kern_bss) | | + +------------------------------------------+ / + +------------------------------------------+ \ + | Common data | | + | (sections .data, .rodata*, .bss, etc.) | | + +------------------------------------------+ +- DS, ES + +------------------------------------------+ \ | (all) + | Exception stack (section .exc_stack) | | | + |+----------------------------------------+| \ | + || Interrupt stack (section .int_stack) || | | + ||+--------------------------------------+|| \ | + ||| Main stack (section .main_stack) ||| +- SS (all) | + +++--------------------------------------+++ / / + +------------------------------------------+ \ + | Main code (.text) | +- CS (all) + +------------------------------------------+ / + +------------------------------------------+ + | Bootstrap code (section .boot_text) | + +------------------------------------------+ + +------------------------------------------+ + | Multiboot header | + +------------------------------------------+ + ... +``` + +This memory layout is more efficient than the layout that is possible +with paging-based protection domains, since segments have byte +granularity, whereas the minimum unit of control supported by paging +is a 4KiB page. For example, this means that metadata may need to be +padded to be a multiple of the page size. This may also permit +potentially-undesirable accesses to padded areas of code and data +regions that do not entirely fill the pages that they occupy. + +Kernel data structure access, including to the descriptor tables +themselves, is normally restricted to the code running at ring level +0, specifically the exception handlers and the system call and return +dispatchers. It is also accessible from the cooperative scheduling +context in the kernel protection domain. Interrupt delivery is +disabled in the kernel protection domain, so the preemptive scheduling +context is not used. + +SS, DS, and ES all have the same base address, since the compiler may +assume that a flat memory model is in use. Memory accesses that use a +base register of SP/ESP or BP/EBP or that are generated by certain +other instructions (e.g. PUSH, RET, etc.) are directed to SS by +default, whereas other accesses are directed to DS or ES by default. +The compiler may use an instruction that directs an access to DS or ES +even if the data being accessed is on the stack, which is why these +three segments must use the same base address. However, it is +possible to use a lower limit for SS than for DS and ES for the +following reasons. Compilers commonly provide an option for +preventing the frame pointer, EBP, from being omitted and possibly +used to point to non-stack data. In our tests, compilers never used +ESP to point to non-stack data. + +Each task switch ends up saving and restoring more state than is +actually useful to us, but the implementation attempts to minimize +overhead by configuring the register values in each TSS to reduce the +number of register loads that are needed in the system call +dispatcher. Specifically, two callee-saved registers are populated +with base addresses used when computing addresses in the entrypoint +information table as well as a mask corresponding to the ID of the +server protection domain that is used to check whether the requested +system call is exported by the server protection domain. Callee-saved +registers are used, since the task return will update the saved +register values. + +Note that this implies that the intervening code run between the task +call and return can modify critical data used by the system call +dispatcher. However, this is analogous to the considerations +associated with sharing a single stack amongst all protection domains +and should be addressed similarly, by only invoking protection domains +that are trusted by the caller to not modify the saved critical +values. This consideration is specific to the TSS-based dispatcher +and is not shared by the ring 0 dispatcher used in the other +plugins. + +Data in the .rodata sections is marked read/write, even though it may +be possible to improve the robustness of the system by marking that +data as read-only. Doing so would introduce even more complexity into +the system than would be the case with paging-based protection +domains, since it would require allocating different segment +descriptors for the read-only vs. the read/write data. + +#### Supporting Null-Pointer Checks + +A lot of code considers a pointer value of 0 to be invalid. However, +segment offsets always start at 0. To accommodate the common software +behavior, at least the first byte of each segment is marked as +unusable. An exception to this is that the first byte of the stack +segments is usable. + +#### Interrupt and Exception Dispatching + +A distinctive challenge that occurs during interrupt and exception +dispatching is that the state of the segment registers when an +interrupt or exception occurs is somewhat unpredictable. For example, +an exception may occur while MMIO is being performed, meaning that FS +is loaded with the MMIO descriptor instead of the kernel descriptor. +Leaving the segment registers configured in that way could cause +incorrect interrupt or exception handler behavior. Thus, the +interrupt or exception dispatcher must save the current segment +configuration, switch to a configuration that is suitable for the +handler body, and then restore the saved segment configuration after +the handler body returns. Another motivation for this is that the +interrupted code may have corrupted the segment register configuration +in an unexpected manner, since segment register load instructions are +unprivileged. Similar segment register updates must be performed for +similar reasons when dispatching system calls. + ### Pointer Validation Primary implementation sources: @@ -563,10 +863,14 @@ an unintended manner. For example, if an incoming pointer referenced the return address, it could potentially redirect execution with the privileges of the callee protection domain. -It is also necessary to check that the pointer is either within the -stack region or the shared data region (or a guard band region, since -that will generate a fault) to prevent redirection of data accesses to -MMIO or metadata regions. +When the paging-based plugin is in use, it is also necessary to check +that the pointer is either within the stack region or the shared data +region (or a guard band region, since that will generate a fault) to +prevent redirection of data accesses to MMIO or metadata regions. The +other plugins already configure segments to restrict accesses to DS to +just those regions. Pointers provided as inputs to system calls as +defined above should never be dereferenced in any segment other than +DS. The pointer is both validated and copied to a new storage location, which must be within the callee's local stack region (excluding the @@ -648,8 +952,11 @@ The following steps are required: Usage ----- -To enable protection domain support, add -"X86_CONF_PROT_DOMAINS=paging" to the command line. +To enable protection domain support, add "X86_CONF_PROT_DOMAINS=" to +the command line and specify one of the following options: + + - paging + - tss The paging option accepts a sub-option to determine whether the TLB is fully- or selectively-invalidated during protection domain switches. diff --git a/cpu/x86/mm/gdt-layout.h b/cpu/x86/mm/gdt-layout.h index 8a5af6cbf..5dddd3a4d 100644 --- a/cpu/x86/mm/gdt-layout.h +++ b/cpu/x86/mm/gdt-layout.h @@ -39,6 +39,8 @@ * outside of gdt.c. */ #define GDT_NUM_FIXED_DESC 7 +#elif X86_CONF_PROT_DOMAINS_MULTI_SEG +#define GDT_NUM_FIXED_DESC 11 #else #define GDT_NUM_FIXED_DESC 3 #endif @@ -66,12 +68,34 @@ /** Stack segment for interrupt handlers */ #define GDT_IDX_STK_INT 5 +#if X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__PAGING #define GDT_IDX_CODE_EXC GDT_IDX_CODE_FLAT /** Default data segment used by code at all privilege levels */ #define GDT_IDX_DATA 6 #define GDT_IDX_STK GDT_IDX_DATA #define GDT_IDX_STK_EXC GDT_IDX_DATA_FLAT #else +/** + * Same bounds and permissions as default code segment, but at the exception + * handler privilege level + */ +#define GDT_IDX_CODE_EXC 6 +/** R/W kernel data descriptor used during boot stage 1 */ +#define GDT_IDX_DATA_KERN_EXC 7 +/** Default data segment used by code at all privilege levels */ +#define GDT_IDX_DATA 8 +/** + * Default stack segment, which overlaps with the beginning of the default data + * segment + */ +#define GDT_IDX_STK 9 +/** Stack segment for exception handlers */ +#define GDT_IDX_STK_EXC 10 + +#define GDT_IDX_TSS(dom_id) (GDT_NUM_FIXED_DESC + (2 * (dom_id))) +#define GDT_IDX_LDT(dom_id) (GDT_NUM_FIXED_DESC + (2 * (dom_id)) + 1) +#endif +#else #define GDT_IDX_CODE GDT_IDX_CODE_FLAT #define GDT_IDX_CODE_INT GDT_IDX_CODE_FLAT #define GDT_IDX_CODE_EXC GDT_IDX_CODE_FLAT @@ -96,10 +120,14 @@ #define GDT_SEL_CODE_EXC GDT_SEL(GDT_IDX_CODE_EXC, PRIV_LVL_EXC) #define GDT_SEL_DATA GDT_SEL(GDT_IDX_DATA, PRIV_LVL_EXC) +#define GDT_SEL_DATA_KERN_EXC GDT_SEL(GDT_IDX_DATA_KERN_EXC, PRIV_LVL_EXC) #define GDT_SEL_STK GDT_SEL(GDT_IDX_STK, PRIV_LVL_USER) #define GDT_SEL_STK_INT GDT_SEL(GDT_IDX_STK_INT, PRIV_LVL_INT) #define GDT_SEL_STK_EXC GDT_SEL(GDT_IDX_STK_EXC, PRIV_LVL_EXC) +#define GDT_SEL_TSS(dom_id) GDT_SEL(GDT_IDX_TSS(dom_id), PRIV_LVL_USER) +#define GDT_SEL_LDT(dom_id) GDT_SEL(GDT_IDX_LDT(dom_id), PRIV_LVL_USER) + #endif /* CPU_X86_MM_GDT_LAYOUT_H_ */ diff --git a/cpu/x86/mm/ldt-layout.h b/cpu/x86/mm/ldt-layout.h new file mode 100644 index 000000000..7c61054a5 --- /dev/null +++ b/cpu/x86/mm/ldt-layout.h @@ -0,0 +1,59 @@ +/* + * Copyright (C) 2015, Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef CPU_X86_MM_LDT_LAYOUT_H_ +#define CPU_X86_MM_LDT_LAYOUT_H_ + +#include "gdt-layout.h" + +/* Each LDT can contain up to this many descriptors, but some protection + * domains may not use all of the slots. + */ +#define LDT_NUM_DESC 3 + +/** + * Provides access to kernel data. Most protection domains are granted at most + * read-only access, but the kernel protection domain is granted read/write + * access. + */ +#define LDT_IDX_KERN 0 +/** Maps a device MMIO range */ +#define LDT_IDX_MMIO 1 +/** Maps domain-defined metadata */ +#define LDT_IDX_META 2 + +#define LDT_SEL(idx, rpl) (GDT_SEL(idx, rpl) | (1 << 2)) + +#define LDT_SEL_KERN LDT_SEL(LDT_IDX_KERN, PRIV_LVL_USER) +#define LDT_SEL_MMIO LDT_SEL(LDT_IDX_MMIO, PRIV_LVL_USER) +#define LDT_SEL_META LDT_SEL(LDT_IDX_META, PRIV_LVL_USER) +#define LDT_SEL_STK LDT_SEL(LDT_IDX_STK, PRIV_LVL_USER) + +#endif /* CPU_X86_MM_LDT_LAYOUT_H_ */ diff --git a/cpu/x86/mm/multi-segment.c b/cpu/x86/mm/multi-segment.c new file mode 100644 index 000000000..f60a2c8bb --- /dev/null +++ b/cpu/x86/mm/multi-segment.c @@ -0,0 +1,239 @@ +/* + * Copyright (C) 2015, Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "gdt.h" +#include "helpers.h" +#include "prot-domains.h" +#include "segmentation.h" +#include "stacks.h" + +/*---------------------------------------------------------------------------*/ +static uint32_t +segment_desc_compute_base(segment_desc_t desc) +{ + return (desc.base_hi << 24) | (desc.base_mid << 16) | desc.base_lo; +} +/*---------------------------------------------------------------------------*/ +void +prot_domains_reg_multi_seg(volatile struct dom_kern_data ATTR_KERN_ADDR_SPACE *dkd, + uintptr_t mmio, size_t mmio_sz, + uintptr_t meta, size_t meta_sz) +{ + segment_desc_t desc; + dom_id_t dom_id = PROT_DOMAINS_GET_DOM_ID(dkd); + uint32_t kern_data_len; + uint32_t tmp; + + if((dkd < prot_domains_kern_data) || + (prot_domains_kern_data_end <= dkd) || + (((((uintptr_t)dkd) - (uintptr_t)prot_domains_kern_data) % + sizeof(dom_kern_data_t)) != 0)) { + halt(); + } + + KERN_READL(tmp, dkd->ldt[DT_SEL_GET_IDX(LDT_SEL_KERN)].raw_hi); + if(tmp != 0) { + /* This PDCS was previously initialized, which is disallowed. */ + halt(); + } + + /* Initialize descriptors */ + + if(dom_id == DOM_ID_kern) { + kern_data_len = (uint32_t)&_ebss_kern_addr; + } else { + /* Non-kernel protection domains do not need to access the protection + * domain control structures, and they may contain saved register values + * that are private to each domain. + */ + kern_data_len = (uint32_t)&_ebss_syscall_addr; + } + kern_data_len -= (uint32_t)&_sbss_kern_addr; + + segment_desc_init(&desc, (uint32_t)&_sbss_kern_addr, kern_data_len, + /* Every protection domain requires at least read-only access to kernel + data to read dom_client_data structures and to support the system call + dispatcher, if applicable. Only the kernel protection domain is granted + read/write access to the kernel data. */ + ((dom_id == DOM_ID_kern) ? + SEG_TYPE_DATA_RDWR : + SEG_TYPE_DATA_RDONLY) | + SEG_FLAG(DPL, PRIV_LVL_USER) | + SEG_GRAN_BYTE | SEG_DESCTYPE_NSYS); + + KERN_WRITEL(dkd->ldt[LDT_IDX_KERN].raw_lo, desc.raw_lo); + KERN_WRITEL(dkd->ldt[LDT_IDX_KERN].raw_hi, desc.raw_hi); + + if(mmio_sz != 0) { + if(SEG_MAX_BYTE_GRAN_LEN < mmio_sz) { + halt(); + } + + segment_desc_init(&desc, mmio, mmio_sz, + SEG_FLAG(DPL, PRIV_LVL_USER) | SEG_GRAN_BYTE | + SEG_DESCTYPE_NSYS | SEG_TYPE_DATA_RDWR); + } else { + desc.raw = SEG_DESC_NOT_PRESENT; + } + + KERN_WRITEL(dkd->ldt[LDT_IDX_MMIO].raw_lo, desc.raw_lo); + KERN_WRITEL(dkd->ldt[LDT_IDX_MMIO].raw_hi, desc.raw_hi); + + if(meta_sz != 0) { + if(SEG_MAX_BYTE_GRAN_LEN < meta_sz) { + halt(); + } + + segment_desc_init(&desc, meta, meta_sz, + SEG_FLAG(DPL, PRIV_LVL_USER) | SEG_GRAN_BYTE | + SEG_DESCTYPE_NSYS | SEG_TYPE_DATA_RDWR); + } else { + desc.raw = SEG_DESC_NOT_PRESENT; + } + + KERN_WRITEL(dkd->ldt[LDT_IDX_META].raw_lo, desc.raw_lo); + KERN_WRITEL(dkd->ldt[LDT_IDX_META].raw_hi, desc.raw_hi); + + segment_desc_init(&desc, + KERN_DATA_OFF_TO_PHYS_ADDR(dkd->ldt), + sizeof(dkd->ldt), + SEG_FLAG(DPL, PRIV_LVL_USER) | SEG_GRAN_BYTE | + SEG_DESCTYPE_SYS | SEG_TYPE_LDT); + gdt_insert(GDT_IDX_LDT(dom_id), desc); +} +/*---------------------------------------------------------------------------*/ +void +prot_domains_gdt_init() +{ + int i; + segment_desc_t desc; + + segment_desc_init(&desc, + (uint32_t)&_stext_addr, + ((uint32_t)&_etext_addr) - (uint32_t)&_stext_addr, + SEG_FLAG(DPL, PRIV_LVL_EXC) | SEG_GRAN_BYTE | + SEG_DESCTYPE_NSYS | SEG_TYPE_CODE_EX); + gdt_insert_boot(GDT_IDX_CODE_EXC, desc); + + segment_desc_init(&desc, + (uint32_t)&_sdata_addr, + ((uint32_t)&_edata_addr) - (uint32_t)&_sdata_addr, + SEG_FLAG(DPL, PRIV_LVL_USER) | SEG_GRAN_BYTE | + SEG_DESCTYPE_NSYS | SEG_TYPE_DATA_RDWR); + gdt_insert_boot(GDT_IDX_DATA, desc); + + segment_desc_init(&desc, + (uint32_t)&_sbss_kern_addr, + ((uint32_t)&_ebss_kern_addr) - + (uint32_t)&_sbss_kern_addr, + SEG_FLAG(DPL, PRIV_LVL_EXC) | SEG_GRAN_BYTE | + SEG_DESCTYPE_NSYS | SEG_TYPE_DATA_RDWR); + gdt_insert_boot(GDT_IDX_DATA_KERN_EXC, desc); + + segment_desc_init(&desc, + (uint32_t)DATA_OFF_TO_PHYS_ADDR(stacks_main), + STACKS_SIZE_MAIN, + SEG_FLAG(DPL, PRIV_LVL_USER) | SEG_GRAN_BYTE | + SEG_DESCTYPE_NSYS | SEG_TYPE_DATA_RDWR); + gdt_insert_boot(GDT_IDX_STK, desc); + + segment_desc_set_limit(&desc, STACKS_SIZE_MAIN + STACKS_SIZE_INT); + SEG_SET_FLAG(desc, DPL, PRIV_LVL_INT); + gdt_insert_boot(GDT_IDX_STK_INT, desc); + + segment_desc_set_limit(&desc, + STACKS_SIZE_MAIN + + STACKS_SIZE_INT + + STACKS_SIZE_EXC); + SEG_SET_FLAG(desc, DPL, PRIV_LVL_EXC); + gdt_insert_boot(GDT_IDX_STK_EXC, desc); + + /* Not all domains will necessarily be initialized, so this initially marks + * all per-domain descriptors not-present. + */ + desc.raw = SEG_DESC_NOT_PRESENT; + for(i = 0; i < PROT_DOMAINS_ACTUAL_CNT; i++) { + gdt_insert_boot(GDT_IDX_TSS(i), desc); + gdt_insert_boot(GDT_IDX_LDT(i), desc); + } + + __asm__ __volatile__ ( + "mov %[_default_data_], %%ds\n\t" + "mov %[_default_data_], %%es\n\t" + "mov %[_kern_data_], %%" SEG_KERN "s\n\t" + : + : [_default_data_] "r"(GDT_SEL_DATA), + [_kern_data_] "r"(GDT_SEL_DATA_KERN_EXC)); +} +/*---------------------------------------------------------------------------*/ +void +multi_segment_launch_kernel(void) +{ + /* Update segment registers. */ + __asm__ __volatile__ ( + "mov %[_data_seg_], %%ds\n\t" + "mov %[_data_seg_], %%es\n\t" + "mov %[_kern_seg_], %%" SEG_KERN "s\n\t" + "mov %[_data_seg_], %%" SEG_META "s\n\t" + : + : [_data_seg_] "r" (GDT_SEL_DATA), + [_kern_seg_] "r" (LDT_SEL_KERN) + ); +} +/*---------------------------------------------------------------------------*/ +void +prot_domains_enable_mmio(void) +{ + __asm__ __volatile__ ("mov %0, %%" SEG_MMIO "s" :: "r" (LDT_SEL_MMIO)); +} +/*---------------------------------------------------------------------------*/ +void +prot_domains_disable_mmio(void) +{ + __asm__ __volatile__ ("mov %0, %%" SEG_KERN "s" :: "r" (LDT_SEL_KERN)); +} +/*---------------------------------------------------------------------------*/ +uintptr_t +prot_domains_lookup_meta_phys_base(dom_client_data_t ATTR_KERN_ADDR_SPACE *drv) +{ + dom_id_t dom_id; + segment_desc_t desc; + volatile dom_kern_data_t ATTR_KERN_ADDR_SPACE *dkd; + + KERN_READL(dom_id, drv->dom_id); + + dkd = prot_domains_kern_data + dom_id; + + KERN_READL(desc.raw_lo, dkd->ldt[DT_SEL_GET_IDX(LDT_SEL_META)].raw_lo); + KERN_READL(desc.raw_hi, dkd->ldt[DT_SEL_GET_IDX(LDT_SEL_META)].raw_hi); + + return segment_desc_compute_base(desc); +} +/*---------------------------------------------------------------------------*/ diff --git a/cpu/x86/mm/multi-segment.h b/cpu/x86/mm/multi-segment.h new file mode 100644 index 000000000..baa28002b --- /dev/null +++ b/cpu/x86/mm/multi-segment.h @@ -0,0 +1,195 @@ +/* + * Copyright (C) 2015, Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef CPU_X86_MM_MULTI_SEGMENT_H_ +#define CPU_X86_MM_MULTI_SEGMENT_H_ + +#include +#include +#include "helpers.h" +#include "ldt-layout.h" + +#ifdef __clang__ +#define __SEG_FS +#define __seg_fs __attribute__((address_space(257))) +#define __SEG_GS +#define __seg_gs __attribute__((address_space(256))) +#endif + +#ifdef __SEG_FS +#define ATTR_MMIO_ADDR_SPACE __seg_fs +#define ATTR_KERN_ADDR_SPACE __seg_fs +#else +#define ATTR_KERN_ADDR_SPACE +#endif +#ifdef __SEG_GS +#define ATTR_META_ADDR_SPACE __seg_gs +#endif + +void prot_domains_reg_multi_seg(volatile struct dom_kern_data ATTR_KERN_ADDR_SPACE *dkd, + uintptr_t mmio, size_t mmio_sz, + uintptr_t meta, size_t meta_sz); +void multi_segment_launch_kernel(void); + +#define MULTI_SEGMENT_ENTER_ISR(exc) \ + "mov $" EXP_STRINGIFY(GDT_SEL_DATA) ", %%eax\n\t" \ + /* Refresh DS and ES in case the userspace code corrupted them. */ \ + "mov %%eax, %%ds\n\t" \ + "mov %%eax, %%es\n\t" \ + /* Refresh SEG_KERN. */ \ + "mov $" EXP_STRINGIFY(LDT_SEL_KERN) ", %%eax\n\t" \ + "mov %%eax, %%" SEG_KERN "s\n\t" \ + ".if " #exc "\n\t" \ + /* It is possible that a routine performing MMIO is being interrupted. */ \ + /* Thus, it is necessary to save and restore the MMIO segment register */ \ + /* (in a callee-saved register). */ \ + "mov %%" SEG_MMIO "s, %%ebp\n\t" \ + "mov $" EXP_STRINGIFY(GDT_SEL_DATA_KERN_EXC) ", %%eax\n\t" \ + "mov %%eax, %%" SEG_KERN "s\n\t" \ + ".endif\n\t" +#define MULTI_SEGMENT_LEAVE_ISR(exc) \ + ".if " #exc "\n\t" \ + "mov %%ebp, %%" SEG_MMIO "s\n\t" \ + ".endif\n\t" + +/** + * The MMIO region is tightly bounded within a segment, so its base offset is + * always 0. + */ +#define PROT_DOMAINS_MMIO(dcd) 0 +/** + * The metadata region is tightly bounded within a segment, so its base offset + * is always 0. + */ +#define PROT_DOMAINS_META(dcd) 0 + +#define SEG_MMIO "f" /**< For MMIO accesses, when enabled. */ +#define SEG_KERN "f" /**< For kernel data accesses */ +#define SEG_META "g" /**< For metadata accesses */ + +#define _SEG_READL(seg, dst, src) \ + __asm__ __volatile__ ( \ + "movl %%" seg "s:%[src_], %[dst_]" : [dst_]"=r"(dst) : [src_]"m"(src)) + +#define _SEG_READW(seg, dst, src) \ + __asm__ __volatile__ ( \ + "movw %%" seg "s:%[src_], %[dst_]" : [dst_]"=r"(dst) : [src_]"m"(src)) + +#define _SEG_READB(seg, dst, src) \ + __asm__ __volatile__ ( \ + "movb %%" seg "s:%[src_], %[dst_]" : [dst_]"=q"(dst) : [src_]"m"(src)) + +#define _SEG_WRITEL(seg, dst, src) \ + __asm__ __volatile__ ( \ + "movl %[src_], %%" seg "s:%[dst_]" \ + : [dst_]"=m"(dst) : [src_]"r"((uint32_t)(src))) + +#define _SEG_WRITEW(seg, dst, src) \ + __asm__ __volatile__ ( \ + "movw %[src_], %%" seg "s:%[dst_]" \ + : [dst_]"=m"(dst) : [src_]"r"((uint16_t)(src))) + +#define _SEG_WRITEB(seg, dst, src) \ + __asm__ __volatile__ ( \ + "movb %[src_], %%" seg "s:%[dst_]" \ + : [dst_]"=m"(dst) : [src_]"q"((uint8_t)(src))) + +#ifndef __SEG_FS +#define MMIO_READL(dst, src) _SEG_READL(SEG_MMIO, dst, src) +#define MMIO_READW(dst, src) _SEG_READW(SEG_MMIO, dst, src) +#define MMIO_READB(dst, src) _SEG_READB(SEG_MMIO, dst, src) +#define MMIO_WRITEL(dst, src) _SEG_WRITEL(SEG_MMIO, dst, src) +#define MMIO_WRITEW(dst, src) _SEG_WRITEW(SEG_MMIO, dst, src) +#define MMIO_WRITEB(dst, src) _SEG_WRITEB(SEG_MMIO, dst, src) + +#define KERN_READL(dst, src) _SEG_READL(SEG_KERN, dst, src) +#define KERN_READW(dst, src) _SEG_READW(SEG_KERN, dst, src) +#define KERN_READB(dst, src) _SEG_READB(SEG_KERN, dst, src) +#define KERN_WRITEL(dst, src) _SEG_WRITEL(SEG_KERN, dst, src) +#define KERN_WRITEW(dst, src) _SEG_WRITEW(SEG_KERN, dst, src) +#define KERN_WRITEB(dst, src) _SEG_WRITEB(SEG_KERN, dst, src) +#endif + +#ifndef __SEG_GS +#define META_READL(dst, src) _SEG_READL(SEG_META, dst, src) +#define META_READW(dst, src) _SEG_READW(SEG_META, dst, src) +#define META_READB(dst, src) _SEG_READB(SEG_META, dst, src) +#define META_WRITEL(dst, src) _SEG_WRITEL(SEG_META, dst, src) +#define META_WRITEW(dst, src) _SEG_WRITEW(SEG_META, dst, src) +#define META_WRITEB(dst, src) _SEG_WRITEB(SEG_META, dst, src) +#endif + +#define MEMCPY_FROM_META(dst, src, sz) \ + { \ + uintptr_t __dst = (uintptr_t)(dst); \ + uintptr_t __src = (uintptr_t)(src); \ + size_t __sz = (size_t)(sz); \ + __asm__ __volatile__ ( \ + "rep movsb %%" SEG_META "s:(%%esi), %%es:(%%edi)\n\t" \ + : "+D"(__dst), "+S"(__src), "+c"(__sz)); \ + } + +#define MEMCPY_TO_META(dst, src, sz) \ + { \ + uintptr_t __dst = (uintptr_t)(dst); \ + uintptr_t __src = (uintptr_t)(src); \ + size_t __sz = (size_t)(sz); \ + __asm__ __volatile__ ( \ + "push %%es\n\t" \ + "push %%" SEG_META "s\n\t" \ + "pop %%es\n\t" \ + "rep movsb\n\t" \ + "pop %%es\n\t" \ + : "+D"(__dst), "+S"(__src), "+c"(__sz)); \ + } + +/** Compute physical address from offset into kernel data space */ +#define KERN_DATA_OFF_TO_PHYS_ADDR(x) \ + (((uintptr_t)&_sbss_kern_addr) + (uintptr_t)(x)) +/** Compute physical address from offset into default data space */ +#define DATA_OFF_TO_PHYS_ADDR(x) \ + (((uintptr_t)&_sdata_addr) + (uintptr_t)(x)) +/** Compute kernel data offset from physical address in kernel data space */ +#define PHYS_ADDR_TO_KERN_DATA_OFF(x) \ + (((uintptr_t)(x)) - (uintptr_t)&_sbss_kern_addr) + +/** + * In multi-segment protection domain implementations, it is sufficient to just + * compare incoming pointers against the frame pointer. All incoming pointers + * are dereferenced in the main data segment, which only maps the stacks and + * the shared data section. Since the shared data section is at a higher + * address range than the stacks, the frame pointer check is sufficient. + */ +#define PROT_DOMAINS_CHECK_INCOMING_PTR PROT_DOMAINS_CHECK_INCOMING_PTR_EBP + +void prot_domains_enable_mmio(void); +void prot_domains_disable_mmio(void); + +#endif /* CPU_X86_MM_MULTI_SEGMENT_H_ */ diff --git a/cpu/x86/mm/prot-domains.c b/cpu/x86/mm/prot-domains.c index 593da98e2..8bbeb4d83 100644 --- a/cpu/x86/mm/prot-domains.c +++ b/cpu/x86/mm/prot-domains.c @@ -39,10 +39,12 @@ #include "stacks.h" static dom_kern_data_t __attribute__((section(".kern_prot_dom_bss"))) - PROT_DOMAINS_PDCS_NM(kern_dcd); + ATTR_KERN_ADDR_SPACE PROT_DOMAINS_PDCS_NM(kern_dcd); +PROT_DOMAINS_ALLOC_IMPL(kern_dcd); static dom_client_data_t ATTR_BSS_KERN kern_dcd; static dom_kern_data_t __attribute__((section(".app_prot_dom_bss"))) - PROT_DOMAINS_PDCS_NM(app_dcd); + ATTR_KERN_ADDR_SPACE PROT_DOMAINS_PDCS_NM(app_dcd); +PROT_DOMAINS_ALLOC_IMPL(app_dcd); static dom_client_data_t ATTR_BSS_KERN app_dcd; /*---------------------------------------------------------------------------*/ diff --git a/cpu/x86/mm/prot-domains.h b/cpu/x86/mm/prot-domains.h index f7dc84e3c..a1fbca130 100644 --- a/cpu/x86/mm/prot-domains.h +++ b/cpu/x86/mm/prot-domains.h @@ -40,6 +40,10 @@ #define X86_CONF_PROT_DOMAINS__NONE 0 #define X86_CONF_PROT_DOMAINS__PAGING 1 +#define X86_CONF_PROT_DOMAINS__TSS 2 + +#define X86_CONF_PROT_DOMAINS_MULTI_SEG \ + (X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__TSS) /** Privilege level (ring) for exception handlers and other supervisory code */ #define PRIV_LVL_EXC 0 @@ -68,6 +72,49 @@ typedef uint32_t dom_id_t; #if X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__PAGING #include "paging-prot-domains.h" +#elif X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__TSS +#include "tss-prot-domains.h" +#endif + +#ifndef ATTR_META_ADDR_SPACE +#define ATTR_META_ADDR_SPACE +#endif +#ifndef ATTR_MMIO_ADDR_SPACE +#define ATTR_MMIO_ADDR_SPACE +#endif +#ifndef ATTR_KERN_ADDR_SPACE +#define ATTR_KERN_ADDR_SPACE +#endif + +#ifndef MMIO_READL +#define MMIO_READL(dst, src) dst = (src) +#define MMIO_READW(dst, src) dst = (src) +#define MMIO_READB(dst, src) dst = (src) +#define MMIO_WRITEL(dst, src) MMIO_READL(dst, src) +#define MMIO_WRITEW(dst, src) MMIO_READW(dst, src) +#define MMIO_WRITEB(dst, src) MMIO_READB(dst, src) +#endif +#ifndef KERN_READL +#define KERN_READL(dst, src) dst = (src) +#define KERN_READW(dst, src) dst = (src) +#define KERN_READB(dst, src) dst = (src) +#define KERN_WRITEL(dst, src) KERN_READL(dst, src) +#define KERN_WRITEW(dst, src) KERN_READW(dst, src) +#define KERN_WRITEB(dst, src) KERN_READB(dst, src) +#endif +#ifndef META_READL +#define META_READL(dst, src) dst = (src) +#define META_READW(dst, src) dst = (src) +#define META_READB(dst, src) dst = (src) +#define META_WRITEL(dst, src) META_READL(dst, src) +#define META_WRITEW(dst, src) META_READw(dst, src) +#define META_WRITEB(dst, src) META_READB(dst, src) +#endif + +#ifndef MEMCPY_FROM_META +#define MEMCPY_FROM_META(dst, src, sz) \ + memcpy((void *)(dst), (const void *)(src), (sz)) +#define MEMCPY_TO_META(dst, src, sz) MEMCPY_FROM_META(dst, src, sz) #endif /* The following symbols are defined in the linker script */ @@ -77,9 +124,9 @@ extern uint32_t _stext_addr, _etext_addr; #if X86_CONF_PROT_DOMAINS != X86_CONF_PROT_DOMAINS__NONE /** Metadata that should not be DMA-accessible */ -#define ATTR_BSS_META __attribute__((section(".meta_bss"))) +#define ATTR_BSS_META __attribute__((section(".meta_bss"))) ATTR_META_ADDR_SPACE /** Kernel-owned data */ -#define ATTR_BSS_KERN __attribute__((section(".kern_bss"))) +#define ATTR_BSS_KERN __attribute__((section(".kern_bss"))) ATTR_KERN_ADDR_SPACE /** Code that should only be executable during bootup */ #define ATTR_CODE_BOOT __attribute__((section(".boot_text"))) @@ -97,6 +144,10 @@ extern uint32_t _ebss_syscall_addr; /** Bounds for other data sections */ extern uint32_t _sdata_addr, _edata_addr; +#ifndef SEG_KERN +#define SEG_KERN "d" +#endif + /** * If set, this protection domain is already in the call stack and is not * available for nested invocations. @@ -114,8 +165,8 @@ extern uint32_t _sdata_addr, _edata_addr; */ typedef struct dom_kern_data dom_kern_data_t; -extern volatile dom_kern_data_t prot_domains_kern_data[]; -extern volatile dom_kern_data_t prot_domains_kern_data_end[]; +extern volatile dom_kern_data_t ATTR_KERN_ADDR_SPACE prot_domains_kern_data[]; +extern volatile dom_kern_data_t ATTR_KERN_ADDR_SPACE prot_domains_kern_data_end[]; #define PROT_DOMAINS_ACTUAL_CNT \ (prot_domains_kern_data_end - prot_domains_kern_data) @@ -125,6 +176,7 @@ extern volatile dom_kern_data_t prot_domains_kern_data_end[]; void prot_domains_syscall_dispatcher(void); +#if X86_CONF_PROT_DOMAINS != X86_CONF_PROT_DOMAINS__TSS /** * Data associated with each protection domain that is owned by clients of that * domain and used to identify the domain. @@ -132,15 +184,21 @@ void prot_domains_syscall_dispatcher(void); struct dom_client_data { dom_id_t dom_id; } __attribute__((packed)); +#endif + +#ifndef PROT_DOMAINS_ALLOC_IMPL +#define PROT_DOMAINS_ALLOC_IMPL(nm) +#endif /** Allocate the client-owned protection domain data structure. */ #define PROT_DOMAINS_PDCS_NM(nm) _pdcs_##nm #define PROT_DOMAINS_ALLOC(typ, nm) \ static dom_kern_data_t __attribute__((section(".prot_dom_bss"))) \ - PROT_DOMAINS_PDCS_NM(nm); \ + ATTR_KERN_ADDR_SPACE PROT_DOMAINS_PDCS_NM(nm); \ + PROT_DOMAINS_ALLOC_IMPL(nm); \ static typ ATTR_BSS_KERN nm #define PROT_DOMAINS_INIT_ID(nm) \ - (nm).dom_id = PROT_DOMAINS_GET_DOM_ID(&PROT_DOMAINS_PDCS_NM(nm)) + KERN_WRITEL((nm).dom_id, PROT_DOMAINS_GET_DOM_ID(&PROT_DOMAINS_PDCS_NM(nm))) /** * Perform early initialization during boot stage 0 to prepare for boot stage 1 @@ -169,8 +227,12 @@ void prot_domains_launch_kernel(void); */ #define PROT_DOMAINS_INIT_RET_ADDR_CNT 2 +#if X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__TSS +void prot_domains_launch_app(void); +#else void app_main(void); #define prot_domains_launch_app app_main +#endif #else @@ -229,7 +291,7 @@ typedef struct dom_client_data dom_client_data_t; * \param meta_sz Size of metadata * \param pio Set to true if protection domain requires port IO access */ -void prot_domains_reg(dom_client_data_t *dcd, +void prot_domains_reg(dom_client_data_t ATTR_KERN_ADDR_SPACE *dcd, uintptr_t mmio, size_t mmio_sz, uintptr_t meta, @@ -237,11 +299,41 @@ void prot_domains_reg(dom_client_data_t *dcd, bool pio); #endif +#if X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__NONE +#define prot_domains_copy_dcd(dst, src) *(dst) = *(src) +#else +static inline void +/** + * It is necessary to make a local copy of a dom_client_data structure when a + * multi-segment protection domain implementation is in use, segment attributes + * are not supported by the compiler, and a dom_client_data structure needs to + * be passed by value into some function. Otherwise, the compiler will not know + * to access the non-default segment in which *src is stored and will attempt + * to copy it out of the default data segment. + */ +prot_domains_copy_dcd(struct dom_client_data *dst, + struct dom_client_data ATTR_KERN_ADDR_SPACE *src) +{ + KERN_READL(dst->dom_id, src->dom_id); +#if X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__TSS + KERN_READL(dst->tss_sel, src->tss_sel); +#endif +} +#endif + +#if !X86_CONF_PROT_DOMAINS_MULTI_SEG +#define prot_domains_enable_mmio() +#define prot_domains_disable_mmio() + +#define KERN_DATA_OFF_TO_PHYS_ADDR(x) ((uintptr_t)(x)) +#define DATA_OFF_TO_PHYS_ADDR(x) ((uintptr_t)(x)) +#endif + #if X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__NONE #define prot_domains_lookup_meta_phys_base(drv) 0 #else /** Lookup base physical address of metadata region for specified domain */ -uintptr_t prot_domains_lookup_meta_phys_base(dom_client_data_t *drv); +uintptr_t prot_domains_lookup_meta_phys_base(dom_client_data_t ATTR_KERN_ADDR_SPACE *drv); #endif #if X86_CONF_PROT_DOMAINS != X86_CONF_PROT_DOMAINS__PAGING @@ -270,6 +362,11 @@ uintptr_t prot_domains_lookup_meta_phys_base(dom_client_data_t *drv); ".endif\n\t" #endif +#ifdef X86_CONF_PROT_DOMAINS_MULTI_SEG +/* include GDT section definitions used when allocating protection domains: */ +#include "gdt.h" +#endif + #endif /* !__ASSEMBLER__ */ #endif /* CPU_X86_MM_PROT_DOMAINS_H_ */ diff --git a/cpu/x86/mm/segmentation.h b/cpu/x86/mm/segmentation.h index 57b1b8aea..71cd6beb6 100644 --- a/cpu/x86/mm/segmentation.h +++ b/cpu/x86/mm/segmentation.h @@ -59,8 +59,11 @@ #define SEG_WIDTH_GRAN 1 #define SEG_SHAMT_GRAN 15 +#define SEG_TYPE_DATA_RDONLY SEG_FLAG(TYPE, 0x00) /* Read only */ #define SEG_TYPE_DATA_RDWR SEG_FLAG(TYPE, 0x02) /* Read/Write */ #define SEG_TYPE_CODE_EXRD SEG_FLAG(TYPE, 0x0A) /* Execute/Read */ +#define SEG_TYPE_CODE_EX SEG_FLAG(TYPE, 0x08) /* Execute only */ +#define SEG_TYPE_LDT SEG_FLAG(TYPE, 0x02) #define SEG_TYPE_TSS32_AVAIL SEG_FLAG(TYPE, 0x09) #define SEG_DESCTYPE_SYS SEG_FLAG(DESCTYPE, 0) @@ -73,6 +76,12 @@ #define SEG_GRAN_BYTE SEG_FLAG(GRAN, 0) #define SEG_GRAN_PAGE SEG_FLAG(GRAN, 1) +/** + * Maximum length of segment that can be regulated with a byte-granularity + * segment limit. + */ +#define SEG_MAX_BYTE_GRAN_LEN (1 << 20) + /** * Segment descriptor. See Intel Combined Manual, * Vol. 3, Section 3.4.5 for more details. @@ -91,7 +100,13 @@ typedef union segment_desc { uint64_t raw; } segment_desc_t; -static inline void +#define SEG_DESC_NOT_PRESENT 0 + +/* The next two functions are invoked by boot code, so they must always be + * inlined to avoid being placed in a different address space than the initial, + * flat address space. + */ +static inline void __attribute__((always_inline)) segment_desc_set_limit(segment_desc_t *c_this, uint32_t len) { uint32_t limit = len - 1; @@ -108,7 +123,7 @@ segment_desc_set_limit(segment_desc_t *c_this, uint32_t len) * \param flags Flags to be added to the default flags: present, default * operand size of 32 bits, and high limit bits. */ -static inline void +static inline void __attribute__((always_inline)) segment_desc_init(segment_desc_t *c_this, uint32_t base, uint32_t len, uint16_t flags) { diff --git a/cpu/x86/mm/stacks.h b/cpu/x86/mm/stacks.h index a1005d8e0..96be72cf9 100644 --- a/cpu/x86/mm/stacks.h +++ b/cpu/x86/mm/stacks.h @@ -61,6 +61,17 @@ #else #define STACKS_SIZE_EXC 256 #endif +#elif X86_CONF_PROT_DOMAINS == X86_CONF_PROT_DOMAINS__TSS +/** + * This should be large enough to execute the exception handler with the + * largest stack requirement: double_fault_handler: + * - 1 word for the return address from calling double_fault_handler + * - 1 word for the saved frame pointer in double_fault_handler + * - 2 words that GCC has been observed to skip on the stack to align it + * to a preferred boundary + * - 1 word for the return address for calling halt + */ +#define STACKS_SIZE_EXC (STACKS_SIZE_INT + (6 * 4)) #else #define STACKS_SIZE_EXC STACKS_SIZE_INT #endif diff --git a/cpu/x86/mm/syscalls.h b/cpu/x86/mm/syscalls.h index 83be7a47e..cae8ff2f5 100644 --- a/cpu/x86/mm/syscalls.h +++ b/cpu/x86/mm/syscalls.h @@ -33,6 +33,7 @@ #include "helpers.h" #include "prot-domains.h" +#include typedef uint32_t dom_id_bitmap_t; @@ -40,8 +41,8 @@ typedef struct syscalls_entrypoint { uintptr_t entrypoint; dom_id_bitmap_t doms; } syscalls_entrypoint_t; -extern syscalls_entrypoint_t syscalls_entrypoints[]; -extern syscalls_entrypoint_t syscalls_entrypoints_end[]; +extern syscalls_entrypoint_t ATTR_KERN_ADDR_SPACE syscalls_entrypoints[]; +extern syscalls_entrypoint_t ATTR_KERN_ADDR_SPACE syscalls_entrypoints_end[]; #define SYSCALLS_ACTUAL_CNT (syscalls_entrypoints_end - syscalls_entrypoints) @@ -49,11 +50,11 @@ extern syscalls_entrypoint_t syscalls_entrypoints_end[]; #define SYSCALLS_ALLOC_ENTRYPOINT(nm) \ syscalls_entrypoint_t __attribute__((section(".syscall_bss"))) \ - _syscall_ent_##nm + ATTR_KERN_ADDR_SPACE _syscall_ent_##nm #define SYSCALLS_INIT(nm) \ - _syscall_ent_##nm.entrypoint = (uintptr_t)_syscall_##nm; \ - _syscall_ent_##nm.doms = 0 + KERN_WRITEL(_syscall_ent_##nm.entrypoint, (uintptr_t)_syscall_##nm); \ + KERN_WRITEL(_syscall_ent_##nm.doms, 0) #define SYSCALLS_DEFINE(nm, ...) \ void _syscall_##nm(__VA_ARGS__); \ @@ -65,8 +66,19 @@ extern syscalls_entrypoint_t syscalls_entrypoints_end[]; SYSCALLS_STUB_SINGLETON(nm, dcd); \ void _syscall_##nm(__VA_ARGS__) -#define SYSCALLS_AUTHZ(nm, drv) _syscall_ent_##nm.doms |= BIT((drv).dom_id) -#define SYSCALLS_DEAUTHZ(nm, drv) _syscall_ent_##nm.doms &= ~BIT((drv).dom_id) +#define SYSCALLS_AUTHZ_UPD(nm, drv, set) \ + { \ + dom_id_t _sc_tmp_id; \ + dom_id_bitmap_t _sc_tmp_bm; \ + KERN_READL(_sc_tmp_id, (drv).dom_id); \ + KERN_READL(_sc_tmp_bm, _syscall_ent_##nm.doms); \ + if(set) { \ + _sc_tmp_bm |= BIT(_sc_tmp_id); \ + } else { \ + _sc_tmp_bm &= ~BIT(_sc_tmp_id); \ + } \ + KERN_WRITEL(_syscall_ent_##nm.doms, _sc_tmp_bm); \ + } /** * Check that any untrusted pointer that could have been influenced by a caller @@ -78,7 +90,11 @@ extern syscalls_entrypoint_t syscalls_entrypoints_end[]; * * This also checks that the pointer is either within the stack region or the * shared data region, which is important for preventing redirection of data - * accesses to MMIO or metadata regions. + * accesses to MMIO or metadata regions. This check is omitted for multi- + * segment protection domain implementations, since the segment settings + * already enforce this property for pointers dereferenced in DS. Pointers + * that can be influenced by a caller should not be dereferenced in any other + * segment. * * The pointer is both validated and copied to a new storage location, which * must be within the callee's local stack region (excluding the parameter @@ -92,6 +108,14 @@ extern syscalls_entrypoint_t syscalls_entrypoints_end[]; * references the return address, it could potentially redirect execution with * the privileges of the callee protection domain. */ +#if X86_CONF_PROT_DOMAINS_MULTI_SEG +#define PROT_DOMAINS_VALIDATE_PTR(validated, untrusted, sz) \ + validated = untrusted; \ + if(((uintptr_t)(validated)) < \ + ((2 * sizeof(uintptr_t)) + (uintptr_t)__builtin_frame_address(0))) { \ + halt(); \ + } +#else #define PROT_DOMAINS_VALIDATE_PTR(validated, untrusted, sz) \ validated = untrusted; \ if((((uintptr_t)(validated)) < \ @@ -99,6 +123,7 @@ extern syscalls_entrypoint_t syscalls_entrypoints_end[]; (((uintptr_t)&_edata_addr) <= (((uintptr_t)(validated)) + (sz)))) { \ halt(); \ } +#endif #else @@ -106,10 +131,12 @@ extern syscalls_entrypoint_t syscalls_entrypoints_end[]; #define SYSCALLS_INIT(nm) #define SYSCALLS_DEFINE(nm, ...) void nm(__VA_ARGS__) #define SYSCALLS_DEFINE_SINGLETON(nm, dcd, ...) void nm(__VA_ARGS__) -#define SYSCALLS_AUTHZ(nm, drv) -#define SYSCALLS_DEAUTHZ(nm, drv) +#define SYSCALLS_AUTHZ_UPD(nm, drv, set) #define PROT_DOMAINS_VALIDATE_PTR(validated, untrusted, sz) validated = untrusted #endif +#define SYSCALLS_AUTHZ(nm, drv) SYSCALLS_AUTHZ_UPD(nm, drv, true) +#define SYSCALLS_DEAUTHZ(nm, drv) SYSCALLS_AUTHZ_UPD(nm, drv, false) + #endif /* CPU_X86_MM_SYSCALLS_H_ */ diff --git a/cpu/x86/mm/tss-prot-domains-asm.S b/cpu/x86/mm/tss-prot-domains-asm.S new file mode 100644 index 000000000..45832a62c --- /dev/null +++ b/cpu/x86/mm/tss-prot-domains-asm.S @@ -0,0 +1,88 @@ +/* + * Copyright (C) 2015, Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +.text + +/* Initialize the TSS fields in prot_domains_reg accordingly: + * Note: Each of these must be a callee-saved register, so that they are + * restored to their original values prior to the task returning. This will + * result in the same values being loaded when the task is next invoked. + */ +#define CUR_DOM_ID_BITMAP esi + +/* Must match SEG_KERN (plus the trailing 's') in multi-segment.h */ +#define SEG_KERN fs + +.global prot_domains_syscall_dispatcher +prot_domains_syscall_dispatcher: +#define PROT_DOMAINS_SYSCALL eax + mov prot_domains_syscall, %PROT_DOMAINS_SYSCALL + cmp $syscalls_entrypoints, %PROT_DOMAINS_SYSCALL + jl halt + cmp $syscalls_entrypoints_end, %PROT_DOMAINS_SYSCALL + jnl halt +#define SYSCALLS_ENTRYPOINTS_ALIGN_MASK ebp + mov $3, %SYSCALLS_ENTRYPOINTS_ALIGN_MASK + and %PROT_DOMAINS_SYSCALL, %SYSCALLS_ENTRYPOINTS_ALIGN_MASK + jnz halt + + /* Compare allowed domains bitmask against current domain ID bitmap. If + * the check fails, then the current domain ID bitmap value will be zeroed + * out, which could cause incorrect behavior in the future. However, the + * response to a failed check is to halt the system, so destroying the + * current domain ID bitmap value will have no effect. + */ + and %SEG_KERN:4(%PROT_DOMAINS_SYSCALL), %CUR_DOM_ID_BITMAP + jz halt + + mov prot_domains_main_esp, %esp + + /* Must be a callee-saved register: */ +#define ORIG_RET_ADDR edi + /* Update the caller's stack to return back to here */ + pop %ORIG_RET_ADDR + push $sysret_dispatcher + /* Jump to the system call body */ + jmp *%SEG_KERN:(%PROT_DOMAINS_SYSCALL) + +sysret_dispatcher: + push %ORIG_RET_ADDR + + iret + + /* The task will resume here for the next system call, so it is necessary + * to jump back to the top. + */ + jmp prot_domains_syscall_dispatcher + +.global dev_not_avail_isr +dev_not_avail_isr: + clts + iret diff --git a/cpu/x86/mm/tss-prot-domains.c b/cpu/x86/mm/tss-prot-domains.c new file mode 100644 index 000000000..40041a6d1 --- /dev/null +++ b/cpu/x86/mm/tss-prot-domains.c @@ -0,0 +1,161 @@ +/* + * Copyright (C) 2015-2016, Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include "gdt.h" +#include "helpers.h" +#include "idt.h" +#include "prot-domains.h" +#include "stacks.h" +#include "syscalls.h" +#include "tss.h" + +uint32_t prot_domains_main_esp; +syscalls_entrypoint_t ATTR_KERN_ADDR_SPACE *prot_domains_syscall; + +/*---------------------------------------------------------------------------*/ +void app_main(void); +void +prot_domains_reg(dom_client_data_t ATTR_KERN_ADDR_SPACE *dcd, + uintptr_t mmio, size_t mmio_sz, + uintptr_t meta, size_t meta_sz, + bool pio) +{ + segment_desc_t desc; + uint32_t eflags; + dom_id_t dom_id; + volatile struct dom_kern_data ATTR_KERN_ADDR_SPACE *dkd; + + KERN_READL(dom_id, dcd->dom_id); + + dkd = prot_domains_kern_data + dom_id; + + prot_domains_reg_multi_seg(dkd, mmio, mmio_sz, meta, meta_sz); + + /* Only the kernel protection domain requires port I/O access outside of the + * interrupt handlers. + */ + eflags = EFLAGS_IOPL(pio ? PRIV_LVL_USER : PRIV_LVL_INT); + if(dom_id == DOM_ID_app) { + eflags |= EFLAGS_IF; + } + + /* Keep this initialization in sync with the register definitions in + * tss-prot-domains-asm.S. + */ + KERN_WRITEL(dkd->tss.ebp, 0); + KERN_WRITEL(dkd->tss.ebx, 0); + KERN_WRITEL(dkd->tss.esi, BIT(dom_id)); + KERN_WRITEL(dkd->tss.eip, + (dom_id == DOM_ID_app) ? + (uint32_t)app_main : + (uint32_t)prot_domains_syscall_dispatcher); + KERN_WRITEL(dkd->tss.cs, GDT_SEL_CODE); + KERN_WRITEL(dkd->tss.ds, GDT_SEL_DATA); + KERN_WRITEL(dkd->tss.es, GDT_SEL_DATA); + KERN_WRITEL(dkd->tss.fs, LDT_SEL_KERN); + KERN_WRITEL(dkd->tss.gs, + (meta_sz == 0) ? GDT_SEL_NULL : LDT_SEL_META); + KERN_WRITEL(dkd->tss.ss, GDT_SEL_STK); + /* This stack pointer is only actually used in application protection domain. + * Other domains enter at system call dispatcher, which switches to main + * stack. + */ + KERN_WRITEL(dkd->tss.esp, + /* Two return addresses have been consumed: */ + STACKS_INIT_TOP + (2 * sizeof(uintptr_t))); + KERN_WRITEL(dkd->tss.eflags, eflags); + KERN_WRITEL(dkd->tss.ldt, GDT_SEL_LDT(dom_id)); + KERN_WRITEL(dkd->tss.esp2, STACKS_SIZE_MAIN + STACKS_SIZE_INT); + KERN_WRITEL(dkd->tss.ss2, GDT_SEL_STK_INT); + KERN_WRITEL(dkd->tss.esp0, + STACKS_SIZE_MAIN + STACKS_SIZE_INT + STACKS_SIZE_EXC); + KERN_WRITEL(dkd->tss.ss0, GDT_SEL_STK_EXC); + KERN_WRITEW(dkd->tss.t, 0); + KERN_WRITEW(dkd->tss.iomap_base, sizeof(tss_t)); + KERN_WRITEL(dkd->tss.cr3, 0); + + segment_desc_init(&desc, + KERN_DATA_OFF_TO_PHYS_ADDR((uint32_t)&(dkd->tss)), + sizeof(dkd->tss), + /* It should be possible for code at any privilege level to invoke the task's + * system call dispatcher. + */ + SEG_FLAG(DPL, PRIV_LVL_USER) | SEG_TYPE_TSS32_AVAIL); + + gdt_insert(GDT_IDX_TSS(dom_id), desc); + + KERN_WRITEW(dcd->tss_sel, GDT_SEL(GDT_IDX_TSS(dom_id), PRIV_LVL_USER)); +} +/*---------------------------------------------------------------------------*/ +void dev_not_avail_isr(void); +void +prot_domains_impl_init(void) +{ + __asm__ __volatile__ ("ltr %0" :: "r" ((uint16_t)GDT_SEL_TSS(DOM_ID_kern))); + __asm__ __volatile__ ("lldt %0" :: "r" ((uint16_t)GDT_SEL_LDT(DOM_ID_kern))); + + idt_set_intr_gate_desc(7, + (uint32_t)dev_not_avail_isr, + GDT_SEL_CODE_EXC, PRIV_LVL_EXC); +} +/*---------------------------------------------------------------------------*/ +int main(); +void +prot_domains_launch_kernel(void) +{ + multi_segment_launch_kernel(); + + /* Activate kernel protection domain, entering the kernel at main. */ + __asm__ __volatile__ ( + "pushl %[_ss_]\n\t" + "pushl %[_top_of_stk_]\n\t" + "pushl %[_eflags_]\n\t" + "pushl %[_cs_]\n\t" + "pushl %[_kern_start_]\n\t" + "iretl\n\t" + : + : [_ss_] "g" (GDT_SEL_STK), + [_eflags_] "g" (EFLAGS_IOPL(PRIV_LVL_USER)), + [_cs_] "g" (GDT_SEL_CODE), + [_kern_start_] "g" (main), + /* one address has already been consumed */ + [_top_of_stk_] "g" (STACKS_INIT_TOP + sizeof(uint32_t)) + ); +} +/*---------------------------------------------------------------------------*/ +void +prot_domains_launch_app() +{ + far_pointer_t app_ptr = { 0, GDT_SEL_TSS(DOM_ID_app) }; + __asm__ __volatile__ ("ljmp *%0" :: "m" (app_ptr)); +} +/*---------------------------------------------------------------------------*/ diff --git a/cpu/x86/mm/tss-prot-domains.h b/cpu/x86/mm/tss-prot-domains.h new file mode 100644 index 000000000..d61d97504 --- /dev/null +++ b/cpu/x86/mm/tss-prot-domains.h @@ -0,0 +1,130 @@ +/* + * Copyright (C) 2015-2016, Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef CPU_X86_MM_TSS_PROT_DOMAINS_H_ +#define CPU_X86_MM_TSS_PROT_DOMAINS_H_ + +#include +#include +#include +#include "ldt-layout.h" +#include "segmentation.h" +#include "tss.h" + +struct dom_kern_data { + /** Task State Segment */ + tss_t tss; + /** Local Descriptor Table with per-domain descriptors */ + segment_desc_t ldt[LDT_NUM_DESC]; +} __attribute__((packed)); + +/* relies on dom_kern_data: */ +#include "multi-segment.h" + +/* relies on ATTR_KERN_ADDR_SPACE: */ +#include "syscalls.h" + +/** + * Data associated with each protection domain that is owned by clients of that + * domain and used to identify the domain. + */ +struct dom_client_data { + dom_id_t dom_id; + /** The selector is only 16 bits, but it is padded to 32 bits. */ + uint32_t tss_sel; +}; + +extern uint32_t prot_domains_main_esp; + +#define SYSCALLS_STUB_MIDDLE(nm) \ + /* If already in the callee protection domain, skip the protection */ \ + /* domain switch and directly invoke the system call body */ \ + " je _syscall_" #nm "\n\t" \ + " movl $" EXP_STRINGIFY(_syscall_ent_##nm) ", prot_domains_syscall\n\t" \ + " mov %esp, prot_domains_main_esp\n\t" + +#define SYSCALLS_STUB(nm) \ + SYSCALLS_ALLOC_ENTRYPOINT(nm); \ + asm ( \ + ".text\n\t" \ + ".global " #nm "\n\t" \ + #nm ":\n\t" \ + " str %ax\n\t" \ + /* Compare current Task Register selector to selector for callee */ \ + /* protection domain, in tss_sel field of dom_client_data */ \ + " cmpw %ax, 8(%esp)\n\t" \ + SYSCALLS_STUB_MIDDLE(nm) \ + /* This will treat the dom_id field as the offset for the call, but */ \ + /* that is ignored when performing a far call to a task */ \ + " lcall *4(%esp)\n\t" \ + " ret\n\t") + +#define SYSCALLS_STUB_SINGLETON(nm, dcd) \ + SYSCALLS_ALLOC_ENTRYPOINT(nm); \ + asm ( \ + ".text\n\t" \ + ".global " #nm "\n\t" \ + #nm ":\n\t" \ + " str %ax\n\t" \ + /* Compare current Task Register selector to selector for callee */ \ + /* protection domain, in tss_sel field of dom_client_data */ \ + " cmpw %ax, %" SEG_KERN "s:(4 + " #dcd ")\n\t" \ + SYSCALLS_STUB_MIDDLE(nm) \ + /* This will treat the dom_id field as the offset for the call, but */ \ + /* that is ignored when performing a far call to a task */ \ + " lcall *%" SEG_KERN "s:" #dcd "\n\t" \ + " ret\n\t") + +#define PROT_DOMAINS_ENTER_ISR(exc) \ + MULTI_SEGMENT_ENTER_ISR(exc) \ + /* It is possible that the system call dispatcher is being interrupted, */ \ + /* and some interrupt handlers perform system calls. Thus, it is */ \ + /* necessary to save and restore the system call dispatcher parameters */ \ + /* (in callee-saved registers). */ \ + "mov prot_domains_main_esp, %%esi\n\t" \ + "mov prot_domains_syscall, %%edi\n\t" \ + PROT_DOMAINS_ENTER_ISR_COMMON(exc) +#define PROT_DOMAINS_LEAVE_ISR(exc) \ + PROT_DOMAINS_LEAVE_ISR_COMMON(exc) \ + "mov %%edi, prot_domains_syscall\n\t" \ + "mov %%esi, prot_domains_main_esp\n\t" \ + MULTI_SEGMENT_LEAVE_ISR(exc) + +/* Allocate two additional GDT entries for each protection domain. Note that + * the particular storage allocated by this statement may actually be used for + * some other protection domain, depending on how the linker happens to arrange + * all of the GDT storage. The GDT_IDX_TSS and GDT_IDX_LDT macros in + * gdt-layout.h determine which storage is used for each protection domain. + * Thus, this storage should not be referenced directly by its variable name. + */ +#define PROT_DOMAINS_ALLOC_IMPL(nm) \ + static segment_desc_t ATTR_BSS_GDT_MID _gdt_storage_##nm[2] + +#endif /* CPU_X86_MM_TSS_PROT_DOMAINS_H_ */ diff --git a/cpu/x86/quarkX1000.ld b/cpu/x86/quarkX1000.ld index 2f90b7c70..be91a74c7 100644 --- a/cpu/x86/quarkX1000.ld +++ b/cpu/x86/quarkX1000.ld @@ -87,4 +87,6 @@ SECTIONS { */ _ebss_gdt_addr = .; } + + _ebss_pre_dma_addr = ALIGN(32); } diff --git a/cpu/x86/quarkX1000_dma.ld b/cpu/x86/quarkX1000_dma.ld index fe3b79861..4cecac839 100644 --- a/cpu/x86/quarkX1000_dma.ld +++ b/cpu/x86/quarkX1000_dma.ld @@ -30,26 +30,18 @@ SECTIONS { - /* - It would be more natural to use a 1K alignment for this entire section. - However, the UEFI GenFw program ratchets up its alignment - granularity to the maximum granularity discovered in its input file. - Using 1K-alignment perturbs the symbols, hindering debugging. Thus, - this section is simply padded out to the desired alignment and - declared to have a section alignment of only 32 bytes. - - The alignment directives used here suffice even when paging is in use, - because this is the last section and directly follows one (.bss.meta) - that is 4K-aligned. - */ - .bss.dma (NOLOAD) : ALIGN (32) + .bss.dma (NOLOAD) : AT(_ebss_pre_dma_addr) ALIGN (32) { - /* The IMR feature operates at 1K granularity. */ - . = ALIGN(1K); - _sbss_dma_addr = .; + /* IMRs are used to restrict DMA, and they require 1K physical address alignment. */ + . += ALIGN(_ebss_pre_dma_addr, 1K) - ALIGN(_ebss_pre_dma_addr, 32); *(.dma_bss) - . = ALIGN(1K); - _ebss_dma_addr = .; } + _sbss_dma_addr = LOADADDR(.bss.dma) + ALIGN(_ebss_pre_dma_addr, 1K) - ALIGN(_ebss_pre_dma_addr, 32); + /* + Effectively pointing beyond the end of .bss.dma is acceptable, since + .bss.dma is the last section in memory. + */ + _ebss_dma_addr = ALIGN(LOADADDR(.bss.dma) + SIZEOF(.bss.dma), 1K); + } diff --git a/cpu/x86/quarkX1000_multi_seg.ld b/cpu/x86/quarkX1000_multi_seg.ld new file mode 100644 index 000000000..945650399 --- /dev/null +++ b/cpu/x86/quarkX1000_multi_seg.ld @@ -0,0 +1,190 @@ +/* + * Copyright (C) 2015, Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE + * COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +OUTPUT_FORMAT("elf32-i386") + +ENTRY(start) + +/* + The TSS-based protection domain implementation does not explicitly reference + these symbols, so we list them here to prevent them from being garbage- + collected. +*/ +EXTERN(stacks_int) +EXTERN(stacks_exc) + +PHDRS { + boot_text PT_LOAD; + text PT_LOAD; + data PT_LOAD; +} + +SECTIONS { + /* + OS-Dev Wiki says it is common for kernels to start at 1M. Addresses before that + are used by BIOS/EFI, the bootloader and memory-mapped I/O. + + The UEFI GenFw program inserts a 0x220 byte offset between the image base and + the .text section. We add that same offset here to align the symbols in the + UEFI DLL with those in the final UEFI binary to make debugging easier. + */ + . = 1M + 0x220; + + /* + The GenFw program in the EDK2 UEFI toolchain outputs UEFI images with a + section alignment of at least 32 bytes. Thus, it is desirable to use at + least that alignment granularity to avoid symbols being shifted from the + intermediate DLL to the final UEFI image. Such shifting may make + debugging more difficult by preventing the DLL from being a useful + source of symbol information. The debugging symbols are not included in + the final UEFI image. + */ + .text.boot : ALIGN (32) + { + *(.multiboot) + /* + The initial bootstrap code expects to operate in a flat address + space with an identity mapping between linear and physical + addresses. + */ + *(.boot_text) + } :boot_text + + /* The post-boot code segments define tight bounds around the code + section, so this directive resets the virtual address to 0. */ + . = 0; + + /* The virtual address differs from the load address. */ + .text : AT(LOADADDR(.text.boot) + ALIGN(SIZEOF(.text.boot), 32)) ALIGN (32) + { + /* + These BYTE directives emit a UD2 instruction to cause execution to + halt if the control flow ever deviates to address 0. This also + prevents other code from being placed at address 0. Some code + considers a function pointer to address 0 to be a null function + pointer. + */ + BYTE(0x0F); + BYTE(0x0B); + *(.text*) + + /* + An alternative design to eliminate the need for ALIGN directives + within the AT directives in later sections could have padded + each section out to a 32-byte boundary. However, that would have + enabled unneeded software accesses to the padding past the end of actual + code/data in each section, since segments are also configured based on + the values of the SIZEOF expressions. As a general principle, accesses + should be as restricted as is feasible. + */ + } :text + + _stext_addr = LOADADDR(.text); + _etext_addr = LOADADDR(.text) + SIZEOF(.text); + + . = 0; + + .data : AT(ALIGN(_etext_addr, 32)) ALIGN (32) + { + *(.main_stack) + *(.int_stack) + *(.exc_stack) + *(.rodata*) + *(.data*) + + /* + These could alternatively be treated as read-only data to prevent tampering + from the user privilege level. + */ + _sdata_shared_isr = .; + KEEP(*(.shared_isr_data*)) + _edata_shared_isr = .; + } :data + + .bss : ALIGN (32) + { + *(COMMON) + *(.bss*) + } + + _sdata_addr = LOADADDR(.data); + _edata_addr = LOADADDR(.bss) + SIZEOF(.bss); + + . = 0; + + .bss.kern (NOLOAD) : AT(ALIGN(_edata_addr, 32)) ALIGN (32) + { + /* + This directive prevents any data from being allocated at address + zero, since the address 0 is commonly used to represent null + pointers. + */ + LONG(0); + *(.kern_bss) + + syscalls_entrypoints = .; + *(.syscall_bss) + syscalls_entrypoints_end = .; + } + + _ebss_syscall_addr = LOADADDR(.bss.kern) + SIZEOF(.bss.kern); + + .bss.kern_priv (NOLOAD) : ALIGN (32) + { + prot_domains_kern_data = .; + /* + The kernel and app protection domain control structures must always + be placed in the first two slots in this order, so that they have + well-known protection domain IDs: + */ + *(.kern_prot_dom_bss) + *(.app_prot_dom_bss) + *(.prot_dom_bss) + prot_domains_kern_data_end = .; + + *(.gdt_bss_start) + KEEP(*(.gdt_bss_mid)) + *(.gdt_bss) + _ebss_gdt_addr = .; + } + + _sbss_kern_addr = LOADADDR(.bss.kern); + _ebss_kern_addr = LOADADDR(.bss.kern_priv) + SIZEOF(.bss.kern_priv); + + . = _ebss_kern_addr; + + .bss.meta (NOLOAD) : AT(ALIGN(_ebss_kern_addr, 32)) ALIGN (32) + { + *(.meta_bss) + } + + /* .bss.meta may be empty, so this uses .bss.kern_priv as a base instead: */ + _ebss_pre_dma_addr = ALIGN(ALIGN(_ebss_kern_addr, 32) + SIZEOF(.bss.meta), 32); +} diff --git a/cpu/x86/quarkX1000_paging.ld b/cpu/x86/quarkX1000_paging.ld index 0352cbf64..19e50e1e0 100644 --- a/cpu/x86/quarkX1000_paging.ld +++ b/cpu/x86/quarkX1000_paging.ld @@ -129,7 +129,7 @@ SECTIONS { *(.data*) /* - These could also be treated as read-only data to prevent tampering + These could alternatively be treated as read-only data to prevent tampering from the user privilege level. */ _sdata_shared_isr = .; @@ -201,4 +201,6 @@ SECTIONS { . = ALIGN(4K); } + + _ebss_pre_dma_addr = ALIGN(32); } diff --git a/platform/galileo/Makefile.customrules-galileo b/platform/galileo/Makefile.customrules-galileo index cb5bc26a9..b141ee211 100644 --- a/platform/galileo/Makefile.customrules-galileo +++ b/platform/galileo/Makefile.customrules-galileo @@ -9,7 +9,17 @@ MULTIBOOT = $(CONTIKI_PROJECT).$(MULTIBOOT_SFX) # UEFI binary UEFI_DLL_SFX = $(TARGET).dll UEFI_DLL = $(CONTIKI_PROJECT).$(UEFI_SFX) -UEFI_LDFLAGS += -Xlinker --emit-relocs -Xlinker --entry=uefi_start +# The GenFw program is unable to process absolute symbols like _stext_addr, +# etc., that are defined in quarkX1000_dma.ld and quarkX1000_multi_seg.ld +# and used to configure segments in multi-segment.c, etc. Furthermore, +# relocating the UEFI image during load would result in those symbols not +# pointing to the expected image locations. So, relocation data is omitted +# from the intermediate UEFI DLL. This will only result in a +# correctly-functioning build if the UEFI firmware does not attempt to +# relocate the UEFI image, so it may be desirable in the future to revisit +# this design. To emit relocation data, '-Xlinker --emit-relocs' should be +# appended to the following line. +UEFI_LDFLAGS = -Xlinker --entry=uefi_start UEFI_SFX = $(TARGET).efi UEFI = $(CONTIKI_PROJECT).$(UEFI_SFX)