-#endif
-
-/* Page Table/Directory Entry flags
- * these are defined by the hardware
- */
-#define PG_P 0x1 /* Present */
-#define PG_W 0x2 /* Writeable */
-#define PG_U 0x4 /* User */
-#define PG_A 0x20 /* Accessed */
-#define PG_D 0x40 /* Dirty */
-/*
- * The PG_USER bits are not used by the kernel and they are
- * not interpreted by the hardware. The kernel allows
- * user processes to set them arbitrarily.
- */
-
-/* EFLAGS Register. */
-#define FLAG_MBS 0x00000002 /* Must be set. */
-#define FLAG_IF 0x00000200 /* Interrupt Flag. */
-
-/* Page fault error codes */
-#define FEC_PR 0x1 /* Page fault caused by protection violation */
-#define FEC_WR 0x2 /* Page fault caused by a write */
-#define FEC_U 0x4 /* Page fault occured while in user mode */
-
-
-/* Application segment type bits */
-#define STA_X 0x8 /* Executable segment */
-#define STA_A 0x1 /* Accessed */
-
-#define STA_C 0x4 /* Conforming code segment (executable only) */
-#define STA_R 0x2 /* Readable (executable segments) */
-
-#define STA_E 0x4 /* Expand down (non-executable segments) */
-#define STA_W 0x2 /* Writeable (non-executable segments) */
-
-
-/* Segment selectors. */
-#define SEL_NULL 0x00 /* Null selector. */
-#define SEL_KCSEG 0x08 /* Kernel code selector. */
-#define SEL_KDSEG 0x10 /* Kernel data selector. */
-#define SEL_UCSEG 0x1B /* User code selector. */
-#define SEL_UDSEG 0x23 /* User data selector. */
-#define SEL_TSS 0x28 /* Task-state segment. */
-#define SEL_CNT 6 /* Number of segments. */
-
-#ifndef __ASSEMBLER__
-struct tss
- {
- uint16_t back_link, :16;
- uint32_t esp0;
- uint16_t ss0, :16;
- uint32_t esp1;
- uint16_t ss1, :16;
- uint32_t esp2;
- uint16_t ss2, :16;
- uint32_t cr3;
- uint32_t eip;
- uint32_t eflags;
- uint32_t eax, ecx, edx, ebx;
- uint32_t esp, ebp, esi, edi;
- uint16_t es, :16;
- uint16_t cs, :16;
- uint16_t ss, :16;
- uint16_t ds, :16;
- uint16_t fs, :16;
- uint16_t gs, :16;
- uint16_t ldt, :16;
- uint16_t trace, bitmap;
- };
-
-enum seg_system
- {
- SYS_SYSTEM = 0, /* System segment. */
- SYS_CODE_DATA = 1 /* Code or data segment. */
- };
-
-enum seg_granularity
- {
- GRAN_BYTE = 0, /* Limit has 1-byte granularity. */
- GRAN_PAGE = 1 /* Limit has 4 kB granularity. */
- };
-
-enum seg_type
- {
- /* System segment types. */
- TYPE_TSS_16_A = 1, /* 16-bit TSS (available). */
- TYPE_LDT = 2, /* LDT. */
- TYPE_TSS_16_B = 3, /* 16-bit TSS (busy). */
- TYPE_CALL_16 = 4, /* 16-bit call gate. */
- TYPE_TASK = 5, /* Task gate. */
- TYPE_INT_16 = 6, /* 16-bit interrupt gate. */
- TYPE_TRAP_16 = 7, /* 16-bit trap gate. */
- TYPE_TSS_32_A = 9, /* 32-bit TSS (available). */
- TYPE_TSS_32_B = 11, /* 32-bit TSS (busy). */
- TYPE_CALL_32 = 12, /* 32-bit call gate. */
- TYPE_INT_32 = 14, /* 32-bit interrupt gate. */
- TYPE_TRAP_32 = 15, /* 32-bit trap gate. */
-
- /* Code/data segment types. */
- TYPE_CODE = 8, /* 1=Code segment, 0=data segment. */
- TYPE_ACCESSED = 1, /* Set if accessed. */
-
- /* Data segment types. */
- TYPE_EXPAND_DOWN = 4, /* 1=Expands up, 0=expands down. */
- TYPE_WRITABLE = 2, /* 1=Read/write, 0=read-only. */
-
- /* Code segment types. */
- TYPE_CONFORMING = 4, /* 1=Conforming, 0=nonconforming. */
- TYPE_READABLE = 2 /* 1=Exec/read, 0=exec-only. */
- };
-
-static inline uint64_t
-make_dtr_operand (uint16_t limit, void *base)
-{
- return limit | ((uint64_t) (uint32_t) base << 16);
+\f
+/* Page directories and page tables.
+
+ For more information see [IA32-v3] pages 3-23 to 3-28.
+
+ PDEs and PTEs share a common format:
+
+ 32 12 0
+ +------------------------------------+------------------------+
+ | Physical Address | Flags |
+ +------------------------------------+------------------------+
+
+ In a PDE, the physical address points to a page table.
+ In a PTE, the physical address points to a data or code page.
+ The important flags are listed below.
+ When a PDE or PTE is not "present", the other flags are
+ ignored.
+ A PDE or PTE that is initialized to 0 will be interpreted as
+ "not present", which is just fine. */
+#define PG_P 0x1 /* 1=present, 0=not present. */
+#define PG_W 0x2 /* 1=read/write, 0=read-only. */
+#define PG_U 0x4 /* 1=user/kernel, 0=kernel only. */
+#define PG_A 0x20 /* 1=accessed, 0=not acccessed. */
+#define PG_D 0x40 /* 1=dirty, 0=not dirty (PTEs only). */
+
+/* Obtains page directory index from a virtual address. */
+static inline uintptr_t pd_no (const void *va) {
+ return (uintptr_t) va >> PDSHIFT;
+}
+
+/* Returns a PDE that points to page table PT. */
+static inline uint32_t pde_create (uint32_t *pt) {
+ ASSERT (pg_ofs (pt) == 0);
+ return vtop (pt) | PG_U | PG_P | PG_W;
+}
+
+/* Returns a pointer to the page table that page directory entry
+ PDE, which must "present", points to. */
+static inline uint32_t *pde_get_pt (uint32_t pde) {
+ ASSERT (pde & PG_P);
+ return ptov (pde & ~PGMASK);
+}
+
+/* Obtains page table index from a virtual address. */
+static inline unsigned pt_no (void *va) {
+ return ((uintptr_t) va & PTMASK) >> PTSHIFT;
+}
+
+/* Returns a PTE that points to PAGE.
+ The PTE's page is readable.
+ If WRITABLE is true then it will be writable as well.
+ The page will be usable only by ring 0 code (the kernel). */
+static inline uint32_t pte_create_kernel (uint32_t *page, bool writable) {
+ ASSERT (pg_ofs (page) == 0);
+ return vtop (page) | PG_P | (writable ? PG_W : 0);
+}
+
+/* Returns a PTE that points to PAGE.
+ The PTE's page is readable.
+ If WRITABLE is true then it will be writable as well.
+ The page will be usable by both user and kernel code. */
+static inline uint32_t pte_create_user (uint32_t *page, bool writable) {
+ return pte_create_kernel (page, writable) | PG_U;
+}
+
+/* Returns a pointer to the page that page table entry PTE, which
+ must "present", points to. */
+static inline void *pte_get_page (uint32_t pte) {
+ ASSERT (pte & PG_P);
+ return ptov (pte & ~PGMASK);