-/*
- * Copyright (C) 1997 Massachusetts Institute of Technology
- *
- * This software is being provided by the copyright holders under the
- * following license. By obtaining, using and/or copying this software,
- * you agree that you have read, understood, and will comply with the
- * following terms and conditions:
- *
- * Permission to use, copy, modify, distribute, and sell this software
- * and its documentation for any purpose and without fee or royalty is
- * hereby granted, provided that the full text of this NOTICE appears on
- * ALL copies of the software and documentation or portions thereof,
- * including modifications, that you make.
- *
- * THIS SOFTWARE IS PROVIDED "AS IS," AND COPYRIGHT HOLDERS MAKE NO
- * REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED. BY WAY OF EXAMPLE,
- * BUT NOT LIMITATION, COPYRIGHT HOLDERS MAKE NO REPRESENTATIONS OR
- * WARRANTIES OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE OR
- * THAT THE USE OF THE SOFTWARE OR DOCUMENTATION WILL NOT INFRINGE ANY
- * THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS. COPYRIGHT
- * HOLDERS WILL BEAR NO LIABILITY FOR ANY USE OF THIS SOFTWARE OR
- * DOCUMENTATION.
- *
- * The name and trademarks of copyright holders may NOT be used in
- * advertising or publicity pertaining to the software without specific,
- * written prior permission. Title to copyright in this software and any
- * associated documentation will at all times remain with copyright
- * holders. See the file AUTHORS which should have accompanied this software
- * for a list of all copyright holders.
- *
- * This file may be derived from previously copyrighted software. This
- * copyright applies only to those changes made by the copyright
- * holders listed in the AUTHORS file. The rest of this file is covered by
- * the copyright notices, if any, listed below.
- */
-
-
-#ifndef _MMU_H_
-#define _MMU_H_
-
-#ifndef __ASSEMBLER__
-#include <stdint.h>
-#include "debug.h"
-#endif
+#ifndef THREADS_MMU_H
+#define THREADS_MMU_H
-#include "loader.h"
+#include <debug.h>
+#include <stdint.h>
+#include <stdbool.h>
+
+#include "threads/loader.h"
+
+/* Virtual to physical translation works like this on an x86:
+
+ - The top 10 bits of the virtual address (bits 22:32) are used
+ to index into the page directory. If the PDE is marked
+ "present," the physical address of a page table is read from
+ the PDE thus obtained. If the PDE is marked "not present"
+ then a page fault occurs.
+
+ - The next 10 bits of the virtual address (bits 12:22) are
+ used to index into the page table. If the PTE is marked
+ "present," the physical address of a data page is read from
+ the PTE thus obtained. If the PTE is marked "not present"
+ then a page fault occurs.
+
+ - The bottom 12 bits of the virtual address (bits 0:12) are
+ added to the data page's physical base address, producing
+ the final physical address.
+
+
+ 32 22 12 0
+ +--------------------------------------------------------------------+
+ | Page Directory Index | Page Table Index | Page Offset |
+ +--------------------------------------------------------------------+
+ | | |
+ _______/ _______/ _____/
+ / / /
+ / Page Directory / Page Table / Data Page
+ / .____________. / .____________. / .____________.
+ |1,023|____________| |1,023|____________| | |____________|
+ |1,022|____________| |1,022|____________| | |____________|
+ |1,021|____________| |1,021|____________| \__\|____________|
+ |1,020|____________| |1,020|____________| /|____________|
+ | | | | | | | |
+ | | | \____\| |_ | |
+ | | . | /| . | \ | . |
+ \____\| . |_ | . | | | . |
+ /| . | \ | . | | | . |
+ | . | | | . | | | . |
+ | | | | | | | |
+ |____________| | |____________| | |____________|
+ 4|____________| | 4|____________| | |____________|
+ 3|____________| | 3|____________| | |____________|
+ 2|____________| | 2|____________| | |____________|
+ 1|____________| | 1|____________| | |____________|
+ 0|____________| \__\0|____________| \____\|____________|
+ / /
+*/
#define MASK(SHIFT, CNT) (((1ul << (CNT)) - 1) << (SHIFT))
-/* Page offset (bits 0:11). */
-#define PGSHIFT 0 /* First offset bit. */
-#define PGBITS 12 /* Number of offset bits. */
-#define PGMASK MASK(PGSHIFT, PGBITS)
-#define PGSIZE (1 << PGBITS)
+/* Page offset (bits 0:12). */
+#define PGSHIFT 0 /* Index of first offset bit. */
+#define PGBITS 12 /* Number of offset bits. */
+#define PGMASK MASK(PGSHIFT, PGBITS) /* Page offset bits (0:12). */
+#define PGSIZE (1 << PGBITS) /* Bytes in a page. */
-/* Page table (bits 12:21). */
-#define PTSHIFT PGBITS /* First page table bit. */
-#define PTBITS 10 /* Number of page table bits. */
-#define PTMASK MASK(PTSHIFT, PTBITS)
+/* Page table (bits 12:22). */
+#define PTSHIFT PGBITS /* Index of first page table bit. */
+#define PTBITS 10 /* Number of page table bits. */
+#define PTMASK MASK(PTSHIFT, PTBITS) /* Page table bits (12:22). */
+#define PTSPAN (1 << PTBITS << PGBITS) /* Bytes covered by a page table. */
-/* Page directory (bits 22:31). */
-#define PDSHIFT (PTSHIFT + PTBITS) /* First page dir bit. */
-#define PDBITS 10 /* Number of page dir bits. */
-#define PDMASK MASK(PDSHIFT, PDBITS)
+/* Page directory (bits 22:32). */
+#define PDSHIFT (PTSHIFT + PTBITS) /* First page dir bit. */
+#define PDBITS 10 /* Number of page dir bits. */
+#define PDMASK MASK(PDSHIFT, PDBITS) /* Page directory bits (22:32). */
-#ifndef __ASSEMBLER__
/* Offset within a page. */
-static inline unsigned pg_ofs (void *va) { return (uintptr_t) va & PGMASK; }
-
-/* Page number. */
-static inline uintptr_t pg_no (void *va) { return (uintptr_t) va >> PTSHIFT; }
-
-/* Page table number. */
-static inline unsigned pt_no (void *va) {
- return ((uintptr_t) va & PTMASK) >> PTSHIFT;
+static inline unsigned pg_ofs (const void *va) {
+ return (uintptr_t) va & PGMASK;
}
-/* Page directory number. */
-static inline uintptr_t pd_no (void *va) { return (uintptr_t) va >> PDSHIFT; }
+/* Virtual page number. */
+static inline uintptr_t pg_no (const void *va) {
+ return (uintptr_t) va >> PTSHIFT;
+}
/* Round up to nearest page boundary. */
-static inline void *pg_round_up (void *va) {
+static inline void *pg_round_up (const void *va) {
return (void *) (((uintptr_t) va + PGSIZE - 1) & ~PGMASK);
}
/* Round down to nearest page boundary. */
-static inline void *pg_round_down (void *va) {
+static inline void *pg_round_down (const void *va) {
return (void *) ((uintptr_t) va & ~PGMASK);
}
+/* Base address of the 1:1 physical-to-virtual mapping. Physical
+ memory is mapped starting at this virtual address. Thus,
+ physical address 0 is accessible at PHYS_BASE, physical
+ address address 0x1234 at (uint8_t *) PHYS_BASE + 0x1234, and
+ so on.
+
+ This address also marks the end of user programs' address
+ space. Up to this point in memory, user programs are allowed
+ to map whatever they like. At this point and above, the
+ virtual address space belongs to the kernel. */
#define PHYS_BASE ((void *) LOADER_PHYS_BASE)
/* Returns kernel virtual address at which physical address PADDR
is mapped. */
static inline void *
-ptov (uintptr_t paddr)
+ptov (uintptr_t paddr)
{
ASSERT ((void *) paddr < PHYS_BASE);
/* Returns physical address at which kernel virtual address VADDR
is mapped. */
static inline uintptr_t
-vtop (void *vaddr)
+vtop (const void *vaddr)
{
ASSERT (vaddr >= PHYS_BASE);
return (uintptr_t) vaddr - (uintptr_t) PHYS_BASE;
}
-#endif
-
-/* Page Table/Directory Entry flags
- * these are defined by the hardware
- */
-#define PG_P 0x1 /* Present */
-#define PG_W 0x2 /* Writeable */
-#define PG_U 0x4 /* User */
-#define PG_A 0x20 /* Accessed */
-#define PG_D 0x40 /* Dirty */
-/*
- * The PG_USER bits are not used by the kernel and they are
- * not interpreted by the hardware. The kernel allows
- * user processes to set them arbitrarily.
- */
-
-/* EFLAGS Register. */
-#define FLAG_MBS 0x00000002 /* Must be set. */
-#define FLAG_IF 0x00000200 /* Interrupt Flag. */
-
-/* Page fault error codes */
-#define FEC_PR 0x1 /* Page fault caused by protection violation */
-#define FEC_WR 0x2 /* Page fault caused by a write */
-#define FEC_U 0x4 /* Page fault occured while in user mode */
-
-
-/* Application segment type bits */
-#define STA_X 0x8 /* Executable segment */
-#define STA_A 0x1 /* Accessed */
-
-#define STA_C 0x4 /* Conforming code segment (executable only) */
-#define STA_R 0x2 /* Readable (executable segments) */
-
-#define STA_E 0x4 /* Expand down (non-executable segments) */
-#define STA_W 0x2 /* Writeable (non-executable segments) */
-
-
-/* Segment selectors. */
-#define SEL_NULL 0x00 /* Null selector. */
-#define SEL_KCSEG 0x08 /* Kernel code selector. */
-#define SEL_KDSEG 0x10 /* Kernel data selector. */
-#define SEL_UCSEG 0x1B /* User code selector. */
-#define SEL_UDSEG 0x23 /* User data selector. */
-#define SEL_TSS 0x28 /* Task-state segment. */
-#define SEL_CNT 6 /* Number of segments. */
-
-#ifndef __ASSEMBLER__
-struct tss
- {
- uint16_t back_link, :16;
- uint32_t esp0;
- uint16_t ss0, :16;
- uint32_t esp1;
- uint16_t ss1, :16;
- uint32_t esp2;
- uint16_t ss2, :16;
- uint32_t cr3;
- uint32_t eip;
- uint32_t eflags;
- uint32_t eax, ecx, edx, ebx;
- uint32_t esp, ebp, esi, edi;
- uint16_t es, :16;
- uint16_t cs, :16;
- uint16_t ss, :16;
- uint16_t ds, :16;
- uint16_t fs, :16;
- uint16_t gs, :16;
- uint16_t ldt, :16;
- uint16_t trace, bitmap;
- };
-
-enum seg_system
- {
- SYS_SYSTEM = 0, /* System segment. */
- SYS_CODE_DATA = 1 /* Code or data segment. */
- };
-
-enum seg_granularity
- {
- GRAN_BYTE = 0, /* Limit has 1-byte granularity. */
- GRAN_PAGE = 1 /* Limit has 4 kB granularity. */
- };
-
-enum seg_type
- {
- /* System segment types. */
- TYPE_TSS_16_A = 1, /* 16-bit TSS (available). */
- TYPE_LDT = 2, /* LDT. */
- TYPE_TSS_16_B = 3, /* 16-bit TSS (busy). */
- TYPE_CALL_16 = 4, /* 16-bit call gate. */
- TYPE_TASK = 5, /* Task gate. */
- TYPE_INT_16 = 6, /* 16-bit interrupt gate. */
- TYPE_TRAP_16 = 7, /* 16-bit trap gate. */
- TYPE_TSS_32_A = 9, /* 32-bit TSS (available). */
- TYPE_TSS_32_B = 11, /* 32-bit TSS (busy). */
- TYPE_CALL_32 = 12, /* 32-bit call gate. */
- TYPE_INT_32 = 14, /* 32-bit interrupt gate. */
- TYPE_TRAP_32 = 15, /* 32-bit trap gate. */
-
- /* Code/data segment types. */
- TYPE_CODE = 8, /* 1=Code segment, 0=data segment. */
- TYPE_ACCESSED = 1, /* Set if accessed. */
-
- /* Data segment types. */
- TYPE_EXPAND_DOWN = 4, /* 1=Expands up, 0=expands down. */
- TYPE_WRITABLE = 2, /* 1=Read/write, 0=read-only. */
-
- /* Code segment types. */
- TYPE_CONFORMING = 4, /* 1=Conforming, 0=nonconforming. */
- TYPE_READABLE = 2 /* 1=Exec/read, 0=exec-only. */
- };
-
-static inline uint64_t
-make_dtr_operand (uint16_t limit, void *base)
-{
- return limit | ((uint64_t) (uint32_t) base << 16);
+\f
+/* Page directories and page tables.
+
+ For more information see [IA32-v3] pages 3-23 to 3-28.
+
+ PDEs and PTEs share a common format:
+
+ 32 12 0
+ +------------------------------------+------------------------+
+ | Physical Address | Flags |
+ +------------------------------------+------------------------+
+
+ In a PDE, the physical address points to a page table.
+ In a PTE, the physical address points to a data or code page.
+ The important flags are listed below.
+ When a PDE or PTE is not "present", the other flags are
+ ignored.
+ A PDE or PTE that is initialized to 0 will be interpreted as
+ "not present", which is just fine. */
+#define PG_P 0x1 /* 1=present, 0=not present. */
+#define PG_W 0x2 /* 1=read/write, 0=read-only. */
+#define PG_U 0x4 /* 1=user/kernel, 0=kernel only. */
+#define PG_A 0x20 /* 1=accessed, 0=not acccessed. */
+#define PG_D 0x40 /* 1=dirty, 0=not dirty (PTEs only). */
+
+/* Obtains page directory index from a virtual address. */
+static inline uintptr_t pd_no (const void *va) {
+ return (uintptr_t) va >> PDSHIFT;
+}
+
+/* Returns a PDE that points to page table PT. */
+static inline uint32_t pde_create (uint32_t *pt) {
+ ASSERT (pg_ofs (pt) == 0);
+ return vtop (pt) | PG_U | PG_P | PG_W;
+}
+
+/* Returns a pointer to the page table that page directory entry
+ PDE, which must "present", points to. */
+static inline uint32_t *pde_get_pt (uint32_t pde) {
+ ASSERT (pde & PG_P);
+ return ptov (pde & ~PGMASK);
+}
+
+/* Obtains page table index from a virtual address. */
+static inline unsigned pt_no (void *va) {
+ return ((uintptr_t) va & PTMASK) >> PTSHIFT;
+}
+
+/* Returns a PTE that points to PAGE.
+ The PTE's page is readable.
+ If WRITABLE is true then it will be writable as well.
+ The page will be usable only by ring 0 code (the kernel). */
+static inline uint32_t pte_create_kernel (uint32_t *page, bool writable) {
+ ASSERT (pg_ofs (page) == 0);
+ return vtop (page) | PG_P | (writable ? PG_W : 0);
+}
+
+/* Returns a PTE that points to PAGE.
+ The PTE's page is readable.
+ If WRITABLE is true then it will be writable as well.
+ The page will be usable by both user and kernel code. */
+static inline uint32_t pte_create_user (uint32_t *page, bool writable) {
+ return pte_create_kernel (page, writable) | PG_U;
+}
+
+/* Returns a pointer to the page that page table entry PTE, which
+ must "present", points to. */
+static inline void *pte_get_page (uint32_t pte) {
+ ASSERT (pte & PG_P);
+ return ptov (pte & ~PGMASK);
}
-#endif
-#endif /* !_MMU_H_ */
+#endif /* threads/mmu.h */