diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 505c8a1ccbe0cd043d672a8e1192e052ac73d84a..c871db4f8ffbcba4c605abde6c0975940a75e093 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -256,6 +256,14 @@ config ARM64_PAGE_SHIFT default 14 if ARM64_16K_PAGES default 12 +config KERNEL_PAGE_SHIFT + int + default 12 if KERNEL_4K_PAGES + default 13 if KERNEL_8K_PAGES + default 14 if KERNEL_16K_PAGES + default 15 if KERNEL_32K_PAGES + default 16 if KERNEL_64K_PAGES + config ARM64_CONT_PTE_SHIFT int default 5 if ARM64_64K_PAGES @@ -1136,7 +1144,7 @@ config SOCIONEXT_SYNQUACER_PREITS endmenu # "ARM errata workarounds via the alternatives framework" choice - prompt "Page size" + prompt "Hardware Page Size" default ARM64_4K_PAGES help Page size (translation granule) configuration. @@ -1163,6 +1171,36 @@ config ARM64_64K_PAGES endchoice +choice + prompt "Kernel Page Size" + default KERNEL_4K_PAGES if ARM64_4K_PAGES + default KERNEL_16K_PAGES if ARM64_16K_PAGES + default KERNEL_64K_PAGES if ARM64_64K_PAGES + help + Allows choosing a kernel page size that is an integer multiple of the + hardware page size. By default, kernel and hardware page sizes match. + +config KERNEL_4K_PAGES + bool "4KB" + depends on ARM64_4K_PAGES + +config KERNEL_8K_PAGES + bool "8KB" + depends on ARM64_4K_PAGES + +config KERNEL_16K_PAGES + bool "16KB" + depends on ARM64_4K_PAGES || ARM64_16K_PAGES + +config KERNEL_32K_PAGES + bool "32KB" + depends on ARM64_4K_PAGES || ARM64_16K_PAGES + +config KERNEL_64K_PAGES + bool "64KB" + +endchoice + choice prompt "Virtual address space size" default ARM64_VA_BITS_39 if ARM64_4K_PAGES diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index 32d14f481f0c3f375e6ff011e6be840f51ed641d..92cdbc67044326a507ff9af6c9811a9108128127 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -85,13 +85,13 @@ + EARLY_PGDS((vstart), (vend), add) /* each PGDIR needs a next level page table */ \ + EARLY_PUDS((vstart), (vend), add) /* each PUD needs a next level page table */ \ + EARLY_PMDS((vstart), (vend), add)) /* each PMD needs a next level page table */ -#define INIT_DIR_SIZE (PAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR, _end, EARLY_KASLR)) +#define INIT_DIR_SIZE (SUBPAGE_SIZE * EARLY_PAGES(KIMAGE_VADDR, _end, EARLY_KASLR)) /* the initial ID map may need two extra pages if it needs to be extended */ #if VA_BITS < 48 -#define INIT_IDMAP_DIR_SIZE ((INIT_IDMAP_DIR_PAGES + 2) * PAGE_SIZE) +#define INIT_IDMAP_DIR_SIZE ((INIT_IDMAP_DIR_PAGES + 2) * SUBPAGE_SIZE) #else -#define INIT_IDMAP_DIR_SIZE (INIT_IDMAP_DIR_PAGES * PAGE_SIZE) +#define INIT_IDMAP_DIR_SIZE (INIT_IDMAP_DIR_PAGES * SUBPAGE_SIZE) #endif #define INIT_IDMAP_DIR_PAGES EARLY_PAGES(KIMAGE_VADDR, _end + MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE, 1) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 8aa8492dafc0f43c4ff6b5d4fa93315ef3f4bc1a..f741eedf45d6c08d2983e45a3fd51e86f122edd8 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -164,7 +164,7 @@ * * The table roughly translates to : * - * SL0(PAGE_SIZE, Entry_level) = TGRAN_SL0_BASE - Entry_Level + * SL0(SUBPAGE_SIZE, Entry_level) = TGRAN_SL0_BASE - Entry_Level * * Where TGRAN_SL0_BASE is a magic number depending on the page size: * TGRAN_SL0_BASE(4K) = 2 @@ -206,21 +206,21 @@ * descriptors in section D4.2.8 in ARM DDI 0487C.a. * * The algorithm defines the expectations on the translation table - * addresses for each level, based on PAGE_SIZE, entry level + * addresses for each level, based on SUBPAGE_SIZE, entry level * and the translation table size (T0SZ). The variable "x" in the * algorithm determines the alignment of a table base address at a given * level and thus determines the alignment of VTTBR:BADDR for stage2 * page table entry level. * Since the number of bits resolved at the entry level could vary * depending on the T0SZ, the value of "x" is defined based on a - * Magic constant for a given PAGE_SIZE and Entry Level. The - * intermediate levels must be always aligned to the PAGE_SIZE (i.e, - * x = PAGE_SHIFT). + * Magic constant for a given SUBPAGE_SIZE and Entry Level. The + * intermediate levels must be always aligned to the SUBPAGE_SIZE (i.e, + * x = SUBPAGE_SHIFT). * * The value of "x" for entry level is calculated as : * x = Magic_N - T0SZ * - * where Magic_N is an integer depending on the page size and the entry + * where Magic_N is an integer depending on the (sub) page size and the entry * level of the page table as below: * * -------------------------------------------- @@ -237,34 +237,34 @@ * * We have a magic formula for the Magic_N below: * - * Magic_N(PAGE_SIZE, Level) = 64 - ((PAGE_SHIFT - 3) * Number_of_levels) + * Magic_N(SUBPAGE_SIZE, Level) = 64 - ((SUBPAGE_SHIFT - 3) * Number_of_levels) * * where Number_of_levels = (4 - Level). We are only interested in the * value for Entry_Level for the stage2 page table. * * So, given that T0SZ = (64 - IPA_SHIFT), we can compute 'x' as follows: * - * x = (64 - ((PAGE_SHIFT - 3) * Number_of_levels)) - (64 - IPA_SHIFT) - * = IPA_SHIFT - ((PAGE_SHIFT - 3) * Number of levels) + * x = (64 - ((SUBPAGE_SHIFT - 3) * Number_of_levels)) - (64 - IPA_SHIFT) + * = IPA_SHIFT - ((SUBPAGE_SHIFT - 3) * Number of levels) * * Here is one way to explain the Magic Formula: * * x = log2(Size_of_Entry_Level_Table) * - * Since, we can resolve (PAGE_SHIFT - 3) bits at each level, and another - * PAGE_SHIFT bits in the PTE, we have : + * Since, we can resolve (SUBPAGE_SHIFT - 3) bits at each level, and another + * SUBPAGE_SHIFT bits in the PTE, we have : * - * Bits_Entry_level = IPA_SHIFT - ((PAGE_SHIFT - 3) * (n - 1) + PAGE_SHIFT) - * = IPA_SHIFT - (PAGE_SHIFT - 3) * n - 3 + * Bits_Entry_level = IPA_SHIFT - ((SUBPAGE_SHIFT - 3) * (n - 1) + SUBPAGE_SHIFT) + * = IPA_SHIFT - (SUBPAGE_SHIFT - 3) * n - 3 * where n = number of levels, and since each pointer is 8bytes, we have: * * x = Bits_Entry_Level + 3 - * = IPA_SHIFT - (PAGE_SHIFT - 3) * n + * = IPA_SHIFT - (SUBPAGE_SHIFT - 3) * n * * The only constraint here is that, we have to find the number of page table * levels for a given IPA size (which we do, see stage2_pt_levels()) */ -#define ARM64_VTTBR_X(ipa, levels) ((ipa) - ((levels) * (PAGE_SHIFT - 3))) +#define ARM64_VTTBR_X(ipa, levels) ((ipa) - ((levels) * (SUBPAGE_SHIFT - 3))) #define VTTBR_CNP_BIT (UL(1)) #define VTTBR_VMID_SHIFT (UL(48)) diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 3252eb50ecfe59e2df67caf19d4bf790eb22a4b8..899626bcc559d16346f5c93d6a1ae40426c84458 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -39,7 +39,7 @@ typedef u64 kvm_pte_t; #define KVM_PTE_VALID BIT(0) -#define KVM_PTE_ADDR_MASK GENMASK(47, PAGE_SHIFT) +#define KVM_PTE_ADDR_MASK GENMASK(47, SUBPAGE_SHIFT) #define KVM_PTE_ADDR_51_48 GENMASK(15, 12) static inline bool kvm_pte_valid(kvm_pte_t pte) @@ -51,7 +51,7 @@ static inline u64 kvm_pte_to_phys(kvm_pte_t pte) { u64 pa = pte & KVM_PTE_ADDR_MASK; - if (PAGE_SHIFT == 16) + if (SUBPAGE_SHIFT == 16) pa |= FIELD_GET(KVM_PTE_ADDR_51_48, pte) << 48; return pa; diff --git a/arch/arm64/include/asm/kvm_pkvm.h b/arch/arm64/include/asm/kvm_pkvm.h index 9f4ad2a8df59c046e267e0b6ff0c3f788d11977b..64b4992efd286c35e7fe5f70f85b72bde44ced7c 100644 --- a/arch/arm64/include/asm/kvm_pkvm.h +++ b/arch/arm64/include/asm/kvm_pkvm.h @@ -20,7 +20,7 @@ static inline unsigned long __hyp_pgtable_max_pages(unsigned long nr_pages) /* Provision the worst case scenario */ for (i = 0; i < KVM_PGTABLE_MAX_LEVELS; i++) { - nr_pages = DIV_ROUND_UP(nr_pages, PTRS_PER_PTE); + nr_pages = DIV_ROUND_UP(nr_pages, SUBPTES_PER_PTE); total += nr_pages; } @@ -34,7 +34,7 @@ static inline unsigned long __hyp_pgtable_total_pages(void) /* Cover all of memory with page-granularity */ for (i = 0; i < kvm_nvhe_sym(hyp_memblock_nr); i++) { struct memblock_region *reg = &kvm_nvhe_sym(hyp_memory)[i]; - res += __hyp_pgtable_max_pages(reg->size >> PAGE_SHIFT); + res += __hyp_pgtable_max_pages(reg->size >> SUBPAGE_SHIFT); } return res; @@ -47,7 +47,7 @@ static inline unsigned long hyp_s1_pgtable_pages(void) res = __hyp_pgtable_total_pages(); /* Allow 1 GiB for private mappings */ - res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT); + res += __hyp_pgtable_max_pages(SZ_1G >> SUBPAGE_SHIFT); return res; } @@ -63,7 +63,7 @@ static inline unsigned long host_s2_pgtable_pages(void) res = __hyp_pgtable_total_pages() + 16; /* Allow 1 GiB for MMIO mappings */ - res += __hyp_pgtable_max_pages(SZ_1G >> PAGE_SHIFT); + res += __hyp_pgtable_max_pages(SZ_1G >> SUBPAGE_SHIFT); return res; } diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 9dd08cd339c3f0286c6d361bae0569f0035c9a8e..d7dd5a9eb567a26ebd62d018abfde6dc7bd2dc5d 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -166,13 +166,13 @@ * Open-coded (swapper_pg_dir - reserved_pg_dir) as this cannot be calculated * until link time. */ -#define RESERVED_SWAPPER_OFFSET (PAGE_SIZE) +#define RESERVED_SWAPPER_OFFSET (SUBPAGE_SIZE) /* * Open-coded (swapper_pg_dir - tramp_pg_dir) as this cannot be calculated * until link time. */ -#define TRAMP_SWAPPER_OFFSET (2 * PAGE_SIZE) +#define TRAMP_SWAPPER_OFFSET (2 * SUBPAGE_SIZE) #ifndef __ASSEMBLY__ diff --git a/arch/arm64/include/asm/page-def.h b/arch/arm64/include/asm/page-def.h index 2403f7b4cdbfb63d038b5b3a86cc6570061928fc..2b95cbdf57667665032cc12a7cff33161bddb639 100644 --- a/arch/arm64/include/asm/page-def.h +++ b/arch/arm64/include/asm/page-def.h @@ -11,8 +11,12 @@ #include /* PAGE_SHIFT determines the page size */ -#define PAGE_SHIFT CONFIG_ARM64_PAGE_SHIFT +#define PAGE_SHIFT CONFIG_KERNEL_PAGE_SHIFT #define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE-1)) +#define SUBPAGE_SHIFT CONFIG_ARM64_PAGE_SHIFT +#define SUBPAGE_SIZE (_AC(1, UL) << SUBPAGE_SHIFT) +#define SUBPAGE_MASK (~(SUBPAGE_SIZE-1)) + #endif /* __ASM_PAGE_DEF_H */ diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 5ab8d163198fd99a75d6f4bfce4202c09f02df88..975f7a1b9ebb475973b7ee2c7d6c5a7035ffdc0b 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -9,38 +9,39 @@ /* * Number of page-table levels required to address 'va_bits' wide - * address, without section mapping. We resolve the top (va_bits - PAGE_SHIFT) - * bits with (PAGE_SHIFT - 3) bits at each page table level. Hence: + * address, without section mapping. We resolve the top (va_bits - SUBPAGE_SHIFT) + * bits with (SUBPAGE_SHIFT - 3) bits at each page table level. Hence: * - * levels = DIV_ROUND_UP((va_bits - PAGE_SHIFT), (PAGE_SHIFT - 3)) + * levels = DIV_ROUND_UP((va_bits - SUBPAGE_SHIFT), (SUBPAGE_SHIFT - 3)) * * where DIV_ROUND_UP(n, d) => (((n) + (d) - 1) / (d)) * * We cannot include linux/kernel.h which defines DIV_ROUND_UP here * due to build issues. So we open code DIV_ROUND_UP here: * - * ((((va_bits) - PAGE_SHIFT) + (PAGE_SHIFT - 3) - 1) / (PAGE_SHIFT - 3)) + * ((((va_bits) - SUBPAGE_SHIFT) + (SUBPAGE_SHIFT - 3) - 1) / (SUBPAGE_SHIFT - 3)) * * which gets simplified as : */ -#define ARM64_HW_PGTABLE_LEVELS(va_bits) (((va_bits) - 4) / (PAGE_SHIFT - 3)) +#define ARM64_HW_PGTABLE_LEVELS(va_bits) (((va_bits) - 4) / (SUBPAGE_SHIFT - 3)) /* * Size mapped by an entry at level n ( 0 <= n <= 3) - * We map (PAGE_SHIFT - 3) at all translation levels and PAGE_SHIFT bits + * We map (SUBPAGE_SHIFT - 3) at all translation levels and SUBPAGE_SHIFT bits * in the final page. The maximum number of translation levels supported by * the architecture is 4. Hence, starting at level n, we have further * ((4 - n) - 1) levels of translation excluding the offset within the page. * So, the total number of bits mapped by an entry at level n is : * - * ((4 - n) - 1) * (PAGE_SHIFT - 3) + PAGE_SHIFT + * ((4 - n) - 1) * (SUBPAGE_SHIFT - 3) + SUBPAGE_SHIFT * * Rearranging it a bit we get : - * (4 - n) * (PAGE_SHIFT - 3) + 3 + * (4 - n) * (SUBPAGE_SHIFT - 3) + 3 */ -#define ARM64_HW_PGTABLE_LEVEL_SHIFT(n) ((PAGE_SHIFT - 3) * (4 - (n)) + 3) +#define ARM64_HW_PGTABLE_LEVEL_SHIFT(n) ((SUBPAGE_SHIFT - 3) * (4 - (n)) + 3) -#define PTRS_PER_PTE (1 << (PAGE_SHIFT - 3)) +#define SUBPTES_PER_PTE (1 << (SUBPAGE_SHIFT - 3)) +#define PTRS_PER_PTE (SUBPTES_PER_PTE >> (PAGE_SHIFT - SUBPAGE_SHIFT)) /* * PMD_SHIFT determines the size a level 2 page table entry can map. @@ -49,7 +50,7 @@ #define PMD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(2) #define PMD_SIZE (_AC(1, UL) << PMD_SHIFT) #define PMD_MASK (~(PMD_SIZE-1)) -#define PTRS_PER_PMD (1 << (PAGE_SHIFT - 3)) +#define PTRS_PER_PMD (1 << (SUBPAGE_SHIFT - 3)) #endif /* @@ -59,7 +60,7 @@ #define PUD_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(1) #define PUD_SIZE (_AC(1, UL) << PUD_SHIFT) #define PUD_MASK (~(PUD_SIZE-1)) -#define PTRS_PER_PUD (1 << (PAGE_SHIFT - 3)) +#define PTRS_PER_PUD (1 << (SUBPAGE_SHIFT - 3)) #endif /* @@ -74,9 +75,9 @@ /* * Contiguous page definitions. */ -#define CONT_PTE_SHIFT (CONFIG_ARM64_CONT_PTE_SHIFT + PAGE_SHIFT) -#define CONT_PTES (1 << (CONT_PTE_SHIFT - PAGE_SHIFT)) -#define CONT_PTE_SIZE (CONT_PTES * PAGE_SIZE) +#define CONT_PTE_SHIFT (CONFIG_ARM64_CONT_PTE_SHIFT + SUBPAGE_SHIFT) +#define CONT_PTES (1 << (CONT_PTE_SHIFT - SUBPAGE_SHIFT)) +#define CONT_PTE_SIZE (CONT_PTES * SUBPAGE_SIZE) #define CONT_PTE_MASK (~(CONT_PTE_SIZE - 1)) #define CONT_PMD_SHIFT (CONFIG_ARM64_CONT_PMD_SHIFT + PMD_SHIFT) @@ -155,7 +156,7 @@ #define PTE_PXN (_AT(pteval_t, 1) << 53) /* Privileged XN */ #define PTE_UXN (_AT(pteval_t, 1) << 54) /* User XN */ -#define PTE_ADDR_LOW (((_AT(pteval_t, 1) << (48 - PAGE_SHIFT)) - 1) << PAGE_SHIFT) +#define PTE_ADDR_LOW (((_AT(pteval_t, 1) << (48 - SUBPAGE_SHIFT)) - 1) << SUBPAGE_SHIFT) #ifdef CONFIG_ARM64_PA_BITS_52 #define PTE_ADDR_HIGH (_AT(pteval_t, 0xf) << 12) #define PTE_ADDR_MASK (PTE_ADDR_LOW | PTE_ADDR_HIGH) diff --git a/arch/arm64/include/asm/pgtable-types.h b/arch/arm64/include/asm/pgtable-types.h index b8f158ae25273679aa8c7953344cd5c0df05e2f2..4ff7accdbedc87a46401546aa47a0b479dcfe33e 100644 --- a/arch/arm64/include/asm/pgtable-types.h +++ b/arch/arm64/include/asm/pgtable-types.h @@ -10,6 +10,7 @@ #define __ASM_PGTABLE_TYPES_H #include +#include typedef u64 pteval_t; typedef u64 pmdval_t; @@ -20,9 +21,11 @@ typedef u64 pgdval_t; /* * These are used to make use of C type-checking.. */ -typedef struct { pteval_t pte; } pte_t; -#define pte_val(x) ((x).pte) -#define __pte(x) ((pte_t) { (x) } ) +#define SUBPTES_PER_PTR (1 << (PAGE_SHIFT - SUBPAGE_SHIFT)) + +typedef struct { pteval_t pte[SUBPTES_PER_PTR]; } pte_t; +#define pte_val(x) ((x).pte[0]) +#define __pte(x) ((pte_t) { { (x) } } ) #if CONFIG_PGTABLE_LEVELS > 2 typedef struct { pmdval_t pmd; } pmd_t; diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index edf6625ce9654bcda0e7e281d56c71f3a62c9888..2f51e745403b771fed9ccc60fdd0a618899361f6 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -260,9 +260,37 @@ static inline pte_t pte_mkdevmap(pte_t pte) return set_pte_bit(pte, __pgprot(PTE_DEVMAP | PTE_SPECIAL)); } +static inline pte_t next_subpte(pte_t pte) +{ + phys_addr_t phys = __pte_to_phys(pte); + pteval_t pteaddr = __phys_to_pte_val(phys + SUBPAGE_SIZE); + pteval_t pteattr = (pte_val(pte) & ~PTE_ADDR_MASK); + + return __pte(pteaddr | pteattr); +} + +static inline pte_t clear_async_pte_bits(pte_t pte) +{ + if (pte_present(pte)) { + pte = clear_pte_bit(pte, __pgprot(PTE_AF)); + pte = clear_pte_bit(pte, __pgprot(PTE_DIRTY)); + if (pte_val(pte) & PTE_DBM) + pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY)); + } + + return pte; +} + static inline void set_pte(pte_t *ptep, pte_t pte) { - WRITE_ONCE(*ptep, pte); + int i; + pte_t subpte = pte; + + for (i = 0; i < SUBPTES_PER_PTR; i++) { + WRITE_ONCE(ptep->pte[i], pte_val(subpte)); + if (pte_present(pte)) + subpte = next_subpte(subpte); + } /* * Only if the new pte is valid and kernel, otherwise TLB maintenance @@ -274,6 +302,49 @@ static inline void set_pte(pte_t *ptep, pte_t pte) } } +#define __HAVE_ARCH_PTEP_GET +static inline pte_t ptep_get(pte_t *ptep) +{ + int i; + pte_t pte; + pte_t ptebase; + pte_t subpte; + + pte = __pte(READ_ONCE(ptep->pte[0])); + + /* + * Knock out any don't care bits to form ptebase for consistency checks + * against the sub-ptes. (e.g. AF, DIRTY and RDONLY if DBM enabled). + */ + ptebase = clear_async_pte_bits(pte); + + for (i = 1; i < SUBPTES_PER_PTR; i++) { + subpte = __pte(READ_ONCE(ptep->pte[i])); + + if (pte_present(pte)) { + /* Gather HW AF bits from sub-ptes. */ + if (pte_young(subpte)) + pte = pte_mkyoung(pte); + + /* Gather HW DMB from sub-ptes. */ + if (pte_hw_dirty(subpte)) + pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY)); + + /* Gather SW dirty; it's per-sub-pte due to ptep_set_wrprotect() */ + if (pte_sw_dirty(subpte)) + pte = set_pte_bit(pte, __pgprot(PTE_DIRTY)); + + /* Check consistency amongst sub-ptes. */ + subpte = clear_async_pte_bits(subpte); + ptebase = next_subpte(ptebase); + } + + WARN_ON_ONCE(pte_val(ptebase) != pte_val(subpte)); + } + + return pte; +} + extern void __sync_icache_dcache(pte_t pteval); /* @@ -292,17 +363,13 @@ extern void __sync_icache_dcache(pte_t pteval); * PTE_DIRTY || (PTE_WRITE && !PTE_RDONLY) */ -static inline void __check_racy_pte_update(struct mm_struct *mm, pte_t *ptep, - pte_t pte) +static inline void __check_racy_pte_update(struct mm_struct *mm, pte_t old_pte, + pte_t new_pte) { - pte_t old_pte; - if (!IS_ENABLED(CONFIG_DEBUG_VM)) return; - old_pte = READ_ONCE(*ptep); - - if (!pte_valid(old_pte) || !pte_valid(pte)) + if (!pte_valid(old_pte) || !pte_valid(new_pte)) return; if (mm != current->active_mm && atomic_read(&mm->mm_users) <= 1) return; @@ -312,19 +379,19 @@ static inline void __check_racy_pte_update(struct mm_struct *mm, pte_t *ptep, * (ptep_set_access_flags safely changes valid ptes without going * through an invalid entry). */ - VM_WARN_ONCE(!pte_young(pte), + VM_WARN_ONCE(!pte_young(new_pte), "%s: racy access flag clearing: 0x%016llx -> 0x%016llx", - __func__, pte_val(old_pte), pte_val(pte)); - VM_WARN_ONCE(pte_write(old_pte) && !pte_dirty(pte), + __func__, pte_val(old_pte), pte_val(new_pte)); + VM_WARN_ONCE(pte_write(old_pte) && !pte_dirty(new_pte), "%s: racy dirty state clearing: 0x%016llx -> 0x%016llx", - __func__, pte_val(old_pte), pte_val(pte)); + __func__, pte_val(old_pte), pte_val(new_pte)); } -static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t pte) +static inline void __prep_set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t old_pte, pte_t new_pte) { - if (pte_present(pte) && pte_user_exec(pte) && !pte_special(pte)) - __sync_icache_dcache(pte); + if (pte_present(new_pte) && pte_user_exec(new_pte) && !pte_special(new_pte)) + __sync_icache_dcache(new_pte); /* * If the PTE would provide user space access to the tags associated @@ -332,9 +399,8 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, * pte_access_permitted() returns false for exec only mappings, they * don't expose tags (instruction fetches don't check tags). */ - if (system_supports_mte() && pte_access_permitted(pte, false) && - !pte_special(pte)) { - pte_t old_pte = READ_ONCE(*ptep); + if (system_supports_mte() && pte_access_permitted(new_pte, false) && + !pte_special(new_pte)) { /* * We only need to synchronise if the new PTE has tags enabled * or if swapping in (in which case another mapping may have @@ -342,20 +408,21 @@ static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr, * (!pte_none() && !pte_present()) is an open coded version of * is_swap_pte() */ - if (pte_tagged(pte) || (!pte_none(old_pte) && !pte_present(old_pte))) - mte_sync_tags(old_pte, pte); + if (pte_tagged(new_pte) || (!pte_none(old_pte) && !pte_present(old_pte))) + mte_sync_tags(old_pte, new_pte); } - __check_racy_pte_update(mm, ptep, pte); - - set_pte(ptep, pte); + __check_racy_pte_update(mm, old_pte, new_pte); } static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { + pte_t old_pte = ptep_get(ptep); + page_table_check_pte_set(mm, addr, ptep, pte); - return __set_pte_at(mm, addr, ptep, pte); + __prep_set_pte_at(mm, addr, old_pte, pte); + set_pte(ptep, pte); } /* @@ -528,20 +595,6 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd) #define pud_pfn(pud) ((__pud_to_phys(pud) & PUD_MASK) >> PAGE_SHIFT) #define pfn_pud(pfn,prot) __pud(__phys_to_pud_val((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot)) -static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, - pmd_t *pmdp, pmd_t pmd) -{ - page_table_check_pmd_set(mm, addr, pmdp, pmd); - return __set_pte_at(mm, addr, (pte_t *)pmdp, pmd_pte(pmd)); -} - -static inline void set_pud_at(struct mm_struct *mm, unsigned long addr, - pud_t *pudp, pud_t pud) -{ - page_table_check_pud_set(mm, addr, pudp, pud); - return __set_pte_at(mm, addr, (pte_t *)pudp, pud_pte(pud)); -} - #define __p4d_to_phys(p4d) __pte_to_phys(p4d_pte(p4d)) #define __phys_to_p4d_val(phys) __phys_to_pte_val(phys) @@ -638,6 +691,16 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) } } +static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t pmd) +{ + pmd_t old_pmd = READ_ONCE(*pmdp); + + page_table_check_pmd_set(mm, addr, pmdp, pmd); + __prep_set_pte_at(mm, addr, pmd_pte(old_pmd), pmd_pte(pmd)); + set_pmd(pmdp, pmd); +} + static inline void pmd_clear(pmd_t *pmdp) { set_pmd(pmdp, __pmd(0)); @@ -701,6 +764,16 @@ static inline void set_pud(pud_t *pudp, pud_t pud) } } +static inline void set_pud_at(struct mm_struct *mm, unsigned long addr, + pud_t *pudp, pud_t pud) +{ + pud_t old_pud = READ_ONCE(*pudp); + + page_table_check_pud_set(mm, addr, pudp, pud); + __prep_set_pte_at(mm, addr, pud_pte(old_pud), pud_pte(pud)); + set_pud(pudp, pud); +} + static inline void pud_clear(pud_t *pudp) { set_pud(pudp, __pud(0)); @@ -831,9 +904,26 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) } #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS -extern int ptep_set_access_flags(struct vm_area_struct *vma, +extern int __entry_set_access_flags(pteval_t *ptevalp, pteval_t entry); + +static inline int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address, pte_t *ptep, - pte_t entry, int dirty); + pte_t entry, int dirty) +{ + int changed = 0; + int i; + + for (i = 0; i < SUBPTES_PER_PTR; i++) { + changed |= __entry_set_access_flags(&ptep->pte[i], pte_val(entry)); + entry = next_subpte(entry); + } + + /* Invalidate a stale read-only entry */ + if (changed && dirty) + flush_tlb_page(vma, address); + + return changed; +} #ifdef CONFIG_TRANSPARENT_HUGEPAGE #define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS @@ -841,7 +931,13 @@ static inline int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp, pmd_t entry, int dirty) { - return ptep_set_access_flags(vma, address, (pte_t *)pmdp, pmd_pte(entry), dirty); + int changed = __entry_set_access_flags(&pmd_val(*pmdp), pmd_val(entry)); + + /* Invalidate a stale read-only entry */ + if (changed && dirty) + flush_tlb_page(vma, address); + + return changed; } static inline int pud_devmap(pud_t pud) @@ -876,26 +972,30 @@ static inline bool pud_user_accessible_page(pud_t pud) * Atomic pte/pmd modifications. */ #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG -static inline int __ptep_test_and_clear_young(pte_t *ptep) +static inline int __ptep_test_and_clear_young(pteval_t *ptevalp) { - pte_t old_pte, pte; - - pte = READ_ONCE(*ptep); + pteval_t old_pteval, pteval; + pteval = READ_ONCE(*ptevalp); do { - old_pte = pte; - pte = pte_mkold(pte); - pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep), - pte_val(old_pte), pte_val(pte)); - } while (pte_val(pte) != pte_val(old_pte)); + old_pteval = pteval; + pteval = pte_val(pte_mkold(__pte(pteval))); + pteval = cmpxchg_relaxed(ptevalp, old_pteval, pteval); + } while (pteval != old_pteval); - return pte_young(pte); + return pte_young(__pte(pteval)); } static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long address, pte_t *ptep) { - return __ptep_test_and_clear_young(ptep); + int i; + int young = 0; + + for (i = 0; i < SUBPTES_PER_PTR; i++) + young |= __ptep_test_and_clear_young(&ptep->pte[i]); + + return young; } #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH @@ -925,7 +1025,16 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { - return ptep_test_and_clear_young(vma, address, (pte_t *)pmdp); + pmd_t old_pmd, pmd; + pmd = READ_ONCE(*pmdp); + do { + old_pmd = pmd; + pmd = pmd_mkold(pmd); + pmd_val(pmd) = cmpxchg_relaxed(&pmd_val(*pmdp), + pmd_val(old_pmd), pmd_val(pmd)); + } while (pmd_val(pmd) != pmd_val(old_pmd)); + + return pmd_young(pmd); } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ @@ -933,7 +1042,42 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long address, pte_t *ptep) { - pte_t pte = __pte(xchg_relaxed(&pte_val(*ptep), 0)); + int i; + pte_t pte; + pte_t ptebase; + pte_t subpte; + + pte = __pte(xchg_relaxed(&ptep->pte[0], 0)); + + /* + * Knock out any don't care bits to form ptebase for consistency checks + * against the sub-ptes. (e.g. AF, DIRTY and RDONLY if DBM enabled). + */ + ptebase = clear_async_pte_bits(pte); + + for (i = 1; i < SUBPTES_PER_PTR; i++) { + subpte = __pte(xchg_relaxed(&ptep->pte[i], 0)); + + if (pte_present(pte)) { + /* Gather HW AF bits from sub-ptes. */ + if (pte_young(subpte)) + pte = pte_mkyoung(pte); + + /* Gather HW DMB from sub-ptes. */ + if (pte_hw_dirty(subpte)) + pte = clear_pte_bit(pte, __pgprot(PTE_RDONLY)); + + /* Gather SW dirty; it's per-sub-pte due to ptep_set_wrprotect() */ + if (pte_sw_dirty(subpte)) + pte = set_pte_bit(pte, __pgprot(PTE_DIRTY)); + + /* Check consistency amongst sub-ptes. */ + subpte = clear_async_pte_bits(subpte); + ptebase = next_subpte(ptebase); + } + + WARN_ON_ONCE(pte_val(ptebase) != pte_val(subpte)); + } page_table_check_pte_clear(mm, address, pte); @@ -958,17 +1102,23 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, * dirty status (PTE_DBM && !PTE_RDONLY) to the software PTE_DIRTY bit. */ #define __HAVE_ARCH_PTEP_SET_WRPROTECT +static inline void __ptep_set_wrprotect(pteval_t *ptevalp) +{ + pteval_t old_pteval, pteval; + pteval = READ_ONCE(*ptevalp); + do { + old_pteval = pteval; + pteval = pte_val(pte_wrprotect(__pte(pteval))); + pteval = cmpxchg_relaxed(ptevalp, old_pteval, pteval); + } while (pteval != old_pteval); +} + static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep) { - pte_t old_pte, pte; + int i; - pte = READ_ONCE(*ptep); - do { - old_pte = pte; - pte = pte_wrprotect(pte); - pte_val(pte) = cmpxchg_relaxed(&pte_val(*ptep), - pte_val(old_pte), pte_val(pte)); - } while (pte_val(pte) != pte_val(old_pte)); + for (i = 0; i < SUBPTES_PER_PTR; i++) + __ptep_set_wrprotect(&ptep->pte[i]); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -976,7 +1126,14 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long address, pmd_t *pmdp) { - ptep_set_wrprotect(mm, address, (pte_t *)pmdp); + pmd_t old_pmd, pmd; + pmd = READ_ONCE(*pmdp); + do { + old_pmd = pmd; + pmd = pmd_wrprotect(pmd); + pmd_val(pmd) = cmpxchg_relaxed(&pmd_val(*pmdp), + pmd_val(old_pmd), pmd_val(pmd)); + } while (pmd_val(pmd) != pmd_val(old_pmd)); } #define pmdp_establish pmdp_establish @@ -1008,7 +1165,7 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, #define __swp_entry(type,offset) ((swp_entry_t) { ((type) << __SWP_TYPE_SHIFT) | ((offset) << __SWP_OFFSET_SHIFT) }) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) -#define __swp_entry_to_pte(swp) ((pte_t) { (swp).val }) +#define __swp_entry_to_pte(swp) __pte((swp).val) #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION #define __pmd_to_swp_entry(pmd) ((swp_entry_t) { pmd_val(pmd) }) @@ -1092,7 +1249,7 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, static inline bool pud_sect_supported(void) { - return PAGE_SIZE == SZ_4K; + return SUBPAGE_SIZE == SZ_4K; } diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 412a3b9a3c25dc0353b43f14058b201aa2980dbd..4230fe28f3518eedbeed63e16608bfa75c080c1d 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -64,7 +64,7 @@ /* * Get translation granule of the system, which is decided by - * PAGE_SIZE. Used by TTL. + * SUBPAGE_SIZE. Used by TTL. * - 4KB : 1 * - 16KB : 2 * - 64KB : 3 @@ -75,7 +75,7 @@ static inline unsigned long get_trans_granule(void) { - switch (PAGE_SIZE) { + switch (SUBPAGE_SIZE) { case SZ_4K: return TLBI_TTL_TG_4K; case SZ_16K: @@ -135,7 +135,7 @@ static inline unsigned long get_trans_granule(void) */ #define __TLBI_VADDR_RANGE(addr, asid, scale, num, ttl) \ ({ \ - unsigned long __ta = (addr) >> PAGE_SHIFT; \ + unsigned long __ta = (addr) >> SUBPAGE_SHIFT; \ __ta &= GENMASK_ULL(36, 0); \ __ta |= (unsigned long)(ttl) << 37; \ __ta |= (unsigned long)(num) << 39; \ @@ -258,11 +258,16 @@ static inline void flush_tlb_page_nosync(struct vm_area_struct *vma, unsigned long uaddr) { unsigned long addr; + int i; dsb(ishst); - addr = __TLBI_VADDR(uaddr, ASID(vma->vm_mm)); - __tlbi(vale1is, addr); - __tlbi_user(vale1is, addr); + + for (i = 0; i < SUBPTES_PER_PTR; i++) { + addr = __TLBI_VADDR(uaddr, ASID(vma->vm_mm)); + __tlbi(vale1is, addr); + __tlbi_user(vale1is, addr); + uaddr += SUBPAGE_SIZE; + } } static inline void flush_tlb_page(struct vm_area_struct *vma, @@ -276,7 +281,7 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, * This is meant to avoid soft lock-ups on large TLB flushing ranges and not * necessarily a performance improvement. */ -#define MAX_TLBI_OPS PTRS_PER_PTE +#define MAX_TLBI_OPS SUBPTES_PER_PTE static inline void __flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end, @@ -287,9 +292,16 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma, int scale = 0; unsigned long asid, addr, pages; + /* + * Adjust stride to account for kernel PAGE_SIZE vs hw SUBPAGE_SIZE + * difference. + */ + if (stride == PAGE_SIZE) + stride = SUBPAGE_SIZE; + start = round_down(start, stride); end = round_up(end, stride); - pages = (end - start) >> PAGE_SHIFT; + pages = (end - start) >> SUBPAGE_SHIFT; /* * When not uses TLB range ops, we can handle up to @@ -337,7 +349,7 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma, __tlbi_user_level(vae1is, addr, tlb_level); } start += stride; - pages -= stride >> PAGE_SHIFT; + pages -= stride >> SUBPAGE_SHIFT; continue; } @@ -352,7 +364,7 @@ static inline void __flush_tlb_range(struct vm_area_struct *vma, __tlbi(rvae1is, addr); __tlbi_user(rvae1is, addr); } - start += __TLBI_RANGE_PAGES(num, scale) << PAGE_SHIFT; + start += __TLBI_RANGE_PAGES(num, scale) << SUBPAGE_SHIFT; pages -= __TLBI_RANGE_PAGES(num, scale); } scale++; @@ -368,14 +380,14 @@ static inline void flush_tlb_range(struct vm_area_struct *vma, * table entries as part of collapsing hugepages or moving page tables. * Set the tlb_level to 0 because we can not get enough information here. */ - __flush_tlb_range(vma, start, end, PAGE_SIZE, false, 0); + __flush_tlb_range(vma, start, end, SUBPAGE_SIZE, false, 0); } static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) { unsigned long addr; - if ((end - start) > (MAX_TLBI_OPS * PAGE_SIZE)) { + if ((end - start) > (MAX_TLBI_OPS * SUBPAGE_SIZE)) { flush_tlb_all(); return; } @@ -384,7 +396,7 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end end = __TLBI_VADDR(end, 0); dsb(ishst); - for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) + for (addr = start; addr < end; addr += 1 << (SUBPAGE_SHIFT - 12)) __tlbi(vaale1is, addr); dsb(ish); isb(); @@ -396,10 +408,17 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end */ static inline void __flush_tlb_kernel_pgtable(unsigned long kaddr) { - unsigned long addr = __TLBI_VADDR(kaddr, 0); + unsigned long addr; + int i; dsb(ishst); - __tlbi(vaae1is, addr); + + for (i = 0; i < SUBPTES_PER_PTR; i++) { + addr = __TLBI_VADDR(kaddr, 0); + __tlbi(vaae1is, addr); + kaddr += SUBPAGE_SIZE; + } + dsb(ish); isb(); } diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index a908a37f03678b6ba819998652a41d7de44288e3..d9ea8c53e951aeefbada3abc6a0567dbf0c0d0c9 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -99,7 +99,7 @@ int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md) static int __init set_permissions(pte_t *ptep, unsigned long addr, void *data) { efi_memory_desc_t *md = data; - pte_t pte = READ_ONCE(*ptep); + pte_t pte = ptep_get(ptep); if (md->attribute & EFI_MEMORY_RO) pte = set_pte_bit(pte, __pgprot(PTE_RDONLY)); diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 2196aad7b55bcef05b2554b5fae086e3cdc84159..d63a915f1729561adc20d27cb4fe03c903a94982 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -194,7 +194,7 @@ SYM_FUNC_END(clear_page_tables) * formed from n pages. * * tbl: location of page table - * rtbl: address to be used for first level page table entry (typically tbl + PAGE_SIZE) + * rtbl: address to be used for first level page table entry (typically tbl + SUBPAGE_SIZE) * vstart: virtual address of start of range * vend: virtual address of end of range - we map [vstart, vend - 1] * flags: flags to use to map last level entries @@ -210,38 +210,38 @@ SYM_FUNC_END(clear_page_tables) */ .macro map_memory, tbl, rtbl, vstart, vend, flags, phys, order, istart, iend, tmp, count, sv, extra_shift sub \vend, \vend, #1 - add \rtbl, \tbl, #PAGE_SIZE + add \rtbl, \tbl, #SUBPAGE_SIZE mov \count, #0 .ifnb \extra_shift tst \vend, #~((1 << (\extra_shift)) - 1) b.eq .L_\@ - compute_indices \vstart, \vend, #\extra_shift, #(PAGE_SHIFT - 3), \istart, \iend, \count + compute_indices \vstart, \vend, #\extra_shift, #(SUBPAGE_SHIFT - 3), \istart, \iend, \count mov \sv, \rtbl - populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp + populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #SUBPAGE_SIZE, \tmp mov \tbl, \sv .endif .L_\@: compute_indices \vstart, \vend, #PGDIR_SHIFT, #\order, \istart, \iend, \count mov \sv, \rtbl - populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp + populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #SUBPAGE_SIZE, \tmp mov \tbl, \sv #if SWAPPER_PGTABLE_LEVELS > 3 - compute_indices \vstart, \vend, #PUD_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count + compute_indices \vstart, \vend, #PUD_SHIFT, #(SUBPAGE_SHIFT - 3), \istart, \iend, \count mov \sv, \rtbl - populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp + populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #SUBPAGE_SIZE, \tmp mov \tbl, \sv #endif #if SWAPPER_PGTABLE_LEVELS > 2 - compute_indices \vstart, \vend, #SWAPPER_TABLE_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count + compute_indices \vstart, \vend, #SWAPPER_TABLE_SHIFT, #(SUBPAGE_SHIFT - 3), \istart, \iend, \count mov \sv, \rtbl - populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #PAGE_SIZE, \tmp + populate_entries \tbl, \rtbl, \istart, \iend, #PMD_TYPE_TABLE, #SUBPAGE_SIZE, \tmp mov \tbl, \sv #endif - compute_indices \vstart, \vend, #SWAPPER_BLOCK_SHIFT, #(PAGE_SHIFT - 3), \istart, \iend, \count + compute_indices \vstart, \vend, #SWAPPER_BLOCK_SHIFT, #(SUBPAGE_SHIFT - 3), \istart, \iend, \count bic \rtbl, \phys, #SWAPPER_BLOCK_SIZE - 1 populate_entries \tbl, \rtbl, \istart, \iend, \flags, #SWAPPER_BLOCK_SIZE, \tmp .endm @@ -264,7 +264,7 @@ SYM_FUNC_START_LOCAL(remap_region) // Get the index offset for the start of the last level table lsr x1, x1, x6 - bfi x1, xzr, #0, #PAGE_SHIFT - 3 + bfi x1, xzr, #0, #SUBPAGE_SHIFT - 3 // Derive the start and end indexes into the last level table // associated with the provided region @@ -308,7 +308,7 @@ SYM_FUNC_START_LOCAL(create_idmap) */ #if (VA_BITS < 48) #define IDMAP_PGD_ORDER (VA_BITS - PGDIR_SHIFT) -#define EXTRA_SHIFT (PGDIR_SHIFT + PAGE_SHIFT - 3) +#define EXTRA_SHIFT (PGDIR_SHIFT + SUBPAGE_SHIFT - 3) /* * If VA_BITS < 48, we have to configure an additional table level. diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index ffc5d76cf69555df322494bf95079d047097c6ea..1006bbbf2b73b565e8595830142367f89469ea5f 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -163,7 +163,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) pr_crit("CPU%u: does not support 52-bit VAs\n", cpu); if (status & CPU_STUCK_REASON_NO_GRAN) { pr_crit("CPU%u: does not support %luK granule\n", - cpu, PAGE_SIZE / SZ_1K); + cpu, SUBPAGE_SIZE / SZ_1K); } cpus_stuck_in_kernel++; break; diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 45131e354e27f1f8fc6607e173638c65c2d40f5b..35d38ebc938d66d93635dc63cc9440f4862c3c88 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -199,18 +199,18 @@ SECTIONS } idmap_pg_dir = .; - . += PAGE_SIZE; + . += SUBPAGE_SIZE; #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 tramp_pg_dir = .; - . += PAGE_SIZE; + . += SUBPAGE_SIZE; #endif reserved_pg_dir = .; - . += PAGE_SIZE; + . += SUBPAGE_SIZE; swapper_pg_dir = .; - . += PAGE_SIZE; + . += SUBPAGE_SIZE; . = ALIGN(SEGMENT_ALIGN); __init_begin = .; diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index cdf8e76b0be141380c95a39e76398a222be75c93..d21ca75d050277946c8857c5159fb9d8f6f8d72c 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -83,7 +83,7 @@ static bool kvm_block_mapping_supported(u64 addr, u64 end, u64 phys, u32 level) static u32 kvm_pgtable_idx(struct kvm_pgtable_walk_data *data, u32 level) { u64 shift = kvm_granule_shift(level); - u64 mask = BIT(PAGE_SHIFT - 3) - 1; + u64 mask = BIT(SUBPAGE_SHIFT - 3) - 1; return (data->addr >> shift) & mask; } @@ -126,7 +126,7 @@ static kvm_pte_t kvm_phys_to_pte(u64 pa) { kvm_pte_t pte = pa & KVM_PTE_ADDR_MASK; - if (PAGE_SHIFT == 16) + if (SUBPAGE_SHIFT == 16) pte |= FIELD_PREP(KVM_PTE_ADDR_51_48, pa >> 48); return pte; @@ -236,7 +236,7 @@ static int __kvm_pgtable_walk(struct kvm_pgtable_walk_data *data, if (WARN_ON_ONCE(level >= KVM_PGTABLE_MAX_LEVELS)) return -EINVAL; - for (idx = kvm_pgtable_idx(data, level); idx < PTRS_PER_PTE; ++idx) { + for (idx = kvm_pgtable_idx(data, level); idx < SUBPTES_PER_PTE; ++idx) { kvm_pte_t *ptep = &pgtable[idx]; if (data->addr >= data->end) @@ -264,7 +264,7 @@ static int _kvm_pgtable_walk(struct kvm_pgtable_walk_data *data) return -EINVAL; for (idx = kvm_pgd_page_idx(data); data->addr < data->end; ++idx) { - kvm_pte_t *ptep = &pgt->pgd[idx * PTRS_PER_PTE]; + kvm_pte_t *ptep = &pgt->pgd[idx * SUBPTES_PER_PTE]; ret = __kvm_pgtable_walk(data, ptep, pgt->start_level); if (ret) diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 5ae18472205a9f34b73eb087b1ba383edee46ad0..3a5afede12e95bd0a67df7b826784b5b43ae8579 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -362,26 +362,26 @@ int kvm_set_ipa_limit(void) ID_AA64MMFR0_EL1_PARANGE_SHIFT); /* * IPA size beyond 48 bits could not be supported - * on either 4K or 16K page size. Hence let's cap + * on either 4K or 16K subpage size. Hence let's cap * it to 48 bits, in case it's reported as larger * on the system. */ - if (PAGE_SIZE != SZ_64K) + if (SUBPAGE_SIZE != SZ_64K) parange = min(parange, (unsigned int)ID_AA64MMFR0_EL1_PARANGE_48); /* - * Check with ARMv8.5-GTG that our PAGE_SIZE is supported at + * Check with ARMv8.5-GTG that our SUBPAGE_SIZE is supported at * Stage-2. If not, things will stop very quickly. */ switch (cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_EL1_TGRAN_2_SHIFT)) { case ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_NONE: - kvm_err("PAGE_SIZE not supported at Stage-2, giving up\n"); + kvm_err("SUBPAGE_SIZE not supported at Stage-2, giving up\n"); return -EINVAL; case ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_DEFAULT: - kvm_debug("PAGE_SIZE supported at Stage-2 (default)\n"); + kvm_debug("SUBPAGE_SIZE supported at Stage-2 (default)\n"); break; case ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_MIN ... ID_AA64MMFR0_EL1_TGRAN_2_SUPPORTED_MAX: - kvm_debug("PAGE_SIZE supported at Stage-2 (advertised)\n"); + kvm_debug("SUBPAGE_SIZE supported at Stage-2 (advertised)\n"); break; default: kvm_err("Unsupported value for TGRAN_2, giving up\n"); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 5b391490e045be91b9cf1e85a2b964474f4d8c4d..7bb0a613c327e0d3067ceab03e2d6ba6aadb19b4 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -141,8 +141,9 @@ static void show_pte(unsigned long addr) return; } - pr_alert("%s pgtable: %luk pages, %llu-bit VAs, pgdp=%016lx\n", + pr_alert("%s pgtable: %luk pages, %luk subpages, %llu-bit VAs, pgdp=%016lx\n", mm == &init_mm ? "swapper" : "user", PAGE_SIZE / SZ_1K, + SUBPAGE_SIZE / SZ_1K, vabits_actual, mm_to_pgd_phys(mm)); pgdp = pgd_offset(mm, addr); pgd = READ_ONCE(*pgdp); @@ -176,7 +177,7 @@ static void show_pte(unsigned long addr) break; ptep = pte_offset_map(pmdp, addr); - pte = READ_ONCE(*ptep); + pte = ptep_get(ptep); pr_cont(", pte=%016llx", pte_val(pte)); pte_unmap(ptep); } while(0); @@ -194,38 +195,32 @@ static void show_pte(unsigned long addr) * * Returns whether or not the PTE actually changed. */ -int ptep_set_access_flags(struct vm_area_struct *vma, - unsigned long address, pte_t *ptep, - pte_t entry, int dirty) +int __entry_set_access_flags(pteval_t *ptevalp, pteval_t entry) { - pteval_t old_pteval, pteval; - pte_t pte = READ_ONCE(*ptep); + pteval_t old_pteval; + pteval_t pteval = READ_ONCE(*ptevalp); - if (pte_same(pte, entry)) + if (pteval == entry) return 0; /* only preserve the access flags and write permission */ - pte_val(entry) &= PTE_RDONLY | PTE_AF | PTE_WRITE | PTE_DIRTY; + entry &= PTE_RDONLY | PTE_AF | PTE_WRITE | PTE_DIRTY; /* * Setting the flags must be done atomically to avoid racing with the * hardware update of the access/dirty state. The PTE_RDONLY bit must - * be set to the most permissive (lowest value) of *ptep and entry + * be set to the most permissive (lowest value) of *ptevalp and entry * (calculated as: a & b == ~(~a | ~b)). */ - pte_val(entry) ^= PTE_RDONLY; - pteval = pte_val(pte); + entry ^= PTE_RDONLY; do { old_pteval = pteval; pteval ^= PTE_RDONLY; - pteval |= pte_val(entry); + pteval |= entry; pteval ^= PTE_RDONLY; - pteval = cmpxchg_relaxed(&pte_val(*ptep), old_pteval, pteval); + pteval = cmpxchg_relaxed(ptevalp, old_pteval, pteval); } while (pteval != old_pteval); - /* Invalidate a stale read-only entry */ - if (dirty) - flush_tlb_page(vma, address); return 1; } diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 35e9a468d13e6ac68093c7516350815df5b009b5..b68cfff83a44b1522bd6c1d0add1d903a30185e3 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -21,13 +21,13 @@ /* * HugeTLB Support Matrix * - * --------------------------------------------------- - * | Page Size | CONT PTE | PMD | CONT PMD | PUD | - * --------------------------------------------------- - * | 4K | 64K | 2M | 32M | 1G | - * | 16K | 2M | 32M | 1G | | - * | 64K | 2M | 512M | 16G | | - * --------------------------------------------------- + * ------------------------------------------------------ + * | SubPage Size | CONT PTE | PMD | CONT PMD | PUD | + * ------------------------------------------------------ + * | 4K | 64K | 2M | 32M | 1G | + * | 16K | 2M | 32M | 1G | | + * | 64K | 2M | 512M | 16G | | + * ------------------------------------------------------ */ /* @@ -510,7 +510,7 @@ void huge_ptep_set_wrprotect(struct mm_struct *mm, size_t pgsize; pte_t pte; - if (!pte_cont(READ_ONCE(*ptep))) { + if (!pte_cont(ptep_get(ptep))) { ptep_set_wrprotect(mm, addr, ptep); return; } @@ -535,7 +535,7 @@ pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, size_t pgsize; int ncontig; - if (!pte_cont(READ_ONCE(*ptep))) + if (!pte_cont(ptep_get(ptep))) return ptep_clear_flush(vma, addr, ptep); ncontig = find_num_contig(mm, addr, ptep, &pgsize); diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index e969e68de005fd2abf0aee91d51a03fc4e2eeebd..d7d480ba0f4dd08f1ef1b21b25700ecbb7ab4b4d 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -113,7 +113,7 @@ static void __init kasan_pte_populate(pmd_t *pmdp, unsigned long addr, memset(__va(page_phys), KASAN_SHADOW_INIT, PAGE_SIZE); next = addr + PAGE_SIZE; set_pte(ptep, pfn_pte(__phys_to_pfn(page_phys), PAGE_KERNEL)); - } while (ptep++, addr = next, addr != end && pte_none(READ_ONCE(*ptep))); + } while (ptep++, addr = next, addr != end && pte_none(ptep_get(ptep))); } static void __init kasan_pmd_populate(pud_t *pudp, unsigned long addr, diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 9a7c38965154081eebea1936146d349989a222cb..94a6555a2bd53b6ab71b54b71ddba052636b5858 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -110,7 +110,7 @@ static phys_addr_t __init early_pgtable_alloc(int shift) phys_addr_t phys; void *ptr; - phys = memblock_phys_alloc_range(PAGE_SIZE, PAGE_SIZE, 0, + phys = memblock_phys_alloc_range(SUBPAGE_SIZE, SUBPAGE_SIZE, 0, MEMBLOCK_ALLOC_NOLEAKTRACE); if (!phys) panic("Failed to allocate page table page\n"); @@ -120,9 +120,10 @@ static phys_addr_t __init early_pgtable_alloc(int shift) * slot will be free, so we can (ab)use the FIX_PTE slot to initialise * any level of table. */ - ptr = pte_set_fixmap(phys); + ptr = pte_set_fixmap(PAGE_ALIGN_DOWN(phys)); + ptr += offset_in_page(phys); - memset(ptr, 0, PAGE_SIZE); + memset(ptr, 0, SUBPAGE_SIZE); /* * Implicit barriers also ensure the zeroed page is visible to the page @@ -174,7 +175,7 @@ static void init_pte(pmd_t *pmdp, unsigned long addr, unsigned long end, ptep = pte_set_fixmap_offset(pmdp, addr); do { - pte_t old_pte = READ_ONCE(*ptep); + pte_t old_pte = ptep_get(ptep); set_pte(ptep, pfn_pte(__phys_to_pfn(phys), prot)); @@ -854,7 +855,7 @@ int kern_addr_valid(unsigned long addr) return pfn_valid(pmd_pfn(pmd)); ptep = pte_offset_kernel(pmdp, addr); - pte = READ_ONCE(*ptep); + pte = ptep_get(ptep); if (pte_none(pte)) return 0; @@ -905,7 +906,7 @@ static void unmap_hotplug_pte_range(pmd_t *pmdp, unsigned long addr, do { ptep = pte_offset_kernel(pmdp, addr); - pte = READ_ONCE(*ptep); + pte = ptep_get(ptep); if (pte_none(pte)) continue; @@ -1038,7 +1039,7 @@ static void free_empty_pte_table(pmd_t *pmdp, unsigned long addr, do { ptep = pte_offset_kernel(pmdp, addr); - pte = READ_ONCE(*ptep); + pte = ptep_get(ptep); /* * This is just a sanity check here which verifies that @@ -1057,7 +1058,7 @@ static void free_empty_pte_table(pmd_t *pmdp, unsigned long addr, */ ptep = pte_offset_kernel(pmdp, 0UL); for (i = 0; i < PTRS_PER_PTE; i++) { - if (!pte_none(READ_ONCE(ptep[i]))) + if (!pte_none(ptep_get(ptep++))) return; } diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index 5922178d7a064c1c98af43ad97d72fa4a6b8d79b..3c5555184281c801aabcbb6f8f4645e12a71d990 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -33,7 +33,7 @@ bool can_set_direct_map(void) static int change_page_range(pte_t *ptep, unsigned long addr, void *data) { struct page_change_data *cdata = data; - pte_t pte = READ_ONCE(*ptep); + pte_t pte = ptep_get(ptep); pte = clear_pte_bit(pte, cdata->clear_mask); pte = set_pte_bit(pte, cdata->set_mask); @@ -244,5 +244,5 @@ bool kernel_page_present(struct page *page) return true; ptep = pte_offset_kernel(pmdp, addr); - return pte_valid(READ_ONCE(*ptep)); + return pte_valid(ptep_get(ptep)); } diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index b9ecbbae1e1abca1bdadfdc33f30c56bbb0074cd..93c23f629a84187be6e043b48653610133c8e40e 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -233,14 +233,14 @@ SYM_FUNC_END(idmap_cpu_replace_ttbr1) .macro kpti_map_pgtbl, type, level str xzr, [temp_pte, #8 * (\level + 1)] // break before make dsb nshst - add pte, temp_pte, #PAGE_SIZE * (\level + 1) + add pte, temp_pte, #SUBPAGE_SIZE * (\level + 1) lsr pte, pte, #12 tlbi vaae1, pte dsb nsh isb phys_to_pte pte, cur_\type\()p - add cur_\type\()p, temp_pte, #PAGE_SIZE * (\level + 1) + add cur_\type\()p, temp_pte, #SUBPAGE_SIZE * (\level + 1) orr pte, pte, pte_flags str pte, [temp_pte, #8 * (\level + 1)] dsb nshst @@ -346,7 +346,7 @@ SYM_TYPED_FUNC_START(idmap_kpti_install_ng_mappings) /* PTE */ pte_to_phys cur_ptep, pmd kpti_map_pgtbl pte, 3 - kpti_mk_tbl_ng pte, PTRS_PER_PTE + kpti_mk_tbl_ng pte, SUBPTES_PER_PTE b .Lnext_pmd .unreq cpu diff --git a/arch/arm64/mm/trans_pgd.c b/arch/arm64/mm/trans_pgd.c index 4ea2eefbc053f673ca1575cbf50cf6b61139319b..5584dbacf049f550f2fffa39d0b5b2f0b95683c8 100644 --- a/arch/arm64/mm/trans_pgd.c +++ b/arch/arm64/mm/trans_pgd.c @@ -32,7 +32,7 @@ static void *trans_alloc(struct trans_pgd_info *info) static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr) { - pte_t pte = READ_ONCE(*src_ptep); + pte_t pte = ptep_get(src_ptep); if (pte_valid(pte)) { /*