kernel_samsung_a53x/arch/arm64/lib/clear_user.S

/* SPDX-License-Identifier: GPL-2.0-only */
/*
 * Based on arch/arm/lib/clear_user.S
 *
 * Copyright (C) 2012 ARM Ltd.
 */
#include <linux/linkage.h>

#include <asm/asm-uaccess.h>
#include <asm/assembler.h>

	.text

/* Prototype: int __arch_clear_user(void *addr, size_t sz)
 * Purpose  : clear some user memory
 * Params   : addr - user memory address to clear
 *          : sz   - number of bytes to clear
 * Returns  : number of bytes NOT cleared
 *
 * Alignment fixed up by hardware.
 */
	.p2align 4
SYM_FUNC_START(__arch_clear_user)
	mov	x2, x1			// save the size for fixup return
	subs	x1, x1, #8
	b.mi	2f
1:
uao_user_alternative 9f, str, sttr, xzr, x0, 8
	subs	x1, x1, #8
	b.pl	1b
2:	adds	x1, x1, #4
	b.mi	3f
uao_user_alternative 9f, str, sttr, wzr, x0, 4
	sub	x1, x1, #4
3:	adds	x1, x1, #2
	b.mi	4f
uao_user_alternative 9f, strh, sttrh, wzr, x0, 2
	sub	x1, x1, #2
4:	adds	x1, x1, #1
	b.mi	5f
uao_user_alternative 9f, strb, sttrb, wzr, x0, 0
5:	mov	x0, #0
	ret
SYM_FUNC_END(__arch_clear_user)
EXPORT_SYMBOL(__arch_clear_user)

	.section .fixup,"ax"
	.align	2
9:	mov	x0, x2			// return the original size
	ret
	.previous
Import A536BXXU9EXDC 2024-06-15 16:02:09 -03:00			`/* SPDX-License-Identifier: GPL-2.0-only */`
			`/*`
			`* Based on arch/arm/lib/clear_user.S`
			`*`
			`* Copyright (C) 2012 ARM Ltd.`
			`*/`
			`#include <linux/linkage.h>`

			`#include <asm/asm-uaccess.h>`
			`#include <asm/assembler.h>`

			`.text`

			`/* Prototype: int __arch_clear_user(void *addr, size_t sz)`
			`* Purpose : clear some user memory`
			`* Params : addr - user memory address to clear`
			`* : sz - number of bytes to clear`
			`* Returns : number of bytes NOT cleared`
			`*`
			`* Alignment fixed up by hardware.`
			`*/`
arm64: Align __arch_clear_user() to 16 bytes as per upstream With significant code churn, the 'read' result from callbench can regress from 4000 ns to 7000 ns, despite no changes directly affecting the code paths exercised by callbench. This can also happen when playing with compiler options that affect the kernel's size. Upon further investigation, it turns out that /dev/zero, which callbench uses for its file benchmarks, makes heavy use of clear_user() when accessed via read(). When the regression occurs, __arch_clear_user() goes from being 16-byte aligned to being 4-byte aligned. A recent upstream change to arm64's clear_user() implementation, commit 344323e0428b ("arm64: Rewrite __arch_clear_user()"), mentions this: Apparently some folks examine large reads from /dev/zero closely enough to notice the loop being hot, so align it per the other critical loops (presumably around a typical instruction fetch granularity). As such, make __arch_clear_user() 16-byte aligned to fix the regression and match upstream. Signed-off-by: Sultan Alsawaf <sultan@kerneltoast.com> 2021-11-21 00:22:17 -08:00			`.p2align 4`
Import A536BXXU9EXDC 2024-06-15 16:02:09 -03:00			`SYM_FUNC_START(__arch_clear_user)`
			`mov x2, x1 // save the size for fixup return`
			`subs x1, x1, #8`
			`b.mi 2f`
			`1:`
			`uao_user_alternative 9f, str, sttr, xzr, x0, 8`
			`subs x1, x1, #8`
			`b.pl 1b`
			`2: adds x1, x1, #4`
			`b.mi 3f`
			`uao_user_alternative 9f, str, sttr, wzr, x0, 4`
			`sub x1, x1, #4`
			`3: adds x1, x1, #2`
			`b.mi 4f`
			`uao_user_alternative 9f, strh, sttrh, wzr, x0, 2`
			`sub x1, x1, #2`
			`4: adds x1, x1, #1`
			`b.mi 5f`
			`uao_user_alternative 9f, strb, sttrb, wzr, x0, 0`
			`5: mov x0, #0`
			`ret`
			`SYM_FUNC_END(__arch_clear_user)`
			`EXPORT_SYMBOL(__arch_clear_user)`

			`.section .fixup,"ax"`
			`.align 2`
			`9: mov x0, x2 // return the original size`
			`ret`
			`.previous`