This is the mail archive of the
gcc@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH] Add 64-bit get_user and __get_user for i386
- From: Jamie Lokier <jamie at shareable dot org>
- To: Linus Torvalds <torvalds at osdl dot org>
- Cc: chris at scary dot beasts dot org, akpm at osdl dot org, linux-kernel at vger dot kernel dot org, gcc at gcc dot gnu dot org
- Date: Tue, 20 Apr 2004 03:09:22 +0100
- Subject: Re: [PATCH] Add 64-bit get_user and __get_user for i386
[GCC list added because there's a GCC question below. Search for "-->".]
Linus Torvalds wrote:
> #define get_user(x, ptr) \
> ({ __typeof__(*(ptr)) __val_gu; \
> int __ret_gu; \
> switch (sizeof(__val_gu)) { \
> case 1: __get_user_x(1,__ret_gu,__val_gu,ptr); break; \
> case 2: __get_user_x(2,__ret_gu,__val_gu,ptr); break; \
> case 4: __get_user_x(4,__ret_gu,__val_gu,ptr); break; \
> case 8: __get_user_8(__ret_gu,__val_gu,ptr); break; \
> default: __get_user_bad(); break; \
> } \
> (x) = __val_gu; \
> __ret_gu; \
> })
>
> Which is _different_ from the other "get_user" cases: it passes the
> address in %ecx, and it returns the error in %ecx too - the return value
> comes in %edx:%eax. Make the __get_user_8 in getuser.S match those
> different rules.
>
> What do you think?
I grokked your idea -- the first time :)
What held me back is this change:
__typeof__(*(ptr)) __val_gu;
At the moment, __val_gu is always an int even if *(ptr) is smaller.
Changing the type of __val_gu changes the type seen by the asm statement.
Now, that is probably ok, but it does potentially change the
1/2-byte cases which is why I didn't do that.
Now, let's see... Just to be sure: if an __asm__ stores a 1-byte or
2-byte value in a register, does the compiler expect the value to be
already zero-extended to 32 bits? The code in arch/i386/mm/getuser.S
makes a point of zero-extending: I wonder if it's important.
For example, when get_user() reads from an unsigned char *, and the
required result is an int, will the compiler zero-extend the __asm__
output, or will the compiler assume the properties it uses for
registers in generated code? The point is that GCC has a PROMOTE_MODE
macro, which means (I'm not entirely clear on this) that 8-bit and
16-bit values are kept in 32-bit form in registers on some targets.
After some experimenting with GCC and dashing dazedly among its twisty
maze of target macros, I see the rules it uses for its i386 generated
code depend on the subtarget. It'll try to keep 16-bit values in
32-bit registers extended on the Pentium Pro, to avoid 16-bit
operations, for example.
However that doesn't seem to be a problem. Experiments show that an
__asm__ which outputs to a char or short type is _always_
sign-extended or zero-extended if needed by the compiler, on various
i386 subtargets. The compiler doesn't assume anything about the
output higher bits from an __asm__.
--> GCC experts, can you confirm the above property please?
(Function calls are similar: GCC won't assume anything about the
higher bits of the result received from a function, although it always
promotes the result before returning a value. Odd combination).
I had to check. It means that the zero-extending in
arch/i386/lib/getuser.S is redundant, although it doesn't hurt except
on the 486 and Pentium. And your __typeof__() is fine.
I've been running it for half an hour now, it is behaving fine.
Haven't done any thorough testing of boundary conditions etc.
By the way, is there a reason why put_user() is inline when get_user() isn't?
Enjoy,
-- Jamie
----
Subject: [PATCH] Add 64-bit get_user and __get_user for i386
Patch: uaccess64-2.6.5-jl3
Added 64-bit get_user and __get_user for i386.
Changed the registers for all __get_user_X so they could all be
consistent: %ecx now contains the address, and %eax or %edx:%eax
contain the result.
There was a boundary bug in getuser.S: get_user(x,(char *)0xbfffffff)
would fail. Fixed.
Added might_sleep() to get_user().
Added the 8-byte constant-length cases for __copy_from_user() and
__copy_to_user().
Changed constant-length short copy_from_user() to be out of line (it was
inline), as this is just another way to write get_user(). It calls the
same routines as get_user() and one test showed equivalent uses now
generate identical code (sendfile64). Constant-length
__copy_from_user() remains inline, just like __get_user().
Very slight cosmetic moving of a few definitions into a more
consistent arrangement.
This patch shaved about 1.2k off my kernel, due to the un-inlining of
copy_from_user() calls which are equivalent to get_user().
diff -urN --exclude-from=dontdiff dual-2.6.5/arch/i386/kernel/i386_ksyms.c uaccess64-2.6.5/arch/i386/kernel/i386_ksyms.c
--- dual-2.6.5/arch/i386/kernel/i386_ksyms.c 2004-04-14 08:28:07.000000000 +0100
+++ uaccess64-2.6.5/arch/i386/kernel/i386_ksyms.c 2004-04-20 01:55:10.000000000 +0100
@@ -103,6 +103,7 @@
EXPORT_SYMBOL_NOVERS(__get_user_1);
EXPORT_SYMBOL_NOVERS(__get_user_2);
EXPORT_SYMBOL_NOVERS(__get_user_4);
+EXPORT_SYMBOL_NOVERS(__get_user_8);
EXPORT_SYMBOL(strpbrk);
EXPORT_SYMBOL(strstr);
diff -urN --exclude-from=dontdiff dual-2.6.5/arch/i386/lib/getuser.S uaccess64-2.6.5/arch/i386/lib/getuser.S
--- dual-2.6.5/arch/i386/lib/getuser.S 2003-12-18 02:57:59.000000000 +0000
+++ uaccess64-2.6.5/arch/i386/lib/getuser.S 2004-04-19 22:32:39.000000000 +0100
@@ -14,10 +14,10 @@
/*
* __get_user_X
*
- * Inputs: %eax contains the address
+ * Inputs: %ecx contains the address
*
- * Outputs: %eax is error code (0 or -EFAULT)
- * %edx contains zero-extended value
+ * Outputs: %ecx is error code (0 or -EFAULT)
+ * %eax contains zero-extended value (%edx:%eax for __get_user_8)
*
* These functions should not modify any other registers,
* as they get called from within inline assembly.
@@ -27,44 +27,61 @@
.align 4
.globl __get_user_1
__get_user_1:
- GET_THREAD_INFO(%edx)
- cmpl TI_ADDR_LIMIT(%edx),%eax
- jae bad_get_user
-1: movzbl (%eax),%edx
- xorl %eax,%eax
+ GET_THREAD_INFO(%eax)
+ cmpl TI_ADDR_LIMIT(%eax),%ecx
+ ja bad_get_user
+1: movzbl (%ecx),%eax
+ xorl %ecx,%ecx
ret
.align 4
.globl __get_user_2
__get_user_2:
- addl $1,%eax
+ addl $1,%ecx
jc bad_get_user
- GET_THREAD_INFO(%edx)
- cmpl TI_ADDR_LIMIT(%edx),%eax
- jae bad_get_user
-2: movzwl -1(%eax),%edx
- xorl %eax,%eax
+ GET_THREAD_INFO(%eax)
+ cmpl TI_ADDR_LIMIT(%eax),%ecx
+ ja bad_get_user
+2: movzwl -1(%ecx),%eax
+ xorl %ecx,%ecx
ret
.align 4
.globl __get_user_4
__get_user_4:
- addl $3,%eax
+ addl $3,%ecx
jc bad_get_user
- GET_THREAD_INFO(%edx)
- cmpl TI_ADDR_LIMIT(%edx),%eax
- jae bad_get_user
-3: movl -3(%eax),%edx
- xorl %eax,%eax
+ GET_THREAD_INFO(%eax)
+ cmpl TI_ADDR_LIMIT(%eax),%ecx
+ ja bad_get_user
+3: movl -3(%ecx),%eax
+ xorl %ecx,%ecx
ret
-bad_get_user:
+.align 4
+.globl __get_user_8
+__get_user_8:
+ addl $7,%ecx
+ jc bad_get_user_8
+ GET_THREAD_INFO(%eax)
+ cmpl TI_ADDR_LIMIT(%eax),%ecx
+ ja bad_get_user_8
+4: movl -7(%ecx),%eax
+5: movl -3(%ecx),%edx
+ xorl %ecx,%ecx
+ ret
+
+bad_get_user_8:
xorl %edx,%edx
- movl $-14,%eax
+bad_get_user:
+ xorl %eax,%eax
+ movl $-14,%ecx
ret
.section __ex_table,"a"
.long 1b,bad_get_user
.long 2b,bad_get_user
.long 3b,bad_get_user
+ .long 4b,bad_get_user_8
+ .long 5b,bad_get_user_8
.previous
diff -urN --exclude-from=dontdiff dual-2.6.5/include/asm-i386/uaccess.h uaccess64-2.6.5/include/asm-i386/uaccess.h
--- dual-2.6.5/include/asm-i386/uaccess.h 2003-12-18 02:58:16.000000000 +0000
+++ uaccess64-2.6.5/include/asm-i386/uaccess.h 2004-04-20 02:12:38.000000000 +0100
@@ -140,17 +140,7 @@
* accesses to the same area of user memory).
*/
-extern void __get_user_1(void);
-extern void __get_user_2(void);
-extern void __get_user_4(void);
-
-#define __get_user_x(size,ret,x,ptr) \
- __asm__ __volatile__("call __get_user_" #size \
- :"=a" (ret),"=d" (x) \
- :"0" (ptr))
-
-/* Careful: we have to cast the result to the type of the pointer for sign reasons */
/**
* get_user: - Get a simple variable from user space.
* @x: Variable to store result.
@@ -168,19 +158,36 @@
* Returns zero on success, or -EFAULT on error.
* On error, the variable @x is set to zero.
*/
+/*
+ * Careful: we have to cast the result to the type of the pointer for
+ * sign reasons.
+ */
#define get_user(x,ptr) \
-({ int __ret_gu,__val_gu; \
+({ \
+ __typeof__(*(ptr)) __gu_val; \
+ int __gu_err; \
+ might_sleep(); \
switch(sizeof (*(ptr))) { \
- case 1: __get_user_x(1,__ret_gu,__val_gu,ptr); break; \
- case 2: __get_user_x(2,__ret_gu,__val_gu,ptr); break; \
- case 4: __get_user_x(4,__ret_gu,__val_gu,ptr); break; \
- default: __get_user_x(X,__ret_gu,__val_gu,ptr); break; \
+ case 1: __get_user_x(1,__gu_val,ptr,__gu_err,"=a"); break; \
+ case 2: __get_user_x(2,__gu_val,ptr,__gu_err,"=a"); break; \
+ case 4: __get_user_x(4,__gu_val,ptr,__gu_err,"=a"); break; \
+ case 8: __get_user_x(8,__gu_val,ptr,__gu_err,"=A"); break; \
+ default: __get_user_bad(); __gu_val = 0; __gu_err = 0; \
} \
- (x) = (__typeof__(*(ptr)))__val_gu; \
- __ret_gu; \
+ (x) = (__typeof__(*(ptr)))__gu_val; \
+ __gu_err; \
})
-extern void __put_user_bad(void);
+extern void __get_user_1(void);
+extern void __get_user_2(void);
+extern void __get_user_4(void);
+extern void __get_user_8(void);
+
+#define __get_user_x(size, x, ptr, err, rtype) \
+ __asm__ __volatile__("call __get_user_" #size \
+ : "=c" (err), rtype (x) \
+ : "0" (ptr));
+
/**
* put_user: - Write a simple value into user space.
@@ -255,33 +262,17 @@
__pu_err; \
})
-
#define __put_user_check(x,ptr,size) \
({ \
long __pu_err = -EFAULT; \
__typeof__(*(ptr)) *__pu_addr = (ptr); \
- might_sleep(); \
+ might_sleep(); \
if (access_ok(VERIFY_WRITE,__pu_addr,size)) \
__put_user_size((x),__pu_addr,(size),__pu_err,-EFAULT); \
__pu_err; \
})
-#define __put_user_u64(x, addr, err) \
- __asm__ __volatile__( \
- "1: movl %%eax,0(%2)\n" \
- "2: movl %%edx,4(%2)\n" \
- "3:\n" \
- ".section .fixup,\"ax\"\n" \
- "4: movl %3,%0\n" \
- " jmp 3b\n" \
- ".previous\n" \
- ".section __ex_table,\"a\"\n" \
- " .align 4\n" \
- " .long 1b,4b\n" \
- " .long 2b,4b\n" \
- ".previous" \
- : "=r"(err) \
- : "A" (x), "r" (addr), "i"(-EFAULT), "0"(err))
+extern void __put_user_bad(void);
#ifdef CONFIG_X86_WP_WORKS_OK
@@ -292,8 +283,8 @@
case 1: __put_user_asm(x,ptr,retval,"b","b","iq",errret);break; \
case 2: __put_user_asm(x,ptr,retval,"w","w","ir",errret);break; \
case 4: __put_user_asm(x,ptr,retval,"l","","ir",errret); break; \
- case 8: __put_user_u64((__typeof__(*ptr))(x),ptr,retval); break;\
- default: __put_user_bad(); \
+ case 8: __put_user_u64(x,ptr,retval,errret); break; \
+ default: __put_user_bad(); \
} \
} while (0)
@@ -301,7 +292,7 @@
#define __put_user_size(x,ptr,size,retval,errret) \
do { \
- __typeof__(*(ptr)) __pus_tmp = x; \
+ __typeof__(*(ptr)) __pus_tmp = (x); \
retval = 0; \
\
if(unlikely(__copy_to_user_ll(ptr, &__pus_tmp, size) != 0)) \
@@ -332,10 +323,29 @@
: "=r"(err) \
: ltype (x), "m"(__m(addr)), "i"(errret), "0"(err))
+#define __put_user_u64(x, addr, err, errret) \
+ __asm__ __volatile__( \
+ "1: movl %%eax,%2\n" \
+ "2: movl %%edx,%3\n" \
+ "3:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "4: movl %4,%0\n" \
+ " jmp 3b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n" \
+ " .align 4\n" \
+ " .long 1b,4b\n" \
+ " .long 2b,4b\n" \
+ ".previous" \
+ : "=r" (err) \
+ : "A" (x), "m" (__m(addr)), "m" (__m(4+(char*)addr)), \
+ "i" (errret), "0" (err))
+
#define __get_user_nocheck(x,ptr,size) \
({ \
- long __gu_err, __gu_val; \
+ __typeof__(*(ptr)) __gu_val; \
+ int __gu_err; \
__get_user_size(__gu_val,(ptr),(size),__gu_err,-EFAULT);\
(x) = (__typeof__(*(ptr)))__gu_val; \
__gu_err; \
@@ -350,7 +360,8 @@
case 1: __get_user_asm(x,ptr,retval,"b","b","=q",errret);break; \
case 2: __get_user_asm(x,ptr,retval,"w","w","=r",errret);break; \
case 4: __get_user_asm(x,ptr,retval,"l","","=r",errret);break; \
- default: (x) = __get_user_bad(); \
+ case 8: __get_user_u64(x,ptr,retval,errret);break; \
+ default: __get_user_bad(); (x) = 0; \
} \
} while (0)
@@ -370,6 +381,26 @@
: "=r"(err), ltype (x) \
: "m"(__m(addr)), "i"(errret), "0"(err))
+#define __get_user_u64(x, addr, err, errret) \
+ __asm__ __volatile__( \
+ "1: movl %2,%%eax\n" \
+ "2: movl %3,%%edx\n" \
+ "3:\n" \
+ ".section .fixup,\"ax\"\n" \
+ "4: movl %4,%0\n" \
+ " xorl %%eax,%%eax\n" \
+ " xorl %%edx,%%edx\n" \
+ " jmp 3b\n" \
+ ".previous\n" \
+ ".section __ex_table,\"a\"\n" \
+ " .align 4\n" \
+ " .long 1b,4b\n" \
+ " .long 2b,4b\n" \
+ ".previous" \
+ : "=r" (err), "=A" (x) \
+ : "m" (__m(addr)), "m" (__m(4+(char*)addr)), \
+ "i" (errret), "0" (err));
+
unsigned long __copy_to_user_ll(void __user *to, const void *from, unsigned long n);
unsigned long __copy_from_user_ll(void *to, const void __user *from, unsigned long n);
@@ -411,6 +442,9 @@
case 4:
__put_user_size(*(u32 *)from, (u32 *)to, 4, ret, 4);
return ret;
+ case 8:
+ __put_user_size(*(u64 *)from, (u64 *)to, 8, ret, 8);
+ return ret;
}
}
return __copy_to_user_ll(to, from, n);
@@ -449,6 +483,9 @@
case 4:
__get_user_size(*(u32 *)to, from, 4, ret, 4);
return ret;
+ case 8:
+ __get_user_size(*(u64 *)to, from, 8, ret, 8);
+ return ret;
}
}
return __copy_from_user_ll(to, from, n);
@@ -496,8 +533,24 @@
copy_from_user(void *to, const void __user *from, unsigned long n)
{
might_sleep();
+ if (__builtin_constant_p(n)) {
+ switch (n) {
+ case 1:
+ __get_user_x(1, *(u8 *)to, from, n, "=a");
+ return n ? 1 : 0;
+ case 2:
+ __get_user_x(2, *(u16 *)to, from, n, "=a");
+ return n ? 2 : 0;
+ case 4:
+ __get_user_x(4, *(u32 *)to, from, n, "=a");
+ return n ? 4 : 0;
+ case 8:
+ __get_user_x(8, *(u64 *)to, from, n, "=A");
+ return n ? 8 : 0;
+ }
+ }
if (access_ok(VERIFY_READ, from, n))
- n = __copy_from_user(to, from, n);
+ n = __copy_from_user_ll(to, from, n);
else
memset(to, 0, n);
return n;