This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[cft darwin x86] reimplement ia32 libffi


Improvements are:

  * Support for fastcall and regparm calling conventions.

  * 2-level instead of 3-level functions for setup of arguments.

  * One copy of the assembly, instead of three.

  * Correct popping of the stack arguments in closures.

I've tested this on i686-linux, and am attempting to get it
tested on cygwin.  I need someone to please test darwin for me.


r~



--- Makefile.am	(revision 24606)
+++ Makefile.am	(local)
@@ -24,7 +24,7 @@ EXTRA_DIST = LICENSE ChangeLog.v1 \
 	src/sh64/ffi.c src/sh64/sysv.S src/sh64/ffitarget.h \
 	src/sparc/v8.S src/sparc/v9.S src/sparc/ffitarget.h \
 	src/sparc/ffi.c \
-	src/x86/ffi.c src/x86/sysv.S src/x86/win32.S src/x86/darwin.S \
+	src/x86/ffi.c src/x86/abi32.S src/x86/abi32.h \
 	src/x86/ffi64.c src/x86/unix64.S src/x86/ffitarget.h \
 	src/pa/ffi.c src/pa/linux.S src/pa/hpux32.S \
 	src/frv/eabi.S src/frv/ffitarget.h
@@ -89,13 +89,13 @@ if MIPS_LINUX
 nodist_libffi_la_SOURCES += src/mips/ffi.c src/mips/o32.S
 endif
 if X86
-nodist_libffi_la_SOURCES += src/x86/ffi.c src/x86/sysv.S
+nodist_libffi_la_SOURCES += src/x86/ffi.c src/x86/abi32.S
 endif
 if X86_WIN32
-nodist_libffi_la_SOURCES += src/x86/ffi.c src/x86/win32.S
+nodist_libffi_la_SOURCES += src/x86/ffi.c src/x86/abi32.S
 endif
 if X86_DARWIN
-nodist_libffi_la_SOURCES += src/x86/ffi.c src/x86/darwin.S src/x86/ffi64.c src/x86/darwin64.S
+nodist_libffi_la_SOURCES += src/x86/ffi.c src/x86/abi32.S src/x86/ffi64.c src/x86/darwin64.S
 endif
 if SPARC
 nodist_libffi_la_SOURCES += src/sparc/ffi.c src/sparc/v8.S src/sparc/v9.S
@@ -137,7 +137,7 @@ if S390
 nodist_libffi_la_SOURCES += src/s390/sysv.S src/s390/ffi.c
 endif
 if X86_64
-nodist_libffi_la_SOURCES += src/x86/ffi64.c src/x86/unix64.S src/x86/ffi.c src/x86/sysv.S
+nodist_libffi_la_SOURCES += src/x86/ffi64.c src/x86/unix64.S src/x86/ffi.c src/x86/abi32.S
 endif
 if SH
 nodist_libffi_la_SOURCES += src/sh/sysv.S src/sh/ffi.c
--- Makefile.in	(revision 24606)
+++ Makefile.in	(local)
@@ -39,9 +39,9 @@ host_triplet = @host@
 target_triplet = @target@
 @MIPS_IRIX_TRUE@am__append_1 = src/mips/ffi.c src/mips/o32.S src/mips/n32.S
 @MIPS_LINUX_TRUE@am__append_2 = src/mips/ffi.c src/mips/o32.S
-@X86_TRUE@am__append_3 = src/x86/ffi.c src/x86/sysv.S
-@X86_WIN32_TRUE@am__append_4 = src/x86/ffi.c src/x86/win32.S
-@X86_DARWIN_TRUE@am__append_5 = src/x86/ffi.c src/x86/darwin.S src/x86/ffi64.c src/x86/darwin64.S
+@X86_TRUE@am__append_3 = src/x86/ffi.c src/x86/abi32.S
+@X86_WIN32_TRUE@am__append_4 = src/x86/ffi.c src/x86/abi32.S
+@X86_DARWIN_TRUE@am__append_5 = src/x86/ffi.c src/x86/abi32.S src/x86/ffi64.c src/x86/darwin64.S
 @SPARC_TRUE@am__append_6 = src/sparc/ffi.c src/sparc/v8.S src/sparc/v9.S
 @ALPHA_TRUE@am__append_7 = src/alpha/ffi.c src/alpha/osf.S
 @IA64_TRUE@am__append_8 = src/ia64/ffi.c src/ia64/unix.S
@@ -55,7 +55,7 @@ target_triplet = @target@
 @LIBFFI_CRIS_TRUE@am__append_16 = src/cris/sysv.S src/cris/ffi.c
 @FRV_TRUE@am__append_17 = src/frv/eabi.S src/frv/ffi.c
 @S390_TRUE@am__append_18 = src/s390/sysv.S src/s390/ffi.c
-@X86_64_TRUE@am__append_19 = src/x86/ffi64.c src/x86/unix64.S src/x86/ffi.c src/x86/sysv.S
+@X86_64_TRUE@am__append_19 = src/x86/ffi64.c src/x86/unix64.S src/x86/ffi.c src/x86/abi32.S
 @SH_TRUE@am__append_20 = src/sh/sysv.S src/sh/ffi.c
 @SH64_TRUE@am__append_21 = src/sh64/sysv.S src/sh64/ffi.c
 @PA_LINUX_TRUE@am__append_22 = src/pa/linux.S src/pa/ffi.c
@@ -96,9 +96,9 @@ am_libffi_la_OBJECTS = src/debug.lo src/
 @MIPS_IRIX_TRUE@am__objects_1 = src/mips/ffi.lo src/mips/o32.lo \
 @MIPS_IRIX_TRUE@	src/mips/n32.lo
 @MIPS_LINUX_TRUE@am__objects_2 = src/mips/ffi.lo src/mips/o32.lo
-@X86_TRUE@am__objects_3 = src/x86/ffi.lo src/x86/sysv.lo
-@X86_WIN32_TRUE@am__objects_4 = src/x86/ffi.lo src/x86/win32.lo
-@X86_DARWIN_TRUE@am__objects_5 = src/x86/ffi.lo src/x86/darwin.lo \
+@X86_TRUE@am__objects_3 = src/x86/ffi.lo src/x86/abi32.lo
+@X86_WIN32_TRUE@am__objects_4 = src/x86/ffi.lo src/x86/abi32.lo
+@X86_DARWIN_TRUE@am__objects_5 = src/x86/ffi.lo src/x86/abi32.lo \
 @X86_DARWIN_TRUE@	src/x86/ffi64.lo src/x86/darwin64.lo
 @SPARC_TRUE@am__objects_6 = src/sparc/ffi.lo src/sparc/v8.lo \
 @SPARC_TRUE@	src/sparc/v9.lo
@@ -124,7 +124,7 @@ am_libffi_la_OBJECTS = src/debug.lo src/
 @FRV_TRUE@am__objects_17 = src/frv/eabi.lo src/frv/ffi.lo
 @S390_TRUE@am__objects_18 = src/s390/sysv.lo src/s390/ffi.lo
 @X86_64_TRUE@am__objects_19 = src/x86/ffi64.lo src/x86/unix64.lo \
-@X86_64_TRUE@	src/x86/ffi.lo src/x86/sysv.lo
+@X86_64_TRUE@	src/x86/ffi.lo src/x86/abi32.lo
 @SH_TRUE@am__objects_20 = src/sh/sysv.lo src/sh/ffi.lo
 @SH64_TRUE@am__objects_21 = src/sh64/sysv.lo src/sh64/ffi.lo
 @PA_LINUX_TRUE@am__objects_22 = src/pa/linux.lo src/pa/ffi.lo
@@ -369,7 +369,7 @@ EXTRA_DIST = LICENSE ChangeLog.v1 \
 	src/sh64/ffi.c src/sh64/sysv.S src/sh64/ffitarget.h \
 	src/sparc/v8.S src/sparc/v9.S src/sparc/ffitarget.h \
 	src/sparc/ffi.c \
-	src/x86/ffi.c src/x86/sysv.S src/x86/win32.S src/x86/darwin.S \
+	src/x86/ffi.c src/x86/abi32.S src/x86/abi32.h \
 	src/x86/ffi64.c src/x86/unix64.S src/x86/ffitarget.h \
 	src/pa/ffi.c src/pa/linux.S src/pa/hpux32.S \
 	src/frv/eabi.S src/frv/ffitarget.h
@@ -555,11 +555,7 @@ src/x86/$(DEPDIR)/$(am__dirstamp):
 	@: > src/x86/$(DEPDIR)/$(am__dirstamp)
 src/x86/ffi.lo: src/x86/$(am__dirstamp) \
 	src/x86/$(DEPDIR)/$(am__dirstamp)
-src/x86/sysv.lo: src/x86/$(am__dirstamp) \
-	src/x86/$(DEPDIR)/$(am__dirstamp)
-src/x86/win32.lo: src/x86/$(am__dirstamp) \
-	src/x86/$(DEPDIR)/$(am__dirstamp)
-src/x86/darwin.lo: src/x86/$(am__dirstamp) \
+src/x86/abi32.lo: src/x86/$(am__dirstamp) \
 	src/x86/$(DEPDIR)/$(am__dirstamp)
 src/x86/ffi64.lo: src/x86/$(am__dirstamp) \
 	src/x86/$(DEPDIR)/$(am__dirstamp)
@@ -812,20 +808,16 @@ mostlyclean-compile:
 	-rm -f src/sparc/v9.lo
 	-rm -f src/types.$(OBJEXT)
 	-rm -f src/types.lo
-	-rm -f src/x86/darwin.$(OBJEXT)
-	-rm -f src/x86/darwin.lo
+	-rm -f src/x86/abi32.$(OBJEXT)
+	-rm -f src/x86/abi32.lo
 	-rm -f src/x86/darwin64.$(OBJEXT)
 	-rm -f src/x86/darwin64.lo
 	-rm -f src/x86/ffi.$(OBJEXT)
 	-rm -f src/x86/ffi.lo
 	-rm -f src/x86/ffi64.$(OBJEXT)
 	-rm -f src/x86/ffi64.lo
-	-rm -f src/x86/sysv.$(OBJEXT)
-	-rm -f src/x86/sysv.lo
 	-rm -f src/x86/unix64.$(OBJEXT)
 	-rm -f src/x86/unix64.lo
-	-rm -f src/x86/win32.$(OBJEXT)
-	-rm -f src/x86/win32.lo
 
 distclean-compile:
 	-rm -f *.tab.c
--- src/x86/darwin.S	(revision 24606)
+++ src/x86/darwin.S	(local)
@@ -1,403 +0,0 @@
-/* -----------------------------------------------------------------------
-   sysv.S - Copyright (c) 1996, 1998, 2001, 2002, 2003, 2005  Red Hat, Inc.
-   
-   X86 Foreign Function Interface 
-
-   Permission is hereby granted, free of charge, to any person obtaining
-   a copy of this software and associated documentation files (the
-   ``Software''), to deal in the Software without restriction, including
-   without limitation the rights to use, copy, modify, merge, publish,
-   distribute, sublicense, and/or sell copies of the Software, and to
-   permit persons to whom the Software is furnished to do so, subject to
-   the following conditions:
-
-   The above copyright notice and this permission notice shall be included
-   in all copies or substantial portions of the Software.
-
-   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
-   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-   IN NO EVENT SHALL CYGNUS SOLUTIONS BE LIABLE FOR ANY CLAIM, DAMAGES OR
-   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-   OTHER DEALINGS IN THE SOFTWARE.
-   ----------------------------------------------------------------------- */
-
-#ifndef __x86_64__
-
-#define LIBFFI_ASM	
-#include <fficonfig.h>
-#include <ffi.h>
-
-.text
-
-.globl _ffi_prep_args
-
-	.align 4
-.globl _ffi_call_SYSV
-
-_ffi_call_SYSV:
-.LFB1:
-        pushl %ebp
-.LCFI0:
-        movl  %esp,%ebp
-.LCFI1:
-        subl $8,%esp
-	/* Make room for all of the new args.  */
-	movl  16(%ebp),%ecx
-	subl  %ecx,%esp
-
-	movl  %esp,%eax
-
-	/* Place all of the ffi_prep_args in position  */
-	subl  $8,%esp
-	pushl 12(%ebp)
-	pushl %eax
-	call  *8(%ebp)
-
-	/* Return stack to previous state and call the function  */
-	addl  $16,%esp	
-
-	call  *28(%ebp)
-
-	/* Remove the space we pushed for the args  */
-	movl  16(%ebp),%ecx
-	addl  %ecx,%esp
-
-	/* Load %ecx with the return type code  */
-	movl  20(%ebp),%ecx	
-
-	/* If the return value pointer is NULL, assume no return value.  */
-	cmpl  $0,24(%ebp)
-	jne   retint
-
-	/* Even if there is no space for the return value, we are 
-	   obliged to handle floating-point values.  */
-	cmpl  $FFI_TYPE_FLOAT,%ecx
-	jne   noretval
-	fstp  %st(0)
-
-        jmp   epilogue
-
-retint:
-	cmpl  $FFI_TYPE_INT,%ecx
-	jne   retfloat
-	/* Load %ecx with the pointer to storage for the return value  */
-	movl  24(%ebp),%ecx	
-	movl  %eax,0(%ecx)
-	jmp   epilogue
-
-retfloat:
-	cmpl  $FFI_TYPE_FLOAT,%ecx
-	jne   retdouble
-	/* Load %ecx with the pointer to storage for the return value  */
-	movl  24(%ebp),%ecx	
-	fstps (%ecx)
-	jmp   epilogue
-
-retdouble:
-	cmpl  $FFI_TYPE_DOUBLE,%ecx
-	jne   retlongdouble
-	/* Load %ecx with the pointer to storage for the return value  */
-	movl  24(%ebp),%ecx	
-	fstpl (%ecx)
-	jmp   epilogue
-
-retlongdouble:
-	cmpl  $FFI_TYPE_LONGDOUBLE,%ecx
-	jne   retint64
-	/* Load %ecx with the pointer to storage for the return value  */
-	movl  24(%ebp),%ecx	
-	fstpt (%ecx)
-	jmp   epilogue
-	
-retint64:	
-	cmpl  $FFI_TYPE_SINT64,%ecx
-        jne   retstruct1b
-	/* Load %ecx with the pointer to storage for the return value  */
-	movl  24(%ebp),%ecx	
-	movl  %eax,0(%ecx)
-	movl  %edx,4(%ecx)
-	jmp   epilogue
-	
-retstruct1b:	
-	cmpl  $FFI_TYPE_SINT8,%ecx
-        jne   retstruct2b
-	/* Load %ecx with the pointer to storage for the return value  */
-	movl  24(%ebp),%ecx	
-	movb  %al,0(%ecx)
-	jmp   epilogue
-
-retstruct2b:	
-	cmpl  $FFI_TYPE_SINT16,%ecx
-        jne   retstruct
-	/* Load %ecx with the pointer to storage for the return value  */
-	movl  24(%ebp),%ecx	
-	movw  %ax,0(%ecx)
-	jmp   epilogue
-
-retstruct:
-	cmpl  $FFI_TYPE_STRUCT,%ecx
-        jne   noretval
-	/* Nothing to do!  */
-        addl $4,%esp
-        popl %ebp
-        ret
-
-noretval:
-epilogue:
-        addl $8,%esp
-        movl %ebp,%esp
-        popl %ebp
-        ret
-.LFE1:
-.ffi_call_SYSV_end:
-
-	.align	4
-FFI_HIDDEN (ffi_closure_SYSV)
-.globl _ffi_closure_SYSV
-
-_ffi_closure_SYSV:
-.LFB2:
-	pushl	%ebp
-.LCFI2:
-	movl	%esp, %ebp
-.LCFI3:
-	subl	$40, %esp
-	leal	-24(%ebp), %edx
-	movl	%edx, -12(%ebp)	/* resp */
-	leal	8(%ebp), %edx
-	movl	%edx, 4(%esp)	/* args = __builtin_dwarf_cfa () */
-	leal	-12(%ebp), %edx
-	movl	%edx, (%esp)	/* &resp */
-	movl	%ebx, 8(%esp)
-.LCFI7:
-	call	L_ffi_closure_SYSV_inner$stub
-	movl	8(%esp), %ebx
-	movl	-12(%ebp), %ecx
-	cmpl	$FFI_TYPE_INT, %eax
-	je	.Lcls_retint
-	cmpl	$FFI_TYPE_FLOAT, %eax
-	je	.Lcls_retfloat
-	cmpl	$FFI_TYPE_DOUBLE, %eax
-	je	.Lcls_retdouble
-	cmpl	$FFI_TYPE_LONGDOUBLE, %eax
-	je	.Lcls_retldouble
-	cmpl	$FFI_TYPE_SINT64, %eax
-	je	.Lcls_retllong
-	cmpl	$FFI_TYPE_SINT8, %eax
-	je	.Lcls_retstruct1
-	cmpl	$FFI_TYPE_SINT16, %eax
-	je	.Lcls_retstruct2
-	cmpl	$FFI_TYPE_STRUCT, %eax
-	je	.Lcls_retstruct
-.Lcls_epilogue:
-	movl	%ebp, %esp
-	popl	%ebp
-	ret
-.Lcls_retint:
-	movl	(%ecx), %eax
-	jmp	.Lcls_epilogue
-.Lcls_retfloat:
-	flds	(%ecx)
-	jmp	.Lcls_epilogue
-.Lcls_retdouble:
-	fldl	(%ecx)
-	jmp	.Lcls_epilogue
-.Lcls_retldouble:
-	fldt	(%ecx)
-	jmp	.Lcls_epilogue
-.Lcls_retllong:
-	movl	(%ecx), %eax
-	movl	4(%ecx), %edx
-	jmp	.Lcls_epilogue
-.Lcls_retstruct1:
-	movsbl	(%ecx), %eax
-	jmp	.Lcls_epilogue
-.Lcls_retstruct2:
-	movswl	(%ecx), %eax
-	jmp	.Lcls_epilogue
-.Lcls_retstruct:
-	lea -8(%ebp),%esp
-	movl	%ebp, %esp
-	popl	%ebp
-	ret $4
-.LFE2:
-
-#if !FFI_NO_RAW_API
-
-#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) & ~3)
-#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4)
-#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4)
-#define CIF_FLAGS_OFFSET 20
-
-	.align	4
-FFI_HIDDEN (ffi_closure_raw_SYSV)
-.globl _ffi_closure_raw_SYSV
-
-_ffi_closure_raw_SYSV:
-.LFB3:
-	pushl	%ebp
-.LCFI4:
-	movl	%esp, %ebp
-.LCFI5:
-	pushl	%esi
-.LCFI6:
-	subl	$36, %esp
-	movl	RAW_CLOSURE_CIF_OFFSET(%eax), %esi	 /* closure->cif */
-	movl	RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */
-	movl	%edx, 12(%esp)	/* user_data */
-	leal	8(%ebp), %edx	/* __builtin_dwarf_cfa () */
-	movl	%edx, 8(%esp)	/* raw_args */
-	leal	-24(%ebp), %edx
-	movl	%edx, 4(%esp)	/* &res */
-	movl	%esi, (%esp)	/* cif */
-	call	*RAW_CLOSURE_FUN_OFFSET(%eax)		 /* closure->fun */
-	movl	CIF_FLAGS_OFFSET(%esi), %eax		 /* rtype */
-	cmpl	$FFI_TYPE_INT, %eax
-	je	.Lrcls_retint
-	cmpl	$FFI_TYPE_FLOAT, %eax
-	je	.Lrcls_retfloat
-	cmpl	$FFI_TYPE_DOUBLE, %eax
-	je	.Lrcls_retdouble
-	cmpl	$FFI_TYPE_LONGDOUBLE, %eax
-	je	.Lrcls_retldouble
-	cmpl	$FFI_TYPE_SINT64, %eax
-	je	.Lrcls_retllong
-.Lrcls_epilogue:
-	addl	$36, %esp
-	popl	%esi
-	popl	%ebp
-	ret
-.Lrcls_retint:
-	movl	-24(%ebp), %eax
-	jmp	.Lrcls_epilogue
-.Lrcls_retfloat:
-	flds	-24(%ebp)
-	jmp	.Lrcls_epilogue
-.Lrcls_retdouble:
-	fldl	-24(%ebp)
-	jmp	.Lrcls_epilogue
-.Lrcls_retldouble:
-	fldt	-24(%ebp)
-	jmp	.Lrcls_epilogue
-.Lrcls_retllong:
-	movl	-24(%ebp), %eax
-	movl	-20(%ebp), %edx
-	jmp	.Lrcls_epilogue
-.LFE3:
-#endif
-
-.section __IMPORT,__jump_table,symbol_stubs,self_modifying_code+pure_instructions,5
-L_ffi_closure_SYSV_inner$stub:
-	.indirect_symbol _ffi_closure_SYSV_inner
-	hlt ; hlt ; hlt ; hlt ; hlt
-
-
-.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
-EH_frame1:
-	.set	L$set$0,LECIE1-LSCIE1
-	.long	L$set$0
-LSCIE1:
-	.long	0x0
-	.byte	0x1
-	.ascii "zR\0"
-	.byte	0x1
-	.byte	0x7c
-	.byte	0x8
-	.byte	0x1
-	.byte	0x10
-	.byte	0xc
-	.byte	0x5
-	.byte	0x4
-	.byte	0x88
-	.byte	0x1
-	.align 2
-LECIE1:
-.globl _ffi_call_SYSV.eh
-_ffi_call_SYSV.eh:
-LSFDE1:
-	.set	L$set$1,LEFDE1-LASFDE1
-	.long	L$set$1
-LASFDE1:
-	.long	LASFDE1-EH_frame1
-	.long	.LFB1-.
-	.set L$set$2,.LFE1-.LFB1
-	.long L$set$2
-	.byte	0x0
-	.byte	0x4
-	.set L$set$3,.LCFI0-.LFB1
-	.long L$set$3
-	.byte	0xe
-	.byte	0x8
-	.byte	0x84
-	.byte	0x2
-	.byte	0x4
-	.set L$set$4,.LCFI1-.LCFI0
-	.long L$set$4
-	.byte	0xd
-	.byte	0x4
-	.align 2
-LEFDE1:
-.globl _ffi_closure_SYSV.eh
-_ffi_closure_SYSV.eh:
-LSFDE2:
-	.set	L$set$5,LEFDE2-LASFDE2
-	.long	L$set$5
-LASFDE2:
-	.long	LASFDE2-EH_frame1
-	.long	.LFB2-.
-	.set L$set$6,.LFE2-.LFB2
-	.long L$set$6
-	.byte	0x0
-	.byte	0x4
-	.set L$set$7,.LCFI2-.LFB2
-	.long L$set$7
-	.byte	0xe
-	.byte	0x8
-	.byte	0x84
-	.byte	0x2
-	.byte	0x4
-	.set L$set$8,.LCFI3-.LCFI2
-	.long L$set$8
-	.byte	0xd
-	.byte	0x4
-	.align 2
-LEFDE2:
-
-#if !FFI_NO_RAW_API
-
-.globl _ffi_closure_raw_SYSV.eh
-_ffi_closure_raw_SYSV.eh:
-LSFDE3:
-	.set	L$set$10,LEFDE3-LASFDE3
-	.long	L$set$10
-LASFDE3:
-	.long	LASFDE3-EH_frame1
-	.long	.LFB3-.
-	.set L$set$11,.LFE3-.LFB3
-	.long L$set$11
-	.byte	0x0
-	.byte	0x4
-	.set L$set$12,.LCFI4-.LFB3
-	.long L$set$12
-	.byte	0xe
-	.byte	0x8
-	.byte	0x84
-	.byte	0x2
-	.byte	0x4
-	.set L$set$13,.LCFI5-.LCFI4
-	.long L$set$13
-	.byte	0xd
-	.byte	0x4
-	.byte	0x4
-	.set L$set$14,.LCFI6-.LCFI5
-	.long L$set$14
-	.byte	0x85
-	.byte	0x3
-	.align 2
-LEFDE3:
-
-#endif
-
-#endif /* ifndef __x86_64__ */
--- src/x86/ffi.c	(revision 24606)
+++ src/x86/ffi.c	(local)
@@ -30,287 +30,400 @@
 
 #include <ffi.h>
 #include <ffi_common.h>
-
 #include <stdlib.h>
+#include "asm32.h"
+
+#ifdef X86_WIN32
+#define MS_AGGREGATE_RETURN 1
+#else
+#define MS_AGGREGATE_RETURN 0
+#endif
+
+#define KEEP_AGGREGATE_RETURN_POINTER 0
 
-/* ffi_prep_args is called by the assembly routine once stack space
-   has been allocated for the function's arguments */
 
-void ffi_prep_args(char *stack, extended_cif *ecif)
+/* If this is changed, the offsets in asm32.h must be changed.  */
+struct ffi_call_inner_data
 {
-  register unsigned int i;
-  register void **p_argv;
-  register char *argp;
-  register ffi_type **p_arg;
+  /* Save area for the assembly routine.  */
+  void *save_ra;
+  unsigned save_ebx;
+  unsigned save_ebp;
 
-  argp = stack;
+  /* The function to call.  */
+  void (*fn)();
 
-  if (ecif->cif->flags == FFI_TYPE_STRUCT)
-    {
-      *(void **) argp = ecif->rvalue;
-      argp += 4;
-    }
+  /* Bits controling how the return address is handled.  */
+  unsigned flags;
 
-  p_argv = ecif->avalue;
+  /* The address of the caller's return address.  */
+  void *rvalue;
 
-  for (i = ecif->cif->nargs, p_arg = ecif->cif->arg_types;
-       i != 0;
-       i--, p_arg++)
-    {
-      size_t z;
+  /* Register arguments, indexed by enum arg_index below.  */
+  unsigned arg[3];
+};
 
-      /* Align if necessary */
-      if ((sizeof(int) - 1) & (unsigned) argp)
-	argp = (char *) ALIGN(argp, sizeof(int));
 
-      z = (*p_arg)->size;
-      if (z < sizeof(int))
-	{
-	  z = sizeof(int);
-	  switch ((*p_arg)->type)
-	    {
-	    case FFI_TYPE_SINT8:
-	      *(signed int *) argp = (signed int)*(SINT8 *)(* p_argv);
-	      break;
-
-	    case FFI_TYPE_UINT8:
-	      *(unsigned int *) argp = (unsigned int)*(UINT8 *)(* p_argv);
-	      break;
-
-	    case FFI_TYPE_SINT16:
-	      *(signed int *) argp = (signed int)*(SINT16 *)(* p_argv);
-	      break;
-
-	    case FFI_TYPE_UINT16:
-	      *(unsigned int *) argp = (unsigned int)*(UINT16 *)(* p_argv);
-	      break;
-
-	    case FFI_TYPE_SINT32:
-	      *(signed int *) argp = (signed int)*(SINT32 *)(* p_argv);
-	      break;
-
-	    case FFI_TYPE_UINT32:
-	      *(unsigned int *) argp = (unsigned int)*(UINT32 *)(* p_argv);
-	      break;
-
-	    case FFI_TYPE_STRUCT:
-	      *(unsigned int *) argp = (unsigned int)*(UINT32 *)(* p_argv);
-	      break;
+/* Describe the argument register handling for the various ABIs.  */
 
-	    default:
-	      FFI_ASSERT(0);
-	    }
-	}
-      else
-	{
-	  memcpy(argp, *p_argv, z);
-	}
-      p_argv++;
-      argp += z;
-    }
-  
-  return;
-}
+enum arg_index { ARG_NONE = -1, ARG_EAX, ARG_EDX, ARG_ECX };
+
+struct ffi_register_args
+{
+  int nregs;
+  enum arg_index idx[3];
+};
 
-/* Perform machine dependent cif processing */
-ffi_status ffi_prep_cif_machdep(ffi_cif *cif)
+static const struct ffi_register_args ffi_x86_abi_args[] = 
 {
+  [FFI_CDECL]     = { 0, { ARG_NONE, ARG_NONE, ARG_NONE } },
+  [FFI_STDCALL]   = { 0, { ARG_NONE, ARG_NONE, ARG_NONE } },
+  [FFI_FASTCALL]  = { 2, { ARG_ECX, ARG_EDX, ARG_NONE } },
+  [FFI_REGPARM_1] = { 1, { ARG_EAX, ARG_NONE, ARG_NONE } },
+  [FFI_REGPARM_2] = { 2, { ARG_EAX, ARG_EDX, ARG_NONE } },
+  [FFI_REGPARM_3] = { 3, { ARG_EAX, ARG_EDX, ARG_ECX } }
+};
+
+extern void ffi_call_inner (void *, struct ffi_call_inner_data *)
+  FFI_HIDDEN __attribute__((regparm(2)));
+
+void ffi_closure_outer_noregs () FFI_HIDDEN;
+void ffi_closure_outer_fastcall () FFI_HIDDEN;
+void ffi_closure_outer_regparm2 () FFI_HIDDEN;
+void ffi_closure_outer_regparm3 () FFI_HIDDEN;
+void ffi_closure_outer_raw_cdecl () FFI_HIDDEN;
+
+unsigned int ffi_closure_inner (ffi_closure *, void *, void *)
+  FFI_HIDDEN __attribute__ ((regparm(3)));
+
+
+ffi_status
+ffi_prep_cif_machdep(ffi_cif *cif)
+{
+  unsigned flags;
+
   /* Set the return type flag */
-  switch (cif->rtype->type)
+  flags = cif->rtype->type;
+  switch (flags)
     {
     case FFI_TYPE_VOID:
-#ifdef X86
-    case FFI_TYPE_STRUCT:
-#endif
     case FFI_TYPE_SINT64:
     case FFI_TYPE_FLOAT:
     case FFI_TYPE_DOUBLE:
     case FFI_TYPE_LONGDOUBLE:
-      cif->flags = (unsigned) cif->rtype->type;
       break;
 
     case FFI_TYPE_UINT64:
-      cif->flags = FFI_TYPE_SINT64;
+      flags = FFI_TYPE_SINT64;
       break;
 
-#ifndef X86
     case FFI_TYPE_STRUCT:
-      if (cif->rtype->size == 1)
-        {
-          cif->flags = FFI_TYPE_SINT8; /* same as char size */
-        }
-      else if (cif->rtype->size == 2)
-        {
-          cif->flags = FFI_TYPE_SINT16; /* same as short size */
-        }
-      else if (cif->rtype->size == 4)
-        {
-          cif->flags = FFI_TYPE_INT; /* same as int type */
-        }
-      else if (cif->rtype->size == 8)
-        {
-          cif->flags = FFI_TYPE_SINT64; /* same as int64 type */
-        }
-      else
-        {
-          cif->flags = FFI_TYPE_STRUCT;
+      flags = FFI_TYPE_STRUCT;
+      if (MS_AGGREGATE_RETURN)
+	{
+	  if (cif->rtype->size == 1)
+	    flags = FFI_TYPE_SINT8;
+	  else if (cif->rtype->size == 2)
+	    flags = FFI_TYPE_SINT16;
+	  else if (cif->rtype->size == 4)
+	    flags = FFI_TYPE_INT;
+	  else if (cif->rtype->size == 8)
+	    flags = FFI_TYPE_SINT64;
+	  else if (cif->rtype->size < 8)
+	    flags = FFI_TYPE_LAST + cif->rtype->size;
         }
       break;
-#endif
 
     default:
-      cif->flags = FFI_TYPE_INT;
+      flags = FFI_TYPE_INT;
       break;
     }
-
-#ifdef X86_DARWIN
-  cif->bytes = (cif->bytes + 15) & ~0xF;
-#endif
+  cif->flags = flags;
 
   return FFI_OK;
 }
 
-extern void ffi_call_SYSV(void (*)(char *, extended_cif *), extended_cif *,
-			  unsigned, unsigned, unsigned *, void (*fn)());
-
-#ifdef X86_WIN32
-extern void ffi_call_STDCALL(void (*)(char *, extended_cif *), extended_cif *,
-			  unsigned, unsigned, unsigned *, void (*fn)());
 
-#endif /* X86_WIN32 */
-
-void ffi_call(ffi_cif *cif, void (*fn)(), void *rvalue, void **avalue)
+void
+ffi_call(ffi_cif *cif, void (*fn)(), void *rvalue, void **avalue)
 {
-  extended_cif ecif;
+  char *stack, *argp;
+  struct ffi_call_inner_data *data;
+  const struct ffi_register_args *abi_regs;
+  int nregs, i, avn;
+  ffi_type **arg_types;
+
+  /* If the return value is NULL, we need to make one.  */
+  if (rvalue == NULL && cif->flags != FFI_TYPE_VOID)
+    rvalue = alloca(cif->rtype->size);
+
+  /* Allocate the space for the arguments, plus the control structure.  */
+  argp = stack = alloca(cif->bytes + sizeof (struct ffi_call_inner_data));
+  data = (struct ffi_call_inner_data *)(stack + cif->bytes);
+
+  data->fn = fn;
+  data->rvalue = rvalue;
+  data->flags = cif->flags;
 
-  ecif.cif = cif;
-  ecif.avalue = avalue;
-  
-  /* If the return value is a struct and we don't have a return	*/
-  /* value address then we need to make one		        */
+  abi_regs = &ffi_x86_abi_args[cif->abi];
+  nregs = 0;
 
-  if ((rvalue == NULL) && 
-      (cif->flags == FFI_TYPE_STRUCT))
+  if (cif->flags == FFI_TYPE_STRUCT)
     {
-      ecif.rvalue = alloca(cif->rtype->size);
-    }
-  else
-    ecif.rvalue = rvalue;
-    
-  
-  switch (cif->abi) 
-    {
-    case FFI_SYSV:
-      ffi_call_SYSV(ffi_prep_args, &ecif, cif->bytes, cif->flags, ecif.rvalue,
-		    fn);
-      break;
-#ifdef X86_WIN32
-    case FFI_STDCALL:
-      ffi_call_STDCALL(ffi_prep_args, &ecif, cif->bytes, cif->flags,
-		       ecif.rvalue, fn);
-      break;
-#endif /* X86_WIN32 */
-    default:
-      FFI_ASSERT(0);
-      break;
+      if (nregs < abi_regs->nregs)
+	data->arg[abi_regs->idx[nregs++]] = (unsigned) rvalue;
+      else
+	{
+	  *(void **)argp = rvalue;
+	  argp += 4;
+	}
     }
-}
 
+  avn = cif->nargs;
+  arg_types = cif->arg_types;
 
-/** private members **/
+  for (i = 0; i < avn; ++i)
+    {
+      union {
+	char c[8];
+	UINT32 u32;
+	UINT64 u64;
+      } u;
+
+      UINT32 promoted;
+      UINT64 promoted64;
+      size_t size = arg_types[i]->size;
 
-static void ffi_prep_incoming_args_SYSV (char *stack, void **ret,
-					 void** args, ffi_cif* cif);
-void FFI_HIDDEN ffi_closure_SYSV (ffi_closure *)
-     __attribute__ ((regparm(1)));
-unsigned int FFI_HIDDEN ffi_closure_SYSV_inner (ffi_closure *, void **, void *)
-     __attribute__ ((regparm(1)));
-void FFI_HIDDEN ffi_closure_raw_SYSV (ffi_raw_closure *)
-     __attribute__ ((regparm(1)));
-
-/* This function is jumped to by the trampoline */
-
-unsigned int FFI_HIDDEN
-ffi_closure_SYSV_inner (closure, respp, args)
-     ffi_closure *closure;
-     void **respp;
-     void *args;
-{
-  // our various things...
-  ffi_cif       *cif;
-  void         **arg_area;
+      switch (arg_types[i]->type)
+	{
+	case FFI_TYPE_SINT8:
+	  promoted = *(SINT8 *)avalue[i];
+	  goto do_int32;
+	case FFI_TYPE_UINT8:
+	  promoted = *(UINT8 *)avalue[i];
+	  goto do_int32;
+	case FFI_TYPE_SINT16:
+	  promoted = *(SINT16 *)avalue[i];
+	  goto do_int32;
+	case FFI_TYPE_UINT16:
+	  promoted = *(UINT16 *)avalue[i];
+	  goto do_int32;
+	case FFI_TYPE_SINT32:
+	case FFI_TYPE_UINT32:
+	case FFI_TYPE_POINTER:
+	  promoted = *(UINT32 *)avalue[i];
+	do_int32:
+	  if (nregs < abi_regs->nregs)
+	    data->arg[abi_regs->idx[nregs++]] = promoted;
+	  else
+	    {
+	      *(unsigned *)argp = promoted;
+	      argp += 4;
+	    }
+	  break;
 
-  cif         = closure->cif;
-  arg_area    = (void**) alloca (cif->nargs * sizeof (void*));  
+	case FFI_TYPE_SINT64:
+	case FFI_TYPE_UINT64:
+	  promoted64 = *(UINT64 *)avalue[i];
+	do_int64:
+	  if (nregs + 1 < abi_regs->nregs && cif->abi != FFI_FASTCALL)
+	    {
+	      data->arg[abi_regs->idx[nregs++]] = promoted64;
+	      data->arg[abi_regs->idx[nregs++]] = promoted64 >> 32;
+	    }
+	  else
+	    {
+	      *(UINT64 *)argp = promoted64;
+	      argp += 8;
+	    }
+	  break;
 
-  /* this call will initialize ARG_AREA, such that each
-   * element in that array points to the corresponding 
-   * value on the stack; and if the function returns
-   * a structure, it will re-set RESP to point to the
-   * structure return address.  */
+	case FFI_TYPE_FLOAT:
+	case FFI_TYPE_DOUBLE:
+	case FFI_TYPE_LONGDOUBLE:
+	  memcpy (argp, avalue[i], size);
+	  argp += size;
+	  break;
+
+	case FFI_TYPE_STRUCT:
+	  /* The FASTCALL ABI, according to GCC, passes things in registers
+	     that are 4 bytes or smaller and that are non-BLKmode.  Which
+	     excludes structs of size 3.  */
+	  if (cif->abi == FFI_FASTCALL && (size == 3 || size >= 4))
+	    ;
+	  else if (size <= 8)
+	    {
+	      memcpy (u.c, avalue[i], size);
+	      if (size <= 4)
+		{
+		  promoted = u.u32;
+		  goto do_int32;
+		}
+	      else
+		{
+		  promoted64 = u.u64;
+		  goto do_int64;
+		}
+	    }
 
-  ffi_prep_incoming_args_SYSV(args, respp, arg_area, cif);
+	  memcpy (argp, avalue[i], size);
+	  argp += ALIGN(size, 4);
+	  break;
 
-  (closure->fun) (cif, *respp, arg_area, closure->user_data);
+	default:
+	  abort ();
+	}
+    }
 
-  return cif->flags;
+  ffi_call_inner (stack, data);
 }
 
-static void
-ffi_prep_incoming_args_SYSV(char *stack, void **rvalue, void **avalue,
-			    ffi_cif *cif)
-{
-  register unsigned int i;
-  register void **p_argv;
-  register char *argp;
-  register ffi_type **p_arg;
 
-  argp = stack;
+unsigned int FFI_HIDDEN __attribute__ ((regparm(3)))
+ffi_closure_inner (ffi_closure *closure, void *stack, void *rvalue)
+{
+  ffi_cif *cif;
+  void **avalue, *argp = stack;
+  const struct ffi_register_args *abi_regs;
+  int nregs, i, avn;
+  unsigned *reg_args;
+  ffi_type **arg_types;
+  unsigned flags;
+
+  cif = closure->cif;
+  avn = cif->nargs;
+  avalue = alloca (avn * sizeof (void *));  
+
+  abi_regs = &ffi_x86_abi_args[cif->abi];
+  nregs = 0;
+
+  reg_args = (unsigned *)(rvalue + 16);
+
+  /* Copy the caller's structure return address so that the closure
+     returns the data directly to the caller.  */
+  if (cif->flags == FFI_TYPE_STRUCT)
+    {
+      if (nregs < abi_regs->nregs)
+	rvalue = (void *) reg_args[abi_regs->idx[nregs++]];
+      else
+	{
+	  rvalue = *(void **)argp;
+	  argp += 4;
+	}
+    }
 
-  if ( cif->flags == FFI_TYPE_STRUCT ) {
-    *rvalue = *(void **) argp;
-    argp += 4;
-  }
+  arg_types = cif->arg_types;
+  for (i = 0; i < avn; ++i)
+    {
+      size_t size = arg_types[i]->size;
 
-  p_argv = avalue;
+      switch (arg_types[i]->type)
+	{
+	case FFI_TYPE_SINT8:
+	case FFI_TYPE_UINT8:
+	case FFI_TYPE_SINT16:
+	case FFI_TYPE_UINT16:
+	case FFI_TYPE_SINT32:
+	case FFI_TYPE_UINT32:
+	case FFI_TYPE_POINTER:
+	do_int32:
+	  if (nregs < abi_regs->nregs)
+	    avalue[i] = reg_args + abi_regs->idx[nregs++];
+	  else
+	    {
+	      avalue[i] = argp;
+	      argp += 4;
+	    }
+	  break;
 
-  for (i = cif->nargs, p_arg = cif->arg_types; (i != 0); i--, p_arg++)
-    {
-      size_t z;
+	case FFI_TYPE_SINT64:
+	case FFI_TYPE_UINT64:
+	do_int64:
+	  if (nregs + 1 < abi_regs->nregs && cif->abi != FFI_FASTCALL)
+	    {
+	      avalue[i] = reg_args + abi_regs->idx[nregs];
+	      nregs += 2;
+	    }
+	  else
+	    {
+	      avalue[i] = argp;
+	      argp += 8;
+	    }
+	  break;
 
-      /* Align if necessary */
-      if ((sizeof(int) - 1) & (unsigned) argp) {
-	argp = (char *) ALIGN(argp, sizeof(int));
-      }
+	case FFI_TYPE_FLOAT:
+	case FFI_TYPE_DOUBLE:
+	case FFI_TYPE_LONGDOUBLE:
+	  avalue[i] = argp;
+	  argp += size;
+	  break;
+
+	case FFI_TYPE_STRUCT:
+	  if (cif->abi == FFI_FASTCALL && (size == 3 || size >= 4))
+	    ;
+	  else if (size <= 4)
+	    goto do_int32;
+	  else if (size <= 8)
+	    goto do_int64;
+	  avalue[i] = argp;
+	  argp += ALIGN(size, 4);
+	  break;
+	}
+    }
 
-      z = (*p_arg)->size;
+  closure->fun (cif, rvalue, avalue, closure->user_data);
 
-      /* because we're little endian, this is what it turns into.   */
+  /* Tell ffi_closure_outer how many bytes to pop on return.  */
+  flags = 0;
+  switch (cif->abi)
+    {
+    case FFI_STDCALL:
+    case FFI_FASTCALL:
+      flags = (argp - stack) << 8;
+      break;
 
-      *p_argv = (void*) argp;
+    case FFI_CDECL:
+      if (!KEEP_AGGREGATE_RETURN_POINTER
+	  && cif->flags == FFI_TYPE_STRUCT)
+	flags = 4 << 8;
+      break;
 
-      p_argv++;
-      argp += z;
+    default:
+      break;
     }
   
-  return;
+  /* Tell ffi_closure_outer how to perform return type promotions.  */
+  if (cif->flags > FFI_TYPE_LAST)
+    flags |= FFI_TYPE_SINT64;
+  else
+    flags |= cif->rtype->type;
+
+  return flags;
 }
 
+
 /* How to make a trampoline.  Derived from gcc/config/i386/i386.c. */
 
-#define FFI_INIT_TRAMPOLINE(TRAMP,FUN,CTX) \
-({ unsigned char *__tramp = (unsigned char*)(TRAMP); \
-   unsigned int  __fun = (unsigned int)(FUN); \
-   unsigned int  __ctx = (unsigned int)(CTX); \
-   unsigned int  __dis = __fun - (__ctx + FFI_TRAMPOLINE_SIZE); \
-   *(unsigned char*) &__tramp[0] = 0xb8; \
-   *(unsigned int*)  &__tramp[1] = __ctx; /* movl __ctx, %eax */ \
-   *(unsigned char *)  &__tramp[5] = 0xe9; \
-   *(unsigned int*)  &__tramp[6] = __dis; /* jmp __fun  */ \
- })
+static void
+ffi_init_trampoline (void *tramp, void (*xfn)(), void *xct,
+		     enum arg_index reg)
+{
+  /* Opcode for the move, based on REG+1, or push if reg is ARG_NONE.  */
+  static const unsigned char opc[4] = { 0x68, 0xb8, 0xba, 0xb9 };
 
+  unsigned int fn = (unsigned int)xfn;
+  unsigned int ct = (unsigned int)xct;
+  unsigned int dis = fn - (ct + FFI_TRAMPOLINE_SIZE);
+
+  FFI_ASSERT (reg >= ARG_NONE && reg <= ARG_ECX);
+  *(unsigned char *)(tramp + 0) = opc[reg + 1];
+  *(unsigned int *)(tramp + 1) = ct;
+
+  /* jmp fn  */
+  *(unsigned char *)(tramp + 5) = 0xe9;
+  *(unsigned int *)(tramp + 6) = dis;
+}
 
-/* the cif must already be prep'ed */
 
 ffi_status
 ffi_prep_closure_loc (ffi_closure* closure,
@@ -319,15 +432,36 @@ ffi_prep_closure_loc (ffi_closure* closu
 		      void *user_data,
 		      void *codeloc)
 {
-  FFI_ASSERT (cif->abi == FFI_SYSV);
+  enum arg_index reg;
+  void (*asm_code)();
+
+  /* Choose an available register in which to pass the context value.  */
+  switch (cif->abi)
+    {
+    case FFI_FASTCALL:
+      reg = ARG_EAX;
+      asm_code = ffi_closure_outer_fastcall;
+      break;
+    case FFI_REGPARM_1:
+    case FFI_REGPARM_2:
+      reg = ARG_ECX;
+      asm_code = ffi_closure_outer_regparm2;
+      break;
+    case FFI_REGPARM_3:
+      reg = ARG_NONE;
+      asm_code = ffi_closure_outer_regparm3;
+      break;
+    default:
+      reg = ARG_EAX;
+      asm_code = ffi_closure_outer_noregs;
+      break;
+    }
 
-  FFI_INIT_TRAMPOLINE (&closure->tramp[0], \
-		       &ffi_closure_SYSV,  \
-		       codeloc);
+  ffi_init_trampoline (&closure->tramp[0], asm_code, codeloc, reg);
     
-  closure->cif  = cif;
+  closure->cif = cif;
   closure->user_data = user_data;
-  closure->fun  = fun;
+  closure->fun = fun;
 
   return FFI_OK;
 }
@@ -345,90 +479,56 @@ ffi_prep_raw_closure_loc (ffi_raw_closur
 {
   int i;
 
-  FFI_ASSERT (cif->abi == FFI_SYSV);
-
-  // we currently don't support certain kinds of arguments for raw
-  // closures.  This should be implemented by a separate assembly language
-  // routine, since it would require argument processing, something we
-  // don't do now for performance.
+  FFI_ASSERT (cif->abi == FFI_CDECL);
 
+  /* We currently don't support certain kinds of arguments for raw
+     closures.  This should be implemented by a separate assembly
+     language routine, since it would require argument processing,
+     something we don't do now for performance.  */
   for (i = cif->nargs-1; i >= 0; i--)
     {
       FFI_ASSERT (cif->arg_types[i]->type != FFI_TYPE_STRUCT);
       FFI_ASSERT (cif->arg_types[i]->type != FFI_TYPE_LONGDOUBLE);
     }
-  
 
-  FFI_INIT_TRAMPOLINE (&closure->tramp[0], &ffi_closure_raw_SYSV,
-		       codeloc);
+  ffi_init_trampoline (&closure->tramp[0], ffi_closure_outer_raw_cdecl,
+		       codeloc, ARG_EAX);
     
-  closure->cif  = cif;
+  closure->cif = cif;
   closure->user_data = user_data;
-  closure->fun  = fun;
+  closure->fun = fun;
 
   return FFI_OK;
 }
 
-static void 
-ffi_prep_args_raw(char *stack, extended_cif *ecif)
-{
-  memcpy (stack, ecif->avalue, ecif->cif->bytes);
-}
-
-/* we borrow this routine from libffi (it must be changed, though, to
- * actually call the function passed in the first argument.  as of
- * libffi-1.20, this is not the case.)
- */
-
-extern void
-ffi_call_SYSV(void (*)(char *, extended_cif *), extended_cif *, unsigned, 
-	      unsigned, unsigned *, void (*fn)());
-
-#ifdef X86_WIN32
-extern void
-ffi_call_STDCALL(void (*)(char *, extended_cif *), extended_cif *, unsigned,
-		 unsigned, unsigned *, void (*fn)());
-#endif /* X86_WIN32 */
-
 void
-ffi_raw_call(ffi_cif *cif, void (*fn)(), void *rvalue, ffi_raw *fake_avalue)
+ffi_raw_call(ffi_cif *cif, void (*fn)(), void *rvalue, ffi_raw *raw_avalue)
 {
-  extended_cif ecif;
-  void **avalue = (void **)fake_avalue;
+  struct ffi_call_inner_data *data;
+  size_t bytes = cif->bytes;
+  char *stack;
+
+  /* If the return value is NULL, we need to make one.  */
+  if (rvalue == NULL && cif->flags != FFI_TYPE_VOID)
+    rvalue = alloca(cif->rtype->size);
+
+  /* Allocate the space for the arguments, plus the control structure.  */
+  stack = alloca(bytes + sizeof (struct ffi_call_inner_data));
+  data = (struct ffi_call_inner_data *)(stack + bytes);
+
+  data->fn = fn;
+  data->rvalue = rvalue;
+  data->flags = cif->flags;
+
+  /* The raw routines only work if no arguments get passed in registers,
+     and we also don't handle insertting the structure return pointer.  */
+  FFI_ASSERT (ffi_x86_abi_args[cif->abi].nregs == 0);
+  FFI_ASSERT (cif->flags != FFI_TYPE_STRUCT);
 
-  ecif.cif = cif;
-  ecif.avalue = avalue;
-  
-  /* If the return value is a struct and we don't have a return	*/
-  /* value address then we need to make one		        */
+  memcpy (stack, raw_avalue, bytes);
 
-  if ((rvalue == NULL) && 
-      (cif->rtype->type == FFI_TYPE_STRUCT))
-    {
-      ecif.rvalue = alloca(cif->rtype->size);
-    }
-  else
-    ecif.rvalue = rvalue;
-    
-  
-  switch (cif->abi) 
-    {
-    case FFI_SYSV:
-      ffi_call_SYSV(ffi_prep_args_raw, &ecif, cif->bytes, cif->flags,
-		    ecif.rvalue, fn);
-      break;
-#ifdef X86_WIN32
-    case FFI_STDCALL:
-      ffi_call_STDCALL(ffi_prep_args_raw, &ecif, cif->bytes, cif->flags,
-		       ecif.rvalue, fn);
-      break;
-#endif /* X86_WIN32 */
-    default:
-      FFI_ASSERT(0);
-      break;
-    }
+  ffi_call_inner (stack, data);
 }
 
-#endif
-
-#endif /* __x86_64__  */
+#endif /* !FFI_NO_RAW_API */
+#endif /* !__x86_64__  */
--- src/x86/ffitarget.h	(revision 24606)
+++ src/x86/ffitarget.h	(local)
@@ -1,5 +1,5 @@
 /* -----------------------------------------------------------------*-C-*-
-   ffitarget.h - Copyright (c) 1996-2003  Red Hat, Inc.
+   ffitarget.h - Copyright (c) 1996-2003, 2007  Red Hat, Inc.
    Target configuration macros for x86 and x86-64.
 
    Permission is hereby granted, free of charge, to any person obtaining
@@ -26,13 +26,6 @@
 #ifndef LIBFFI_TARGET_H
 #define LIBFFI_TARGET_H
 
-/* ---- System specific configurations ----------------------------------- */
-
-#if defined (X86_64) && defined (__i386__)
-#undef X86_64
-#define X86
-#endif
-
 /* ---- Generic type definitions ----------------------------------------- */
 
 #ifndef LIBFFI_ASM
@@ -42,26 +35,26 @@ typedef signed long            ffi_sarg;
 typedef enum ffi_abi {
   FFI_FIRST_ABI = 0,
 
-  /* ---- Intel x86 Win32 ---------- */
-#ifdef X86_WIN32
-  FFI_SYSV,
-  FFI_STDCALL,
-  /* TODO: Add fastcall support for the sake of completeness */
-  FFI_DEFAULT_ABI = FFI_SYSV,
-#endif
+#if defined (__x86_64__)
+  /* The 64-bit ABIs.  */
+  FFI_UNIX64,
+  FFI_WIN64,
+  FFI_LAST_ABI,
 
-  /* ---- Intel x86 and AMD x86-64 - */
-#if !defined(X86_WIN32) && (defined(__i386__) || defined(__x86_64__))
-  FFI_SYSV,
-  FFI_UNIX64,   /* Unix variants all use the same ABI for x86-64  */
-#ifdef __i386__
-  FFI_DEFAULT_ABI = FFI_SYSV,
+  FFI_DEFAULT_ABI = FFI_UNIX64
 #else
-  FFI_DEFAULT_ABI = FFI_UNIX64,
-#endif
-#endif
+  /* The 32-bit ABIs.  */
+  FFI_CDECL,
+  FFI_STDCALL,
+  FFI_FASTCALL,
+  FFI_REGPARM_1,
+  FFI_REGPARM_2,
+  FFI_REGPARM_3,
+  FFI_LAST_ABI,
 
-  FFI_LAST_ABI = FFI_DEFAULT_ABI + 1
+  FFI_SYSV = FFI_CDECL,
+  FFI_DEFAULT_ABI = FFI_CDECL
+#endif
 } ffi_abi;
 #endif
 
@@ -69,7 +62,7 @@ typedef enum ffi_abi {
 
 #define FFI_CLOSURES 1
 
-#if defined (X86_64) || (defined (__x86_64__) && defined (X86_DARWIN))
+#if defined (__x86_64__)
 #define FFI_TRAMPOLINE_SIZE 24
 #define FFI_NATIVE_RAW_API 0
 #else
@@ -77,5 +70,4 @@ typedef enum ffi_abi {
 #define FFI_NATIVE_RAW_API 1	/* x86 has native raw api support */
 #endif
 
-#endif
-
+#endif /* LIBFFI_TARGET_H */
--- src/x86/sysv.S	(revision 24606)
+++ src/x86/sysv.S	(local)
@@ -1,378 +0,0 @@
-/* -----------------------------------------------------------------------
-   sysv.S - Copyright (c) 1996, 1998, 2001, 2002, 2003, 2005  Red Hat, Inc.
-   
-   X86 Foreign Function Interface 
-
-   Permission is hereby granted, free of charge, to any person obtaining
-   a copy of this software and associated documentation files (the
-   ``Software''), to deal in the Software without restriction, including
-   without limitation the rights to use, copy, modify, merge, publish,
-   distribute, sublicense, and/or sell copies of the Software, and to
-   permit persons to whom the Software is furnished to do so, subject to
-   the following conditions:
-
-   The above copyright notice and this permission notice shall be included
-   in all copies or substantial portions of the Software.
-
-   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
-   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-   IN NO EVENT SHALL CYGNUS SOLUTIONS BE LIABLE FOR ANY CLAIM, DAMAGES OR
-   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-   OTHER DEALINGS IN THE SOFTWARE.
-   ----------------------------------------------------------------------- */
-
-#ifndef __x86_64__
-
-#define LIBFFI_ASM	
-#include <fficonfig.h>
-#include <ffi.h>
-
-.text
-
-.globl ffi_prep_args
-
-	.align 4
-.globl ffi_call_SYSV
-        .type    ffi_call_SYSV,@function
-
-ffi_call_SYSV:
-.LFB1:
-        pushl %ebp
-.LCFI0:
-        movl  %esp,%ebp
-.LCFI1:
-	/* Make room for all of the new args.  */
-	movl  16(%ebp),%ecx
-	subl  %ecx,%esp
-
-	movl  %esp,%eax
-
-	/* Place all of the ffi_prep_args in position  */
-	pushl 12(%ebp)
-	pushl %eax
-	call  *8(%ebp)
-
-	/* Return stack to previous state and call the function  */
-	addl  $8,%esp	
-
-	call  *28(%ebp)
-
-	/* Remove the space we pushed for the args  */
-	movl  16(%ebp),%ecx
-	addl  %ecx,%esp
-
-	/* Load %ecx with the return type code  */
-	movl  20(%ebp),%ecx	
-
-	/* If the return value pointer is NULL, assume no return value.  */
-	cmpl  $0,24(%ebp)
-	jne   retint
-
-	/* Even if there is no space for the return value, we are 
-	   obliged to handle floating-point values.  */
-	cmpl  $FFI_TYPE_FLOAT,%ecx
-	jne   noretval
-	fstp  %st(0)
-
-        jmp   epilogue
-
-retint:
-	cmpl  $FFI_TYPE_INT,%ecx
-	jne   retfloat
-	/* Load %ecx with the pointer to storage for the return value  */
-	movl  24(%ebp),%ecx	
-	movl  %eax,0(%ecx)
-	jmp   epilogue
-
-retfloat:
-	cmpl  $FFI_TYPE_FLOAT,%ecx
-	jne   retdouble
-	/* Load %ecx with the pointer to storage for the return value  */
-	movl  24(%ebp),%ecx	
-	fstps (%ecx)
-	jmp   epilogue
-
-retdouble:
-	cmpl  $FFI_TYPE_DOUBLE,%ecx
-	jne   retlongdouble
-	/* Load %ecx with the pointer to storage for the return value  */
-	movl  24(%ebp),%ecx	
-	fstpl (%ecx)
-	jmp   epilogue
-
-retlongdouble:
-	cmpl  $FFI_TYPE_LONGDOUBLE,%ecx
-	jne   retint64
-	/* Load %ecx with the pointer to storage for the return value  */
-	movl  24(%ebp),%ecx	
-	fstpt (%ecx)
-	jmp   epilogue
-	
-retint64:	
-	cmpl  $FFI_TYPE_SINT64,%ecx
-        jne   retstruct
-	/* Load %ecx with the pointer to storage for the return value  */
-	movl  24(%ebp),%ecx	
-	movl  %eax,0(%ecx)
-	movl  %edx,4(%ecx)
-	
-retstruct:
-	/* Nothing to do!  */
-
-noretval:
-epilogue:
-        movl %ebp,%esp
-        popl %ebp
-        ret
-.LFE1:
-.ffi_call_SYSV_end:
-        .size    ffi_call_SYSV,.ffi_call_SYSV_end-ffi_call_SYSV
-
-	.align	4
-FFI_HIDDEN (ffi_closure_SYSV)
-.globl ffi_closure_SYSV
-	.type	ffi_closure_SYSV, @function
-
-ffi_closure_SYSV:
-.LFB2:
-	pushl	%ebp
-.LCFI2:
-	movl	%esp, %ebp
-.LCFI3:
-	subl	$40, %esp
-	leal	-24(%ebp), %edx
-	movl	%edx, -12(%ebp)	/* resp */
-	leal	8(%ebp), %edx
-	movl	%edx, 4(%esp)	/* args = __builtin_dwarf_cfa () */
-	leal	-12(%ebp), %edx
-	movl	%edx, (%esp)	/* &resp */
-#if defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE || !defined __PIC__
-	call	ffi_closure_SYSV_inner
-#else
-	movl	%ebx, 8(%esp)
-.LCFI7:
-	call	1f
-1:	popl	%ebx
-	addl	$_GLOBAL_OFFSET_TABLE_+[.-1b], %ebx
-	call	ffi_closure_SYSV_inner@PLT
-	movl	8(%esp), %ebx
-#endif
-	movl	-12(%ebp), %ecx
-	cmpl	$FFI_TYPE_INT, %eax
-	je	.Lcls_retint
-	cmpl	$FFI_TYPE_FLOAT, %eax
-	je	.Lcls_retfloat
-	cmpl	$FFI_TYPE_DOUBLE, %eax
-	je	.Lcls_retdouble
-	cmpl	$FFI_TYPE_LONGDOUBLE, %eax
-	je	.Lcls_retldouble
-	cmpl	$FFI_TYPE_SINT64, %eax
-	je	.Lcls_retllong
-.Lcls_epilogue:
-	movl	%ebp, %esp
-	popl	%ebp
-	ret
-.Lcls_retint:
-	movl	(%ecx), %eax
-	jmp	.Lcls_epilogue
-.Lcls_retfloat:
-	flds	(%ecx)
-	jmp	.Lcls_epilogue
-.Lcls_retdouble:
-	fldl	(%ecx)
-	jmp	.Lcls_epilogue
-.Lcls_retldouble:
-	fldt	(%ecx)
-	jmp	.Lcls_epilogue
-.Lcls_retllong:
-	movl	(%ecx), %eax
-	movl	4(%ecx), %edx
-	jmp	.Lcls_epilogue
-.LFE2:
-	.size	ffi_closure_SYSV, .-ffi_closure_SYSV
-
-#if !FFI_NO_RAW_API
-
-#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) & ~3)
-#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4)
-#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4)
-#define CIF_FLAGS_OFFSET 20
-
-	.align	4
-FFI_HIDDEN (ffi_closure_raw_SYSV)
-.globl ffi_closure_raw_SYSV
-	.type	ffi_closure_raw_SYSV, @function
-
-ffi_closure_raw_SYSV:
-.LFB3:
-	pushl	%ebp
-.LCFI4:
-	movl	%esp, %ebp
-.LCFI5:
-	pushl	%esi
-.LCFI6:
-	subl	$36, %esp
-	movl	RAW_CLOSURE_CIF_OFFSET(%eax), %esi	 /* closure->cif */
-	movl	RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */
-	movl	%edx, 12(%esp)	/* user_data */
-	leal	8(%ebp), %edx	/* __builtin_dwarf_cfa () */
-	movl	%edx, 8(%esp)	/* raw_args */
-	leal	-24(%ebp), %edx
-	movl	%edx, 4(%esp)	/* &res */
-	movl	%esi, (%esp)	/* cif */
-	call	*RAW_CLOSURE_FUN_OFFSET(%eax)		 /* closure->fun */
-	movl	CIF_FLAGS_OFFSET(%esi), %eax		 /* rtype */
-	cmpl	$FFI_TYPE_INT, %eax
-	je	.Lrcls_retint
-	cmpl	$FFI_TYPE_FLOAT, %eax
-	je	.Lrcls_retfloat
-	cmpl	$FFI_TYPE_DOUBLE, %eax
-	je	.Lrcls_retdouble
-	cmpl	$FFI_TYPE_LONGDOUBLE, %eax
-	je	.Lrcls_retldouble
-	cmpl	$FFI_TYPE_SINT64, %eax
-	je	.Lrcls_retllong
-.Lrcls_epilogue:
-	addl	$36, %esp
-	popl	%esi
-	popl	%ebp
-	ret
-.Lrcls_retint:
-	movl	-24(%ebp), %eax
-	jmp	.Lrcls_epilogue
-.Lrcls_retfloat:
-	flds	-24(%ebp)
-	jmp	.Lrcls_epilogue
-.Lrcls_retdouble:
-	fldl	-24(%ebp)
-	jmp	.Lrcls_epilogue
-.Lrcls_retldouble:
-	fldt	-24(%ebp)
-	jmp	.Lrcls_epilogue
-.Lrcls_retllong:
-	movl	-24(%ebp), %eax
-	movl	-20(%ebp), %edx
-	jmp	.Lrcls_epilogue
-.LFE3:
-	.size	ffi_closure_raw_SYSV, .-ffi_closure_raw_SYSV
-#endif
-
-	.section	.eh_frame,EH_FRAME_FLAGS,@progbits
-.Lframe1:
-	.long	.LECIE1-.LSCIE1	/* Length of Common Information Entry */
-.LSCIE1:
-	.long	0x0	/* CIE Identifier Tag */
-	.byte	0x1	/* CIE Version */
-#ifdef __PIC__
-	.ascii "zR\0"	/* CIE Augmentation */
-#else
-	.ascii "\0"	/* CIE Augmentation */
-#endif
-	.byte	0x1	/* .uleb128 0x1; CIE Code Alignment Factor */
-	.byte	0x7c	/* .sleb128 -4; CIE Data Alignment Factor */
-	.byte	0x8	/* CIE RA Column */
-#ifdef __PIC__
-	.byte	0x1	/* .uleb128 0x1; Augmentation size */
-	.byte	0x1b	/* FDE Encoding (pcrel sdata4) */
-#endif
-	.byte	0xc	/* DW_CFA_def_cfa */
-	.byte	0x4	/* .uleb128 0x4 */
-	.byte	0x4	/* .uleb128 0x4 */
-	.byte	0x88	/* DW_CFA_offset, column 0x8 */
-	.byte	0x1	/* .uleb128 0x1 */
-	.align 4
-.LECIE1:
-.LSFDE1:
-	.long	.LEFDE1-.LASFDE1	/* FDE Length */
-.LASFDE1:
-	.long	.LASFDE1-.Lframe1	/* FDE CIE offset */
-#ifdef __PIC__
-	.long	.LFB1-.	/* FDE initial location */
-#else
-	.long	.LFB1	/* FDE initial location */
-#endif
-	.long	.LFE1-.LFB1	/* FDE address range */
-#ifdef __PIC__
-	.byte	0x0	/* .uleb128 0x0; Augmentation size */
-#endif
-	.byte	0x4	/* DW_CFA_advance_loc4 */
-	.long	.LCFI0-.LFB1
-	.byte	0xe	/* DW_CFA_def_cfa_offset */
-	.byte	0x8	/* .uleb128 0x8 */
-	.byte	0x85	/* DW_CFA_offset, column 0x5 */
-	.byte	0x2	/* .uleb128 0x2 */
-	.byte	0x4	/* DW_CFA_advance_loc4 */
-	.long	.LCFI1-.LCFI0
-	.byte	0xd	/* DW_CFA_def_cfa_register */
-	.byte	0x5	/* .uleb128 0x5 */
-	.align 4
-.LEFDE1:
-.LSFDE2:
-	.long	.LEFDE2-.LASFDE2	/* FDE Length */
-.LASFDE2:
-	.long	.LASFDE2-.Lframe1	/* FDE CIE offset */
-#ifdef __PIC__
-	.long	.LFB2-.	/* FDE initial location */
-#else
-	.long	.LFB2
-#endif
-	.long	.LFE2-.LFB2	/* FDE address range */
-#ifdef __PIC__
-	.byte	0x0	/* .uleb128 0x0; Augmentation size */
-#endif
-	.byte	0x4	/* DW_CFA_advance_loc4 */
-	.long	.LCFI2-.LFB2
-	.byte	0xe	/* DW_CFA_def_cfa_offset */
-	.byte	0x8	/* .uleb128 0x8 */
-	.byte	0x85	/* DW_CFA_offset, column 0x5 */
-	.byte	0x2	/* .uleb128 0x2 */
-	.byte	0x4	/* DW_CFA_advance_loc4 */
-	.long	.LCFI3-.LCFI2
-	.byte	0xd	/* DW_CFA_def_cfa_register */
-	.byte	0x5	/* .uleb128 0x5 */
-#if !defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE && defined __PIC__
-	.byte	0x4	/* DW_CFA_advance_loc4 */
-	.long	.LCFI7-.LCFI3
-	.byte	0x83	/* DW_CFA_offset, column 0x3 */
-	.byte	0xa	/* .uleb128 0xa */
-#endif
-	.align 4
-.LEFDE2:
-
-#if !FFI_NO_RAW_API
-
-.LSFDE3:
-	.long	.LEFDE3-.LASFDE3	/* FDE Length */
-.LASFDE3:
-	.long	.LASFDE3-.Lframe1	/* FDE CIE offset */
-#ifdef __PIC__
-	.long	.LFB3-.	/* FDE initial location */
-#else
-	.long	.LFB3
-#endif
-	.long	.LFE3-.LFB3	/* FDE address range */
-#ifdef __PIC__
-	.byte	0x0	/* .uleb128 0x0; Augmentation size */
-#endif
-	.byte	0x4	/* DW_CFA_advance_loc4 */
-	.long	.LCFI4-.LFB3
-	.byte	0xe	/* DW_CFA_def_cfa_offset */
-	.byte	0x8	/* .uleb128 0x8 */
-	.byte	0x85	/* DW_CFA_offset, column 0x5 */
-	.byte	0x2	/* .uleb128 0x2 */
-	.byte	0x4	/* DW_CFA_advance_loc4 */
-	.long	.LCFI5-.LCFI4
-	.byte	0xd	/* DW_CFA_def_cfa_register */
-	.byte	0x5	/* .uleb128 0x5 */
-	.byte	0x4	/* DW_CFA_advance_loc4 */
-	.long	.LCFI6-.LCFI5
-	.byte	0x86	/* DW_CFA_offset, column 0x6 */
-	.byte	0x3	/* .uleb128 0x3 */
-	.align 4
-.LEFDE3:
-
-#endif
-
-#endif /* ifndef __x86_64__ */
--- src/x86/win32.S	(revision 24606)
+++ src/x86/win32.S	(local)
@@ -1,373 +0,0 @@
-/* -----------------------------------------------------------------------
-   win32.S - Copyright (c) 1996, 1998, 2001, 2002  Red Hat, Inc.
-	     Copyright (c) 2001  John Beniton
-	     Copyright (c) 2002  Ranjit Mathew
-			
- 
-   X86 Foreign Function Interface
- 
-   Permission is hereby granted, free of charge, to any person obtaining
-   a copy of this software and associated documentation files (the
-   ``Software''), to deal in the Software without restriction, including
-   without limitation the rights to use, copy, modify, merge, publish,
-   distribute, sublicense, and/or sell copies of the Software, and to
-   permit persons to whom the Software is furnished to do so, subject to
-   the following conditions:
- 
-   The above copyright notice and this permission notice shall be included
-   in all copies or substantial portions of the Software.
- 
-   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
-   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-   IN NO EVENT SHALL CYGNUS SOLUTIONS BE LIABLE FOR ANY CLAIM, DAMAGES OR
-   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-   OTHER DEALINGS IN THE SOFTWARE.
-   ----------------------------------------------------------------------- */
- 
-#define LIBFFI_ASM
-#include <fficonfig.h>
-#include <ffi.h>
- 
-.text
- 
-.globl ffi_prep_args
- 
-        # This assumes we are using gas.
-        .balign 16
-.globl _ffi_call_SYSV
- 
-_ffi_call_SYSV:
-        pushl %ebp
-        movl  %esp,%ebp
- 
-        # Make room for all of the new args.
-        movl  16(%ebp),%ecx                                                     
-        subl  %ecx,%esp
- 
-        movl  %esp,%eax
- 
-        # Place all of the ffi_prep_args in position
-        pushl 12(%ebp)
-        pushl %eax
-        call  *8(%ebp)
- 
-        # Return stack to previous state and call the function
-        addl  $8,%esp
- 
-        # FIXME: Align the stack to a 128-bit boundary to avoid
-        # potential performance hits.
-
-	call  *28(%ebp)
- 
-        # Remove the space we pushed for the args
-        movl  16(%ebp),%ecx
-        addl  %ecx,%esp
- 
-        # Load %ecx with the return type code
-        movl  20(%ebp),%ecx
- 
-        # If the return value pointer is NULL, assume no return value.
-        cmpl  $0,24(%ebp)
-        jne   retint
- 
-        # Even if there is no space for the return value, we are
-        # obliged to handle floating-point values.
-        cmpl  $FFI_TYPE_FLOAT,%ecx
-        jne   noretval
-        fstp  %st(0)
- 
-        jmp   epilogue
- 
-retint:
-        cmpl  $FFI_TYPE_INT,%ecx
-        jne   retfloat
-        # Load %ecx with the pointer to storage for the return value
-        movl  24(%ebp),%ecx
-        movl  %eax,0(%ecx)
-        jmp   epilogue
- 
-retfloat:
-        cmpl  $FFI_TYPE_FLOAT,%ecx
-        jne   retdouble   
-         # Load %ecx with the pointer to storage for the return value
-        movl  24(%ebp),%ecx
-        fstps (%ecx)
-        jmp   epilogue
- 
-retdouble:
-        cmpl  $FFI_TYPE_DOUBLE,%ecx
-        jne   retlongdouble
-        # Load %ecx with the pointer to storage for the return value
-        movl  24(%ebp),%ecx
-        fstpl (%ecx)
-        jmp   epilogue
- 
-retlongdouble:
-        cmpl  $FFI_TYPE_LONGDOUBLE,%ecx
-        jne   retint64
-        # Load %ecx with the pointer to storage for the return value
-        movl  24(%ebp),%ecx
-        fstpt (%ecx)
-        jmp   epilogue
- 
-retint64:
-        cmpl  $FFI_TYPE_SINT64,%ecx
-        jne   retstruct1b
-        # Load %ecx with the pointer to storage for the return value
-        movl  24(%ebp),%ecx
-        movl  %eax,0(%ecx)
-        movl  %edx,4(%ecx)
- 
-retstruct1b:
-        cmpl  $FFI_TYPE_SINT8,%ecx
-        jne   retstruct2b
-        # Load %ecx with the pointer to storage for the return value
-        movl  24(%ebp),%ecx
-        movb  %al,0(%ecx)
-        jmp   epilogue
- 
-retstruct2b:
-        cmpl  $FFI_TYPE_SINT16,%ecx
-        jne   retstruct
-        # Load %ecx with the pointer to storage for the return value
-        movl  24(%ebp),%ecx
-        movw  %ax,0(%ecx)
-        jmp   epilogue
- 
-retstruct:
-        # Nothing to do!
- 
-noretval:
-epilogue:
-        movl %ebp,%esp
-        popl %ebp
-        ret
- 
-.ffi_call_SYSV_end:
-
-        # This assumes we are using gas.
-        .balign 16
-.globl _ffi_call_STDCALL
-
-_ffi_call_STDCALL:
-        pushl %ebp
-        movl  %esp,%ebp
-
-        # Make room for all of the new args.
-        movl  16(%ebp),%ecx 
-        subl  %ecx,%esp
-
-        movl  %esp,%eax
-
-        # Place all of the ffi_prep_args in position
-        pushl 12(%ebp)
-        pushl %eax
-        call  *8(%ebp)
-
-        # Return stack to previous state and call the function
-        addl  $8,%esp
-
-        # FIXME: Align the stack to a 128-bit boundary to avoid
-        # potential performance hits.
-
-        call  *28(%ebp)
-
-        # stdcall functions pop arguments off the stack themselves
-
-        # Load %ecx with the return type code
-        movl  20(%ebp),%ecx
-
-        # If the return value pointer is NULL, assume no return value.
-        cmpl  $0,24(%ebp)
-        jne   sc_retint
-
-        # Even if there is no space for the return value, we are
-        # obliged to handle floating-point values.
-        cmpl  $FFI_TYPE_FLOAT,%ecx
-        jne   sc_noretval
-        fstp  %st(0)
-
-        jmp   sc_epilogue
-
-sc_retint:
-        cmpl  $FFI_TYPE_INT,%ecx
-        jne   sc_retfloat
-        # Load %ecx with the pointer to storage for the return value
-        movl  24(%ebp),%ecx
-        movl  %eax,0(%ecx)
-        jmp   sc_epilogue
-
-sc_retfloat:
-        cmpl  $FFI_TYPE_FLOAT,%ecx
-        jne   sc_retdouble
-         # Load %ecx with the pointer to storage for the return value
-        movl  24(%ebp),%ecx
-        fstps (%ecx)
-        jmp   sc_epilogue
-
-sc_retdouble:
-        cmpl  $FFI_TYPE_DOUBLE,%ecx
-        jne   sc_retlongdouble
-        # Load %ecx with the pointer to storage for the return value
-        movl  24(%ebp),%ecx
-        fstpl (%ecx)
-        jmp   sc_epilogue
-
-sc_retlongdouble:
-        cmpl  $FFI_TYPE_LONGDOUBLE,%ecx
-        jne   sc_retint64
-        # Load %ecx with the pointer to storage for the return value
-        movl  24(%ebp),%ecx
-        fstpt (%ecx)
-        jmp   sc_epilogue
-
-sc_retint64:
-        cmpl  $FFI_TYPE_SINT64,%ecx
-        jne   sc_retstruct1b
-        # Load %ecx with the pointer to storage for the return value
-        movl  24(%ebp),%ecx
-        movl  %eax,0(%ecx)
-        movl  %edx,4(%ecx)
-
-sc_retstruct1b:
-        cmpl  $FFI_TYPE_SINT8,%ecx
-        jne   sc_retstruct2b
-        # Load %ecx with the pointer to storage for the return value
-        movl  24(%ebp),%ecx
-        movb  %al,0(%ecx)
-        jmp   sc_epilogue
-
-sc_retstruct2b:
-        cmpl  $FFI_TYPE_SINT16,%ecx
-        jne   sc_retstruct
-        # Load %ecx with the pointer to storage for the return value
-        movl  24(%ebp),%ecx
-        movw  %ax,0(%ecx)
-        jmp   sc_epilogue
-
-sc_retstruct:
-        # Nothing to do!
-
-sc_noretval:
-sc_epilogue:
-        movl %ebp,%esp
-        popl %ebp
-        ret
-
-.ffi_call_STDCALL_end:
-
-	.globl _ffi_closure_SYSV
-_ffi_closure_SYSV:
-	pushl	%ebp
-	movl	%esp, %ebp
-	subl	$40, %esp
-	leal	-24(%ebp), %edx
-	movl	%edx, -12(%ebp)	/* resp */
-	leal	8(%ebp), %edx
-	movl	%edx, 4(%esp)	/* args = __builtin_dwarf_cfa () */
-	leal	-12(%ebp), %edx
-	movl	%edx, (%esp)	/* &resp */
-	call	_ffi_closure_SYSV_inner
-	movl	-12(%ebp), %ecx
-	cmpl	$FFI_TYPE_INT, %eax
-	je	.Lcls_retint
-	cmpl	$FFI_TYPE_FLOAT, %eax
-	je	.Lcls_retfloat
-	cmpl	$FFI_TYPE_DOUBLE, %eax
-	je	.Lcls_retdouble
-	cmpl	$FFI_TYPE_LONGDOUBLE, %eax
-	je	.Lcls_retldouble
-	cmpl	$FFI_TYPE_SINT64, %eax
-	je	.Lcls_retllong
-	cmpl	$FFI_TYPE_SINT8, %eax	/* 1-byte struct */
-	je	.Lcls_retstruct1
-	cmpl	$FFI_TYPE_SINT16, %eax	/* 2-bytes struct */
-	je	.Lcls_retstruct2
-.Lcls_epilogue:
-	movl	%ebp, %esp
-	popl	%ebp
-	ret
-.Lcls_retint:
-	movl	(%ecx), %eax
-	jmp	.Lcls_epilogue
-.Lcls_retfloat:
-	flds	(%ecx)
-	jmp	.Lcls_epilogue
-.Lcls_retdouble:
-	fldl	(%ecx)
-	jmp	.Lcls_epilogue
-.Lcls_retldouble:
-	fldt	(%ecx)
-	jmp	.Lcls_epilogue
-.Lcls_retllong:
-	movl	(%ecx), %eax
-	movl	4(%ecx), %edx
-	jmp	.Lcls_epilogue
-.Lcls_retstruct1:
-	movsbl	(%ecx), %eax
-	jmp	.Lcls_epilogue
-.Lcls_retstruct2:
-	movswl	(%ecx), %eax
-	jmp	.Lcls_epilogue
-.ffi_closure_SYSV_end:
-
-#if !FFI_NO_RAW_API
-
-#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) & ~3)
-#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4)
-#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4)
-#define CIF_FLAGS_OFFSET 20
-
-	.balign	16
-	.globl _ffi_closure_raw_SYSV
-_ffi_closure_raw_SYSV:
-	pushl	%ebp
-	movl	%esp, %ebp
-	pushl	%esi
-	subl	$36, %esp
-	movl	RAW_CLOSURE_CIF_OFFSET(%eax), %esi	 /* closure->cif */
-	movl	RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx /* closure->user_data */
-	movl	%edx, 12(%esp)	/* user_data */
-	leal	8(%ebp), %edx	/* __builtin_dwarf_cfa () */
-	movl	%edx, 8(%esp)	/* raw_args */
-	leal	-24(%ebp), %edx
-	movl	%edx, 4(%esp)	/* &res */
-	movl	%esi, (%esp)	/* cif */
-	call	*RAW_CLOSURE_FUN_OFFSET(%eax)		 /* closure->fun */
-	movl	CIF_FLAGS_OFFSET(%esi), %eax		 /* rtype */
-	cmpl	$FFI_TYPE_INT, %eax
-	je	.Lrcls_retint
-	cmpl	$FFI_TYPE_FLOAT, %eax
-	je	.Lrcls_retfloat
-	cmpl	$FFI_TYPE_DOUBLE, %eax
-	je	.Lrcls_retdouble
-	cmpl	$FFI_TYPE_LONGDOUBLE, %eax
-	je	.Lrcls_retldouble
-	cmpl	$FFI_TYPE_SINT64, %eax
-	je	.Lrcls_retllong
-.Lrcls_epilogue:
-	addl	$36, %esp
-	popl	%esi
-	popl	%ebp
-	ret
-.Lrcls_retint:
-	movl	-24(%ebp), %eax
-	jmp	.Lrcls_epilogue
-.Lrcls_retfloat:
-	flds	-24(%ebp)
-	jmp	.Lrcls_epilogue
-.Lrcls_retdouble:
-	fldl	-24(%ebp)
-	jmp	.Lrcls_epilogue
-.Lrcls_retldouble:
-	fldt	-24(%ebp)
-	jmp	.Lrcls_epilogue
-.Lrcls_retllong:
-	movl	-24(%ebp), %eax
-	movl	-20(%ebp), %edx
-	jmp	.Lrcls_epilogue
-.ffi_closure_raw_SYSV_end:
-
-#endif
--- src/x86/abi32.S	(revision 24606)
+++ src/x86/abi32.S	(local)
@@ -0,0 +1,615 @@
+/* -----------------------------------------------------------------------
+   abi32.S - Copyright (c) 1996, 1998, 2001, 2002, 2003, 2005,
+	2007 Red Hat, Inc.
+   
+   X86 Foreign Function Interface 
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+   IN NO EVENT SHALL CYGNUS SOLUTIONS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+   OTHER DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#ifndef __x86_64__
+
+#define LIBFFI_ASM	
+#include <fficonfig.h>
+#include <ffi.h>
+#include "asm32.h"
+
+#ifdef X86_WIN32
+# define L(X)		L ## X
+# define E(X)		_ ## X
+# define TYPE(X)
+# define SIZE(X)
+# define RODATA		.rdata
+#elif defined(X86_DARWIN)
+# define L(X)		.L ## X
+# define E(X)		_ ## X:
+# define TYPE(X)
+# define SIZE(X)	. ## X ## _end:
+# define RODATA		.const
+#else
+# define L(X)		.L ## X
+# define E(X)		X
+# define TYPE(X)	.type E(X),@function
+# define SIZE(X)	.size E(X), . - E(X)
+# define RODATA		.section .rodata
+#endif
+
+#ifdef __PIC__
+# define D(X, Y)	L(X) - L(Y)
+#else
+# define D(X, Y)	L(X)
+#endif
+
+	.text
+
+/* ffi_call_inner (void *stack, struct ffi_call_inner_data *data)
+     __attribute__((regparm(2)));
+*/
+
+	.globl	E(ffi_call_inner)
+        TYPE(ffi_call_inner)
+	FFI_HIDDEN(E(ffi_call_inner))
+
+	.balign	16
+E(ffi_call_inner):
+L(FB1):
+	/* Bit o trickiness here.  DATA is our local stack frame, and
+	   STACK is the callee's argument frame, both allocated by our
+	   caller.  We deallocate some of that alloca'd space.  */
+	movl	(%esp), %ecx
+	movl	%ecx, OFS_SAVE_RA(%edx)
+	movl	%ebx, OFS_SAVE_EBX(%edx)
+	movl	%ebp, OFS_SAVE_EBP(%edx)
+	movl	%edx, %ebp
+L(CFI1_1):
+
+	movl	%eax, %esp
+
+	/* Load up all of the (potential) argument registers.  */
+	movl	OFS_ARG_EAX(%ebp), %eax
+	movl	OFS_ARG_EDX(%ebp), %edx
+	movl	OFS_ARG_ECX(%ebp), %ecx
+
+	/* Call the function.  */
+	call	*OFS_FN(%ebp)
+
+	movl	OFS_FLAGS(%ebp), %ecx
+	cmpl	$FFI_TYPE_LAST, %ecx
+	ja	L(st_small_struct)
+
+#ifdef __PIC__
+	call	E(getpc_ebx)
+0:	addl	$L(store_table) - 0b, %ebx
+	movl	(%ebx, %ecx, 4), %ecx
+	addl	%ebx, %ecx
+	movl	OFS_RVALUE(%ebp), %ebx
+	jmp	*%ecx
+#else
+	movl	OFS_RVALUE(%ebp), %ebx
+	jmp	*L(store_table)(, %ecx, 4)
+#endif
+
+	RODATA
+	.balign	4
+L(store_table):
+	.long	D(st_epilogue, store_table)	/* FFI_TYPE_VOID */
+	.long	D(st_int4, store_table)		/* FFI_TYPE_INT */
+	.long	D(st_float, store_table)	/* FFI_TYPE_FLOAT */
+	.long	D(st_double, store_table)	/* FFI_TYPE_DOUBLE */
+	.long	D(st_ldouble, store_table)	/* FFI_TYPE_LONGDOUBLE */
+	.long	D(st_int1, store_table)		/* FFI_TYPE_UINT8 */
+	.long	D(st_int1, store_table)		/* FFI_TYPE_SINT8 */
+	.long	D(st_int2, store_table)		/* FFI_TYPE_UINT16 */
+	.long	D(st_int2, store_table)		/* FFI_TYPE_SINT16 */
+	.long	D(st_int4, store_table)		/* FFI_TYPE_UINT32 */
+	.long	D(st_int4, store_table)		/* FFI_TYPE_SINT32 */
+	.long	D(st_int8, store_table)		/* FFI_TYPE_UINT64 */
+	.long	D(st_int8, store_table)		/* FFI_TYPE_SINT64 */
+	.long	D(st_epilogue, store_table)	/* FFI_TYPE_STRUCT */
+	.long	D(st_int4, store_table)		/* FFI_TYPE_POINTER */
+
+	.text
+L(st_small_struct):
+	/* Store all 8 bytes into preallocated storage.  */
+	movl	%eax, OFS_ARG_EAX(%ebp)
+	movl	%edx, OFS_ARG_EDX(%ebp)
+
+	/* Copy it to the destination, one byte at a time.  This is
+	   of size 3, 5, 6 or 7; no sense optimizing past this.  */
+	subl	$FFI_TYPE_LAST, %ecx
+	movl	OFS_RVALUE(%ebp), %ebx
+0:	decl	%ecx
+	movb	OFS_ARG_EAX(%ebp, %ecx), %al
+	movb	%al, (%ebx, %ecx)
+	jnz	0b
+	jmp	L(st_epilogue)
+
+L(st_int1):
+	movb	%al, (%ebx)
+	jmp	L(st_epilogue)
+
+L(st_int2):
+	movw	%ax, (%ebx)
+	jmp	L(st_epilogue)
+
+L(st_float):
+	fstps	(%ebx)
+	jmp	L(st_epilogue)
+
+L(st_double):
+	fstpl	(%ebx)
+	jmp	L(st_epilogue)
+
+L(st_ldouble):
+	fstpt	(%ebx)
+	jmp	L(st_epilogue)
+
+	.balign	4
+L(st_int8):
+	movl	%edx, 4(%ebx)
+	/* FALLTHRU */
+
+	.balign	4
+L(st_int4):
+	movl	%eax, (%ebx)
+	/* FALLTHRU */
+
+L(st_epilogue):
+	movl	%ebp, %esp
+L(CFI1_2):
+
+	movl	OFS_SAVE_EBX(%ebp), %ebx
+	movl	OFS_SAVE_EBP(%ebp), %ebp
+        ret
+
+L(FE1):
+	SIZE(ffi_call_inner)
+
+/* ffi_closure_outer(void *ctx) */
+
+	.globl	E(ffi_closure_outer_noregs)
+	.globl	E(ffi_closure_outer_fastcall)
+	.globl	E(ffi_closure_outer_regparm2)
+	.globl	E(ffi_closure_outer_regparm3)
+	TYPE(ffi_closure_outer_noregs)
+	TYPE(ffi_closure_outer_fastcall)
+	TYPE(ffi_closure_outer_regparm2)
+	TYPE(ffi_closure_outer_regparm3)
+	FFI_HIDDEN(E(ffi_closure_outer_noregs))
+	FFI_HIDDEN(E(ffi_closure_outer_fastcall))
+	FFI_HIDDEN(E(ffi_closure_outer_regparm2))
+	FFI_HIDDEN(E(ffi_closure_outer_regparm3))
+
+	/* 16 bytes for return value,
+	   12 bytes for incoming register arguments,
+	   4 bytes for ebx save
+	   12 bytes of padding to make esp remain aligned.  */
+#define FRAME	44
+
+	.balign	16
+E(ffi_closure_outer_fastcall):
+L(FB2):
+	subl	$FRAME, %esp
+L(CFI2):
+	movl	%ecx, 16(%esp)
+	movl	%edx, 20(%esp)
+	movl	%ebx, 28(%esp)
+	jmp	L(do_call)
+L(FE2):
+	SIZE(ffi_closure_outer_fastcall)
+
+	.balign	16
+E(ffi_closure_outer_regparm2):
+L(FB3):
+	subl	$FRAME, %esp
+L(CFI3):
+	movl	%eax, 16(%esp)
+	movl	%edx, 20(%esp)
+	movl	%ebx, 28(%esp)
+	movl	%ecx, %eax
+	jmp	L(do_call)
+L(FE3):
+	SIZE(ffi_closure_outer_regparm2)
+
+	.balign	16
+E(ffi_closure_outer_regparm3):
+L(FB4):
+	subl	$FRAME-4, %esp
+L(CFI4):
+	movl	%eax, 16(%esp)
+	movl	%edx, 20(%esp)
+	movl	%ecx, 24(%esp)
+	movl	FRAME-4(%esp), %eax
+	movl	%ebx, 28(%esp)
+	jmp	L(do_call)
+L(FE4):
+	SIZE(ffi_closure_outer_regparm3)
+
+	.balign	16
+E(ffi_closure_outer_noregs):
+L(FB5):
+	subl	$FRAME, %esp
+L(CFI5_1):
+	movl	%ebx, 28(%esp)
+L(CFI5_2):
+
+L(do_call):
+	leal	FRAME+4(%esp), %edx
+	movl	%esp, %ecx
+#if !defined(__PIC__) || defined(X86_WIN32) \
+    || defined(HAVE_HIDDEN_VISIBILITY_ATTRIBUTE)
+	call	ffi_closure_inner
+#elif defined(X86_DARWIN)
+	call	L_ffi_closure_inner$stub
+#else
+	call	E(getpc_ebx)
+1:	addl	$_GLOBAL_OFFSET_TABLE_+[.-1b], %ebx
+	call	ffi_closure_inner@PLT
+#endif
+
+	/* Extract the return type from the low byte of %eax,
+	   and the pop count from the high part into %ecx.  */
+	movl	%eax, %ecx
+	movzbl	%al, %eax
+	shrl	$8, %ecx
+
+#ifndef __PIC__
+	jmp	*L(load_table)(, %eax, 4)
+#else
+# if defined(HAVE_HIDDEN_VISIBILITY_ATTRIBUTE) || defined(X86_DARWIN)
+	call	E(getpc_ebx)
+0:	addl	$L(load_table) - 0b, %ebx
+# else
+	addl	$L(load_table)@GOTOFF, %ebx
+# endif
+	movl	(%ebx, %eax, 4), %eax
+	addl	%ebx, %eax
+	jmp	*%eax
+#endif
+
+	RODATA
+	.balign	4
+L(load_table):
+	.long	D(ld_epilogue, load_table)	/* FFI_TYPE_VOID */
+	.long	D(ld_int4, load_table)		/* FFI_TYPE_INT */
+	.long	D(ld_float, load_table)		/* FFI_TYPE_FLOAT */
+	.long	D(ld_double, load_table)	/* FFI_TYPE_DOUBLE */
+	.long	D(ld_ldouble, load_table)	/* FFI_TYPE_LONGDOUBLE */
+	.long	D(ld_int1u, load_table)		/* FFI_TYPE_UINT8 */
+	.long	D(ld_int1s, load_table)		/* FFI_TYPE_SINT8 */
+	.long	D(ld_int2u, load_table)		/* FFI_TYPE_UINT16 */
+	.long	D(ld_int2s, load_table)		/* FFI_TYPE_SINT16 */
+	.long	D(ld_int4, load_table)		/* FFI_TYPE_UINT32 */
+	.long	D(ld_int4, load_table)		/* FFI_TYPE_SINT32 */
+	.long	D(ld_int8, load_table)		/* FFI_TYPE_UINT64 */
+	.long	D(ld_int8, load_table)		/* FFI_TYPE_SINT64 */
+	.long	D(ld_epilogue, load_table)	/* FFI_TYPE_STRUCT */
+	.long	D(ld_int4, load_table)		/* FFI_TYPE_POINTER */
+
+	.text
+L(ld_int1u):
+	movzbl	(%esp), %eax
+	jmp	L(ld_epilogue)
+
+L(ld_int1s):
+	movsbl	(%esp), %eax
+	jmp	L(ld_epilogue)
+
+L(ld_int2u):
+	movzwl	(%esp), %eax
+	jmp	L(ld_epilogue)
+
+L(ld_int2s):
+	movswl	(%esp), %eax
+	jmp	L(ld_epilogue)
+
+L(ld_float):
+	flds	(%esp)
+	jmp	L(ld_epilogue)
+
+L(ld_double):
+	fldl	(%esp)
+	jmp	L(ld_epilogue)
+
+L(ld_ldouble):
+	fldt	(%esp)
+	jmp	L(ld_epilogue)
+
+	.balign	4
+L(ld_int8):
+	movl	4(%esp), %edx
+	/* FALLTHRU */
+
+	.balign	4
+L(ld_int4):
+	movl	0(%esp), %eax
+	/* FALLTHRU */
+
+L(ld_epilogue):
+	testl	%ecx, %ecx
+	jne	1f
+
+	movl	28(%esp), %ebx
+	addl	$FRAME, %esp
+L(CFI5_3):
+	ret
+
+	/* We've been directed to pop some stack.  If this were a known
+	   constant, we'd use the "ret $n" instruction.  Since it isn't,
+	   move the return address to just below the target stack value,
+	   deallocate all that stack, and return.  */
+L(CFI5_4):
+1:	movl	FRAME(%esp), %ebx
+	movl	%ebx, FRAME-4(%esp, %ecx)
+	movl	28(%esp), %ebx
+	leal	FRAME-4(%esp, %ecx), %esp
+L(CFI5_5):
+	ret
+
+L(FE5):
+	SIZE(ffi_closure_outer_noregs)
+
+
+#if !FFI_NO_RAW_API
+
+#define RAW_CLOSURE_CIF_OFFSET ((FFI_TRAMPOLINE_SIZE + 3) & ~3)
+#define RAW_CLOSURE_FUN_OFFSET (RAW_CLOSURE_CIF_OFFSET + 4)
+#define RAW_CLOSURE_USER_DATA_OFFSET (RAW_CLOSURE_FUN_OFFSET + 4)
+#define CIF_FLAGS_OFFSET 20
+
+	/* 16 bytes of outgoing arguments,
+	   16 bytes of return value
+	   4 bytes to save esi
+           8 bytes of padding to make esp remain aligned.  */
+#undef FRAME
+#define FRAME	44
+
+	.globl	E(ffi_closure_outer_raw_cdecl)
+	TYPE(ffi_closure_outer_raw_cdecl)
+	FFI_HIDDEN (E(ffi_closure_outer_raw_cdecl))
+
+	.balign	16
+E(ffi_closure_outer_raw_cdecl):
+L(FB6):
+	subl	$FRAME, %esp
+L(CFI6_1):
+	movl	%esi, 32(%esp)
+L(CFI6_2):
+
+	movl	RAW_CLOSURE_CIF_OFFSET(%eax), %esi
+	movl	RAW_CLOSURE_USER_DATA_OFFSET(%eax), %edx
+
+	movl	%edx, 12(%esp)				/* user_data */
+	leal	FRAME+4(%esp), %edx
+	movl	%edx, 8(%esp)				/* raw_args */
+	leal	16(%esp), %edx
+	movl	%edx, 4(%esp)				/* &res */
+	movl	%esi, (%esp)				/* cif */
+
+	call	*RAW_CLOSURE_FUN_OFFSET(%eax)
+
+	movl	CIF_FLAGS_OFFSET(%esi), %eax
+	cmpl	$FFI_TYPE_INT, %eax
+	je	L(raw_ld_int4)
+	cmpl	$FFI_TYPE_SINT64, %eax
+	jne	L(raw_ld_int8)
+	cmpl	$FFI_TYPE_FLOAT, %eax
+	je	L(raw_ld_float)
+	cmpl	$FFI_TYPE_DOUBLE, %eax
+	je	L(raw_ld_double)
+	cmpl	$FFI_TYPE_LONGDOUBLE, %eax
+	jne	L(raw_ld_int4)
+
+L(raw_ld_ldouble):
+	fldt	16(%esp)
+	jmp	L(raw_ld_epilogue)
+
+L(raw_ld_double):
+	fldl	16(%esp)
+	jmp	L(raw_ld_epilogue)
+
+L(raw_ld_float):
+	flds	16(%esp)
+	jmp	L(raw_ld_epilogue)
+
+	.balign	4
+L(raw_ld_int8):
+	movl	20(%esp), %edx
+	/* FALLTHRU */
+
+	.balign	4
+L(raw_ld_int4):
+	movl	16(%esp), %eax
+	/* FALLTHRU */
+
+L(raw_ld_epilogue):
+	movl	32(%esp), %esi
+	addl	$FRAME, %esp
+L(CFI6_3):
+	ret
+L(FE6):
+	SIZE(ffi_closure_outer_raw_cdecl)
+#endif
+
+#ifdef __PIC__
+	.balign	16
+	TYPE(getpc_ebx)
+E(getpc_ebx):
+L(FB7):
+	movl	(%esp), %ebx
+	ret
+L(FE7):
+	SIZE(getpc_ebx)
+#endif
+
+#ifdef X86_DARWIN
+.section __IMPORT,__jump_table,symbol_stubs,self_modifying_code+pure_instructions,5
+L_ffi_closure_inner$stub:
+	.indirect_symbol E(ffi_closure_inner)
+	hlt; hlt; hlt; hlt; hlt
+#endif
+
+#ifndef X86_WIN32
+#ifdef X86_DARWIN
+#define EH(X)		.globl _##X##.eh; _##X##.eh:
+.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
+#else
+#define EH(X)
+.section .eh_frame,EH_FRAME_FLAGS,@progbits
+#endif
+L(frame1):
+	.long	L(ECIE1)-L(SCIE1)	/* Length of Common Information Entry */
+L(SCIE1):
+	.long	0x0	/* CIE Identifier Tag */
+	.byte	0x1	/* CIE Version */
+	.ascii "zR\0"	/* CIE Augmentation */
+	.byte	0x1	/* .uleb128 0x1; CIE Code Alignment Factor */
+	.byte	0x7c	/* .sleb128 -4; CIE Data Alignment Factor */
+	.byte	0x8	/* CIE RA Column */
+	.byte	0x1	/* .uleb128 0x1; Augmentation size */
+	.byte	0x1b	/* FDE Encoding (pcrel sdata4) */
+	.byte	0xc	/* DW_CFA_def_cfa */
+	.byte	0x4	/* .uleb128 0x4 */
+	.byte	0x4	/* .uleb128 0x4 */
+	.byte	0x88	/* DW_CFA_offset, column 0x8 */
+	.byte	0x1	/* .uleb128 0x1 */
+	.align 4
+L(ECIE1):
+EH(ffi_call_inner)
+L(SFDE1):
+	.long	L(EFDE1)-L(ASFDE1)	/* FDE Length */
+L(ASFDE1):
+	.long	L(ASFDE1)-L(frame1)	/* FDE CIE offset */
+	.long	L(FB1)-.		/* FDE initial location */
+	.long	L(FE1)-L(FB1)		/* FDE address range */
+	.byte	0x0			/* .uleb128 0x0; Augmentation size */
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.long	L(CFI1_1)-L(FB1)
+	.byte	0xc, 5, 12		/* DW_CFA_def_cfa: r5 ofs 12 */
+	.byte	0x88, 3			/* DW_CFA_offset: r8 at cfa-12 */
+	.byte	0x83, 2			/* DW_CFA_offset: r3 at cfa-8 */
+	.byte	0x85, 1			/* DW_CFA_offset: r5 at cfa-4 */
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.long	L(CFI1_2)-L(CFI1_1)
+	.byte	0xd, 4			/* DW_CFA_def_cfa_reg: r4 */
+	.align 4
+L(EFDE1):
+EH(ffi_closure_outer_fastcall)
+L(SFDE2):
+	.long	L(EFDE2)-L(ASFDE2)	/* FDE Length */
+L(ASFDE2):
+	.long	L(ASFDE2)-L(frame1)	/* FDE CIE offset */
+	.long	L(FB2)-.		/* FDE initial location */
+	.long	L(FE2)-L(FB2)		/* FDE address range */
+	.byte	0x0			/* .uleb128 0x0; Augmentation size */
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.long	L(CFI2)-L(FB2)
+	.byte	0xe, 48			/* DW_CFA_def_cfa_offset: 48 */
+	.align 4
+L(EFDE2):
+EH(ffi_closure_outer_regparm2)
+L(SFDE3):
+	.long	L(EFDE3)-L(ASFDE3)	/* FDE Length */
+L(ASFDE3):
+	.long	L(ASFDE3)-L(frame1)	/* FDE CIE offset */
+	.long	L(FB3)-.		/* FDE initial location */
+	.long	L(FE3)-L(FB3)		/* FDE address range */
+	.byte	0x0			/* .uleb128 0x0; Augmentation size */
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.long	L(CFI3)-L(FB3)
+	.byte	0xe, 48			/* DW_CFA_def_cfa_offset: 48 */
+	.align 4
+L(EFDE3):
+EH(ffi_closure_outer_regparm3)
+L(SFDE4):
+	.long	L(EFDE4)-L(ASFDE4)	/* FDE Length */
+L(ASFDE4):
+	.long	L(ASFDE4)-L(frame1)	/* FDE CIE offset */
+	.long	L(FB4)-.		/* FDE initial location */
+	.long	L(FE4)-L(FB4)		/* FDE address range */
+	.byte	0x0			/* .uleb128 0x0; Augmentation size */
+	.byte	0xe, 8			/* DW_CFA_def_cfa_offset: 8 */
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.long	L(CFI4)-L(FB4)
+	.byte	0xe, 48			/* DW_CFA_def_cfa_offset: 48 */
+	.align 4
+L(EFDE4):
+EH(ffi_closure_outer_noregs)
+L(SFDE5):
+	.long	L(EFDE5)-L(ASFDE5)	/* FDE Length */
+L(ASFDE5):
+	.long	L(ASFDE5)-L(frame1)	/* FDE CIE offset */
+	.long	L(FB5)-.		/* FDE initial location */
+	.long	L(FE5)-L(FB5)		/* FDE address range */
+	.byte	0x0			/* .uleb128 0x0; Augmentation size */
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.long	L(CFI5_1)-L(FB5)
+	.byte	0xe, 48			/* DW_CFA_def_cfa_offset: 48 */
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.long	L(CFI5_2)-L(CFI5_1)
+	.byte	0x83, 5			/* DW_CFA_offset: r3 at cfa-20 */
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.long	L(CFI5_3)-L(CFI5_2)
+	.byte	0xe, 4			/* DW_CFA_def_cfa_offset: 4 */
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.long	L(CFI5_4)-L(CFI5_3)
+	.byte	0xe, 48			/* DW_CFA_def_cfa_offset: 48 */
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.long	L(CFI5_5)-L(CFI5_4)
+	.byte	0xe, 4			/* DW_CFA_def_cfa_offset: 4 */
+	.align 4
+L(EFDE5):
+#if !FFI_NO_RAW_API
+EH(ffi_closure_outer_raw_cdecl)
+L(SFDE6):
+	.long	L(EFDE6)-L(ASFDE6)	/* FDE Length */
+L(ASFDE6):
+	.long	L(ASFDE6)-L(frame1)	/* FDE CIE offset */
+	.long	L(FB6)-.		/* FDE initial location */
+	.long	L(FE6)-L(FB6)		/* FDE address range */
+	.byte	0x0			/* .uleb128 0x0; Augmentation size */
+	.byte	0xe, 8			/* DW_CFA_def_cfa_offset: 8 */
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.long	L(CFI6_1)-L(FB6)
+	.byte	0xe, 48			/* DW_CFA_def_cfa_offset: 48 */
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.long	L(CFI6_2)-L(CFI6_1)
+	.byte	0x86, 4			/* DW_CFA_offset: r6 at cfa-16 */
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.long	L(CFI6_3)-L(CFI6_2)
+	.byte	0xe, 4			/* DW_CFA_def_cfa_offset: 4 */
+	.align 4
+L(EFDE6):
+#endif
+#ifdef __PIC__
+EH(getpc_ebx)
+L(SFDE7):
+	.long	L(EFDE7)-L(ASFDE7)	/* FDE Length */
+L(ASFDE7):
+	.long	L(ASFDE7)-L(frame1)	/* FDE CIE offset */
+	.long	L(FB7)-.		/* FDE initial location */
+	.long	L(FE7)-L(FB7)		/* FDE address range */
+	.byte	0x0			/* .uleb128 0x0; Augmentation size */
+	.align 4
+L(EFDE7):
+#endif
+#endif /* !X86_WIN32 */
+
+#endif /* !__x86_64__ */
--- testsuite/libffi.call/many_win32.c	(revision 24606)
+++ testsuite/libffi.call/many_win32.c	(local)
@@ -4,7 +4,7 @@
    PR:		none.
    Originator:	From the original ffitest.c  */
 
-/* { dg-do run { target i?86-*-cygwin* i?86-*-mingw* } } */
+/* { dg-do run { target i?86-*-* } } */
 
 #include "ffitest.h"
 #include <float.h>
--- testsuite/libffi.call/strlen_win32.c	(revision 24606)
+++ testsuite/libffi.call/strlen_win32.c	(local)
@@ -4,7 +4,7 @@
    PR:		none.
    Originator:	From the original ffitest.c  */
 
-/* { dg-do run { target i?86-*-cygwin* i?86-*-mingw* } } */
+/* { dg-do run { target i?86-*-* } } */
 
 #include "ffitest.h"
 
--- testsuite/libffi.call/regparm_1.c	(revision 24606)
+++ testsuite/libffi.call/regparm_1.c	(local)
@@ -0,0 +1,96 @@
+/* Area:	ffi_call
+   Purpose:	Check regparm parameter passing.
+   Limitations:	none.
+   PR:		none.
+   Originator:	Richard Henderson  */
+
+/* { dg-do run { target i?86-*-* } } */
+
+#include "ffitest.h"
+
+static unsigned rec[4];
+
+static unsigned __attribute__((regparm(1)))
+fn_regparm1 (unsigned int a, unsigned int b, unsigned int c, unsigned int d)
+{
+  rec[0] = a, rec[1] = b, rec[2] = c, rec[3] = d;
+  return a + b + c + d;
+}
+
+static unsigned __attribute__((regparm(2)))
+fn_regparm2 (unsigned int a, unsigned int b, unsigned int c, unsigned int d)
+{
+  rec[0] = a, rec[1] = b, rec[2] = c, rec[3] = d;
+  return a + b + c + d;
+}
+
+static unsigned __attribute__((regparm(3)))
+fn_regparm3 (unsigned int a, unsigned int b, unsigned int c, unsigned int d)
+{
+  rec[0] = a, rec[1] = b, rec[2] = c, rec[3] = d;
+  return a + b + c + d;
+}
+
+static unsigned __attribute__((fastcall))
+fn_fastcall(unsigned int a, unsigned int b, unsigned int c, unsigned int d)
+{
+  rec[0] = a, rec[1] = b, rec[2] = c, rec[3] = d;
+  return a + b + c + d;
+}
+
+int main (void)
+{
+  ffi_cif cif;
+  ffi_type *args[MAX_ARGS];
+  void *values[MAX_ARGS];
+  ffi_arg rint;
+  unsigned a, b, c, d;
+
+  args[0] = &ffi_type_uint;
+  args[1] = &ffi_type_uint;
+  args[2] = &ffi_type_uint;
+  args[3] = &ffi_type_uint;
+  values[0] = (void*) &a;
+  values[1] = (void*) &b;
+  values[2] = (void*) &c;
+  values[3] = (void*) &d;
+
+  a = 0x01010101;
+  b = 0x00020002;
+  c = 0x04000400;
+  d = 0x80808080;
+
+  CHECK(ffi_prep_cif(&cif, FFI_REGPARM_1, 4, &ffi_type_uint, args) == FFI_OK);
+  ffi_call(&cif, FFI_FN(fn_regparm1), &rint, values);
+  CHECK((unsigned)rint == 0x85838583);
+  CHECK(rec[0] == a);
+  CHECK(rec[1] == b);
+  CHECK(rec[2] == c);
+  CHECK(rec[3] == d);
+
+  CHECK(ffi_prep_cif(&cif, FFI_REGPARM_2, 4, &ffi_type_uint, args) == FFI_OK);
+  ffi_call(&cif, FFI_FN(fn_regparm2), &rint, values);
+  CHECK((unsigned)rint == 0x85838583);
+  CHECK(rec[0] == a);
+  CHECK(rec[1] == b);
+  CHECK(rec[2] == c);
+  CHECK(rec[3] == d);
+
+  CHECK(ffi_prep_cif(&cif, FFI_REGPARM_3, 4, &ffi_type_uint, args) == FFI_OK);
+  ffi_call(&cif, FFI_FN(fn_regparm3), &rint, values);
+  CHECK((unsigned)rint == 0x85838583);
+  CHECK(rec[0] == a);
+  CHECK(rec[1] == b);
+  CHECK(rec[2] == c);
+  CHECK(rec[3] == d);
+
+  CHECK(ffi_prep_cif(&cif, FFI_FASTCALL, 4, &ffi_type_uint, args) == FFI_OK);
+  ffi_call(&cif, FFI_FN(fn_fastcall), &rint, values);
+  CHECK((unsigned)rint == 0x85838583);
+  CHECK(rec[0] == a);
+  CHECK(rec[1] == b);
+  CHECK(rec[2] == c);
+  CHECK(rec[3] == d);
+
+  exit(0);
+}


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]