This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: vararg calls with libffi on x86_64


Richard Henderson <rth@redhat.com> writes:

[...]
> Two more small changes...
[... trampoline setup /  replace subq with leaq]

Done, and patch appended (as you might have realized, I'm not an
expert in assembly language.. neither in English..; thank you very
much for your help, I learned quite a bit now). No more changes, I'm
off until next week ;-)

ciao
Andreas


Index: src/x86/ffi64.c
===================================================================
RCS file: /cvsroot/gcc/gcc/libffi/src/x86/ffi64.c,v
retrieving revision 1.8
diff -u -r1.8 ffi64.c
--- src/x86/ffi64.c	25 Dec 2004 09:54:40 -0000	1.8
+++ src/x86/ffi64.c	4 May 2005 20:52:30 -0000
@@ -42,7 +42,7 @@
 };
 
 extern void ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
-			     void *raddr, void (*fnaddr)());
+			     void *raddr, void (*fnaddr)(), unsigned ssecount);
 
 /* All reference to register classes here is identical to the code in
    gcc/config/i386/i386.c. Do *not* change one without the other.  */
@@ -303,10 +303,9 @@
 	  else if (sse0 && sse1)
 	    flags |= 1 << 10;
 	  /* Mark the true size of the structure.  */
-	  flags |= cif->rtype->size << 11;
+	  flags |= cif->rtype->size << 12;
 	}
     }
-  cif->flags = flags;
 
   /* Go over all arguments and determine the way they should be passed.
      If it's in a register and there is space for it, let that be so. If
@@ -331,6 +330,9 @@
 	  ssecount += nsse;
 	}
     }
+  if (ssecount)
+    flags |= 1 << 11;
+  cif->flags = flags;
   cif->bytes = bytes;
 
   return FFI_OK;
@@ -353,7 +355,7 @@
      address then we need to make one.  Note the setting of flags to
      VOID above in ffi_prep_cif_machdep.  */
   ret_in_memory = (cif->rtype->type == FFI_TYPE_STRUCT
-		   && cif->flags == FFI_TYPE_VOID);
+		   && (cif->flags & 0xff) == FFI_TYPE_VOID);
   if (rvalue == NULL && ret_in_memory)
     rvalue = alloca (cif->rtype->size);
 
@@ -424,7 +426,7 @@
     }
 
   ffi_call_unix64 (stack, cif->bytes + sizeof (struct register_args),
-		   cif->flags, rvalue, fn);
+		   cif->flags, rvalue, fn, ssecount);
 }
 
 
@@ -439,13 +441,18 @@
   volatile unsigned short *tramp;
 
   tramp = (volatile unsigned short *) &closure->tramp[0];
+
   tramp[0] = 0xbb49;		/* mov <code>, %r11	*/
-  tramp[5] = 0xba49;		/* mov <data>, %r10	*/
-  tramp[10] = 0xff49;		/* jmp *%r11	*/
-  tramp[11] = 0x00e3;
   *(void * volatile *) &tramp[1] = ffi_closure_unix64;
+  tramp[5] = 0xba49;		/* mov <data>, %r10	*/
   *(void * volatile *) &tramp[6] = closure;
 
+  /* Set the carry bit iff the function uses any sse registers.
+     This is clc or stc, together with the first byte of the jmp.  */
+  tramp[10] = cif->flags & (1 << 11) ? 0x49f9 : 0x49f8;
+
+  tramp[11] = 0xe3ff;			/* jmp *%r11    */
+
   closure->cif = cif;
   closure->fun = fun;
   closure->user_data = user_data;
Index: src/x86/unix64.S
===================================================================
RCS file: /cvsroot/gcc/gcc/libffi/src/x86/unix64.S,v
retrieving revision 1.7
diff -u -r1.7 unix64.S
--- src/x86/unix64.S	27 Dec 2004 09:20:10 -0000	1.7
+++ src/x86/unix64.S	4 May 2005 20:52:30 -0000
@@ -53,6 +53,7 @@
 .LUW1:
 	movq	%rdi, %r10		/* Save a copy of the register area. */
 	movq	%r8, %r11		/* Save a copy of the target fn.  */
+	movl	%r9d, %eax		/* Set number of SSE registers.  */
 
 	/* Load up all argument registers.  */
 	movq	(%r10), %rdi
@@ -61,14 +62,9 @@
 	movq	24(%r10), %rcx
 	movq	32(%r10), %r8
 	movq	40(%r10), %r9
-	movdqa	48(%r10), %xmm0
-	movdqa	64(%r10), %xmm1
-	movdqa	80(%r10), %xmm2
-	movdqa	96(%r10), %xmm3
-	movdqa	112(%r10), %xmm4
-	movdqa	128(%r10), %xmm5
-	movdqa	144(%r10), %xmm6
-	movdqa	160(%r10), %xmm7
+	testl	%eax, %eax
+	jnz	.Lload_sse
+.Lret_from_load_sse:
 
 	/* Deallocate the reg arg area.  */
 	leaq	176(%r10), %rsp
@@ -91,6 +87,21 @@
 	addq	%r11, %r10
 	jmp	*%r10
 
+	/* Many times we can avoid loading any SSE registers at all.
+	   It's not worth an indirect jump to load the exact set of
+	   SSE registers needed; zero or all is a good compromise.  */
+	.align 2
+.Lload_sse:
+        movdqa	48(%r10), %xmm0
+        movdqa	64(%r10), %xmm1
+        movdqa	80(%r10), %xmm2
+        movdqa	96(%r10), %xmm3
+        movdqa	112(%r10), %xmm4
+        movdqa	128(%r10), %xmm5
+        movdqa	144(%r10), %xmm6
+        movdqa	160(%r10), %xmm7
+        jmp	.Lret_from_load_sse
+
 	.section .rodata
 .Lstore_table:
 	.long	.Lst_void-.Lstore_table		/* FFI_TYPE_VOID */
@@ -181,9 +192,9 @@
 	movq	%rax, (%rsi)
 	movq	%rdx, 8(%rsi)
 
-	/* Bits 11-31 contain the true size of the structure.  Copy from
+	/* Bits 12-31 contain the true size of the structure.  Copy from
 	   the scratch area to the true destination.  */
-	shrl	$11, %ecx
+	shrl	$12, %ecx
 	rep movsb
 	ret
 .LUW3:
@@ -195,23 +206,18 @@
 
 ffi_closure_unix64:
 .LUW4:
-	subq	$200, %rsp
+	/* the carry flag is set by the trampoline iff SSE registers */
+	/* are used. Don't clobber it before the branch instruction. */
+	leaq    -200(%rsp), %rsp
 .LUW5:
-
 	movq	%rdi, (%rsp)
         movq    %rsi, 8(%rsp)
         movq    %rdx, 16(%rsp)
         movq    %rcx, 24(%rsp)
         movq    %r8, 32(%rsp)
         movq    %r9, 40(%rsp)
-	movdqa	%xmm0, 48(%rsp)
-	movdqa	%xmm1, 64(%rsp)
-	movdqa	%xmm2, 80(%rsp)
-	movdqa	%xmm3, 96(%rsp)
-	movdqa	%xmm4, 112(%rsp)
-	movdqa	%xmm5, 128(%rsp)
-	movdqa	%xmm6, 144(%rsp)
-	movdqa	%xmm7, 160(%rsp)
+        jc      .Lsave_sse
+.Lret_from_save_sse:
 
 	movq	%r10, %rdi
 	leaq	176(%rsp), %rsi
@@ -230,6 +236,19 @@
 	addq	%r11, %r10
 	jmp	*%r10
 
+	/* Same comment as for loading SSE registers applies. see above */
+	.align 2
+.Lsave_sse:
+	movdqa	%xmm0, 48(%rsp)
+	movdqa	%xmm1, 64(%rsp)
+	movdqa	%xmm2, 80(%rsp)
+	movdqa	%xmm3, 96(%rsp)
+	movdqa	%xmm4, 112(%rsp)
+	movdqa	%xmm5, 128(%rsp)
+	movdqa	%xmm6, 144(%rsp)
+	movdqa	%xmm7, 160(%rsp)
+	jmp	.Lret_from_save_sse
+
 	.section .rodata
 .Lload_table:
 	.long	.Lld_void-.Lload_table		/* FFI_TYPE_VOID */


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]