This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

i386 longlong.h fixes (Was: ... -masm=intel problems)


On Mon, Oct 22, 2007 at 07:42:20AM +0200, Uros Bizjak wrote:
> Hello!
> 
> >   I also tested bootstrap with BOOT_CFLAGS='-O2 -g -fomit-frame-pointer
> > -masm=intel'. To to that, I used an additional patch to longlong.h to
> >  disable the inline asm:
> 
> I think that in the short term, longlong.h issue should be fixed by
> introducing asm dialects into the asm template. Otherwise, the reason
> that we still implement it in asm is described in PR 31985 (the
> situation is much bettern now, but still not equivalent to asm
> version).

   The add_ssaaaa and sub_ddmmss macros are defined differently in
longlong.h than in PR 31985 and we ought to be able to produce optimum code
without inline asm. Testcase:

#include <stdint.h>
#define W_TYPE_SIZE 32
#define UWtype	unsigned int
#define UHWtype	unsigned short int
#define UDWtype	unsigned long long int
#define UQItype	uint8_t
#define SItype	int32_t
#define USItype	uint32_t
#define DItype	int64_t
#define UDItype	uint64_t

#include <longlong.h>
         
UDWtype test_add_ssaaaa (UWtype ah, UWtype al, UWtype bh, UWtype bl)
{
  UWtype reth, retl;

  add_ssaaaa (reth, retl, ah, al, bh, bl);
  return (((UDWtype) reth << W_TYPE_SIZE) | retl);
}

   I tried deleting the x86 version of that macro. Unfortunately, we're
missing a pattern for combine:

Index: gcc/config/i386/i386.md
===================================================================
--- gcc/config/i386/i386.md	(revision 129548)
+++ gcc/config/i386/i386.md	(working copy)
@@ -5532,6 +5532,39 @@
    (set_attr "pent_pair" "pu")
    (set_attr "mode" "SI")])
 
+; Combine creates such insns for add_ssaaaa() from longlong.h.
+; Add the constant 0 that we need.
+(define_split
+  [(set (match_operand:SWI 0 "nonimmediate_operand")
+	(plus:SWI (match_operand:SWI 2 "ix86_carry_flag_operator")
+		  (match_operand:SWI 1 "nonimmediate_operand")))
+   (clobber (reg:CC FLAGS_REG))]
+  "(!MEM_P (operands[0]) && !MEM_P (operands[1]))
+   || rtx_equal_p (operands[0], operands[1])"
+  [(parallel
+  [(set (match_dup 0)
+	(plus:SWI (plus:SWI (match_dup 2) (match_dup 1))
+		  (const_int 0)))
+   (clobber (reg:CC FLAGS_REG))]
+  )]
+)
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(zero_extend:DI
+	    (plus:SI (match_operand:SI 2 "ix86_carry_flag_operator")
+		     (match_operand:SI 1 "register_operand"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  [(parallel
+  [(set (match_dup 0)
+	(zero_extend:DI
+	    (plus:SI (plus:SI (match_dup 2) (match_dup 1))
+		     (const_int 0))))
+   (clobber (reg:CC FLAGS_REG))]
+  )]
+)
+
 (define_insn "*addsi3_cc"
   [(set (reg:CC FLAGS_REG)
 	(unspec:CC [(match_operand:SI 1 "nonimmediate_operand" "%0,0")

   With that, the code is just as good as the asm version (-O2
-fomit-frame-pointer -m32):

test_add_ssaaaa:
	movl	8(%esp), %eax	# 3	*movsi_1/1	[length = 4]
	movl	12(%esp), %edx	# 4	*movsi_1/1	[length = 4]
	addl	16(%esp), %eax	# 11	*addsi3_cc_overflow/2	[length = 4]
	adcl	4(%esp), %edx	# 14	addsi3_carry/2	[length = 4]
	ret	# 53	return_internal	[length = 1]

   When trying the same with the "sub_ddmmss" macro, there's initially the
same missing (const_int 0) and a similar fix, but after that, the insn
canonicalization doesn't work out to match the existing subsi3_carry
instruction. Adding a new one fixes it and the resulting code is again no
worse than the asm version:

UDWtype test_sub_ddmmss (UWtype ah, UWtype al, UWtype bh, UWtype bl)
{
  UWtype reth, retl;

  sub_ddmmss (reth, retl, ah, al, bh, bl);
  return (((UDWtype) reth << W_TYPE_SIZE) | retl);
}

@@ -7278,6 +7311,64 @@
    (set_attr "pent_pair" "pu")
    (set_attr "mode" "SI")])
 
+; Combine creates such insns for sub_ddmmss() from longlong.h.
+; Add the constant 0 that we need.
+(define_split
+  [(set (match_operand:SWI 0 "nonimmediate_operand")
+	(minus:SWI (match_operand:SWI 1 "nonimmediate_operand")
+		   (match_operand:SWI 2 "ix86_carry_flag_operator")))
+   (clobber (reg:CC FLAGS_REG))]
+  "(!MEM_P (operands[0]) && !MEM_P (operands[1]))
+   || rtx_equal_p (operands[0], operands[1])"
+  [(parallel
+  [(set (match_dup 0)
+	(minus:SWI (match_dup 1)
+		   (plus:SWI (match_dup 2) (const_int 0))))
+   (clobber (reg:CC FLAGS_REG))]
+  )]
+)
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(zero_extend:DI
+	    (minus:SI (match_operand:SI 1 "register_operand")
+		      (match_operand:SI 2 "ix86_carry_flag_operator"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT"
+  [(parallel
+  [(set (match_dup 0)
+	(zero_extend:DI
+	    (minus:SI (match_dup 1)
+		      (plus:SI (match_dup 2) (const_int 0)))))
+   (clobber (reg:CC FLAGS_REG))]
+  )]
+)
+
+(define_insn "*sub<mode>3_carry_new"
+  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
+	(minus:SWI (minus:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,0")
+			      (match_operand:SWI 3 "ix86_carry_flag_operator"))
+		   (match_operand:SWI 2 "<general_operand>" "<r><e>,<r>m")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
+  "sbb{<imodesuffix>}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "pent_pair" "pu")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*subsi3_carry_zext_new"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+     (zero_extend:DI
+	(minus:SI (minus:SI (match_operand:SI 1 "register_operand" "0")
+			    (match_operand:SI 3 "ix86_carry_flag_operator"))
+		  (match_operand:SI 2 "general_operand" "g"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (MINUS, SImode, operands)"
+  "sbb{l}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "pent_pair" "pu")
+   (set_attr "mode" "SI")])
+
 (define_expand "subsi3"
   [(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "")
 		   (minus:SI (match_operand:SI 1 "nonimmediate_operand" "")

test_sub_ddmmss:
	movl	4(%esp), %edx	# 2	*movsi_1/1	[length = 4]
	movl	8(%esp), %eax	# 3	*movsi_1/1	[length = 4]
	subl	16(%esp), %eax	# 11	*subsi3_cc_overflow/2	[length = 4]
	sbbl	12(%esp), %edx	# 14	*subsi3_carry_new/2	[length = 4]
	ret	# 53	return_internal	[length = 1]

   However, if you followed the above, you'll probably have a nasty feeling
that there are issues with insn canonicalization lurking under the surface,
and sure enough, everything falls apart when some clod passes in a constant
for the subtrahend:
         
UDWtype test_sub_ddmm (UWtype ah, UWtype al)
{
  UWtype reth, retl;

  sub_ddmmss (reth, retl, ah, al, 1, 1);
  return (((UDWtype) reth << W_TYPE_SIZE) | retl);
}

test_sub_ddmm:
	movl	8(%esp), %edx	# 3	*movsi_1/1	[length = 4]
	movl	4(%esp), %ecx	# 49	*movsi_1/1	[length = 4]
	leal	-1(%edx), %eax	# 50	*lea_1	[length = 3]
	subl	$1, %ecx	# 8	*addsi_1/1	[length = 3]
	cmpl	%edx, %eax	# 9	*cmpsi_1_insn/1	[length = 2]
	seta	%dl	# 10	*setcc_1	[length = 3]
	movzbl	%dl, %edx	# 51	*zero_extendqisi2_movzbw	[length = 3]
	subl	%edx, %ecx	# 12	*subsi_1/1	[length = 2]
	movl	%ecx, %edx	# 45	*movsi_1/1	[length = 2]
	ret	# 54	return_internal	[length = 1]

   The asm version produces the intended code in this case:

test_sub_ddmm:
	movl	4(%esp), %edx	# 42	*movsi_1/1	[length = 4]
	movl	8(%esp), %eax	# 43	*movsi_1/1	[length = 4]
#APP
# 34 "/home/rask/longlongcheck.c" 1
	subl $1,%eax
	sbbl $1,%edx
# 0 "" 2
#NO_APP
	ret	# 47	return_internal	[length = 1]

   I can easily add the remaining i386.md magic to deal with the constant
case also, but I think it's already past stage 3 material, so I'll submit a
straightforward fix for longlong.h instead of a further patched i386.md.
It would have looked good in GCC 4.3 though. :-(

-- 
Rask Ingemann Lambertsen
Danish law requires addresses in e-mail to be logged and stored for a year


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]