This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
i386 longlong.h fixes (Was: ... -masm=intel problems)
- From: Rask Ingemann Lambertsen <rask at sygehus dot dk>
- To: Uros Bizjak <ubizjak at gmail dot com>
- Cc: GCC Patches <gcc-patches at gcc dot gnu dot org>
- Date: Mon, 22 Oct 2007 23:54:10 +0200
- Subject: i386 longlong.h fixes (Was: ... -masm=intel problems)
- References: <5787cf470710212242q169a9b29yd5103d66f4e0a97a@mail.gmail.com>
On Mon, Oct 22, 2007 at 07:42:20AM +0200, Uros Bizjak wrote:
> Hello!
>
> > I also tested bootstrap with BOOT_CFLAGS='-O2 -g -fomit-frame-pointer
> > -masm=intel'. To to that, I used an additional patch to longlong.h to
> > disable the inline asm:
>
> I think that in the short term, longlong.h issue should be fixed by
> introducing asm dialects into the asm template. Otherwise, the reason
> that we still implement it in asm is described in PR 31985 (the
> situation is much bettern now, but still not equivalent to asm
> version).
The add_ssaaaa and sub_ddmmss macros are defined differently in
longlong.h than in PR 31985 and we ought to be able to produce optimum code
without inline asm. Testcase:
#include <stdint.h>
#define W_TYPE_SIZE 32
#define UWtype unsigned int
#define UHWtype unsigned short int
#define UDWtype unsigned long long int
#define UQItype uint8_t
#define SItype int32_t
#define USItype uint32_t
#define DItype int64_t
#define UDItype uint64_t
#include <longlong.h>
UDWtype test_add_ssaaaa (UWtype ah, UWtype al, UWtype bh, UWtype bl)
{
UWtype reth, retl;
add_ssaaaa (reth, retl, ah, al, bh, bl);
return (((UDWtype) reth << W_TYPE_SIZE) | retl);
}
I tried deleting the x86 version of that macro. Unfortunately, we're
missing a pattern for combine:
Index: gcc/config/i386/i386.md
===================================================================
--- gcc/config/i386/i386.md (revision 129548)
+++ gcc/config/i386/i386.md (working copy)
@@ -5532,6 +5532,39 @@
(set_attr "pent_pair" "pu")
(set_attr "mode" "SI")])
+; Combine creates such insns for add_ssaaaa() from longlong.h.
+; Add the constant 0 that we need.
+(define_split
+ [(set (match_operand:SWI 0 "nonimmediate_operand")
+ (plus:SWI (match_operand:SWI 2 "ix86_carry_flag_operator")
+ (match_operand:SWI 1 "nonimmediate_operand")))
+ (clobber (reg:CC FLAGS_REG))]
+ "(!MEM_P (operands[0]) && !MEM_P (operands[1]))
+ || rtx_equal_p (operands[0], operands[1])"
+ [(parallel
+ [(set (match_dup 0)
+ (plus:SWI (plus:SWI (match_dup 2) (match_dup 1))
+ (const_int 0)))
+ (clobber (reg:CC FLAGS_REG))]
+ )]
+)
+
+(define_split
+ [(set (match_operand:DI 0 "register_operand")
+ (zero_extend:DI
+ (plus:SI (match_operand:SI 2 "ix86_carry_flag_operator")
+ (match_operand:SI 1 "register_operand"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ [(parallel
+ [(set (match_dup 0)
+ (zero_extend:DI
+ (plus:SI (plus:SI (match_dup 2) (match_dup 1))
+ (const_int 0))))
+ (clobber (reg:CC FLAGS_REG))]
+ )]
+)
+
(define_insn "*addsi3_cc"
[(set (reg:CC FLAGS_REG)
(unspec:CC [(match_operand:SI 1 "nonimmediate_operand" "%0,0")
With that, the code is just as good as the asm version (-O2
-fomit-frame-pointer -m32):
test_add_ssaaaa:
movl 8(%esp), %eax # 3 *movsi_1/1 [length = 4]
movl 12(%esp), %edx # 4 *movsi_1/1 [length = 4]
addl 16(%esp), %eax # 11 *addsi3_cc_overflow/2 [length = 4]
adcl 4(%esp), %edx # 14 addsi3_carry/2 [length = 4]
ret # 53 return_internal [length = 1]
When trying the same with the "sub_ddmmss" macro, there's initially the
same missing (const_int 0) and a similar fix, but after that, the insn
canonicalization doesn't work out to match the existing subsi3_carry
instruction. Adding a new one fixes it and the resulting code is again no
worse than the asm version:
UDWtype test_sub_ddmmss (UWtype ah, UWtype al, UWtype bh, UWtype bl)
{
UWtype reth, retl;
sub_ddmmss (reth, retl, ah, al, bh, bl);
return (((UDWtype) reth << W_TYPE_SIZE) | retl);
}
@@ -7278,6 +7311,64 @@
(set_attr "pent_pair" "pu")
(set_attr "mode" "SI")])
+; Combine creates such insns for sub_ddmmss() from longlong.h.
+; Add the constant 0 that we need.
+(define_split
+ [(set (match_operand:SWI 0 "nonimmediate_operand")
+ (minus:SWI (match_operand:SWI 1 "nonimmediate_operand")
+ (match_operand:SWI 2 "ix86_carry_flag_operator")))
+ (clobber (reg:CC FLAGS_REG))]
+ "(!MEM_P (operands[0]) && !MEM_P (operands[1]))
+ || rtx_equal_p (operands[0], operands[1])"
+ [(parallel
+ [(set (match_dup 0)
+ (minus:SWI (match_dup 1)
+ (plus:SWI (match_dup 2) (const_int 0))))
+ (clobber (reg:CC FLAGS_REG))]
+ )]
+)
+
+(define_split
+ [(set (match_operand:DI 0 "register_operand")
+ (zero_extend:DI
+ (minus:SI (match_operand:SI 1 "register_operand")
+ (match_operand:SI 2 "ix86_carry_flag_operator"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_64BIT"
+ [(parallel
+ [(set (match_dup 0)
+ (zero_extend:DI
+ (minus:SI (match_dup 1)
+ (plus:SI (match_dup 2) (const_int 0)))))
+ (clobber (reg:CC FLAGS_REG))]
+ )]
+)
+
+(define_insn "*sub<mode>3_carry_new"
+ [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
+ (minus:SWI (minus:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,0")
+ (match_operand:SWI 3 "ix86_carry_flag_operator"))
+ (match_operand:SWI 2 "<general_operand>" "<r><e>,<r>m")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (MINUS, <MODE>mode, operands)"
+ "sbb{<imodesuffix>}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "pent_pair" "pu")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*subsi3_carry_zext_new"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (zero_extend:DI
+ (minus:SI (minus:SI (match_operand:SI 1 "register_operand" "0")
+ (match_operand:SI 3 "ix86_carry_flag_operator"))
+ (match_operand:SI 2 "general_operand" "g"))))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (MINUS, SImode, operands)"
+ "sbb{l}\t{%2, %0|%0, %2}"
+ [(set_attr "type" "alu")
+ (set_attr "pent_pair" "pu")
+ (set_attr "mode" "SI")])
+
(define_expand "subsi3"
[(parallel [(set (match_operand:SI 0 "nonimmediate_operand" "")
(minus:SI (match_operand:SI 1 "nonimmediate_operand" "")
test_sub_ddmmss:
movl 4(%esp), %edx # 2 *movsi_1/1 [length = 4]
movl 8(%esp), %eax # 3 *movsi_1/1 [length = 4]
subl 16(%esp), %eax # 11 *subsi3_cc_overflow/2 [length = 4]
sbbl 12(%esp), %edx # 14 *subsi3_carry_new/2 [length = 4]
ret # 53 return_internal [length = 1]
However, if you followed the above, you'll probably have a nasty feeling
that there are issues with insn canonicalization lurking under the surface,
and sure enough, everything falls apart when some clod passes in a constant
for the subtrahend:
UDWtype test_sub_ddmm (UWtype ah, UWtype al)
{
UWtype reth, retl;
sub_ddmmss (reth, retl, ah, al, 1, 1);
return (((UDWtype) reth << W_TYPE_SIZE) | retl);
}
test_sub_ddmm:
movl 8(%esp), %edx # 3 *movsi_1/1 [length = 4]
movl 4(%esp), %ecx # 49 *movsi_1/1 [length = 4]
leal -1(%edx), %eax # 50 *lea_1 [length = 3]
subl $1, %ecx # 8 *addsi_1/1 [length = 3]
cmpl %edx, %eax # 9 *cmpsi_1_insn/1 [length = 2]
seta %dl # 10 *setcc_1 [length = 3]
movzbl %dl, %edx # 51 *zero_extendqisi2_movzbw [length = 3]
subl %edx, %ecx # 12 *subsi_1/1 [length = 2]
movl %ecx, %edx # 45 *movsi_1/1 [length = 2]
ret # 54 return_internal [length = 1]
The asm version produces the intended code in this case:
test_sub_ddmm:
movl 4(%esp), %edx # 42 *movsi_1/1 [length = 4]
movl 8(%esp), %eax # 43 *movsi_1/1 [length = 4]
#APP
# 34 "/home/rask/longlongcheck.c" 1
subl $1,%eax
sbbl $1,%edx
# 0 "" 2
#NO_APP
ret # 47 return_internal [length = 1]
I can easily add the remaining i386.md magic to deal with the constant
case also, but I think it's already past stage 3 material, so I'll submit a
straightforward fix for longlong.h instead of a further patched i386.md.
It would have looked good in GCC 4.3 though. :-(
--
Rask Ingemann Lambertsen
Danish law requires addresses in e-mail to be logged and stored for a year