[hjl@gnu-tools-1 delta-2006.08.03]$ cat x.c typedef int int32_t; typedef unsigned int uint32_t; typedef int32_t Elf32_Sword; typedef struct { Elf32_Sword d_tag; } Elf32_Dyn; struct link_map { Elf32_Dyn *l_ld; Elf32_Dyn *l_info[34]; }; extern struct link_map _dl_rtld_map __attribute__ ((visibility ("hidden"))); static void elf_get_dynamic_info (struct link_map *l) { Elf32_Dyn *dyn = l->l_ld; Elf32_Dyn **info; info = l->l_info; while (dyn->d_tag != 0) { if (dyn->d_tag < 11) info[0x6ffffeff - dyn->d_tag + 12] = dyn; ++dyn; } } void dl_start (void) { elf_get_dynamic_info (&_dl_rtld_map); } [hjl@gnu-tools-1 delta-2006.08.03]$ /export/build/gnu/gcc-x32/release/usr/gcc-4.8.0-x32/bin/gcc -mx32 -mtune=generic -march=x86-64 x.c -S -O2 -fPIC -maddress-mode=long x.c: In function ‘dl_start’: x.c:22:37: internal compiler error: in plus_constant, at explow.c:88 info[0x6ffffeff - dyn->d_tag + 12] = dyn; ^ 0x6a656a plus_constant(machine_mode, rtx_def*, long) /export/gnu/import/git/gcc/gcc/explow.c:88 0xb107e7 ix86_expand_move(machine_mode, rtx_def**) /export/gnu/import/git/gcc/gcc/config/i386/i386.c:15973 0xb85c3e gen_movsi(rtx_def*, rtx_def*) /export/gnu/import/git/gcc/gcc/config/i386/i386.md:1813 0x6c11f4 emit_move_insn_1(rtx_def*, rtx_def*) /export/gnu/import/git/gcc/gcc/expr.c:3418 0x6c14f0 emit_move_insn(rtx_def*, rtx_def*) /export/gnu/import/git/gcc/gcc/expr.c:3512 0x6a6d2e copy_to_mode_reg(machine_mode, rtx_def*) /export/gnu/import/git/gcc/gcc/explow.c:645 0x825bc2 maybe_legitimize_operand /export/gnu/import/git/gcc/gcc/optabs.c:8080 0x825bc2 maybe_legitimize_operands(insn_code, unsigned int, unsigned int, expand_operand*) /export/gnu/import/git/gcc/gcc/optabs.c:8142 0x825cf8 maybe_gen_insn(insn_code, unsigned int, expand_operand*) /export/gnu/import/git/gcc/gcc/optabs.c:8160 0x829364 expand_binop_directly /export/gnu/import/git/gcc/gcc/optabs.c:1461 0x82773d expand_binop(machine_mode, optab_tag, rtx_def*, rtx_def*, rtx_def*, int, optab_methods) /export/gnu/import/git/gcc/gcc/optabs.c:1530 0x6c5d78 force_operand(rtx_def*, rtx_def*) /export/gnu/import/git/gcc/gcc/expr.c:7057 0x6a6d52 copy_to_mode_reg(machine_mode, rtx_def*) /export/gnu/import/git/gcc/gcc/explow.c:641 0x825bc2 maybe_legitimize_operand /export/gnu/import/git/gcc/gcc/optabs.c:8080 0x825bc2 maybe_legitimize_operands(insn_code, unsigned int, unsigned int, expand_operand*) /export/gnu/import/git/gcc/gcc/optabs.c:8142 0x825cf8 maybe_gen_insn(insn_code, unsigned int, expand_operand*) /export/gnu/import/git/gcc/gcc/optabs.c:8160 0x825f69 maybe_emit_unop_insn(insn_code, rtx_def*, rtx_def*, rtx_code) /export/gnu/import/git/gcc/gcc/optabs.c:3774 0x825ff8 emit_unop_insn(insn_code, rtx_def*, rtx_def*, rtx_code) /export/gnu/import/git/gcc/gcc/optabs.c:3796 0x6c27b0 convert_modes(machine_mode, machine_mode, rtx_def*, int) /export/gnu/import/git/gcc/gcc/expr.c:784 0x6a7161 memory_address_addr_space(machine_mode, rtx_def*, unsigned char) /export/gnu/import/git/gcc/gcc/explow.c:429 Please submit a full bug report, with preprocessed source if appropriate. Please include the complete backtrace with any bug report. See <http://gcc.gnu.org/bugs.html> for instructions. [hjl@gnu-tools-1 delta-2006.08.03]$
This diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 2284703..8569418 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -12902,6 +12902,8 @@ legitimize_pic_address (rtx orig, rtx reg) else { base = legitimize_pic_address (XEXP (addr, 0), reg); + if (GET_MODE (base) != Pmode) + base = convert_to_mode (Pmode, base, 1); new_rtx = legitimize_pic_address (XEXP (addr, 1), base == reg ? NULL_RTX : reg); fixes the testcase.
Patch doesn't work and elf_get_dynamic_info is miscompiled: 0xf7dddc88 <+5224>: neg %eax 0xf7dddc8a <+5226>: lea (%rcx,%rax,4),%eax ---Type <return> to continue, or q <return> to quit--- => 0xf7dddc8d <+5229>: mov %edx,-0x40000300(%rax) (gdb) p *(int *) $rax Cannot access memory at address 0x37ffe064 (gdb) bt #0 0xf7dddc8d in elf_get_dynamic_info (temp=0x0, l=0xf7ffdc18) at get-dynamic-info.h:61 #1 dl_main (phdr=<optimized out>, phnum=9, user_entry=<optimized out>, auxv=<optimized out>) at rtld.c:1311 #2 0xf7df12a0 in _dl_sysdep_start ( start_argptr=start_argptr@entry=0xffffd0c0, dl_main=<optimized out>) at ../elf/dl-sysdep.c:241 #3 0xf7ddfaa8 in _dl_start_final (arg=0xffffd0c0) at rtld.c:331 #4 _dl_start (arg=<optimized out>) at rtld.c:557 #5 0xf7ddc057 in _start () from /export/build/gnu/glibc-x32-long/build-x86_64-linux/elf/ld.so
The code looks like: while (dyn->d_tag != 0) { if ((d_tag_utype) dyn->d_tag < 34) info[dyn->d_tag] = dyn; else if (dyn->d_tag >= 0x70000000 && dyn->d_tag < 0x70000000 + 0) info[dyn->d_tag - 0x70000000 + 34] = dyn; else if ((d_tag_utype) (0x6fffffff - (dyn->d_tag)) < 16) info[(34 + 0 + (0x6fffffff - (dyn->d_tag)))] = dyn; else if ((d_tag_utype) ((Elf32_Word)-((Elf32_Sword) (dyn->d_tag) <<1>>1)-1) < 3) info[((Elf32_Word)-((Elf32_Sword) (dyn->d_tag) <<1>>1)-1) + 34 + 0 + 16] = dyn; else if ((d_tag_utype) (0x6ffffdff - (dyn->d_tag)) < 12) info[(0x6ffffdff - (dyn->d_tag)) + 34 + 0 + 16 + 3] = dyn; else if ((d_tag_utype) (0x6ffffeff - (dyn->d_tag)) < 11) info[(0x6ffffeff - (dyn->d_tag)) + 34 + 0 + 16 + 3 + 12] = dyn; ++dyn; }
It is caused by revision 188118: http://gcc.gnu.org/ml/gcc-cvs/2012-06/msg00028.html
> It is caused by revision 188118: > > http://gcc.gnu.org/ml/gcc-cvs/2012-06/msg00028.html Are you really sure? This change is only a refinement of another one. You're saying it's a regression, but the known-to-work field is empty.
(In reply to comment #5) > > It is caused by revision 188118: > > > > http://gcc.gnu.org/ml/gcc-cvs/2012-06/msg00028.html > > Are you really sure? This change is only a refinement of another one. You're > saying it's a regression, but the known-to-work field is empty. I am 100% sure that it is a 4.8 regression. You can verify it yourself.
Breakpoint 7, fold_binary_loc (loc=2696, code=PLUS_EXPR, type=0x7ffff199e000, op0=0x7ffff1ab8398, op1=0x7ffff1aa6660) at /export/gnu/import/git/gcc/gcc/fold-const.c:10058 10058 return tem; (gdb) call debug_tree (arg0) <mult_expr 0x7ffff1ab6900 type <integer_type 0x7ffff199e690 unsigned int public unsigned SI size <integer_cst 0x7ffff1989d40 constant 32> unit size <integer_cst 0x7ffff1989d60 constant 4> align 32 symtab 0 alias set -1 canonical type 0x7ffff199e690 precision 32 min <integer_cst 0x7ffff19a40c0 0> max <integer_cst 0x7ffff19a40a0 4294967295> pointer_to_this <pointer_type 0x7ffff19b60a8>> arg 0 <nop_expr 0x7ffff1ab8348 type <integer_type 0x7ffff199e690 unsigned int> arg 0 <minus_expr 0x7ffff1ab6870 type <integer_type 0x7ffff1aa73f0 Elf32_Sword> arg 0 <integer_cst 0x7ffff1aa65a0 constant 1879047935> arg 1 <component_ref 0x7ffff1aba0e0 type <integer_type 0x7ffff1aa73f0 Elf32_Sword> arg 0 <indirect_ref 0x7ffff1ab82d0 type <record_type 0x7ffff1aa7540 Elf32_Dyn> arg 0 <var_decl 0x7ffff1996280 dyn> x.i:22:23> arg 1 <field_decl 0x7ffff19b3260 d_tag> x.i:22:23> x.i:22:18>> arg 1 <integer_cst 0x7ffff1aa6620 type <integer_type 0x7ffff199e690 unsigned int> constant 4> x.i:22:6> (gdb) call debug_tree (arg1) <integer_cst 0x7ffff1aa6660 type <integer_type 0x7ffff199e000 sizetype> constant 48> (gdb) call debug_tree (tem) <plus_expr 0x7ffff1ab6990 type <integer_type 0x7ffff199e000 sizetype public unsigned SI size <integer_cst 0x7ffff1989d40 constant 32> unit size <integer_cst 0x7ffff1989d60 constant 4> align 32 symtab 0 alias set -1 canonical type 0x7ffff199e000 precision 32 min <integer_cst 0x7ffff1989d80 0> max <integer_cst 0x7ffff1989000 4294967295>> arg 0 <mult_expr 0x7ffff1ab6960 type <integer_type 0x7ffff199e000 sizetype> arg 0 <nop_expr 0x7ffff1ab8438 type <integer_type 0x7ffff199e000 sizetype> arg 0 <component_ref 0x7ffff1aba0e0 type <integer_type 0x7ffff1aa73f0 Elf32_Sword> arg 0 <indirect_ref 0x7ffff1ab82d0 type <record_type 0x7ffff1aa7540 Elf32_Dyn> arg 0 <var_decl 0x7ffff1996280 dyn> x.i:22:23> arg 1 <field_decl 0x7ffff19b3260 d_tag> x.i:22:23> x.i:22:6> arg 1 <integer_cst 0x7ffff1aa66c0 constant 4294967292>> arg 1 <integer_cst 0x7ffff1aa66a0 type <integer_type 0x7ffff199e000 sizetype> constant 3221224492>> (gdb) Integer constant multiply operand is signed.
Does this make any senses? diff --git a/gcc/fold-const.c b/gcc/fold-const.c index 5ea5110..50879d6 100644 --- a/gcc/fold-const.c +++ b/gcc/fold-const.c @@ -7038,6 +7038,24 @@ fold_to_nonsharp_ineq_using_bound (location_t loc, tree ineq, tree bound) return fold_build2_loc (loc, GE_EXPR, type, a, y); } +static unsigned int +unsigned_expr_operands (tree op) +{ + switch (TREE_CODE (op)) + { + case MULT_EXPR: + if (!unsigned_expr_operands (TREE_OPERAND (op, 0))) + return 0; + return unsigned_expr_operands (TREE_OPERAND (op, 1)); + + case NOP_EXPR: + return unsigned_expr_operands (TREE_OPERAND (op, 0)); + + default: + return TYPE_UNSIGNED (TREE_TYPE (op)); + } +} + /* Fold a sum or difference of at least one multiplication. Returns the folded tree or NULL if no simplification could be made. */ @@ -7048,6 +7066,17 @@ fold_plusminus_mult_expr (location_t loc, enum tree_code code, tree type, tree arg00, arg01, arg10, arg11; tree alt0 = NULL_TREE, alt1 = NULL_TREE, same; + /* Make sure the type is not saturating and has the signedness of + the stripped operands, as fold_plusminus_mult_expr will re-associate. + ??? The latter condition should use TYPE_OVERFLOW_* flags instead. */ + if (!((TREE_CODE (arg0) == MULT_EXPR + || TREE_CODE (arg1) == MULT_EXPR) + && !TYPE_SATURATING (type) + && TYPE_UNSIGNED (type) == unsigned_expr_operands (arg0) + && TYPE_UNSIGNED (type) == unsigned_expr_operands (arg1) + && (!FLOAT_TYPE_P (type) || flag_associative_math))) + return NULL_TREE; + /* (A * C) +- (B * C) -> (A+-B) * C. (A * C) +- A -> A * (C+-1). We are most concerned about the case where C is a constant, @@ -10042,17 +10071,7 @@ fold_binary_loc (location_t loc, } } - /* Handle (A1 * C1) + (A2 * C2) with A1, A2 or C1, C2 being the same or - one. Make sure the type is not saturating and has the signedness of - the stripped operands, as fold_plusminus_mult_expr will re-associate. - ??? The latter condition should use TYPE_OVERFLOW_* flags instead. */ - if ((TREE_CODE (arg0) == MULT_EXPR - || TREE_CODE (arg1) == MULT_EXPR) - && !TYPE_SATURATING (type) - && TYPE_UNSIGNED (type) == TYPE_UNSIGNED (TREE_TYPE (arg0)) - && TYPE_UNSIGNED (type) == TYPE_UNSIGNED (TREE_TYPE (arg1)) - && (!FLOAT_TYPE_P (type) || flag_associative_math)) - { + { tree tem = fold_plusminus_mult_expr (loc, code, type, arg0, arg1); if (tem) return tem; @@ -10668,17 +10687,7 @@ fold_binary_loc (location_t loc, && (tem = distribute_real_division (loc, code, type, arg0, arg1))) return tem; - /* Handle (A1 * C1) - (A2 * C2) with A1, A2 or C1, C2 being the same or - one. Make sure the type is not saturating and has the signedness of - the stripped operands, as fold_plusminus_mult_expr will re-associate. - ??? The latter condition should use TYPE_OVERFLOW_* flags instead. */ - if ((TREE_CODE (arg0) == MULT_EXPR - || TREE_CODE (arg1) == MULT_EXPR) - && !TYPE_SATURATING (type) - && TYPE_UNSIGNED (type) == TYPE_UNSIGNED (TREE_TYPE (arg0)) - && TYPE_UNSIGNED (type) == TYPE_UNSIGNED (TREE_TYPE (arg1)) - && (!FLOAT_TYPE_P (type) || flag_associative_math)) - { + { tree tem = fold_plusminus_mult_expr (loc, code, type, arg0, arg1); if (tem) return tem;
> I am 100% sure that it is a 4.8 regression. You can verify it yourself. Precisely I cannot, that's why I'm asking: eric@polaris:~/build/gcc-4_7-branch/native> gcc/xgcc -Bgcc -S -mx32 -mtune=generic -march=x86-64 pr55142.c -S -O2 -fPIC -maddress-mode=long xgcc: error: unrecognized command line option '-maddress-mode=long' eric@polaris:~/build/gcc-4_7-branch/native> gcc/xgcc -v Using built-in specs. COLLECT_GCC=gcc/xgcc Target: x86_64-suse-linux Configured with: /home/eric/svn/gcc-4_7-branch/configure --build=x86_64-suse-linux --prefix=/home/eric/install/gcc-4_7-branch --enable-languages=c,c++,objc,obj-c++,java,fortran,ada --enable-__cxa_atexit --disable-nls --disable-libmudflap Thread model: posix gcc version 4.7.3 20121021 (prerelease) [gcc-4_7-branch revision 192648] (GCC)
-maddress-mode=long is new in 4.8. GCC 4.7 only implements -maddress-mode=long equivalent. I backported -maddress-mode=long to hjl/x32/gcc-4_7-branch branch: http://gcc.gnu.org/git/?p=gcc.git;a=summary
Investigating.
Recategorizing.
Created attachment 28591 [details] Tentative fix This generates (essentially) the same RTL as in non-PIC mode, so the generated code should be correct if it is correct in non-PIC mode. Tested only in LP64 mode though.
(In reply to comment #13) > Created attachment 28591 [details] > Tentative fix > > This generates (essentially) the same RTL as in non-PIC mode, so the generated > code should be correct if it is correct in non-PIC mode. > > Tested only in LP64 mode though. The patch compiles testcase, but totally miscompiles glibc. I think the bug is in unsigned array index computation as shown in Comment 7. dyn->d_tag is signed type and Pmode != ptr_mode here.
All binaries die before main: Starting program: /export/build/gnu/glibc-x32-long/build-x86_64-linux/libio/bug-fclose1 Program received signal SIGSEGV, Segmentation fault. 0xf7dddc8d in elf_get_dynamic_info (temp=0x0, l=0xf7ffdc18) at get-dynamic-info.h:61 61 + DT_VERSIONTAGNUM + DT_EXTRANUM + DT_VALNUM] = dyn; (gdb) disass $pc -19, +30 Dump of assembler code from 0xf7dddc7a to 0xf7dddc98: 0xf7dddc7a <dl_main+5210>: mov %r8d,%esi 0xf7dddc7d <dl_main+5213>: sub %eax,%esi 0xf7dddc7f <dl_main+5215>: cmp $0xa,%esi 0xf7dddc82 <dl_main+5218>: ja 0xf7ddd431 <dl_main+3089> 0xf7dddc88 <dl_main+5224>: neg %eax 0xf7dddc8a <dl_main+5226>: lea (%rcx,%rax,4),%eax => 0xf7dddc8d <dl_main+5229>: mov %edx,-0x40000300(%rax) 0xf7dddc93 <dl_main+5235>: jmpq 0xf7ddd431 <dl_main+3089> End of assembler dump. (gdb) p/x $rcx $1 = 0xf7ffdc38 (gdb) p/x $rax $2 = 0x37ffe064 (gdb) list 56 else if ((d_tag_utype) DT_VALTAGIDX (dyn->d_tag) < DT_VALNUM) 57 info[DT_VALTAGIDX (dyn->d_tag) + DT_NUM + DT_THISPROCNUM 58 + DT_VERSIONTAGNUM + DT_EXTRANUM] = dyn; 59 else if ((d_tag_utype) DT_ADDRTAGIDX (dyn->d_tag) < DT_ADDRNUM) 60 info[DT_ADDRTAGIDX (dyn->d_tag) + DT_NUM + DT_THISPROCNUM 61 + DT_VERSIONTAGNUM + DT_EXTRANUM + DT_VALNUM] = dyn; 62 ++dyn; 63 } 64 65 #define DL_RO_DYN_TEMP_CNT 8 (gdb) p info $3 = (Elf32_Dyn **) 0xf7ffdc38 (gdb) bt #0 0xf7dddc8d in elf_get_dynamic_info (temp=0x0, l=0xf7ffdc18) at get-dynamic-info.h:61 #1 dl_main (phdr=<optimized out>, phnum=9, user_entry=<optimized out>, auxv=<optimized out>) at rtld.c:1311 #2 0xf7df12c0 in _dl_sysdep_start ( start_argptr=start_argptr@entry=0xffffd100, dl_main=<optimized out>) at ../elf/dl-sysdep.c:241 #3 0xf7ddfaa8 in _dl_start_final (arg=0xffffd100) at rtld.c:331 #4 _dl_start (arg=<optimized out>) at rtld.c:557 #5 0xf7ddc057 in _start () from /export/build/gnu/glibc-x32-long/build-x86_64-linux/elf/ld.so #6 0x00000001 in ?? () #7 0x00000000 in ?? () (gdb)
> The patch compiles testcase, but totally miscompiles glibc. Ouch. Then this means that the existing non-PIC code is wrong as well. > I think the bug is in unsigned array index computation as > shown in Comment 7. dyn->d_tag is signed type and Pmode > != ptr_mode here. Possibly, you must be extra careful with these kinds of awkward setups. We already had similar issues in the past (MEM_REF expansion on SPARC64, array indexes on IA-64/VMS) but the bugs were in the RTL expansion. I'll dig.
T(In reply to comment #16) > > I think the bug is in unsigned array index computation as > > shown in Comment 7. dyn->d_tag is signed type and Pmode > > != ptr_mode here. > > Possibly, you must be extra careful with these kinds of awkward setups. We > already had similar issues in the past (MEM_REF expansion on SPARC64, array > indexes on IA-64/VMS) but the bugs were in the RTL expansion. I'll dig. I agree. It is a pain to work with Pmode == DImode and ptr_mode == SImode for x32. On the other hand, it does expose many issues in middle-end as well as in backend.
(gdb) disass $pc - 19, +25 Dump of assembler code from 0xf7dddc7a to 0xf7dddc93: 0xf7dddc7a <dl_main+5210>: mov %r8d,%esi 0xf7dddc7d <dl_main+5213>: sub %eax,%esi 0xf7dddc7f <dl_main+5215>: cmp $0xa,%esi 0xf7dddc82 <dl_main+5218>: ja 0xf7ddd431 <dl_main+3089> 0xf7dddc88 <dl_main+5224>: neg %eax 0xf7dddc8a <dl_main+5226>: lea (%rcx,%rax,4),%eax => 0xf7dddc8d <dl_main+5229>: mov %edx,-0x40000300(%rax) End of assembler dump. (gdb) p info $4 = (Elf32_Dyn **) 0xf7ffdc38 (gdb) p/x $rax $5 = 0x37ffe064 (gdb) p/x $rax -0x40000300 $6 = 0xfffffffff7ffdd64 (gdb) -0x40000300(%rax) should be zero-extended from SImode to DImode.
This patch: [hjl@gnu-tools-1 tmp]$ cat /tmp/x diff --git a/gcc/expr.c b/gcc/expr.c index 3e8e004..da35488 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -8115,7 +8115,7 @@ expand_expr_real_2 (sepops ops, rtx target, enum machine_mode tmode, And force_operand won't know whether to sign-extend or zero-extend. */ if ((modifier != EXPAND_SUM && modifier != EXPAND_INITIALIZER) - || mode != ptr_mode) + || mode != Pmode) { expand_operands (treeop0, treeop1, subtarget, &op0, &op1, EXPAND_NORMAL); [hjl@gnu-tools-1 tmp]$ removes most of glibc failures. Does it make any senses? If it does, there is another place in expand_expr_real_2: [hjl@gnu-tools-1 tmp]$ cat /tmp/y diff --git a/gcc/expr.c b/gcc/expr.c index 3e8e004..816fdb8 100644 --- a/gcc/expr.c +++ b/gcc/expr.c @@ -8157,7 +8157,7 @@ expand_expr_real_2 (sepops ops, rtx target, enum machine_mode tmode, And force_operand won't know whether to sign-extend or zero-extend. */ if ((modifier != EXPAND_SUM && modifier != EXPAND_INITIALIZER) - || mode != ptr_mode) + || mode != Pmode) goto binop; expand_operands (treeop0, treeop1, [hjl@gnu-tools-1 tmp]$
If you compile the testcase with the unmodified compiler but without -fPIC, you get in the assembly file: movl %edx, _dl_rtld_map-1073742800(,%eax,4) I presume that's wrong, correct? Yet the RTL instruction is: (insn:TI 43 38 44 4 (set (mem/f:SI (zero_extend:DI (plus:SI (ashift:SI (reg:SI 0 ax [87]) (const_int 2 [0x2])) (const:SI (plus:SI (symbol_ref:SI ("_dl_rtld_map") [flags 0x42] <var_decl 0x7ffff6d2b7b8 _dl_rtld_map>) (const_int -1073742800 [0xffffffffbffffc30]))))) [3 *_8+0 S4 A32]) (reg:SI 1 dx [orig:82 dyn ] [82])) pr55142.c:32 65 {*movsi_internal} (expr_list:REG_DEAD (reg:SI 0 ax [87]) (nil))) and there is a zero_extend.
(In reply to comment #20) > If you compile the testcase with the unmodified compiler but without -fPIC, you > get in the assembly file: > > movl %edx, _dl_rtld_map-1073742800(,%eax,4) > > I presume that's wrong, correct? Yet the RTL instruction is: That is not wrong. > (insn:TI 43 38 44 4 (set (mem/f:SI (zero_extend:DI (plus:SI (ashift:SI (reg:SI > 0 ax [87]) > (const_int 2 [0x2])) > (const:SI (plus:SI (symbol_ref:SI ("_dl_rtld_map") [flags > 0x42] <var_decl 0x7ffff6d2b7b8 _dl_rtld_map>) > (const_int -1073742800 [0xffffffffbffffc30]))))) [3 > *_8+0 S4 A32]) > (reg:SI 1 dx [orig:82 dyn ] [82])) pr55142.c:32 65 {*movsi_internal} > (expr_list:REG_DEAD (reg:SI 0 ax [87]) > (nil))) > > and there is a zero_extend. (,%eax,4) generates a 0x67 address-size prefix, which zero-extends 32-bit address to 64-bit.
> (,%eax,4) generates a 0x67 address-size prefix, which zero-extends > 32-bit address to 64-bit. OK, I see. Then it would be interesting to have a testcase that generates the problematic mov %edx,-0x40000300(%rax) in order to really understand the issue.
Created attachment 28632 [details] A complete testcase I applied i386 change at http://gcc.gnu.org/bugzilla/attachment.cgi?id=28591 to compile it: [hjl@gnu-tools-1 tmp]$ /export/build/gnu/gcc/release/usr/gcc-4.8.0/bin/gcc -mtune=generic -march=x86-64 -maddress-mode=long -mx32 -O2 -fPIC rtld.c -std=gnu99 -fgnu89-inline -S [hjl@gnu-tools-1 tmp]$ egrep ", -[0-9]*\(%rax" rtld.s movl %ecx, -1073743664(%rax) movl %ecx, -1073742592(%rax) movl %edx, -1073743664(%rax) movl %edx, -1073742592(%rax) [hjl@gnu-tools-1 tmp]$ All those stores should be zero-extended.
> I applied i386 change at > > http://gcc.gnu.org/bugzilla/attachment.cgi?id=28591 > > to compile it: > > [hjl@gnu-tools-1 tmp]$ /export/build/gnu/gcc/release/usr/gcc-4.8.0/bin/gcc > -mtune=generic -march=x86-64 -maddress-mode=long -mx32 -O2 -fPIC rtld.c > -std=gnu99 -fgnu89-inline -S > [hjl@gnu-tools-1 tmp]$ egrep ", -[0-9]*\(%rax" rtld.s > movl %ecx, -1073743664(%rax) > movl %ecx, -1073742592(%rax) > movl %edx, -1073743664(%rax) > movl %edx, -1073742592(%rax) > [hjl@gnu-tools-1 tmp]$ > > All those stores should be zero-extended. OK, thanks. Everything is correctly zero-extended until: case PLUS: case MULT: /* FIXME: For addition, we used to permute the conversion and addition operation only if one operand is a constant and converting the constant does not change it or if one operand is a constant and we are using a ptr_extend instruction (POINTERS_EXTEND_UNSIGNED < 0) even if the resulting address may overflow/underflow. We relax the condition to include zero-extend (POINTERS_EXTEND_UNSIGNED > 0) since the other parts of the compiler depend on it. See PR 49721. We can always safely permute them if we are making the address narrower. */ if (GET_MODE_SIZE (to_mode) < GET_MODE_SIZE (from_mode) || (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)) && (POINTERS_EXTEND_UNSIGNED != 0 || XEXP (x, 1) == convert_memory_address_addr_space (to_mode, XEXP (x, 1), as)))) return gen_rtx_fmt_ee (GET_CODE (x), to_mode, convert_memory_address_addr_space (to_mode, XEXP (x, 0), as), XEXP (x, 1)); break; It's clear that the distribution is invalid, whatever POINTERS_EXTEND_UNSIGNED is defined to. It becomes valid only under conditions of non-overflow that depend upon the value of POINTERS_EXTEND_UNSIGNED, as stated in the head comment of the function. If POINTERS_EXTEND_UNSIGNED, it's valid if there is no overflow in the address. If POINTERS_EXTEND_UNSIGNED > 0, it's valid if either the constant is positive (and there is no overflow in the address) or the constant is negative and the variable part is sufficiently large. The condition aren't fulfilled here since the constant is very large negative and the variable part small. I think the most robust solution would be to always zero-extend the addresses for -mx32, i.e. output movl %ecx, -1073743664(%eax) even if the address is a PLUS in DImode. Otherwise, we're left with kludges...
(In reply to comment #24) > I think the most robust solution would be to always zero-extend the addresses > for -mx32, i.e. output > movl %ecx, -1073743664(%eax) > even if the address is a PLUS in DImode. Otherwise, we're left with kludges... No, this would be one giant kludge by itself. The failure just shows that the controversial patch [1] for PR 49721 was wrong. Quote from [1]: --quote-- I am checking in this patch, which only affects x32 and nothing else. This one character change, from POINTERS_EXTEND_UNSIGNED < 0 to POINTERS_EXTEND_UNSIGNED != 0 creates a working x32 GCC. This isn't perfect. I have tried many different approaches without any success. I will revisit it if we run into any problems with x32 applications. --/qoute-- So, we run into problem. [1] http://gcc.gnu.org/ml/gcc-patches/2011-08/msg01618.html
> I think the most robust solution would be to always zero-extend the addresses > for -mx32, i.e. output > movl %ecx, -1073743664(%eax) > even if the address is a PLUS in DImode. Otherwise, we're left with kludges... Also, please note that the failure is with -maddress-mode=long. This flag was introduced with the intention to get rid of as many 0x67 address size prefixes as possible. The problem is even listed at the bottom of x32-abi project page [1]. [1] https://sites.google.com/site/x32abi/
> No, this would be one giant kludge by itself. The failure just shows that the > controversial patch [1] for PR 49721 was wrong. > > Quote from [1]: > > --quote-- > I am checking in this patch, which only affects x32 > and nothing else. This one character change, from > > POINTERS_EXTEND_UNSIGNED < 0 > > to > > POINTERS_EXTEND_UNSIGNED != 0 > > creates a working x32 GCC. This isn't perfect. I have > tried many different approaches without any success. > I will revisit it if we run into any problems with x32 > applications. > --/qoute-- > > So, we run into problem. It's not totally wrong, given the context of convert_memory_address_addr_space which is already optimistically correct only. The problem is that the case POINTERS_EXTEND_UNSIGNED > 0 is trickier than POINTERS_EXTEND_UNSIGNED == 0 because RTL constants are sign-extended: in the latter case, everything is sign-extended so this is symmetric and simple; in the former case, one part is zero-extended and the other part sign-extended and, in order to make this work under the same hypothesis of non-overflow, one would need to know which part is bigger. In the case at hand, the code would be correct if the constant was zero-extended and the register sign-extended, not the reverse as currently.
So for POINTERS_EXTEND_UNSIGNED > 0, we should transform (zero_extend:DI (plus:SI (FOO:SI) (const_int Y))) in such a way that it won't cause ICE and zero-extend the result. For X32, we just need to generate (plus:SI (REG:SI) (const_int Y))
> So for POINTERS_EXTEND_UNSIGNED > 0, we should transform > > (zero_extend:DI (plus:SI (FOO:SI) (const_int Y))) > > in such a way that it won't cause ICE and zero-extend the > result. For X32, we just need to generate > > (plus:SI (REG:SI) (const_int Y)) The ICE is a minor detail, the real issue is convert_memory_address_addr_space and PR middle-end/49721. The unmodified compiler generates the same problematic instructions for the full testcase without -fPIC. It's clear that Richard's change, aka the un-sign-extension of sizetype constants, is an earthquake here.
(In reply to comment #24) > > I think the most robust solution would be to always zero-extend the addresses > for -mx32, i.e. output > movl %ecx, -1073743664(%eax) > even if the address is a PLUS in DImode. Otherwise, we're left with kludges... Since x32 runs in 64-bit mode, for address -0x40000300(%rax), hardware sign-extends displacement from 32-bits to 64-bits and adds it to %rax. But x32 wants 32-bit -0x40000300, not 64-bit -0x40000300. I believe it is correct for GCC to use 32-bit registers instead of 64-bit registers when displacement is negative.
Created attachment 28644 [details] A patch This patch prints SImode register names to force addr32 prefix if displacement < -16*1024*1024 so that 32-bit displacement isn't sign-extended to 64-bit. It also removes "code = 'l'" since 'l' isn't supported.
Can we limit (zero_extend:DI (plus:SI (FOO:SI) (const_int Y))) to (plus:DI (zero_extend:DI (FOO:SI)) (const_int Y)) transformation to Y > TARGET SPECIFIC VALUE? For x86-64, it is -16*1023*1024.
> Can we limit > > (zero_extend:DI (plus:SI (FOO:SI) (const_int Y))) > > to > > (plus:DI (zero_extend:DI (FOO:SI)) (const_int Y)) > > transformation to Y > TARGET SPECIFIC VALUE? For x86-64, it is -16*1023*1024. That would really be hackish... I think that the (slightly less hackish) change in ix86_print_operand_address would be preferable.
A patch is posted at http://gcc.gnu.org/ml/gcc-patches/2012-11/msg00991.html
This regression is triggered by revision 188008: http://gcc.gnu.org/ml/gcc-cvs/2012-05/msg00038.html aka the un-sign-extension of sizetype constants.
Author: hjl Date: Tue Nov 13 18:35:32 2012 New Revision: 193483 URL: http://gcc.gnu.org/viewcvs?root=gcc&view=rev&rev=193483 Log: Workaround PR middle-end/55142 gcc/ 2012-11-13 Eric Botcazou <ebotcazou@adacore.com> H.J. Lu <hongjiu.lu@intel.com> PR middle-end/55142 * config/i386/i386.c (legitimize_pic_address): Properly handle REG + CONST. (ix86_print_operand_address): Set code to 'k' when forcing addr32 prefix. For x32, zero-extend negative displacement if it < -16*1024*1024. gcc/testsuite/ 2012-11-13 H.J. Lu <hongjiu.lu@intel.com> PR middle-end/55142 * gcc.target/i386/pr55142-1.c: New file. * gcc.target/i386/pr55142-2.c: Likewise. Added: trunk/gcc/testsuite/gcc.target/i386/pr55142-1.c trunk/gcc/testsuite/gcc.target/i386/pr55142-2.c Modified: trunk/gcc/ChangeLog trunk/gcc/config/i386/i386.c trunk/gcc/testsuite/ChangeLog
Hopefully.
Author: hjl Date: Mon Nov 19 19:17:05 2012 New Revision: 193635 URL: http://gcc.gnu.org/viewcvs?root=gcc&view=rev&rev=193635 Log: Workaround PR middle-end/55142 gcc/ 2012-11-19 H.J. Lu <hongjiu.lu@intel.com> Backported from mainline 2012-11-13 Eric Botcazou <ebotcazou@adacore.com> H.J. Lu <hongjiu.lu@intel.com> PR middle-end/55142 * config/i386/i386.c (legitimize_pic_address): Properly handle REG + CONST. (ix86_print_operand_address): Set code to 'k' when forcing addr32 prefix. For x32, zero-extend negative displacement if it < -16*1024*1024. gcc/testsuite/ 2012-11-19 H.J. Lu <hongjiu.lu@intel.com> Backported from mainline 2012-11-13 H.J. Lu <hongjiu.lu@intel.com> PR middle-end/55142 * gcc.target/i386/pr55142-1.c: New file. * gcc.target/i386/pr55142-2.c: Likewise. Added: branches/gcc-4_7-branch/gcc/testsuite/gcc.target/i386/pr55142-1.c branches/gcc-4_7-branch/gcc/testsuite/gcc.target/i386/pr55142-2.c Modified: branches/gcc-4_7-branch/gcc/ChangeLog branches/gcc-4_7-branch/gcc/config/i386/i386.c branches/gcc-4_7-branch/gcc/testsuite/ChangeLog
(In reply to Eric Botcazou from comment #37) > Hopefully. But not correctly as I reported in bug 49721 as the workaround does not work for other ptr_mode==SImode, Pmode==DImode and POINTERS_EXTEND_UNSIGNED>0 targets. Let me try to fix this correctly.
Dup of bug 49721 really. *** This bug has been marked as a duplicate of bug 49721 ***