$ cat ~/ice2.i typedef unsigned int _GCC_ATTR_ALIGN_u64t __attribute__((__mode__(__DI__))); typedef _GCC_ATTR_ALIGN_u64t _Uint64t __attribute__((__aligned__(8))); typedef unsigned int _GCC_ATTR_ALIGN_u8t __attribute__((__mode__(__QI__))); typedef _GCC_ATTR_ALIGN_u8t _Uint8t __attribute__((__aligned__(1))); typedef _Uint8t uint8_t; typedef _Uint64t uint64_t; typedef __builtin_neon_udi uint64x1_t; typedef struct uint64x1x4_t { uint64x1_t val[4]; } uint64x1x4_t; __extension__ static __inline uint64x1x4_t __attribute__ ((__always_inline__)) vld4_u64 (const uint64_t * __a) { union { uint64x1x4_t __i; __builtin_neon_oi __o; } __rv; __rv.__o = __builtin_neon_vld4v8hi ((const __builtin_neon_hi *) __a); return __rv.__i; } void test( uint8_t * dst, uint8_t * src, unsigned nbytes) { uint64x1x4_t tmp; while (nbytes >= 32) { tmp = vld4_u64((uint64_t *)src); vst4_u64((uint64_t *)dst, tmp); } } $ ./xgcc -v Using built-in specs. Target: arm-unknown-linux-gnueabi Configured with: ../configure --target=arm-unknown-linux-gnueabi --prefix=/home/ryan/x-tools/arm-unknown-linux-gnueabi --with-sysroot=/home/ryan/x-tools/arm-unknown-linux-gnueabi/arm-unknown-linux-gnueabi/sys-root --enable-languages=c,c++ --disable-multilib --with-float=soft --disable-sjlj-exceptions --enable-__cxa_atexit --with-local-prefix=/home/ryan/x-tools/arm-unknown-linux-gnueabi/arm-unknown-linux-gnueabi/sys-root --disable-nls --enable-threads=posix --enable-symvers=gnu --enable-c99 --enable-long-long --enable-target-optspac Thread model: posix gcc version 4.5.0 20100329 (experimental) [trunk revision 157802] (GCC) $ ./xgcc -B. ~/ice2.i -O3 -mfpu=neon -mfloat-abi=softfp -c /home/ryan/ice2.i: In function 'test': /home/ryan/ice2.i:22:1: error: unable to find a register to spill in class 'GENERAL_REGS' /home/ryan/ice2.i:22:1: error: this is the insn: (insn 96 10 12 3 /home/ryan/ice2.i:12 (set (reg:XI 136 [ D.3641 ]) (const_int 0 [0x0])) 732 {*neon_movxi} (expr_list:REG_EQUAL (const_int 0 [0x0]) (nil))) /home/ryan/ice2.i:22:1: internal compiler error: in spill_failure, at reload1.c:2158 Please submit a full bug report, with preprocessed source if appropriate. See <http://gcc.gnu.org/bugs.html> for instructions. On the 4.3 branch (4.3.5 20100330 (prerelease)), the same testcase has causes a different ICE: $ ./xgcc -B. ~/ice2.i -O3 -mfpu=neon -mfloat-abi=softfp -c /home/ryan/ice2.i: In function 'test': /home/ryan/ice2.i:22: internal compiler error: in record_store, at dse.c:1360 Please submit a full bug report, with preprocessed source if appropriate. See <http://gcc.gnu.org/bugs.html> for instructions.
Hmm - so why is it that we add an initialization for reg:XI 136 with a const_int 0 in .175r.init_regs adding initialization in test of reg 136 at in block 3 for insn 12. (insn 91 11 12 3 /tmp/n.c:13 (set (reg:XI 136 [ D.3722 ]) (const_int 0 [0x0])) -1 (nil)) There's something fundamentally funny going on here.
(In reply to comment #1) > Hmm - so why is it that we add an initialization for reg:XI 136 with a > const_int 0 in .175r.init_regs Because init_reg thinks the psedu register 136 is used unitialized. In fact that is the whole point of init_reg :).
Bah I know the problem . The base pattern is flawed. Testing a patch. Ramana
I think this is one more case of the ARM backend lying to the general infrastructure. We expand into ld4qav8hi which happens to be in this following form. Note if you look at init_regs, there is no use of reg:XI 136 before this point, thus data flow assumes that insn 12 is using an uninitialized value in this process. (insn 12 10 13 3 /tmp/n.c:13 (parallel [ (set (reg:XI 136 [ D.3722 ]) (unspec:XI [ (mem:XI (reg/v/f:SI 141 [ src ]) [0 S64 A64]) (reg:XI 136 [ D.3722 ]) (unspec:V8HI [ (const_int 0 [0x0]) ] 191) ] 111)) Why do we need the reg:XI 136 inside the UNSPEC here ? We could very well do without that in the UNSPEC because all we are saying is that an XImode value is written to by the load from the memory location described . I'm not an expert on the Neon backend and thus someone else who knows more about the Neon backend should comment about this one. Thus the patch that I am testing is as below which atleast seems to fix the ICE and didn't show any other issues. Index: neon.md =================================================================== --- neon.md (revision 158138) +++ neon.md (working copy) @@ -4711,26 +4711,25 @@ (define_insn "neon_vld4<mode>" ) (define_expand "neon_vld4<mode>" - [(match_operand:XI 0 "s_register_operand" "=w") - (match_operand:SI 1 "s_register_operand" "+r") + [(match_operand:XI 0 "s_register_operand" "") + (match_operand:SI 1 "s_register_operand" "") (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] "TARGET_NEON" { - emit_insn (gen_neon_vld4qa<mode> (operands[0], operands[0], + emit_insn (gen_neon_vld4qa<mode> (operands[0], operands[1], operands[1])); - emit_insn (gen_neon_vld4qb<mode> (operands[0], operands[0], + emit_insn (gen_neon_vld4qb<mode> (operands[0], operands[1], operands[1])); DONE; }) (define_insn "neon_vld4qa<mode>" [(set (match_operand:XI 0 "s_register_operand" "=w") - (unspec:XI [(mem:XI (match_operand:SI 3 "s_register_operand" "2")) - (match_operand:XI 1 "s_register_operand" "0") + (unspec:XI [(mem:XI (match_operand:SI 2 "s_register_operand" "1")) (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VLD4A)) - (set (match_operand:SI 2 "s_register_operand" "=r") - (plus:SI (match_dup 3) + (set (match_operand:SI 1 "s_register_operand" "=r") + (plus:SI (match_dup 2) (const_int 32)))] "TARGET_NEON" { @@ -4740,7 +4739,7 @@ (define_insn "neon_vld4qa<mode>" ops[1] = gen_rtx_REG (DImode, regno + 4); ops[2] = gen_rtx_REG (DImode, regno + 8); ops[3] = gen_rtx_REG (DImode, regno + 12); - ops[4] = operands[2]; + ops[4] = operands[1]; output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, [%4]!", ops); return ""; } @@ -4749,12 +4748,11 @@ (define_insn "neon_vld4qa<mode>" (define_insn "neon_vld4qb<mode>" [(set (match_operand:XI 0 "s_register_operand" "=w") - (unspec:XI [(mem:XI (match_operand:SI 3 "s_register_operand" "2")) - (match_operand:XI 1 "s_register_operand" "0") + (unspec:XI [(mem:XI (match_operand:SI 2 "s_register_operand" "1")) (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_VLD4B)) - (set (match_operand:SI 2 "s_register_operand" "=r") - (plus:SI (match_dup 3) + (set (match_operand:SI 1 "s_register_operand" "=r") + (plus:SI (match_dup 2) (const_int 32)))] "TARGET_NEON" { @@ -4764,7 +4762,7 @@ (define_insn "neon_vld4qb<mode>" ops[1] = gen_rtx_REG (DImode, regno + 6); ops[2] = gen_rtx_REG (DImode, regno + 10); ops[3] = gen_rtx_REG (DImode, regno + 14); - ops[4] = operands[2]; + ops[4] = operands[1]; output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, [%4]!", ops); return ""; } Also the constraints in this expander routine are probably superfluous and could be corrected. (define_expand "neon_vld4<mode>" [(match_operand:XI 0 "s_register_operand" "=w") (match_operand:SI 1 "s_register_operand" "+r") (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] "TARGET_NEON"
Author: rsandifo Date: Wed Mar 30 14:52:38 2011 New Revision: 171729 URL: http://gcc.gnu.org/viewcvs?root=gcc&view=rev&rev=171729 Log: gcc/ 2011-03-30 Richard Sandiford <richard.sandiford@linaro.org> Ramana Radhakrishnan <ramana.radhakrishnan@linaro.org> PR target/43590 * config/arm/neon.md (neon_vld3qa<mode>, neon_vld4qa<mode>): Remove operand 1 and reshuffle the operands to match. (neon_vld3<mode>, neon_vld4<mode>): Update accordingly. Modified: trunk/gcc/ChangeLog trunk/gcc/config/arm/neon.md
Fixed on trunk.
Fixed 4.7.0 onwards.