With current trunk (revision 140117): (sid)1122:tbm@em64t: ..4.3-2008-09-08-r140117/gcc] ./cc1plus -quiet -m64 -msse3 ~/framewave-Set.ii .... domain/fwImage/include/Set.h: In function 'void SSE2::Set_AC4R_SETUP_I(long long int __vector__&, XMM128&, const Fw16s*)': domain/fwImage/include/Set.h:134: error: unrecognizable insn: (insn 73 72 74 2 /usr/lib/gcc-snapshot/lib/gcc/x86_64-linux-gnu/4.4.0/include/emmintrin.h:596 (set (reg:V8HI 117) (vec_merge:V8HI (vec_duplicate:V8HI (const_int 0 [0x0])) (reg:V8HI 117) (const_int 2 [0x2]))) -1 (nil)) domain/fwImage/include/Set.h:134: internal compiler error: in extract_insn, at recog.c:2027
Created attachment 16257 [details] Preprocessed code
Reducing ...
Reduced testcase: typedef short __v8hi __attribute__ ((__vector_size__ (16))); typedef long long __m128i __attribute__ ((__vector_size__ (16))); __m128i Set_AC4R_SETUP_I( const short *val ) { short D2073 = *val; short D2076 = *(val + 2); short D2079 = *(val + 4); __v8hi D2094 = {D2073, D2076, D2079, 0, D2073, D2076, D2079, 0}; return (__m128i)D2094; }
It is caused by my patch: http://gcc.gnu.org/ml/gcc-cvs/2008-05/msg00569.html
This patch fixes the crash: --- i386.c.sse2 2008-09-08 21:17:15.000000000 -0700 +++ i386.c 2008-09-08 21:17:32.000000000 -0700 @@ -26886,7 +26886,7 @@ static void ix86_expand_vector_init_interleave (enum machine_mode mode, rtx target, rtx *ops, int n) { - enum machine_mode first_imode, second_imode, third_imode; + enum machine_mode first_imode, second_imode, third_imode, inner_mode; int i, j; rtx op0, op1; rtx (*gen_load_even) (rtx, rtx, rtx); @@ -26899,6 +26899,7 @@ ix86_expand_vector_init_interleave (enum gen_load_even = gen_vec_setv8hi; gen_interleave_first_low = gen_vec_interleave_lowv4si; gen_interleave_second_low = gen_vec_interleave_lowv2di; + inner_mode = HImode; first_imode = V4SImode; second_imode = V2DImode; third_imode = VOIDmode; @@ -26907,6 +26908,7 @@ ix86_expand_vector_init_interleave (enum gen_load_even = gen_vec_setv16qi; gen_interleave_first_low = gen_vec_interleave_lowv8hi; gen_interleave_second_low = gen_vec_interleave_lowv4si; + inner_mode = QImode; first_imode = V8HImode; second_imode = V4SImode; third_imode = V2DImode; @@ -26935,7 +26937,9 @@ ix86_expand_vector_init_interleave (enum emit_move_insn (op0, gen_lowpart (mode, op1)); /* Load even elements into the second positon. */ - emit_insn ((*gen_load_even) (op0, ops [i + i + 1], + emit_insn ((*gen_load_even) (op0, + force_reg (inner_mode, + ops [i + i + 1]), const1_rtx)); /* Cast vector to FIRST_IMODE vector. */ However the generated codes don't look good for 2 reasons: 1. By default, gcc won't generate inter-unit move between GPR and SSE. 2. ix86_expand_vector_init doesn't take advantage of the fact that the second half is the same as the first half __v8hi D2094 = {D2073, D2076, D2079, 0, D2073, D2076, D2079, 0};
This patch will disable SSE4 and SSE2 optimization if inter-unit move is disabled or there are duplicates: --- i386.c.sse2 2008-09-08 21:17:15.000000000 -0700 +++ i386.c 2008-09-08 21:36:38.000000000 -0700 @@ -26886,7 +26886,7 @@ static void ix86_expand_vector_init_interleave (enum machine_mode mode, rtx target, rtx *ops, int n) { - enum machine_mode first_imode, second_imode, third_imode; + enum machine_mode first_imode, second_imode, third_imode, inner_mode; int i, j; rtx op0, op1; rtx (*gen_load_even) (rtx, rtx, rtx); @@ -26899,6 +26899,7 @@ ix86_expand_vector_init_interleave (enum gen_load_even = gen_vec_setv8hi; gen_interleave_first_low = gen_vec_interleave_lowv4si; gen_interleave_second_low = gen_vec_interleave_lowv2di; + inner_mode = HImode; first_imode = V4SImode; second_imode = V2DImode; third_imode = VOIDmode; @@ -26907,6 +26908,7 @@ ix86_expand_vector_init_interleave (enum gen_load_even = gen_vec_setv16qi; gen_interleave_first_low = gen_vec_interleave_lowv8hi; gen_interleave_second_low = gen_vec_interleave_lowv4si; + inner_mode = QImode; first_imode = V8HImode; second_imode = V4SImode; third_imode = V2DImode; @@ -26935,7 +26937,9 @@ ix86_expand_vector_init_interleave (enum emit_move_insn (op0, gen_lowpart (mode, op1)); /* Load even elements into the second positon. */ - emit_insn ((*gen_load_even) (op0, ops [i + i + 1], + emit_insn ((*gen_load_even) (op0, + force_reg (inner_mode, + ops [i + i + 1]), const1_rtx)); /* Cast vector to FIRST_IMODE vector. */ @@ -26998,7 +27002,8 @@ ix86_expand_vector_init_general (bool mm { rtx ops[32], op0, op1; enum machine_mode half_mode = VOIDmode; - int n, i; + int n, i, h; + bool duplicated; switch (mode) { @@ -27045,18 +27050,27 @@ half: return; case V16QImode: - if (!TARGET_SSE4_1) + if (!TARGET_SSE4_1 || !TARGET_INTER_UNIT_MOVES) break; /* FALLTHRU */ case V8HImode: - if (!TARGET_SSE2) + if (!TARGET_SSE2 || !TARGET_INTER_UNIT_MOVES) break; n = GET_MODE_NUNITS (mode); + h = n >> 1; + duplicated = true; for (i = 0; i < n; i++) - ops[i] = XVECEXP (vals, 0, i); - ix86_expand_vector_init_interleave (mode, target, ops, n >> 1); + { + ops[i] = XVECEXP (vals, 0, i); + if (i >= h && !rtx_equal_p (ops[i], ops[i - h])) + duplicated = false; + } + if (duplicated) + break; + + ix86_expand_vector_init_interleave (mode, target, ops, h); return; case V4HImode:
A patch is posted at http://gcc.gnu.org/ml/gcc-patches/2008-09/msg00727.html
Subject: Bug 37434 Author: hjl Date: Wed Sep 10 14:14:28 2008 New Revision: 140231 URL: http://gcc.gnu.org/viewcvs?root=gcc&view=rev&rev=140231 Log: gcc/ 2008-09-10 H.J. Lu <hongjiu.lu@intel.com> PR target/37434: * config/i386/i386.c (ix86_expand_vector_init_interleave): Force the even element into register. (ix86_expand_vector_init_general): Don't use ix86_expand_vector_init_interleave on V16QImode and V8HImode if we can't move from GPR to SSE register directly. gcc/testsuite/ 2008-09-10 H.J. Lu <hongjiu.lu@intel.com> PR target/37434: * gcc.target/i386/pr37434-1.c: New. * gcc.target/i386/pr37434-2.c: Likewise. * gcc.target/i386/pr37434-3.c: Likewise. * gcc.target/i386/pr37434-4.c: Likewise. * gcc.target/i386/sse2-set-v8hi-1a.c: Likewise. * gcc.target/i386/sse2-set-v8hi-2a.c: Likewise. * gcc.target/i386/sse4_1-set-v16qi-1a.c: Likewise. * gcc.target/i386/sse4_1-set-v16qi-2a.c: Likewise. * gcc.target/i386/sse4_1-set-v16qi-3a.c: Likewise. Added: trunk/gcc/testsuite/gcc.target/i386/pr37434-1.c trunk/gcc/testsuite/gcc.target/i386/pr37434-2.c trunk/gcc/testsuite/gcc.target/i386/pr37434-3.c trunk/gcc/testsuite/gcc.target/i386/pr37434-4.c trunk/gcc/testsuite/gcc.target/i386/sse2-set-v8hi-1a.c trunk/gcc/testsuite/gcc.target/i386/sse2-set-v8hi-2a.c trunk/gcc/testsuite/gcc.target/i386/sse4_1-set-v16qi-1a.c trunk/gcc/testsuite/gcc.target/i386/sse4_1-set-v16qi-2a.c trunk/gcc/testsuite/gcc.target/i386/sse4_1-set-v16qi-3a.c Modified: trunk/gcc/ChangeLog trunk/gcc/config/i386/i386.c trunk/gcc/testsuite/ChangeLog
Fixed.