I find no description for "triplet", so guessing at its use, and see no method of attaching .ii so attempting to fit all info in this form. Problem seems to be from reinterpret_cast between vectors and unions of vectors with arrays of scalars. Occurred in machine-generated code, have not been able to substantially simplify. Error persists with -march=pentium4 -msse2 switches so is independent of whether vectors are generated or just simulated as array. Identical error message occurs in gcc built from 4.1.1 sources on a PowerPC G4. Error does not occur if I change to C-style casts and change extension to .c. Unfortunately, semantics of C++ casting seem to change the C-style cast in error.cpp to a static_cast, which then encounters a semantic error: error2.cpp:144: error: no matching function for call to ‘__v4F::__v4F(float __vector__&)’ error2.cpp:17: note: candidates are: __v4F::__v4F() error2.cpp:17: note: __v4F::__v4F(const __v4F&) Thus the attempt to generate a reinterpret_cast leading to discovery of bug. gcc -v Using built-in specs. Target: i386-redhat-linux Configured with: ../configure --prefix=/usr --mandir=/usr/share/man --infodir=/usr/share/info --enable-shared --enable-threads=posix --enable-checking=release --with-system-zlib --enable-__cxa_atexit --disable-libunwind-exceptions --enable-libgcj-multifile --enable-languages=c,c++,objc,obj-c++,java,fortran,ada --enable-java-awt=gtk --disable-dssi --with-java-home=/usr/lib/jvm/java-1.4.2-gcj-1.4.2.0/jre --with-cpu=generic --host=i386-redhat-linux Thread model: posix gcc version 4.1.1 20060525 (Red Hat 4.1.1-1) gcc -O3 -o error.o error.cpp error.cpp: In function ‘void work_Fused_Pre_iDC_Pos_iDC_158__2(int)’: error.cpp:245: internal compiler error: in convert_move, at expr.c:362 Please submit a full bug report, with preprocessed source if appropriate. See <URL:http://bugzilla.redhat.com/bugzilla> for instructions. Preprocessed source stored into /tmp/ccqrXNUl.out file, please attach this to your bugreport. // /usr/libexec/gcc/i386-redhat-linux/4.1.1/cc1plus -quiet -D_GNU_SOURCE error.cpp -quiet -dumpbase error.cpp -mtune=generic -auxbase error -O3 -o - -frandom-seed=0 # 1 "error.cpp" # 1 "<built-in>" # 1 "<command line>" # 1 "error.cpp" volatile int __print_sink__; # 16 "error.cpp" typedef float __v_4F __attribute__ ((vector_size (16))); typedef union {__v_4F v; float a[4];} __v4F; int __max_iteration; int BUFFER_0_1[1023 + 1]; int HEAD_0_1 = 0; int TAIL_0_1 = 0; float BUFFER_1_2[1023 + 1]; int HEAD_1_2 = 0; int TAIL_1_2 = 0; float BUFFER_2_3[1023 + 1]; int HEAD_2_3 = 0; int TAIL_2_3 = 0; void init_AnonFilter_a0__261_44__1(); void work_AnonFilter_a0__261_44__1(int); void init_Fused_Pre_iDC_Pos_iDC_158__2(); void work_Fused_Pre_iDC_Pos_iDC_158__2(int); void FileWriter__301_78__work__4(int); int main(int argc, char **argv) { int a; int n; # 48 "error.cpp" init_Fused_Pre_iDC_Pos_iDC_158__2(); for (n = 0; n < (__max_iteration ); n++) { HEAD_1_2 = 0; TAIL_1_2 = 0; work_AnonFilter_a0__261_44__1(1024 ); HEAD_2_3 = 0; TAIL_2_3 = 0; work_Fused_Pre_iDC_Pos_iDC_158__2(1 ); FileWriter__301_78__work__4(1024 ); } return 0; } inline void __push__0(int data) { BUFFER_0_1[HEAD_0_1]=data; HEAD_0_1++; } inline void __push__1(float data) { BUFFER_1_2[HEAD_1_2]=data; HEAD_1_2++; } float v = 0.0f; void work_AnonFilter_a0__261_44__1(int ____n){ for ( ; (0 < ____n); (____n--)) {{ __push__1(v++); } } } __v_4F coeff__268__309__2[16][16]; __v_4F coeff__285__329__2[16][16]; inline float __pop__2n(int n) { float res=BUFFER_1_2[TAIL_1_2]; TAIL_1_2+=n; return res; } inline float __peek__2(int offs) { return BUFFER_1_2[TAIL_1_2+offs]; } inline void __push__2(float data) { BUFFER_2_3[HEAD_2_3]=data; HEAD_2_3++; } void init_Fused_Pre_iDC_Pos_iDC_158__2(){ float Cu__275__318 = 0.0f; int u__276__319 = 0; int x__277__320 = 0; float __tmp92 = 0.0f; (x__277__320 = 0) ; while ((x__277__320 < 16)) { (u__276__319 = 0) ; while ((u__276__319 < 16)) { (Cu__275__318 = ((float)1.0)) ; if ((u__276__319 == 0)) {(Cu__275__318 = ((float)0.70710677)) ; } else {} (__tmp92 = ((((float)0.5) * Cu__275__318) * ((float)( (((double)((((((float)(u__276__319)) * ((float)3.1415927)) * ((((float)2.0) * ((float)(x__277__320))) + ((float)1.0))) / ((float)32.0))))))))) ; (((reinterpret_cast<__v4F>(((coeff__268__309__2)[(int)x__277__320])[(int)u__276__319]).a)[(int)0]) = __tmp92); (((reinterpret_cast<__v4F>(((coeff__268__309__2)[(int)x__277__320])[(int)u__276__319]).a)[(int)1]) = __tmp92); (((reinterpret_cast<__v4F>(((coeff__268__309__2)[(int)x__277__320])[(int)u__276__319]).a)[(int)2]) = __tmp92); (((reinterpret_cast<__v4F>(((coeff__268__309__2)[(int)x__277__320])[(int)u__276__319]).a)[(int)3]) = __tmp92); (u__276__319++); } (x__277__320++); } float Cu__292__338 = 0.0f; int u__293__339 = 0; int x__294__340 = 0; float __tmp93 = 0.0f; (x__294__340 = 0) ; while ((x__294__340 < 16)) { (u__293__339 = 0) ; while ((u__293__339 < 16)) { (Cu__292__338 = ((float)1.0)) ; if ((u__293__339 == 0)) {(Cu__292__338 = ((float)0.70710677)) ; } else {} (__tmp93 = ((((float)0.5) * Cu__292__338) * ((float)( (((double)((((((float)(u__293__339)) * ((float)3.1415927)) * ((((float)2.0) * ((float)(x__294__340))) + ((float)1.0))) / ((float)32.0))))))))) ; (((reinterpret_cast<__v4F>(((coeff__285__329__2)[(int)x__294__340])[(int)u__293__339]).a)[(int)0]) = __tmp93); (((reinterpret_cast<__v4F>(((coeff__285__329__2)[(int)x__294__340])[(int)u__293__339]).a)[(int)1]) = __tmp93); (((reinterpret_cast<__v4F>(((coeff__285__329__2)[(int)x__294__340])[(int)u__293__339]).a)[(int)2]) = __tmp93); (((reinterpret_cast<__v4F>(((coeff__285__329__2)[(int)x__294__340])[(int)u__293__339]).a)[(int)3]) = __tmp93); (u__293__339++); } (x__294__340++); } } float __POKEBUFFER____2[768] = {0}; int __POKEBUFFERHEAD____2 = 0; void work_Fused_Pre_iDC_Pos_iDC_158__2(int ____n){ for ( ; (0 < ____n); (____n--)) {{ static const __v_4F __tmp94 = {((float)0.0), ((float)0.0), ((float)0.0), ((float)0.0)}; __v_4F ___POP_BUFFER_1_1[256]; int ___POP_INDEX_1_1 = 0; int ___PUSH_INDEX_1_1 = 0; int ___COUNTER_WORK_1_1 = 0; __v_4F ___POP_BUFFER_2_1[256]; int ___PUSH_INDEX_1_2 = 0; __v_4F ___POP_BUFFER_3_1[256]; int ___POP_INDEX_1_3 = 0; int ___PUSH_INDEX_1_3 = 0; int ___COUNTER_WORK_1_3 = 0; int _k__264__304 = 0; int iTimesSumOfWeights_Plus_PartialSum_k__265__305 = 0; int _i__266__306 = 0; __v_4F __tmp74__307; __v_4F tempsum__271__312; int u__272__313 = 0; int x__273__314 = 0; __v_4F __tmp76__315; __v_4F __tmp77__316; __v_4F __tmp78__317; int _k__281__324 = 0; int partialSum_i__282__325 = 0; int _i__283__326 = 0; __v_4F __tmp80__327; __v_4F tempsum__288__332; int u__289__333 = 0; int x__290__334 = 0; __v_4F __tmp82__335; __v_4F __tmp83__336; __v_4F __tmp84__337; int streamItVar81 = 0; ((__POKEBUFFERHEAD____2) = 0) ; (___POP_INDEX_1_1 = -1) ; (___PUSH_INDEX_1_1 = -1) ; (___PUSH_INDEX_1_2 = -1) ; (___POP_INDEX_1_3 = -1) ; (___PUSH_INDEX_1_3 = -1) ; for ((_k__264__304 = 0) ; (_k__264__304 < 16); (_k__264__304++)) {{ (iTimesSumOfWeights_Plus_PartialSum_k__265__305 = _k__264__304) ; for ((_i__266__306 = 0) ; (_i__266__306 < 16); (_i__266__306++)) {{ (((reinterpret_cast<__v4F>(__tmp74__307).a)[(int)0]) = __peek__2(((iTimesSumOfWeights_Plus_PartialSum_k__265__305 + 0) + 0))); (((reinterpret_cast<__v4F>(__tmp74__307).a)[(int)1]) = __peek__2(((iTimesSumOfWeights_Plus_PartialSum_k__265__305 + 0) + 256))); (((reinterpret_cast<__v4F>(__tmp74__307).a)[(int)2]) = __peek__2(((iTimesSumOfWeights_Plus_PartialSum_k__265__305 + 0) + 512))); (((reinterpret_cast<__v4F>(__tmp74__307).a)[(int)3]) = __peek__2(((iTimesSumOfWeights_Plus_PartialSum_k__265__305 + 0) + 768))); (((___POP_BUFFER_1_1[(int)(++___PUSH_INDEX_1_1)])) = (__tmp74__307)); (iTimesSumOfWeights_Plus_PartialSum_k__265__305 = (iTimesSumOfWeights_Plus_PartialSum_k__265__305 + 16)) ; } } } } __pop__2n(256); for ((___COUNTER_WORK_1_1 = 0) ; (___COUNTER_WORK_1_1 < 16); (___COUNTER_WORK_1_1++)) {{ (x__273__314 = 0) ; while ((x__273__314 < 16)) { ((tempsum__271__312) = __tmp94); (u__272__313 = 0) ; while ((u__272__313 < 16)) { ((__tmp78__317) = ((___POP_BUFFER_1_1[(int)((1 + ___POP_INDEX_1_1) + u__272__313)]))); ((__tmp77__316) = (((((coeff__268__309__2)[(int)x__273__314])[(int)u__272__313])) * (__tmp78__317))); ((__tmp76__315) = ((tempsum__271__312) + (__tmp77__316))); ((tempsum__271__312) = (__tmp76__315)); (u__272__313++); } (((___POP_BUFFER_2_1[(int)(++___PUSH_INDEX_1_2)])) = (tempsum__271__312)); (x__273__314++); } (___POP_INDEX_1_1 = (___POP_INDEX_1_1 + 16)) ; } } for ((_k__281__324 = 0) ; (_k__281__324 < 16); (_k__281__324++)) {{ (partialSum_i__282__325 = 0) ; for ((_i__283__326 = 0) ; (_i__283__326 < 16); (_i__283__326++)) {{ ((__tmp80__327) = ((___POP_BUFFER_2_1[(int)(0 + (_k__281__324 + (partialSum_i__282__325 + 0)))]))); (((___POP_BUFFER_3_1[(int)(++___PUSH_INDEX_1_3)])) = (__tmp80__327)); (partialSum_i__282__325 = (partialSum_i__282__325 + 16)) ; } } } } for ((___COUNTER_WORK_1_3 = 0) ; (___COUNTER_WORK_1_3 < 16); (___COUNTER_WORK_1_3++)) {{ (x__290__334 = 0) ; while ((x__290__334 < 16)) { ((tempsum__288__332) = __tmp94); (u__289__333 = 0) ; while ((u__289__333 < 16)) { ((__tmp84__337) = ((___POP_BUFFER_3_1[(int)((1 + ___POP_INDEX_1_3) + u__289__333)]))); ((__tmp83__336) = (((((coeff__285__329__2)[(int)x__290__334])[(int)u__289__333])) * (__tmp84__337))); ((__tmp82__335) = ((tempsum__288__332) + (__tmp83__336))); ((tempsum__288__332) = (__tmp82__335)); (u__289__333++); } __push__2(((reinterpret_cast<__v4F>(tempsum__288__332).a)[(int)0])); (((__POKEBUFFER____2)[(int)((__POKEBUFFERHEAD____2) + 0)]) = ((reinterpret_cast<__v4F>(tempsum__288__332).a)[(int)1])); (((__POKEBUFFER____2)[(int)((__POKEBUFFERHEAD____2) + 256)]) = ((reinterpret_cast<__v4F>(tempsum__288__332).a)[(int)2])); (((__POKEBUFFER____2)[(int)((__POKEBUFFERHEAD____2) + 512)]) = ((reinterpret_cast<__v4F>(tempsum__288__332).a)[(int)3])); ((__POKEBUFFERHEAD____2)++); (x__290__334++); } (___POP_INDEX_1_3 = (___POP_INDEX_1_3 + 16)) ; } } ((__POKEBUFFERHEAD____2) = 0) ; for ((streamItVar81 = 0) ; (streamItVar81 < 768); (streamItVar81++)) {{ __push__2(((__POKEBUFFER____2)[(int)(__POKEBUFFERHEAD____2)])); ((__POKEBUFFERHEAD____2)++); } } __pop__2n(768); } } } inline float __pop__3() { float res=BUFFER_2_3[TAIL_2_3]; TAIL_2_3++; return res; } int output_count = 0; void FileWriter__301_78__work__4(int ____n) { __print_sink__ = (int)__pop__3(); }
With checking we get a different ICE: t1.cc: In function ‘void f()’: t1.cc:3: error: statement makes a memory store, but has no V_MAY_DEFS nor V_MUST_DEFS VIEW_CONVERT_EXPR<union __v4F>(b_1).a[1] = 1.0e+0; t1.cc:3: internal compiler error: verify_ssa failed Please submit a full bug report, with preprocessed source if appropriate. See <URL:http://gcc.gnu.org/bugs.html> for instructions And here is a reduced testcase for that: typedef float __v_4F __attribute__ ((vector_size (16))); typedef union {__v_4F v; float a[4];} __v4F; void f(void) { __v_4F b; (reinterpret_cast<__v4F>(b).a)[1] = 1; }
I think there are two issues, first this code should be rejected which it was in 3.2.3: t.cc:38: invalid reinterpret_cast from type `vector float' to type `__v4F' Second the VIEW_CONVERT_EXPR issue which an tree-ssa issue which actually blocks other work.
(In reply to comment #2) > ... first this code should be rejected which it was in 3.2.3: > t.cc:38: invalid reinterpret_cast from type `vector float' to type `__v4F' (0) I don't have 3.2.3 handy, but have 3.4.3 which also rejects the code. But 3.4.3 also rejects a lot of vector code which does not use reinterpret_cast on vectors. In fact it is difficult to write useful code for manipulating vectors in versions of gcc before 4.0. (1) I don't have the spec in front of me, but working from "C++ in a nutshell" I believe that the code can not be rejected by a conforming C++ implementation. You can have a compile-time error on reinterpret_cast only if it is used to blur the distinction between member functions of a class and other functions, either by casting directly or to an object. Since behavior after a reinterpret_cast is undefined, you could issue a warning and produce non-intuitive code, but the fact that the submitted error case ran with C-style casts as a .c file and crashed the compiler with reinterpret_cast as a .cpp file makes me believe that the gcc C++ compiler should also produce working code for this case. Once again: I do not have a spec in front of me, someone should check the C++ spec. (2a) [portability and performance] The standard way of handling the vector extensions in gcc is to make a union of the vector and an array of the same size so that the vector can be loaded or unloaded without making use of machine-specific (non-portable) intrinsics or builtins. I noticed that in my machine-generated code which used unions everywhere, that gcc was able to better optimize code if I took out unions where they were not needed (removing unused unions produced different .s files on -mcpu=G4 on a PowerPC, and the code with unions removed ran faster. Performance not checked on pentium/SSE since my real target is PPE/SPE.) (2b) Intuitively, casting between two types with the same memory representation and alignment should work. Also, it does work on most occasions. The following example casts vectors as both l-values and as r-values and produces the expected output under gcc 4.1.1: #include <stdio.h> typedef float __v_4F __attribute__ ((vector_size (16))); union __v_4Fa {float a[4]; __v_4F v; }; __v_4F x = {1.0f,2.0f,3.0f,4.0f}; int main(int argc, char** argv) { reinterpret_cast<__v_4Fa>(x).a[2] = -1.0f; printf ("%f %f\n", reinterpret_cast<__v_4Fa>(x).a[1], reinterpret_cast<__v_4Fa>(x).a[2]); return 0; } $ gcc -O3 -o cpp-casting.out cpp-casting.cpp -lstdc++ $ ./cpp-casting.out 2.000000 -1.000000 This above code also works if I have struct __v_4Fa {float a[4] __attribute__ ((aligned (16)));}; which also crashes the original submitted program (but I use unions since I am trying to generate similar code for C and for C++ and gcc 4.1.1 C compiler would reject casting to the struct, or to a union not containing a __v_4F component). (3) By the way: Your reduced test case also compiles without crashing on my copy of gcc version 4.1.1 20060525 (Red Hat 4.1.1-1). You seem to have caught a similar bug, but either your version of gcc is different from the one that I reported on, or you switches are different.
Subject: Re: [4.0/4.1/4.2/4.3 Regression] internal compiler error: in convert_move, at expr.c:362 On Thu, 2006-11-30 at 15:55 +0000, dimock at csail dot mit dot edu wrote: > > (2a) [portability and performance] The standard way of handling the vector > extensions in gcc is to make a union of the vector and an array of the same > size so that the vector can be loaded or unloaded without making use of > machine-specific (non-portable) intrinsics or builtins. I noticed that in my > machine-generated code which used unions everywhere, that gcc was able to > better optimize code if I took out unions where they were not needed (removing > unused unions produced different .s files on -mcpu=G4 on a PowerPC, and the > code with unions removed ran faster. Performance not checked on pentium/SSE > since my real target is PPE/SPE.) The best portability (and better for performance) way is to make a temporary variable. Though unions are not that good for performance. For SPU, you can use spu_extract/spu_insert to get better performance. Thanks, Andrew Pinski a SPU maintainer (and a Cell guy in gneral)
The fix for PR 18073 was not fully correct, it allowed a conversion from a vector type to any type, even if it was not an integer. I have a fix which gets us back to 3.4.0's behavior.
Here is the patch which I am going to test in the new year: Index: typeck.c =================================================================== --- typeck.c (revision 120211) +++ typeck.c (working copy) @@ -5285,7 +5285,7 @@ } else if (TREE_CODE (type) == VECTOR_TYPE) return fold_if_not_in_template (convert_to_vector (type, expr)); - else if (TREE_CODE (intype) == VECTOR_TYPE) + else if (TREE_CODE (intype) == VECTOR_TYPE && INTEGRAL_TYPE_P (type)) return fold_if_not_in_template (convert_to_integer (type, expr)); else {
won't fix in GCC-4.0.x. Adjusting milestone
I am testing this patch now.
Subject: Bug 30016 Author: pinskia Date: Fri Apr 27 02:31:25 2007 New Revision: 124208 URL: http://gcc.gnu.org/viewcvs?root=gcc&view=rev&rev=124208 Log: 2007-04-26 Andrew Pinski <andrew_pinski@playstation.sony.com> PR C++/30016 * typeck.c (build_reinterpret_cast_1): Only allow conversion to integeral types from vectors types. 2007-04-26 Andrew Pinski <andrew_pinski@playstation.sony.com> PR C++/30016 * g++.dg/ext/vector6.C: New test. Added: trunk/gcc/testsuite/g++.dg/ext/vector6.C Modified: trunk/gcc/cp/ChangeLog trunk/gcc/cp/typeck.c trunk/gcc/testsuite/ChangeLog
Subject: Bug 30016 Author: pinskia Date: Fri Apr 27 02:37:02 2007 New Revision: 124209 URL: http://gcc.gnu.org/viewcvs?root=gcc&view=rev&rev=124209 Log: 2007-04-26 Andrew Pinski <andrew_pinski@playstation.sony.com> PR C++/30016 * typeck.c (build_reinterpret_cast_1): Only allow conversion to integeral types from vectors types. 2007-04-26 Andrew Pinski <andrew_pinski@playstation.sony.com> PR C++/30016 * g++.dg/ext/vector6.C: New test. Added: branches/gcc-4_2-branch/gcc/testsuite/g++.dg/ext/vector6.C - copied unchanged from r124208, trunk/gcc/testsuite/g++.dg/ext/vector6.C Modified: branches/gcc-4_2-branch/gcc/cp/ChangeLog branches/gcc-4_2-branch/gcc/cp/typeck.c branches/gcc-4_2-branch/gcc/testsuite/ChangeLog
Subject: Bug 30016 Author: pinskia Date: Fri Apr 27 02:37:42 2007 New Revision: 124210 URL: http://gcc.gnu.org/viewcvs?root=gcc&view=rev&rev=124210 Log: 2007-04-26 Andrew Pinski <andrew_pinski@playstation.sony.com> PR C++/30016 * typeck.c (build_reinterpret_cast_1): Only allow conversion to integeral types from vectors types. 2007-04-26 Andrew Pinski <andrew_pinski@playstation.sony.com> PR C++/30016 * g++.dg/ext/vector6.C: New test. Added: branches/gcc-4_1-branch/gcc/testsuite/g++.dg/ext/vector6.C - copied unchanged from r124208, trunk/gcc/testsuite/g++.dg/ext/vector6.C Modified: branches/gcc-4_1-branch/gcc/cp/ChangeLog branches/gcc-4_1-branch/gcc/cp/typeck.c branches/gcc-4_1-branch/gcc/testsuite/ChangeLog
Fixed.