/* * compile with GCC 3.3.2 on s390-ibm-linux * options: -O3 * * instruction 61 and 71 will be switched by the scheduler, although they * access overlapping memory * (insn 71 317 72 2 ff1f1500 (parallel [ (set (reg/v:SI 53) (zero_extract:SI (mem/f:QI (plus:SI (reg/f:SI 34 %fp) (const_int 96 [0x60])) [9 S1 A16]) (const_int 15 [0xf]) (const_int 0 [0x0]))) (clobber (reg:CC 33 %cc)) ]) 89 {*extracthi} (insn_list 317 (nil)) (expr_list:REG_UNUSED (reg:CC 33 %cc) (nil))) (insn 61 67 63 2 ff1f1500 (set (mem/s:QI (plus:SI (reg/f:SI 34 %fp) (const_int 97 [0x61])) [0 <variable>.string_2+1 S1 A8]) (mem/s:QI (plus:SI (reg/v/f:SI 49) (const_int 3 [0x3])) [0 <variable>.mem2.string_2+1 S1 A8])) 59 {movqi} (insn_list 56 (insn_list:REG_DEP_OUTPUT 317 (insn_list:REG_DEP_ANTI 312 (insn_list:REG_DEP_ANTI 315 (nil))))) (nil)) * The reason is probably in sched_analyze_2 which does not handle zero_extract * correctly. It just hands the (mem:QI ...) subexpression to another * incarnation of sched_analyze_2. The fact that 2 bytes were accessed is lost. * */ typedef struct { char string_2 [2]; } Char2; typedef struct { char string_4 [4]; } Char4; typedef struct { void *mem4; void *mem5; Char4 mem6; Char2 mem7; Char2 mem8; } Type2; typedef struct { Char2 mem1; Char2 mem2; Char2 mem3; } Type1; typedef void *Pointer; unsigned char Erwin ( Type2 *par1, Type1 *par2, Type1 *par3, Type1 **par4, Char2 **par5, unsigned char *par6, Char4 *par7 ) { int var1; Pointer var2; Pointer var3; Pointer var4; int var5; int var6; int var7; unsigned short var8; Char2 var9; Char2 var10; unsigned char var11; var2 = (Pointer) par2; var4 = (Pointer) par2; var11 = 0; while ((var2 < par3) && (!var11)) { if (memcmp( (Char2 *) var2, &par1->mem7, sizeof(Char2)) == 0) { *par4 = (Type1 *) var2; ((Char2 * ) &var8)->string_2[0] = ((Type1 *) var2)->mem2.string_2[0];; ((Char2 * ) &var8)->string_2[1] = ((Type1 *) var2)->mem2.string_2[1];; var2 = (Pointer) ((char *) var2 + ((sizeof (Type1) - sizeof (Char2)))); var3 = var2; var5 = 0; var6 = ((int) var8) / sizeof(Char2); while (var5 != var6) { var7 = (var5 + var6) / sizeof(Char2); var2 = ( (Pointer) ( (char *) (var3) + ((var7 * sizeof (Char2))) ) ); var1 = memcmp ( (Char2 *) var2, &par1->mem8, sizeof(Char2)); if (var1 < 0) { var5 = var7 + 1; } else if (var1 > 0) { var6 = var7; } else { var6 = var7; var5 = var7; } } if ( var1 == 0) { var11 = (!0); *par5 = ((Char2 *) var2); if (( (Pointer) ( (char *) (var2) + (sizeof(Char2)) ) ) == par3) { *par6 = (!0); if (( (Pointer) ( (char *) (*par4) + ((sizeof (Type1) - sizeof (Char2))) ) ) == var2) { if (var4 == *par4) { memset(&var9, 0x00, sizeof(Char2)); memset(&var10, 0x00, sizeof(Char2)); } else { memcpy (&var9, var4, sizeof(Char2)); *((Char2 * ) &var8) = ((Type1 *) var4)->mem2;; var2 = ((Pointer) ((char *) var4 + ((var8 + (sizeof (Type1) - sizeof (Char2)) - sizeof (Char2))) ) ); memcpy (&var10, var2, sizeof(Char2)); } } else { var2 = ( (Pointer) ( (char *) (var2) - (sizeof(Char2)) ) ); memcpy (&var10, var2, sizeof(Char2)); var9 = par1->mem7; } memcpy(&(par7->string_4[0]), &var9, sizeof(Char2)); memcpy(&(par7->string_4[2]), &var10, sizeof(Char2)); } else { *par6 = 0; } } else { var11 = 0; break; } } else { var4 = var2; *((Char2 * ) &var8) = ((Type1 *) var2)->mem2;; var2 = ( (Pointer) ( (char *) (var2) + ((var8 + (sizeof (Type1) - sizeof (Char2)) )) ) ); } } return(var11); }
You are violating C aliasing rules, either use an union (which is a GCC extension) or use -fno-strict- alias. (Char2 * ) &var *** This bug has been marked as a duplicate of 21920 ***
With option -fno-strict-aliasing the generated code is still incorrect.
Can you please provide a testcase that can be executed and does abort() if the outcome is wrong and has zero exit code if it is right? Thanks, Richard.
Having a zero_extract that takes 16 bytes out of a QImode mem seems extremely wrong. Can you find out which pass generates that thing? Just grep for zero_extract:SI..mem[a-z/]:QI in all the files dumped by -da.
Also note that gcc 3.3.2 is not exactly state of the art, but unmaintained, and a fix would go at earliest into 3.4.5, which may be already fixed.
The zero_extract is generated by the combiner pass (x.c.17.combine). Note that the official documentation of zero_extract requires a mem:QI (see http://gcc.gnu.org/onlinedocs/gccint/Bit_002dFields.html). I just tested the case with a 4.0.1 crosscompiler. The error is still present.
Created attachment 9773 [details] testcase supplement Here is some additional coding, which calls the function given in the bug report. The exit code is 1 in case of error, zero otherwise. I was able to verify my original analysis with this test.
Created attachment 9774 [details] complete testcase Combining comment #1 and the testcase, declaring some functions and exchaning printf for abort() I cannot reproduce the bug as the testcase aborts with every optimization level, even -O0 -fno-strict-aliasing and shows this same behavior even with the intel compiler. Can you re-check, if the testcase is really working in some case?
Created attachment 9780 [details] complete testcase (corrected) My first try on the testcase depends on the endianess. S/390 is big endian, intel is little endian. This revised testcase avoids this dependency. It runs correctly on intel (-O0 or -O3).
Is this still an issue?
No feedback in over an year so closing.