23650 – Scheduler does not handle zero_extract correctly

Bug 23650 - Scheduler does not handle zero_extract correctly

Summary: Scheduler does not handle zero_extract correctly

Status:	RESOLVED INVALID

Alias:	None

Product:	gcc
Classification:	Unclassified
Component:	rtl-optimization (show other bugs)
Version:	3.3.2

Importance:	P2 normal
Target Milestone:	---
Assignee:	Not yet assigned to anyone

URL:
Keywords:

Depends on:
Blocks:

Reported:	2005-08-31 09:33 UTC by Erwin Unruh
Modified:	2013-03-20 00:48 UTC (History)
CC List:	1 user (show)

See Also:
Host:
Target:	s390-ibm-linux
Build:
Known to work:
Known to fail:
Last reconfirmed:	2012-01-11 00:00:00

Attachments
testcase supplement (468 bytes, text/plain) 2005-09-19 12:25 UTC, Erwin Unruh	Details
complete testcase (1.07 KB, text/plain) 2005-09-19 17:57 UTC, Richard Biener	Details
complete testcase (corrected) (1.10 KB, text/plain) 2005-09-20 07:22 UTC, Erwin Unruh	Details
Show Obsolete (1) View All Add an attachment (proposed patch, testcase, etc.)

Note You need to log in before you can comment on or make changes to this bug.

Description Erwin Unruh 2005-08-31 09:33:05 UTC

/*
 * compile with GCC 3.3.2 on s390-ibm-linux
 * options: -O3
 *
 * instruction 61 and 71 will be switched by the scheduler, although they
 * access overlapping memory
 *

(insn 71 317 72 2 ff1f1500 (parallel [
            (set (reg/v:SI 53) 
                (zero_extract:SI (mem/f:QI (plus:SI (reg/f:SI 34 %fp)
                            (const_int 96 [0x60])) [9 S1 A16])
                    (const_int 15 [0xf])
                    (const_int 0 [0x0])))
            (clobber (reg:CC 33 %cc))
        ]) 89 {*extracthi} (insn_list 317 (nil))
    (expr_list:REG_UNUSED (reg:CC 33 %cc)
        (nil)))

(insn 61 67 63 2 ff1f1500 (set (mem/s:QI (plus:SI (reg/f:SI 34 %fp)
                (const_int 97 [0x61])) [0 <variable>.string_2+1 S1 A8])
        (mem/s:QI (plus:SI (reg/v/f:SI 49)
                (const_int 3 [0x3])) [0 <variable>.mem2.string_2+1 S1 A8])) 59 
        
        {movqi} (insn_list 56 (insn_list:REG_DEP_OUTPUT 317 
        (insn_list:REG_DEP_ANTI 312 (insn_list:REG_DEP_ANTI 315 (nil)))))
    (nil))

 * The reason is probably in sched_analyze_2 which does not handle zero_extract
 * correctly. It just hands the (mem:QI ...) subexpression to another
 * incarnation of sched_analyze_2. The fact that 2 bytes were accessed is lost.
 *
 */


typedef struct {
    char string_2 [2];
} Char2;


typedef struct { 
    char string_4 [4];
} Char4;


typedef struct
{
   void *mem4;
   void *mem5;
   Char4 mem6; 
   Char2 mem7;
   Char2 mem8;

} Type2;


typedef struct {
   Char2 mem1;
   Char2 mem2;
   Char2 mem3;
} Type1;

typedef void *Pointer;

unsigned char Erwin
(
   Type2 *par1,
   Type1 *par2,
   Type1 *par3,

   Type1 **par4,
   Char2 **par5,
   unsigned char *par6,
   Char4 *par7
)
{
   int var1;
   Pointer var2;
   Pointer var3;
   Pointer var4;
   int var5;
   int var6;
   int var7;

   unsigned short var8;

   Char2 var9;
   Char2 var10;

   unsigned char var11;

   var2 = (Pointer) par2;
   var4 = (Pointer) par2;
   var11 = 0;

   while ((var2 < par3) && (!var11))
   {
      if (memcmp( (Char2 *) var2, &par1->mem7, sizeof(Char2)) == 0)
      {
         *par4 = (Type1 *) var2;
         ((Char2 * ) &var8)->string_2[0] = ((Type1 *) var2)->mem2.string_2[0];;
         ((Char2 * ) &var8)->string_2[1] = ((Type1 *) var2)->mem2.string_2[1];;

         var2 = (Pointer) ((char *) var2 + ((sizeof (Type1) - sizeof 
(Char2))));
         var3 = var2;

         var5 = 0;
         var6 = ((int) var8) / sizeof(Char2);


         while (var5 != var6)
         {
            var7 = (var5 + var6) / sizeof(Char2);
            var2 = ( (Pointer) ( (char *) (var3) + ((var7 * sizeof
(Char2))) ) );

            var1 = memcmp ( (Char2 *) var2, &par1->mem8, sizeof(Char2));

            if (var1 < 0)
            {
               var5 = var7 + 1;
            }
            else if (var1 > 0)
            {
               var6 = var7;
            }
            else
            {
               var6 = var7;
               var5 = var7;
            }
          }
          if ( var1 == 0)
          {
            var11 = (!0);
            *par5 = ((Char2 *) var2);

            if (( (Pointer) ( (char *) (var2) + (sizeof(Char2)) ) ) == par3)
            {

               *par6 = (!0);

               if (( (Pointer) ( (char *) (*par4) +
                               ((sizeof (Type1) - sizeof (Char2))) ) ) == var2)
               {
                  if (var4 == *par4)
                  {
                     memset(&var9, 0x00, sizeof(Char2));
                     memset(&var10, 0x00, sizeof(Char2));
                  }
                  else
                  {
                     memcpy (&var9, var4, sizeof(Char2)); 
                     *((Char2 * ) &var8) = ((Type1 *) var4)->mem2;;
                     var2 = ((Pointer) ((char *) var4 + ((var8 +
                        (sizeof (Type1) - sizeof (Char2)) - sizeof
(Char2))) ) );

                     memcpy (&var10, var2, sizeof(Char2));
                  }
               }
               else
               {
                  var2 = ( (Pointer) ( (char *) (var2) - (sizeof(Char2)) ) );
                  memcpy (&var10, var2, sizeof(Char2));
                  var9 = par1->mem7;
               }
               memcpy(&(par7->string_4[0]), &var9, sizeof(Char2));
               memcpy(&(par7->string_4[2]), &var10, sizeof(Char2));
            }
            else
            {
               *par6 = 0;
            }
         }
         else
         {
            var11 = 0;
            break;
         }
      }
      else
      {
         var4 = var2;
         *((Char2 * ) &var8) = ((Type1 *) var2)->mem2;;
         var2 = ( (Pointer) ( (char *) (var2) + ((var8 +
                        (sizeof (Type1) - sizeof (Char2)) )) ) );
      }
   }
   return(var11);
}

Comment 1 Andrew Pinski 2005-08-31 12:29:03 UTC

You are violating C aliasing rules, either use an union (which is a GCC extension) or use -fno-strict-
alias.

(Char2 * ) &var

*** This bug has been marked as a duplicate of 21920 ***

Comment 2 Erwin Unruh 2005-08-31 12:51:33 UTC

With option -fno-strict-aliasing the generated code is still incorrect.

Comment 3 Richard Biener 2005-08-31 14:39:37 UTC

Can you please provide a testcase that can be executed and does abort() if the
outcome is wrong and has zero exit code if it is right?

Thanks,
Richard.

Comment 4 Paolo Bonzini 2005-08-31 14:45:45 UTC

Having a zero_extract that takes 16 bytes out of a QImode mem seems extremely
wrong.  Can you find out which pass generates that thing?  Just grep for

zero_extract:SI..mem[a-z/]:QI

in all the files dumped by -da.

Comment 5 Richard Biener 2005-08-31 14:58:50 UTC

Also note that gcc 3.3.2 is not exactly state of the art, but unmaintained,
and a fix would go at earliest into 3.4.5, which may be already fixed.

Comment 6 Erwin Unruh 2005-09-02 09:08:07 UTC

The zero_extract is generated by the combiner pass (x.c.17.combine). Note that 
the official documentation of zero_extract requires a mem:QI (see 
http://gcc.gnu.org/onlinedocs/gccint/Bit_002dFields.html).

I just tested the case with a 4.0.1 crosscompiler. The error is still present.

Comment 7 Erwin Unruh 2005-09-19 12:25:24 UTC

Created attachment 9773 [details]
testcase supplement

Here is some additional coding, which calls the function given in the bug
report. The exit code is 1 in case of error, zero otherwise.
I was able to verify my original analysis with this test.

Comment 8 Richard Biener 2005-09-19 17:57:14 UTC

Created attachment 9774 [details]
complete testcase

Combining comment #1 and the testcase, declaring some functions and exchaning
printf for abort() I cannot reproduce the bug as the testcase aborts with
every optimization level, even -O0 -fno-strict-aliasing and shows this same
behavior even with the intel compiler.

Can you re-check, if the testcase is really working in some case?

Comment 9 Erwin Unruh 2005-09-20 07:22:41 UTC

Created attachment 9780 [details]
complete testcase (corrected)

My first try on the testcase depends on the endianess. S/390 is big endian,
intel is little endian. This revised testcase avoids this dependency. It runs
correctly on intel (-O0 or -O3).

Comment 10 Richard Biener 2012-01-11 14:23:13 UTC

Is this still an issue?

Comment 11 Andrew Pinski 2013-03-20 00:48:42 UTC

No feedback in over an year so closing.