This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [RFC] Elimination of zext/sext - type promotion pass



You indeed need to use CONVERT_EXPR here, maybe you can elaborate
on the optimization issues.

2. for inline asm (a reduced test case that might not make much as a
stand alone test-case, but I ran into similar cases with valid programmes)

;; Function fn1 (fn1, funcdef_no=0, decl_uid=4220, cgraph_uid=0,
symbol_order=0)

fn1 (short int p1)
{
   <bb 2>:
   __asm__("" : "=r" p1_2 : "0" p1_1(D));
   return;

}


I am generating something like the following which ICEs. What is the
expected out?

;; Function fn1 (fn1, funcdef_no=0, decl_uid=4220, cgraph_uid=0,
symbol_order=0)

fn1 (short int p1)
{
   int _1;
   int _2;
   short int _5;

   <bb 2>:
   _1 = (int) p1_4(D);
   _5 = (short int) _1;
   __asm__("" : "=r" p1_6 : "0" _5);
   _2 = (int) p1_6;
   return;

}

Parameters are indeed "interesting" to handle ;)  As we now see on ARM
the incoming parameter (the default def) and later assignments to it
can require different promotions (well, different extensions for ARM).

The only sensible way to deal with promoting parameters is to
promote them by changing the function signature.  Thus reflect the
targets ABI for parameters in the GIMPLE representation (which
includes TYPE_ARG_TYPES and DECL_ARGUMENTS).
IMHO we should do this during gimplification of parameters / call
arguments already.

So for your example you'd end up with

fn1 (int p1)
{
   __asm__("" : "=r" p1_6 : "0" p1_4(D));
   return;
}

that is, promotions also apply to asm inputs/outputs (no?)


Thanks for the review and answers. For the time being, I am handling gimple_asm as one that has to be handled in original type. I Will look into improving it after getting the basic framework right.

As it is, attached patch bootstraps on x86_64-linux-gnu, arm-linux-gnu and aarch64-linux-gnu. There are few regressions to look into (Please see below).

There are cases it is working well. There are cases where it can be improved. I am attaching couple test cases (and their results). I am seeing some BIT_AND_EXPR which are inserted by promotion are not being optimized when they are redundant. This is especially the case when I invalidate the VRP range into from VRP1 during the type promotion. I am looking into it.

Please note that attached patch still needs to address:
* Adding gimple_debug stmts.
* Address review comment for expr.c handling SEXT_EXPR.
* Address regression failures

Based on the feedback, I will address the above and split the patch into logical patch set for easy detailed review.

Here are the outputs for the testcases.

--- c5.c.142t.veclower21	2015-08-05 08:50:11.367135339 +1000
+++ c5.c.143t.promotion	2015-08-05 08:50:11.367135339 +1000
@@ -1,34 +1,45 @@

;; Function unPack (unPack, funcdef_no=0, decl_uid=4145, cgraph_uid=0, symbol_order=0)

 unPack (unsigned char c)
 {
-  short int _1;
-  unsigned short _4;
-  unsigned short _5;
-  short int _6;
-  short int _7;
+  int _1;
+  unsigned int _2;
+  unsigned int _3;
+  unsigned int _4;
+  unsigned int _5;
+  int _6;
+  int _7;
+  unsigned int _9;
+  int _11;
+  int _12;
+  short int _13;

   <bb 2>:
-  c_3 = c_2(D) & 15;
-  if (c_3 > 7)
+  _2 = (unsigned int) c_10(D);
+  _3 = _2 & 15;
+  _9 = _3 & 255;
+  if (_9 > 7)
     goto <bb 3>;
   else
     goto <bb 4>;

   <bb 3>:
-  _4 = (unsigned short) c_3;
-  _5 = _4 + 65531;
-  _6 = (short int) _5;
+  _4 = _3 & 65535;
+  _5 = _4 + 4294967291;
+  _11 = (int) _5;
+  _6 = (_11) sext from bit (16);
   goto <bb 5>;

   <bb 4>:
-  _7 = (short int) c_3;
+  _12 = (int) _3;
+  _7 = (_12) sext from bit (16);

   <bb 5>:
   # _1 = PHI <_6(3), _7(4)>
-  return _1;
+  _13 = (short int) _1;
+  return _13;

 }


--- c5.org.s	2015-08-05 08:51:44.619133892 +1000
+++ c5.new.s	2015-08-05 08:51:29.643134124 +1000
@@ -16,16 +16,14 @@
 	.syntax divided
 	.arm
 	.type	unPack, %function
 unPack:
 	@ args = 0, pretend = 0, frame = 0
 	@ frame_needed = 0, uses_anonymous_args = 0
 	@ link register save eliminated.
 	and	r0, r0, #15
 	cmp	r0, #7
 	subhi	r0, r0, #5
-	uxth	r0, r0
-	sxth	r0, r0
 	bx	lr
 	.size	unPack, .-unPack
 	.ident	"GCC: (GNU) 6.0.0 20150724 (experimental)"
 	.section	.note.GNU-stack,"",%progbits
--- crc.c.142t.veclower21	2015-08-05 08:52:43.811132974 +1000
+++ crc.c.143t.promotion	2015-08-05 08:52:43.811132974 +1000
@@ -1,52 +1,78 @@

;; Function crc2 (crc2, funcdef_no=0, decl_uid=4146, cgraph_uid=0, symbol_order=0)

 crc2 (short unsigned int crc, unsigned char data)
 {
   unsigned char carry;
   unsigned char x16;
   unsigned char i;
-  unsigned char ivtmp_5;
-  unsigned char _9;
-  unsigned char _10;
-  unsigned char ivtmp_18;
+  unsigned int _2;
+  unsigned int _3;
+  unsigned int _5;
+  unsigned int _7;
+  unsigned int _8;
+  unsigned int _9;
+  unsigned int _10;
+  unsigned int _11;
+  unsigned int _12;
+  unsigned int _13;
+  unsigned int _15;
+  unsigned int _16;
+  unsigned int _18;
+  unsigned int _19;
+  unsigned int _21;
+  unsigned int _22;
+  unsigned int _24;
+  short unsigned int _25;
+  unsigned int _26;
+  unsigned int _27;
+  unsigned int _28;
+  unsigned int _29;

   <bb 2>:
+  _8 = (unsigned int) data_4(D);
+  _7 = (unsigned int) crc_30(D);

   <bb 3>:
-  # crc_28 = PHI <crc_2(5), crc_7(D)(2)>
-  # data_29 = PHI <data_12(5), data_8(D)(2)>
-  # ivtmp_18 = PHI <ivtmp_5(5), 8(2)>
-  _9 = (unsigned char) crc_28;
-  _10 = _9 ^ data_29;
-  x16_11 = _10 & 1;
-  data_12 = data_29 >> 1;
-  if (x16_11 == 1)
+  # _28 = PHI <_2(5), _7(2)>
+  # _29 = PHI <_12(5), _8(2)>
+  # _18 = PHI <_5(5), 8(2)>
+  _9 = _28 & 255;
+  _10 = _9 ^ _29;
+  _11 = _10 & 1;
+  _3 = _29 & 255;
+  _12 = _3 >> 1;
+  _27 = _11 & 255;
+  if (_27 == 1)
     goto <bb 4>;
   else
     goto <bb 7>;

   <bb 4>:
-  crc_13 = crc_28 ^ 16386;
-  crc_24 = crc_13 >> 1;
-  crc_15 = crc_24 | 32768;
+  _13 = _28 ^ 16386;
+  _26 = _13 & 65535;
+  _24 = _26 >> 1;
+  _15 = _24 | 4294934528;

   <bb 5>:
-  # crc_2 = PHI <crc_15(4), crc_21(7)>
-  ivtmp_5 = ivtmp_18 - 1;
-  if (ivtmp_5 != 0)
+  # _2 = PHI <_15(4), _21(7)>
+  _5 = _18 - 1;
+  _22 = _5 & 255;
+  if (_22 != 0)
     goto <bb 3>;
   else
     goto <bb 6>;

   <bb 6>:
-  # crc_19 = PHI <crc_2(5)>
-  return crc_19;
+  # _19 = PHI <_2(5)>
+  _25 = (short unsigned int) _19;
+  return _25;

   <bb 7>:
-  crc_21 = crc_28 >> 1;
+  _16 = _28 & 65535;
+  _21 = _16 >> 1;
   goto <bb 5>;

 }


--- crc.org.s	2015-08-05 08:54:17.491131520 +1000
+++ crc.new.s	2015-08-05 08:53:12.183132534 +1000
@@ -15,27 +15,28 @@
 	.global	crc2
 	.syntax divided
 	.arm
 	.type	crc2, %function
 crc2:
 	@ args = 0, pretend = 0, frame = 0
 	@ frame_needed = 0, uses_anonymous_args = 0
 	mov	ip, #32768
 	movt	ip, 65535
 	str	lr, [sp, #-4]!
-	mov	r3, #8
+	mov	r2, #8
 	movw	lr, #16386
 .L3:
-	eor	r2, r1, r0
-	sub	r3, r3, #1
-	tst	r2, #1
+	uxtb	r3, r0
+	eor	r3, r3, r1
 	mov	r1, r1, lsr #1
+	tst	r3, #1
 	eorne	r0, r0, lr
-	moveq	r0, r0, lsr #1
-	orrne	r0, ip, r0, lsr #1
-	uxthne	r0, r0
-	ands	r3, r3, #255
+	ubfxeq	r0, r0, #1, #15
+	ubfxne	r0, r0, #1, #15
+	orrne	r0, r0, ip
+	subs	r2, r2, #1
 	bne	.L3
+	uxth	r0, r0
 	ldr	pc, [sp], #4
 	.size	crc2, .-crc2
 	.ident	"GCC: (GNU) 6.0.0 20150724 (experimental)"
 	.section	.note.GNU-stack,"",%progbits



Testsuite regression for x86_64-unknown-linux-gnu:
Tests that now fail, but worked before:
gfortran.dg/graphite/pr42393-1.f90   -O  (test for excess errors)


Testsuite regression for  arm-linux-gnu:
Tests that now fail, but worked before:
arm-sim: gcc.dg/fixed-point/convert-sat.c execution test
arm-sim: gcc.dg/tree-ssa/20030729-1.c scan-tree-dump-times dom2 "\\(unsigned
int\\)" 0
arm-sim: gcc.dg/tree-ssa/pr54245.c scan-tree-dump-times slsr "Inserting
initializer" 0
arm-sim: gcc.dg/tree-ssa/shorten-1.c scan-tree-dump-not optimized
"\\(int\\)"
arm-sim: gcc.dg/tree-ssa/shorten-1.c scan-tree-dump-times optimized
"\\(unsigned char\\)" 8
arm-sim: gcc.target/arm/mla-2.c scan-assembler smlalbb
arm-sim: gcc.target/arm/unsigned-extend-2.c scan-assembler ands
arm-sim: gcc.target/arm/wmul-1.c scan-assembler-times smlabb 2
arm-sim: gcc.target/arm/wmul-2.c scan-assembler-times smulbb 1
arm-sim: gcc.target/arm/wmul-3.c scan-assembler-times smulbb 2
arm-sim: gcc.target/arm/wmul-9.c scan-assembler smlalbb
arm-sim: gfortran.dg/graphite/pr42393-1.f90   -O  (test for excess errors)

Tests that now work, but didn't before:
arm-sim: gcc.dg/tree-prof/time-profiler-2.c scan-ipa-dump-times profile
"Read tp_first_run: 0" 2
arm-sim: gcc.dg/tree-prof/time-profiler-2.c scan-ipa-dump-times profile
"Read tp_first_run: 2" 1
arm-sim: gcc.dg/tree-prof/time-profiler-2.c scan-ipa-dump-times profile
"Read tp_first_run: 3" 1
arm-sim: gcc.target/arm/builtin-bswap-1.c scan-assembler-times rev16ne\\t 1
arm-sim: gcc.target/arm/builtin-bswap-1.c scan-assembler-times revshne\\t 1
arm-sim: gcc.target/arm/smlaltb-1.c scan-assembler smlaltb\\t
arm-sim: gcc.target/arm/smlaltt-1.c scan-assembler smlaltt\\t


Testsuite regression for  aarch64-linux-gnu:
Tests that now fail, but worked before:
c-c++-common/torture/vector-compare-1.c   -O3 -g  (test for excess errors)
c-c++-common/torture/vector-compare-1.c   -O3 -g  (test for excess errors)
gcc.dg/tree-ssa/20030729-1.c scan-tree-dump-times dom2 "\\(unsigned int\\)"
0
gcc.dg/tree-ssa/pr54245.c scan-tree-dump-times slsr "Inserting initializer"
0
gcc.dg/tree-ssa/shorten-1.c scan-tree-dump-not optimized "\\(int\\)"
gcc.dg/tree-ssa/shorten-1.c scan-tree-dump-times optimized "\\(unsigned
char\\)" 8

Thanks,
Kugan

Attachment: p.txt
Description: Text document

Attachment: log.txt
Description: Text document

short unPack( unsigned char c )
{
    /* Only want lower four bit nibble */
    c = c & (unsigned char)0x0F ;

    if( c > 7 ) {
        /* Negative nibble */
        return( ( short )( c - 5 ) ) ;

    }
    else
    {
        /* positive nibble */
        return( ( short )c ) ;
    }
}

unsigned short
crc2(unsigned short crc, unsigned char data)
{
   unsigned char i, x16, carry;
 
   for (i = 0; i < 8; i++)
     {
       x16 = (data ^ crc) & 1;
       data >>= 1;
 
       if (x16 == 1)
         {
           crc ^= 0x4002;
           carry = 1;
         }
       else
         carry = 0;
 
       crc >>= 1;
 
       if (carry)
         crc |= 0x8000;
       else
         crc &= 0x7fff;
     }
 
   return crc;
}

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]