This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH 2/2] Enable elimination of zext/sext


Hello!

> 2014-08-07  Kugan Vivekanandarajah  <kuganv@linaro.org>
>
> * calls.c (precompute_arguments): Check
> promoted_for_signed_and_unsigned_p and set the promoted mode.
> (promoted_for_signed_and_unsigned_p): New function.
> (expand_expr_real_1): Check promoted_for_signed_and_unsigned_p
> and set the promoted mode.
> * expr.h (promoted_for_signed_and_unsigned_p): New function definition.
> * cfgexpand.c (expand_gimple_stmt_1): Call emit_move_insn if
> SUBREG is promoted with SRP_SIGNED_AND_UNSIGNED.

This patch regresses:

Running target unix
FAIL: libgomp.fortran/simd7.f90   -O2  execution test
FAIL: libgomp.fortran/simd7.f90   -Os  execution test

on alphaev6-linux-gnu.

The problem can be illustrated with attached testcase with a
crosscompiler to alphaev68-linux-gnu (-O2 -fopenmp). The problem is in
missing SImode extension after DImode shift of SImode subregs for this
part:

--cut here--
  # test.23_12 = PHI <0(37), 1(36)>
  _242 = ivtmp.181_73 + 2147483645;
  _240 = _242 * 2;
  _63 = (integer(kind=4)) _240;
  if (ubound.6_99 <= 2)
    goto <bb 39>;
  else
    goto <bb 40>;
;;    succ:       39
;;                40

;;   basic block 39, loop depth 1
;;    pred:       38
  pretmp_337 = test.23_12 | l_76;
  goto <bb 45>;
;;    succ:       45

;;   basic block 40, loop depth 1
;;    pred:       38
  _11 = *c_208[0];
  if (_11 != _63)
    goto <bb 45>;
  else
    goto <bb 42>;
--cut here--

this expands to:

(code_label 592 591 593 35 "" [0 uses])

(note 593 592 0 NOTE_INSN_BASIC_BLOCK)

;; _63 = (integer(kind=4)) _240;

(insn 594 593 595 (set (reg:SI 538)
        (const_int 1073741824 [0x40000000])) -1
     (nil))

(insn 595 594 596 (set (reg:SI 539)
        (plus:SI (reg:SI 538)
            (const_int 1073741824 [0x40000000]))) -1
     (nil))

(insn 596 595 597 (set (reg:SI 537)
        (plus:SI (reg:SI 539)
            (const_int -3 [0xfffffffffffffffd]))) -1
     (expr_list:REG_EQUAL (const_int 2147483645 [0x7ffffffd])
        (nil)))

(insn 597 596 598 (set (reg:SI 536 [ D.1700 ])
        (plus:SI (subreg/s/v/u:SI (reg:DI 144 [ ivtmp.181 ]) 0)
            (reg:SI 537))) -1
     (nil))

(insn 598 597 599 (set (reg:DI 540)
        (ashift:DI (subreg:DI (reg:SI 536 [ D.1700 ]) 0)
            (const_int 1 [0x1]))) -1
     (nil))

(insn 599 598 0 (set (reg:DI 145 [ D.1694 ])
        (reg:DI 540)) -1
     (nil))

...

(note 610 609 0 NOTE_INSN_BASIC_BLOCK)

;; _11 = *c_208[0];

(insn 611 610 0 (set (reg:DI 120 [ D.1694 ])
        (sign_extend:DI (mem:SI (reg/v/f:DI 227 [ c ]) [7 *c_208+0 S4
A128]))) simd7.f90:12 -1
     (nil))

;; if (_11 != _63)

(insn 612 611 613 40 (set (reg:DI 545)
        (eq:DI (reg:DI 120 [ D.1694 ])
            (reg:DI 145 [ D.1694 ]))) simd7.f90:12 -1
     (nil))

(jump_insn 613 612 616 40 (set (pc)
        (if_then_else (eq (reg:DI 545)
                (const_int 0 [0]))
            (label_ref 0)
            (pc))) simd7.f90:12 -1
     (int_list:REG_BR_PROB 450 (nil)))

which results in following asm:

$L35:
    addl $25,$7,$2     # 597    addsi3/1    [length = 4]
    addq $2,$2,$2     # 598    ashldi3/1    [length = 4]     <------ here
    bne $24,$L145     # 601    *bcc_normal    [length = 4]
    lda $4,4($20)     # 627    *adddi_internal/2    [length = 4]
    ldl $8,0($20)     # 611    *extendsidi2_1/2    [length = 4]
    lda $3,3($31)     # 74    *movdi/2    [length = 4]
    cmpeq $8,$2,$2     # 612    *setcc_internal    [length = 4]  <-- compare
    bne $2,$L40     # 613    *bcc_normal    [length = 4]
    br $31,$L88     # 2403    jump    [length = 4]
    .align 4
...

Tracking the values with the debugger shows wrong calculation:

   0x000000012000108c <+1788>:  addl    t10,t12,t1
   0x0000000120001090 <+1792>:  addq    t1,t1,t1
   ...
   0x00000001200010a4 <+1812>:  cmpeq   t6,t1,t1
   0x00000001200010a8 <+1816>:  bne     t1,0x1200010c0 <foo_+1840>

(gdb) si
0x000000012000108c      17          l = l .or. any (b /= 7 + i)
(gdb) i r t10 t12
t10            0x7      7
t12            0x7ffffffd       2147483645

(gdb) si
0x0000000120001090      17          l = l .or. any (b /= 7 + i)
(gdb) i r t1
t1             0xffffffff80000004       -2147483644

(gdb) si
18          l = l .or. any (c /= 8 + 2 * i)
(gdb) i r t1
t1             0xffffffff00000008       -4294967288

At this point, the calculation should zero-extend SImode value to full
DImode, since compare operates on DImode values. The problematic insn
is (insn 599), which is now a DImode assignment instead of
zero-extend, due to:

--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -3309,7 +3309,13 @@ expand_gimple_stmt_1 (gimple stmt)
   GET_MODE (target), temp, unsignedp);
   }

- convert_move (SUBREG_REG (target), temp, unsignedp);
+ if ((SUBREG_PROMOTED_GET (target) == SRP_SIGNED_AND_UNSIGNED)
+    && (GET_CODE (temp) == SUBREG)
+    && (GET_MODE (target) == GET_MODE (temp))
+    && (GET_MODE (SUBREG_REG (target)) == GET_MODE (SUBREG_REG (temp))))
+  emit_move_insn (SUBREG_REG (target), SUBREG_REG (temp));
+ else
+  convert_move (SUBREG_REG (target), temp, unsignedp);
       }
     else if (nontemporal && emit_storent_insn (target, temp))
       ;

When compiling this code, we have:

lhs = _63
target = (subreg/s/v/u:SI (reg:DI 145 [ D.1694 ]) 0)
temp = (subreg:SI (reg:DI 540) 0)

So, the code assumes that it is possible to copy (reg:DI 540) directly
to (reg:DI 154). However, this is not the case, since we still have
garbage in the top 32bits.

Reverting the part above fixes the runtime failure, since (insn 599) is now:

(insn 599 598 0 (set (reg:DI 145 [ D.1694 ])
        (zero_extend:DI (subreg:SI (reg:DI 540) 0))) -1
     (nil))

It looks to me that we have also to check the temp with SUBREG_PROMOTED_*.

Uros.
subroutine foo (d, e, f, g, m, n)
  integer :: i, j, b(2:9), c(3:n), d(:), e(2:n), f(2:,3:), n
  integer, allocatable :: g(:), h(:), k, m
  logical :: l
  l = .false.
  allocate (h(2:7))
  i = 4; j = 4; b = 7; c = 8; d = 9; e = 10; f = 11; g = 12; h = 13; k = 14; m = 15
!$omp simd linear(b)linear(c:2)linear(d:3)linear(e:4)linear(f:5)linear(g:6) &
!$omp & linear(h:7)linear(k:8)linear(m:9) reduction(.or.:l)
  do i = 0, 63 
    l = l .or. any (b /= 7 + i)
    l = l .or. any (c /= 8 + 2 * i)
    b = b + 1; c = c + 2
    d = d + 3; e = e + 4; f = f + 5; g = g + 6
    h = h + 7; k = k + 8; m = m + 9
  end do
  if (l .or. i /= 64) call abort
  if (any (b /= 7 + 64) .or. any (c /= 8 + 2 * 64)) call abort
end subroutine

  interface
    subroutine foo (d, e, f, g, m, n)
      integer :: d(:), e(2:n), f(2:,3:), n
      integer, allocatable :: g(:), m
    end subroutine
  end interface
  integer, parameter :: n = 8
  integer :: d(2:18), e(3:n+1), f(5:6,7:9)
  integer, allocatable :: g(:), m
  allocate (g(7:10))
  call foo (d, e, f, g, m, n)
end

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]