[PATCH] Fix various arithmetic patterns with %[abcd]h destination (PR target/82524)

Uros Bizjak ubizjak@gmail.com
Fri Oct 13 06:48:00 GMT 2017


On Thu, Oct 12, 2017 at 9:49 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> Hi!
>
> As mentioned in the PR, there are two bugs in these.  One is that
> the zero_extract destination is effectively another input operand (for the
> remaining bits that are unchanged) and thus the constraint can't be =Q,
> but has to be +Q.
> And the other problem is that then LRA ICEs whenever it has 3 different
> operands, because it is unable to reload it properly.  Uros mentioned
> that it could be reloaded by using *insvqi_2-like insn to move the
> 8 bits from the operand that should use "0" constraint into the destination
> register, but LRA isn't able to do that right now.
> So this patch instead adds insn conditions that either the destination
> is the same as the first input operand or as one of the input operands
> (the latter for commutative patterns).
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk/7.3?
>
> 2017-10-12  Jakub Jelinek  <jakub@redhat.com>
>
>         PR target/82524
>         * config/i386/i386.md (addqi_ext_1, andqi_ext_1,
>         *andqi_ext_1_cc, *<code>qi_ext_1, *xorqi_ext_1_cc): Change
>         =Q constraints to +Q and into insn condition add check
>         that operands[0] and operands[1] are equal.
>         (*addqi_ext_2, *andqi_ext_2, *<code>qi_ext_2): Change
>         =Q constraints to +Q and into insn condition add check
>         that operands[0] is equal to either operands[1] or operands[2].
>
>         * gcc.c-torture/execute/pr82524.c: New test.

OK for mainline and gcc-7 branch.

Thanks,
Uros.

> --- gcc/config/i386/i386.md.jj  2017-10-12 14:05:15.000000000 +0200
> +++ gcc/config/i386/i386.md     2017-10-12 17:07:11.723151868 +0200
> @@ -6264,7 +6264,7 @@ (define_insn "*add<mode>_5"
>     (set_attr "mode" "<MODE>")])
>
>  (define_insn "addqi_ext_1"
> -  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q,Q")
> +  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q,Q")
>                          (const_int 8)
>                          (const_int 8))
>         (subreg:SI
> @@ -6275,7 +6275,8 @@ (define_insn "addqi_ext_1"
>                                (const_int 8)) 0)
>             (match_operand:QI 2 "general_operand" "QnBc,m")) 0))
>     (clobber (reg:CC FLAGS_REG))]
> -  ""
> +  "/* FIXME: without this LRA can't reload this pattern, see PR82524.  */
> +   rtx_equal_p (operands[0], operands[1])"
>  {
>    switch (get_attr_type (insn))
>      {
> @@ -6300,7 +6301,7 @@ (define_insn "addqi_ext_1"
>     (set_attr "mode" "QI")])
>
>  (define_insn "*addqi_ext_2"
> -  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
> +  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q")
>                          (const_int 8)
>                          (const_int 8))
>         (subreg:SI
> @@ -6314,7 +6315,9 @@ (define_insn "*addqi_ext_2"
>                                (const_int 8)
>                                (const_int 8)) 0)) 0))
>    (clobber (reg:CC FLAGS_REG))]
> -  ""
> +  "/* FIXME: without this LRA can't reload this pattern, see PR82524.  */
> +   rtx_equal_p (operands[0], operands[1])
> +   || rtx_equal_p (operands[0], operands[2])"
>    "add{b}\t{%h2, %h0|%h0, %h2}"
>    [(set_attr "type" "alu")
>     (set_attr "mode" "QI")])
> @@ -8998,7 +9001,7 @@ (define_insn "*andqi_2_slp"
>     (set_attr "mode" "QI")])
>
>  (define_insn "andqi_ext_1"
> -  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q,Q")
> +  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q,Q")
>                          (const_int 8)
>                          (const_int 8))
>         (subreg:SI
> @@ -9009,7 +9012,8 @@ (define_insn "andqi_ext_1"
>                                (const_int 8)) 0)
>             (match_operand:QI 2 "general_operand" "QnBc,m")) 0))
>     (clobber (reg:CC FLAGS_REG))]
> -  ""
> +  "/* FIXME: without this LRA can't reload this pattern, see PR82524.  */
> +   rtx_equal_p (operands[0], operands[1])"
>    "and{b}\t{%2, %h0|%h0, %2}"
>    [(set_attr "isa" "*,nox64")
>     (set_attr "type" "alu")
> @@ -9027,7 +9031,7 @@ (define_insn "*andqi_ext_1_cc"
>                                (const_int 8)) 0)
>             (match_operand:QI 2 "general_operand" "QnBc,m"))
>           (const_int 0)))
> -   (set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q,Q")
> +   (set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q,Q")
>                          (const_int 8)
>                          (const_int 8))
>         (subreg:SI
> @@ -9037,14 +9041,16 @@ (define_insn "*andqi_ext_1_cc"
>                                (const_int 8)
>                                (const_int 8)) 0)
>             (match_dup 2)) 0))]
> -  "ix86_match_ccmode (insn, CCNOmode)"
> +  "ix86_match_ccmode (insn, CCNOmode)
> +   /* FIXME: without this LRA can't reload this pattern, see PR82524.  */
> +   && rtx_equal_p (operands[0], operands[1])"
>    "and{b}\t{%2, %h0|%h0, %2}"
>    [(set_attr "isa" "*,nox64")
>     (set_attr "type" "alu")
>     (set_attr "mode" "QI")])
>
>  (define_insn "*andqi_ext_2"
> -  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
> +  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q")
>                          (const_int 8)
>                          (const_int 8))
>         (subreg:SI
> @@ -9058,7 +9064,9 @@ (define_insn "*andqi_ext_2"
>                                (const_int 8)
>                                (const_int 8)) 0)) 0))
>     (clobber (reg:CC FLAGS_REG))]
> -  ""
> +  "/* FIXME: without this LRA can't reload this pattern, see PR82524.  */
> +   rtx_equal_p (operands[0], operands[1])
> +   || rtx_equal_p (operands[0], operands[2])"
>    "and{b}\t{%h2, %h0|%h0, %h2}"
>    [(set_attr "type" "alu")
>     (set_attr "mode" "QI")])
> @@ -9431,7 +9439,7 @@ (define_insn "*<code><mode>_3"
>     (set_attr "mode" "<MODE>")])
>
>  (define_insn "*<code>qi_ext_1"
> -  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q,Q")
> +  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q,Q")
>                          (const_int 8)
>                          (const_int 8))
>         (subreg:SI
> @@ -9442,14 +9450,16 @@ (define_insn "*<code>qi_ext_1"
>                                (const_int 8)) 0)
>             (match_operand:QI 2 "general_operand" "QnBc,m")) 0))
>     (clobber (reg:CC FLAGS_REG))]
> -  "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
> +  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
> +   /* FIXME: without this LRA can't reload this pattern, see PR82524.  */
> +   && rtx_equal_p (operands[0], operands[1])"
>    "<logic>{b}\t{%2, %h0|%h0, %2}"
>    [(set_attr "isa" "*,nox64")
>     (set_attr "type" "alu")
>     (set_attr "mode" "QI")])
>
>  (define_insn "*<code>qi_ext_2"
> -  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
> +  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q")
>                          (const_int 8)
>                          (const_int 8))
>         (subreg:SI
> @@ -9463,7 +9473,10 @@ (define_insn "*<code>qi_ext_2"
>                                (const_int 8)
>                                (const_int 8)) 0)) 0))
>     (clobber (reg:CC FLAGS_REG))]
> -  "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
> +  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
> +   /* FIXME: without this LRA can't reload this pattern, see PR82524.  */
> +   && (rtx_equal_p (operands[0], operands[1])
> +       || rtx_equal_p (operands[0], operands[2]))"
>    "<logic>{b}\t{%h2, %h0|%h0, %h2}"
>    [(set_attr "type" "alu")
>     (set_attr "mode" "QI")])
> @@ -9552,7 +9565,7 @@ (define_insn "*xorqi_ext_1_cc"
>                                (const_int 8)) 0)
>             (match_operand:QI 2 "general_operand" "QnBc,m"))
>           (const_int 0)))
> -   (set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q,Q")
> +   (set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q,Q")
>                          (const_int 8)
>                          (const_int 8))
>         (subreg:SI
> @@ -9562,7 +9575,9 @@ (define_insn "*xorqi_ext_1_cc"
>                                (const_int 8)
>                                (const_int 8)) 0)
>           (match_dup 2)) 0))]
> -  "ix86_match_ccmode (insn, CCNOmode)"
> +  "ix86_match_ccmode (insn, CCNOmode)
> +   /* FIXME: without this LRA can't reload this pattern, see PR82524.  */
> +   && rtx_equal_p (operands[0], operands[1])"
>    "xor{b}\t{%2, %h0|%h0, %2}"
>    [(set_attr "isa" "*,nox64")
>     (set_attr "type" "alu")
> --- gcc/testsuite/gcc.c-torture/execute/pr82524.c.jj    2017-10-12 17:39:28.244825800 +0200
> +++ gcc/testsuite/gcc.c-torture/execute/pr82524.c       2017-10-12 17:39:12.000000000 +0200
> @@ -0,0 +1,37 @@
> +/* PR target/82524 */
> +
> +struct S { unsigned char b, g, r, a; };
> +union U { struct S c; unsigned v; };
> +
> +static inline unsigned char
> +foo (unsigned char a, unsigned char b)
> +{
> +  return ((a + 1) * b) >> 8;
> +}
> +
> +__attribute__((noinline, noclone)) unsigned
> +bar (union U *x, union U *y)
> +{
> +  union U z;
> +  unsigned char v = x->c.a;
> +  unsigned char w = foo (y->c.a, 255 - v);
> +  z.c.r = foo (x->c.r, v) + foo (y->c.r, w);
> +  z.c.g = foo (x->c.g, v) + foo (y->c.g, w);
> +  z.c.b = foo (x->c.b, v) + foo (y->c.b, w);
> +  z.c.a = 0;
> +  return z.v;
> +}
> +
> +int
> +main ()
> +{
> +  union U a, b, c;
> +  if ((unsigned char) ~0 != 255 || sizeof (unsigned) != 4)
> +    return 0;
> +  a.c = (struct S) { 255, 255, 255, 0 };
> +  b.c = (struct S) { 255, 255, 255, 255 };
> +  c.v = bar (&a, &b);
> +  if (c.c.b != 255 || c.c.g != 255 || c.c.r != 255 || c.c.a != 0)
> +    __builtin_abort ();
> +  return 0;
> +}
>
>         Jakub



More information about the Gcc-patches mailing list