[PATCH] Fix various arithmetic patterns with %[abcd]h destination (PR target/82524)
Uros Bizjak
ubizjak@gmail.com
Fri Oct 13 06:48:00 GMT 2017
On Thu, Oct 12, 2017 at 9:49 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> Hi!
>
> As mentioned in the PR, there are two bugs in these. One is that
> the zero_extract destination is effectively another input operand (for the
> remaining bits that are unchanged) and thus the constraint can't be =Q,
> but has to be +Q.
> And the other problem is that then LRA ICEs whenever it has 3 different
> operands, because it is unable to reload it properly. Uros mentioned
> that it could be reloaded by using *insvqi_2-like insn to move the
> 8 bits from the operand that should use "0" constraint into the destination
> register, but LRA isn't able to do that right now.
> So this patch instead adds insn conditions that either the destination
> is the same as the first input operand or as one of the input operands
> (the latter for commutative patterns).
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk/7.3?
>
> 2017-10-12 Jakub Jelinek <jakub@redhat.com>
>
> PR target/82524
> * config/i386/i386.md (addqi_ext_1, andqi_ext_1,
> *andqi_ext_1_cc, *<code>qi_ext_1, *xorqi_ext_1_cc): Change
> =Q constraints to +Q and into insn condition add check
> that operands[0] and operands[1] are equal.
> (*addqi_ext_2, *andqi_ext_2, *<code>qi_ext_2): Change
> =Q constraints to +Q and into insn condition add check
> that operands[0] is equal to either operands[1] or operands[2].
>
> * gcc.c-torture/execute/pr82524.c: New test.
OK for mainline and gcc-7 branch.
Thanks,
Uros.
> --- gcc/config/i386/i386.md.jj 2017-10-12 14:05:15.000000000 +0200
> +++ gcc/config/i386/i386.md 2017-10-12 17:07:11.723151868 +0200
> @@ -6264,7 +6264,7 @@ (define_insn "*add<mode>_5"
> (set_attr "mode" "<MODE>")])
>
> (define_insn "addqi_ext_1"
> - [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q,Q")
> + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q,Q")
> (const_int 8)
> (const_int 8))
> (subreg:SI
> @@ -6275,7 +6275,8 @@ (define_insn "addqi_ext_1"
> (const_int 8)) 0)
> (match_operand:QI 2 "general_operand" "QnBc,m")) 0))
> (clobber (reg:CC FLAGS_REG))]
> - ""
> + "/* FIXME: without this LRA can't reload this pattern, see PR82524. */
> + rtx_equal_p (operands[0], operands[1])"
> {
> switch (get_attr_type (insn))
> {
> @@ -6300,7 +6301,7 @@ (define_insn "addqi_ext_1"
> (set_attr "mode" "QI")])
>
> (define_insn "*addqi_ext_2"
> - [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
> + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q")
> (const_int 8)
> (const_int 8))
> (subreg:SI
> @@ -6314,7 +6315,9 @@ (define_insn "*addqi_ext_2"
> (const_int 8)
> (const_int 8)) 0)) 0))
> (clobber (reg:CC FLAGS_REG))]
> - ""
> + "/* FIXME: without this LRA can't reload this pattern, see PR82524. */
> + rtx_equal_p (operands[0], operands[1])
> + || rtx_equal_p (operands[0], operands[2])"
> "add{b}\t{%h2, %h0|%h0, %h2}"
> [(set_attr "type" "alu")
> (set_attr "mode" "QI")])
> @@ -8998,7 +9001,7 @@ (define_insn "*andqi_2_slp"
> (set_attr "mode" "QI")])
>
> (define_insn "andqi_ext_1"
> - [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q,Q")
> + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q,Q")
> (const_int 8)
> (const_int 8))
> (subreg:SI
> @@ -9009,7 +9012,8 @@ (define_insn "andqi_ext_1"
> (const_int 8)) 0)
> (match_operand:QI 2 "general_operand" "QnBc,m")) 0))
> (clobber (reg:CC FLAGS_REG))]
> - ""
> + "/* FIXME: without this LRA can't reload this pattern, see PR82524. */
> + rtx_equal_p (operands[0], operands[1])"
> "and{b}\t{%2, %h0|%h0, %2}"
> [(set_attr "isa" "*,nox64")
> (set_attr "type" "alu")
> @@ -9027,7 +9031,7 @@ (define_insn "*andqi_ext_1_cc"
> (const_int 8)) 0)
> (match_operand:QI 2 "general_operand" "QnBc,m"))
> (const_int 0)))
> - (set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q,Q")
> + (set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q,Q")
> (const_int 8)
> (const_int 8))
> (subreg:SI
> @@ -9037,14 +9041,16 @@ (define_insn "*andqi_ext_1_cc"
> (const_int 8)
> (const_int 8)) 0)
> (match_dup 2)) 0))]
> - "ix86_match_ccmode (insn, CCNOmode)"
> + "ix86_match_ccmode (insn, CCNOmode)
> + /* FIXME: without this LRA can't reload this pattern, see PR82524. */
> + && rtx_equal_p (operands[0], operands[1])"
> "and{b}\t{%2, %h0|%h0, %2}"
> [(set_attr "isa" "*,nox64")
> (set_attr "type" "alu")
> (set_attr "mode" "QI")])
>
> (define_insn "*andqi_ext_2"
> - [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
> + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q")
> (const_int 8)
> (const_int 8))
> (subreg:SI
> @@ -9058,7 +9064,9 @@ (define_insn "*andqi_ext_2"
> (const_int 8)
> (const_int 8)) 0)) 0))
> (clobber (reg:CC FLAGS_REG))]
> - ""
> + "/* FIXME: without this LRA can't reload this pattern, see PR82524. */
> + rtx_equal_p (operands[0], operands[1])
> + || rtx_equal_p (operands[0], operands[2])"
> "and{b}\t{%h2, %h0|%h0, %h2}"
> [(set_attr "type" "alu")
> (set_attr "mode" "QI")])
> @@ -9431,7 +9439,7 @@ (define_insn "*<code><mode>_3"
> (set_attr "mode" "<MODE>")])
>
> (define_insn "*<code>qi_ext_1"
> - [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q,Q")
> + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q,Q")
> (const_int 8)
> (const_int 8))
> (subreg:SI
> @@ -9442,14 +9450,16 @@ (define_insn "*<code>qi_ext_1"
> (const_int 8)) 0)
> (match_operand:QI 2 "general_operand" "QnBc,m")) 0))
> (clobber (reg:CC FLAGS_REG))]
> - "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
> + "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
> + /* FIXME: without this LRA can't reload this pattern, see PR82524. */
> + && rtx_equal_p (operands[0], operands[1])"
> "<logic>{b}\t{%2, %h0|%h0, %2}"
> [(set_attr "isa" "*,nox64")
> (set_attr "type" "alu")
> (set_attr "mode" "QI")])
>
> (define_insn "*<code>qi_ext_2"
> - [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
> + [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q")
> (const_int 8)
> (const_int 8))
> (subreg:SI
> @@ -9463,7 +9473,10 @@ (define_insn "*<code>qi_ext_2"
> (const_int 8)
> (const_int 8)) 0)) 0))
> (clobber (reg:CC FLAGS_REG))]
> - "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
> + "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
> + /* FIXME: without this LRA can't reload this pattern, see PR82524. */
> + && (rtx_equal_p (operands[0], operands[1])
> + || rtx_equal_p (operands[0], operands[2]))"
> "<logic>{b}\t{%h2, %h0|%h0, %h2}"
> [(set_attr "type" "alu")
> (set_attr "mode" "QI")])
> @@ -9552,7 +9565,7 @@ (define_insn "*xorqi_ext_1_cc"
> (const_int 8)) 0)
> (match_operand:QI 2 "general_operand" "QnBc,m"))
> (const_int 0)))
> - (set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q,Q")
> + (set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q,Q")
> (const_int 8)
> (const_int 8))
> (subreg:SI
> @@ -9562,7 +9575,9 @@ (define_insn "*xorqi_ext_1_cc"
> (const_int 8)
> (const_int 8)) 0)
> (match_dup 2)) 0))]
> - "ix86_match_ccmode (insn, CCNOmode)"
> + "ix86_match_ccmode (insn, CCNOmode)
> + /* FIXME: without this LRA can't reload this pattern, see PR82524. */
> + && rtx_equal_p (operands[0], operands[1])"
> "xor{b}\t{%2, %h0|%h0, %2}"
> [(set_attr "isa" "*,nox64")
> (set_attr "type" "alu")
> --- gcc/testsuite/gcc.c-torture/execute/pr82524.c.jj 2017-10-12 17:39:28.244825800 +0200
> +++ gcc/testsuite/gcc.c-torture/execute/pr82524.c 2017-10-12 17:39:12.000000000 +0200
> @@ -0,0 +1,37 @@
> +/* PR target/82524 */
> +
> +struct S { unsigned char b, g, r, a; };
> +union U { struct S c; unsigned v; };
> +
> +static inline unsigned char
> +foo (unsigned char a, unsigned char b)
> +{
> + return ((a + 1) * b) >> 8;
> +}
> +
> +__attribute__((noinline, noclone)) unsigned
> +bar (union U *x, union U *y)
> +{
> + union U z;
> + unsigned char v = x->c.a;
> + unsigned char w = foo (y->c.a, 255 - v);
> + z.c.r = foo (x->c.r, v) + foo (y->c.r, w);
> + z.c.g = foo (x->c.g, v) + foo (y->c.g, w);
> + z.c.b = foo (x->c.b, v) + foo (y->c.b, w);
> + z.c.a = 0;
> + return z.v;
> +}
> +
> +int
> +main ()
> +{
> + union U a, b, c;
> + if ((unsigned char) ~0 != 255 || sizeof (unsigned) != 4)
> + return 0;
> + a.c = (struct S) { 255, 255, 255, 0 };
> + b.c = (struct S) { 255, 255, 255, 255 };
> + c.v = bar (&a, &b);
> + if (c.c.b != 255 || c.c.g != 255 || c.c.r != 255 || c.c.a != 0)
> + __builtin_abort ();
> + return 0;
> +}
>
> Jakub
More information about the Gcc-patches
mailing list