This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH (3/7)] Widening multiply-and-accumulate pattern matching
- From: Andrew Stubbs <ams at codesourcery dot com>
- To: Richard Guenther <richard dot guenther at gmail dot com>
- Cc: Michael Matz <matz at suse dot de>, gcc-patches at gcc dot gnu dot org, patches at linaro dot org
- Date: Mon, 11 Jul 2011 17:55:36 +0100
- Subject: Re: [PATCH (3/7)] Widening multiply-and-accumulate pattern matching
- References: <4E034EF2.3070503@codesourcery.com> <4E03504B.9060305@codesourcery.com> <BANLkTi=a8B-DZZdG2bmOWQ8A+-pCSEsAbQ@mail.gmail.com> <4E044559.5000105@linaro.org> <BANLkTimfKyrARS+eRy+MWYVO2gLqp85JsQ@mail.gmail.com> <1A77B5B39081C241A68E6CF16983025F020906F6@EU1-MAIL.mgc.mentorg.com> <BANLkTi=1TRM7uHWxLD3Se=S-ibe0C13T-Q@mail.gmail.com> <4E09B142.4020402@codesourcery.com> <BANLkTim0wLcSTgAL9isO+dVXxgq-U8B4Sw@mail.gmail.com> <Pine.LNX.4.64.1106281741170.17115@wotan.suse.de> <4E09FDEA.3000004@gmail.com> <Pine.LNX.4.64.1106281827230.17115@wotan.suse.de> <1A77B5B39081C241A68E6CF16983025F0209071D@EU1-MAIL.mgc.mentorg.com> <BANLkTimJob8C2L8kYkd-aU6FbM5nckc8Yg@mail.gmail.com> <4E11CCD1.4010505@codesourcery.com> <CAFiYyc3WczNWC19j-dWWRtDdOk=9E7vRtqidDpv8BW3V=W1Fpw@mail.gmail.com>
On 07/07/11 10:58, Richard Guenther wrote:
I think you should assume that series of widenings, (int)(short)char_variable
are already combined. Thus I believe you only need to consider a single
conversion in valid_types_for_madd_p.
Ok, here's my new patch.
This version only allows one conversion between the multiply and
addition, so assumes that VRP has eliminated any needless ones.
That one conversion may either be a truncate, if the mode was too large
for the meaningful data, or an extend, which must be of the right flavour.
This means that this patch now has the same effect as the last patch,
for all valid cases (following you VRP patch), but rejects the cases
where the C language (unhelpfully) requires an intermediate temporary to
be of the 'wrong' signedness.
Hopefully the output will now be the same between both -O0 and -O2, and
programmers will continue to have to be careful about casting unsigned
variables whenever they expect purely unsigned math. :(
Is this one ok?
Andrew
2011-07-11 Andrew Stubbs <ams@codesourcery.com>
gcc/
* tree-ssa-math-opts.c (convert_plusminus_to_widen): Permit a single
conversion statement separating multiply-and-accumulate.
gcc/testsuite/
* gcc.target/arm/wmul-5.c: New file.
* gcc.target/arm/no-wmla-1.c: New file.
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/no-wmla-1.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv7-a" } */
+
+int
+foo (int a, short b, short c)
+{
+ int bc = b * c;
+ return a + (short)bc;
+}
+
+/* { dg-final { scan-assembler "mul" } } */
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/wmul-5.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=armv7-a" } */
+
+long long
+foo (long long a, char *b, char *c)
+{
+ return a + *b * *c;
+}
+
+/* { dg-final { scan-assembler "umlal" } } */
--- a/gcc/tree-ssa-math-opts.c
+++ b/gcc/tree-ssa-math-opts.c
@@ -2135,6 +2135,7 @@ convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt,
enum tree_code code)
{
gimple rhs1_stmt = NULL, rhs2_stmt = NULL;
+ gimple conv1_stmt = NULL, conv2_stmt = NULL, conv_stmt;
tree type, type1, type2;
tree lhs, rhs1, rhs2, mult_rhs1, mult_rhs2, add_rhs;
enum tree_code rhs1_code = ERROR_MARK, rhs2_code = ERROR_MARK;
@@ -2175,6 +2176,38 @@ convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt,
else
return false;
+ /* Allow for one conversion statement between the multiply
+ and addition/subtraction statement. If there are more than
+ one conversions then we assume they would invalidate this
+ transformation. If that's not the case then they should have
+ been folded before now. */
+ if (CONVERT_EXPR_CODE_P (rhs1_code))
+ {
+ conv1_stmt = rhs1_stmt;
+ rhs1 = gimple_assign_rhs1 (rhs1_stmt);
+ if (TREE_CODE (rhs1) == SSA_NAME)
+ {
+ rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
+ if (is_gimple_assign (rhs1_stmt))
+ rhs1_code = gimple_assign_rhs_code (rhs1_stmt);
+ }
+ else
+ return false;
+ }
+ if (CONVERT_EXPR_CODE_P (rhs2_code))
+ {
+ conv2_stmt = rhs2_stmt;
+ rhs2 = gimple_assign_rhs1 (rhs2_stmt);
+ if (TREE_CODE (rhs2) == SSA_NAME)
+ {
+ rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
+ if (is_gimple_assign (rhs2_stmt))
+ rhs2_code = gimple_assign_rhs_code (rhs2_stmt);
+ }
+ else
+ return false;
+ }
+
/* If code is WIDEN_MULT_EXPR then it would seem unnecessary to call
is_widening_mult_p, but we still need the rhs returns.
@@ -2188,6 +2221,7 @@ convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt,
&type2, &mult_rhs2))
return false;
add_rhs = rhs2;
+ conv_stmt = conv1_stmt;
}
else if (rhs2_code == MULT_EXPR || rhs2_code == WIDEN_MULT_EXPR)
{
@@ -2195,6 +2229,7 @@ convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt,
&type2, &mult_rhs2))
return false;
add_rhs = rhs1;
+ conv_stmt = conv2_stmt;
}
else
return false;
@@ -2202,6 +2237,33 @@ convert_plusminus_to_widen (gimple_stmt_iterator *gsi, gimple stmt,
if (TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2))
return false;
+ /* If there was a conversion between the multiply and addition
+ then we need to make sure it fits a multiply-and-accumulate.
+ The should be a single mode change which does not change the
+ value. */
+ if (conv_stmt)
+ {
+ tree from_type = TREE_TYPE (gimple_assign_rhs1 (conv_stmt));
+ tree to_type = TREE_TYPE (gimple_assign_lhs (conv_stmt));
+ int data_size = TYPE_PRECISION (type1) + TYPE_PRECISION (type2);
+ bool is_unsigned = TYPE_UNSIGNED (type1) && TYPE_UNSIGNED (type2);
+
+ if (TYPE_PRECISION (from_type) > TYPE_PRECISION (to_type))
+ {
+ /* Conversion is a truncate. */
+ if (TYPE_PRECISION (to_type) < data_size)
+ return false;
+ }
+ else if (TYPE_PRECISION (from_type) < TYPE_PRECISION (to_type))
+ {
+ /* Conversion is an extend. Check it's the right sort. */
+ if (TYPE_UNSIGNED (from_type) != is_unsigned
+ && !(is_unsigned && TYPE_PRECISION (from_type) > data_size))
+ return false;
+ }
+ /* else convert is a no-op for our purposes. */
+ }
+
/* Verify that the machine can perform a widening multiply
accumulate in this mode/signedness combination, otherwise
this transformation is likely to pessimize code. */