[PATCH] Introducing SAD (Sum of Absolute Differences) operation to GCC vectorizer.
Cong Hou
congh@google.com
Fri Nov 1 02:04:00 GMT 2013
According to your comments, I made the following modifications to this patch:
1. Now SAD pattern does not require the first and second operands to
be unsigned. And two versions (signed/unsigned) of the SAD optabs are
defined: usad_optab and ssad_optab.
2. Use expand_simple_binop instead of gen_rtx_PLUS to generate the
plus expression in sse.md. Also change the type of the second/third
operands to be nonimmediate_operand.
3. Add the document for SAD_EXPR.
4. Verify the operands of SAD_EXPR.
5. Create a new target: vect_usad_char, and use it in the test case.
The updated patch is pasted below.
Thank you!
Cong
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 8a38316..d528307 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,23 @@
+2013-10-29 Cong Hou <congh@google.com>
+
+ * tree-vect-patterns.c (vect_recog_sad_pattern): New function for SAD
+ pattern recognition.
+ (type_conversion_p): PROMOTION is true if it's a type promotion
+ conversion, and false otherwise. Return true if the given expression
+ is a type conversion one.
+ * tree-vectorizer.h: Adjust the number of patterns.
+ * tree.def: Add SAD_EXPR.
+ * optabs.def: Add sad_optab.
+ * cfgexpand.c (expand_debug_expr): Add SAD_EXPR case.
+ * expr.c (expand_expr_real_2): Likewise.
+ * gimple-pretty-print.c (dump_ternary_rhs): Likewise.
+ * gimple.c (get_gimple_rhs_num_ops): Likewise.
+ * optabs.c (optab_for_tree_code): Likewise.
+ * tree-cfg.c (estimate_operator_cost): Likewise.
+ * tree-ssa-operands.c (get_expr_operands): Likewise.
+ * tree-vect-loop.c (get_initial_def_for_reduction): Likewise.
+ * config/i386/sse.md: Add SSE2 and AVX2 expand for SAD.
+
2013-10-14 David Malcolm <dmalcolm@redhat.com>
* dumpfile.h (gcc::dump_manager): New class, to hold state
diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index 7ed29f5..9ec761a 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -2730,6 +2730,7 @@ expand_debug_expr (tree exp)
{
case COND_EXPR:
case DOT_PROD_EXPR:
+ case SAD_EXPR:
case WIDEN_MULT_PLUS_EXPR:
case WIDEN_MULT_MINUS_EXPR:
case FMA_EXPR:
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index c3f6c94..5b97576 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -6052,6 +6052,40 @@
DONE;
})
+(define_expand "usadv16qi"
+ [(match_operand:V4SI 0 "register_operand")
+ (match_operand:V16QI 1 "register_operand")
+ (match_operand:V16QI 2 "nonimmediate_operand")
+ (match_operand:V4SI 3 "nonimmediate_operand")]
+ "TARGET_SSE2"
+{
+ rtx t1 = gen_reg_rtx (V2DImode);
+ rtx t2 = gen_reg_rtx (V4SImode);
+ emit_insn (gen_sse2_psadbw (t1, operands[1], operands[2]));
+ convert_move (t2, t1, 0);
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+ expand_simple_binop (V4SImode, PLUS, t2, operands[3],
+ NULL, 0, OPTAB_DIRECT)));
+ DONE;
+})
+
+(define_expand "usadv32qi"
+ [(match_operand:V8SI 0 "register_operand")
+ (match_operand:V32QI 1 "register_operand")
+ (match_operand:V32QI 2 "nonimmediate_operand")
+ (match_operand:V8SI 3 "nonimmediate_operand")]
+ "TARGET_AVX2"
+{
+ rtx t1 = gen_reg_rtx (V4DImode);
+ rtx t2 = gen_reg_rtx (V8SImode);
+ emit_insn (gen_avx2_psadbw (t1, operands[1], operands[2]));
+ convert_move (t2, t1, 0);
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+ expand_simple_binop (V8SImode, PLUS, t2, operands[3],
+ NULL, 0, OPTAB_DIRECT)));
+ DONE;
+})
+
(define_insn "ashr<mode>3"
[(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
(ashiftrt:VI24_AVX2
diff --git a/gcc/doc/generic.texi b/gcc/doc/generic.texi
index ccecd6e..381ee09 100644
--- a/gcc/doc/generic.texi
+++ b/gcc/doc/generic.texi
@@ -1707,6 +1707,7 @@ its sole argument yields the representation for @code{ap}.
@tindex VEC_PACK_TRUNC_EXPR
@tindex VEC_PACK_SAT_EXPR
@tindex VEC_PACK_FIX_TRUNC_EXPR
+@tindex SAD_EXPR
@table @code
@item VEC_LSHIFT_EXPR
@@ -1787,6 +1788,15 @@ value, it is taken from the second operand. It
should never evaluate to
any other value currently, but optimizations should not rely on that
property. In contrast with a @code{COND_EXPR}, all operands are always
evaluated.
+
+@item SAD_EXPR
+This node represents the Sum of Absolute Differences operation. The three
+operands must be vectors of integral types. The first and second operand
+must have the same type. The size of the vector element of the third
+operand must be at lease twice of the size of the vector element of the
+first and second one. The SAD is calculated between the first and second
+operands, added to the third operand, and returned.
+
@end table
diff --git a/gcc/expr.c b/gcc/expr.c
index 4975a64..1db8a49 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -9026,6 +9026,20 @@ expand_expr_real_2 (sepops ops, rtx target,
enum machine_mode tmode,
return target;
}
+ case SAD_EXPR:
+ {
+ tree oprnd0 = treeop0;
+ tree oprnd1 = treeop1;
+ tree oprnd2 = treeop2;
+ rtx op2;
+
+ expand_operands (oprnd0, oprnd1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
+ op2 = expand_normal (oprnd2);
+ target = expand_widen_pattern_expr (ops, op0, op1, op2,
+ target, unsignedp);
+ return target;
+ }
+
case REALIGN_LOAD_EXPR:
{
tree oprnd0 = treeop0;
diff --git a/gcc/gimple-pretty-print.c b/gcc/gimple-pretty-print.c
index f0f8166..514ddd1 100644
--- a/gcc/gimple-pretty-print.c
+++ b/gcc/gimple-pretty-print.c
@@ -425,6 +425,16 @@ dump_ternary_rhs (pretty_printer *buffer, gimple
gs, int spc, int flags)
dump_generic_node (buffer, gimple_assign_rhs3 (gs), spc, flags, false);
pp_greater (buffer);
break;
+
+ case SAD_EXPR:
+ pp_string (buffer, "SAD_EXPR <");
+ dump_generic_node (buffer, gimple_assign_rhs1 (gs), spc, flags, false);
+ pp_string (buffer, ", ");
+ dump_generic_node (buffer, gimple_assign_rhs2 (gs), spc, flags, false);
+ pp_string (buffer, ", ");
+ dump_generic_node (buffer, gimple_assign_rhs3 (gs), spc, flags, false);
+ pp_greater (buffer);
+ break;
case VEC_PERM_EXPR:
pp_string (buffer, "VEC_PERM_EXPR <");
diff --git a/gcc/gimple.c b/gcc/gimple.c
index a12dd67..4975959 100644
--- a/gcc/gimple.c
+++ b/gcc/gimple.c
@@ -2562,6 +2562,7 @@ get_gimple_rhs_num_ops (enum tree_code code)
|| (SYM) == WIDEN_MULT_PLUS_EXPR \
|| (SYM) == WIDEN_MULT_MINUS_EXPR \
|| (SYM) == DOT_PROD_EXPR \
+ || (SYM) == SAD_EXPR \
|| (SYM) == REALIGN_LOAD_EXPR \
|| (SYM) == VEC_COND_EXPR \
|| (SYM) == VEC_PERM_EXPR \
diff --git a/gcc/optabs.c b/gcc/optabs.c
index 06a626c..16e8f4f 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -462,6 +462,9 @@ optab_for_tree_code (enum tree_code code, const_tree type,
case DOT_PROD_EXPR:
return TYPE_UNSIGNED (type) ? udot_prod_optab : sdot_prod_optab;
+ case SAD_EXPR:
+ return TYPE_UNSIGNED (type) ? usad_optab : ssad_optab;
+
case WIDEN_MULT_PLUS_EXPR:
return (TYPE_UNSIGNED (type)
? (TYPE_SATURATING (type)
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 6b924ac..377763e 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -248,6 +248,8 @@ OPTAB_D (sdot_prod_optab, "sdot_prod$I$a")
OPTAB_D (ssum_widen_optab, "widen_ssum$I$a3")
OPTAB_D (udot_prod_optab, "udot_prod$I$a")
OPTAB_D (usum_widen_optab, "widen_usum$I$a3")
+OPTAB_D (usad_optab, "usad$I$a")
+OPTAB_D (ssad_optab, "ssad$I$a")
OPTAB_D (vec_extract_optab, "vec_extract$a")
OPTAB_D (vec_init_optab, "vec_init$a")
OPTAB_D (vec_pack_sfix_trunc_optab, "vec_pack_sfix_trunc_$a")
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 075d071..1698912 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2013-10-29 Cong Hou <congh@google.com>
+
+ * gcc.dg/vect/vect-reduc-sad.c: New.
+ * lib/target-supports.exp (check_effective_target_vect_usad_char): New.
+
2013-10-14 Tobias Burnus <burnus@net-b.de>
PR fortran/58658
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-sad.c
b/gcc/testsuite/gcc.dg/vect/vect-reduc-sad.c
new file mode 100644
index 0000000..15a625f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-sad.c
@@ -0,0 +1,54 @@
+/* { dg-require-effective-target vect_usad_char } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 64
+#define SAD N*N/2
+
+unsigned char X[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+unsigned char Y[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+
+/* Sum of absolute differences between arrays of unsigned char types.
+ Detected as a sad pattern.
+ Vectorized on targets that support sad for unsigned chars. */
+
+__attribute__ ((noinline)) int
+foo (int len)
+{
+ int i;
+ int result = 0;
+
+ for (i = 0; i < len; i++)
+ result += abs (X[i] - Y[i]);
+
+ return result;
+}
+
+
+int
+main (void)
+{
+ int i;
+ int sad;
+
+ check_vect ();
+
+ for (i = 0; i < N; i++)
+ {
+ X[i] = i;
+ Y[i] = N - i;
+ __asm__ volatile ("");
+ }
+
+ sad = foo (N);
+ if (sad != SAD)
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vect_recog_sad_pattern:
detected" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
diff --git a/gcc/testsuite/lib/target-supports.exp
b/gcc/testsuite/lib/target-supports.exp
index 7eb4dfe..01ee6f2 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -3672,6 +3672,26 @@ proc check_effective_target_vect_udot_hi { } {
return $et_vect_udot_hi_saved
}
+# Return 1 if the target plus current options supports a vector
+# sad operation of unsigned chars, 0 otherwise.
+#
+# This won't change for different subtargets so cache the result.
+
+proc check_effective_target_vect_usad_char { } {
+ global et_vect_usad_char
+
+ if [info exists et_vect_usad_char_saved] {
+ verbose "check_effective_target_vect_usad_char: using cached result" 2
+ } else {
+ set et_vect_usad_char_saved 0
+ if { ([istarget i?86-*-*]
+ || [istarget x86_64-*-*]) } {
+ set et_vect_usad_char_saved 1
+ }
+ }
+ verbose "check_effective_target_vect_usad_char: returning
$et_vect_usad_char_saved" 2
+ return $et_vect_usad_char_saved
+}
# Return 1 if the target plus current options supports a vector
# demotion (packing) of shorts (to chars) and ints (to shorts)
diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
index 8b66791..c8f3d33 100644
--- a/gcc/tree-cfg.c
+++ b/gcc/tree-cfg.c
@@ -3796,6 +3796,36 @@ verify_gimple_assign_ternary (gimple stmt)
return false;
+ case SAD_EXPR:
+ if (!useless_type_conversion_p (rhs1_type, rhs2_type)
+ || !useless_type_conversion_p (lhs_type, rhs3_type)
+ || 2 * GET_MODE_BITSIZE (GET_MODE_INNER
+ (TYPE_MODE (TREE_TYPE (rhs1_type))))
+ > GET_MODE_BITSIZE (GET_MODE_INNER
+ (TYPE_MODE (TREE_TYPE (lhs_type)))))
+ {
+ error ("type mismatch in sad expression");
+ debug_generic_expr (lhs_type);
+ debug_generic_expr (rhs1_type);
+ debug_generic_expr (rhs2_type);
+ debug_generic_expr (rhs3_type);
+ return true;
+ }
+
+ if (TREE_CODE (rhs1_type) != VECTOR_TYPE
+ || TREE_CODE (rhs2_type) != VECTOR_TYPE
+ || TREE_CODE (rhs3_type) != VECTOR_TYPE)
+ {
+ error ("vector types expected in sad expression");
+ debug_generic_expr (lhs_type);
+ debug_generic_expr (rhs1_type);
+ debug_generic_expr (rhs2_type);
+ debug_generic_expr (rhs3_type);
+ return true;
+ }
+
+ return false;
+
case DOT_PROD_EXPR:
case REALIGN_LOAD_EXPR:
/* FIXME. */
diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c
index 2221b9c..44261a3 100644
--- a/gcc/tree-inline.c
+++ b/gcc/tree-inline.c
@@ -3601,6 +3601,7 @@ estimate_operator_cost (enum tree_code code,
eni_weights *weights,
case WIDEN_SUM_EXPR:
case WIDEN_MULT_EXPR:
case DOT_PROD_EXPR:
+ case SAD_EXPR:
case WIDEN_MULT_PLUS_EXPR:
case WIDEN_MULT_MINUS_EXPR:
case WIDEN_LSHIFT_EXPR:
diff --git a/gcc/tree-ssa-operands.c b/gcc/tree-ssa-operands.c
index 603f797..393efc3 100644
--- a/gcc/tree-ssa-operands.c
+++ b/gcc/tree-ssa-operands.c
@@ -854,6 +854,7 @@ get_expr_operands (gimple stmt, tree *expr_p, int flags)
}
case DOT_PROD_EXPR:
+ case SAD_EXPR:
case REALIGN_LOAD_EXPR:
case WIDEN_MULT_PLUS_EXPR:
case WIDEN_MULT_MINUS_EXPR:
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 638b981..89aa8c7 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -3607,6 +3607,7 @@ get_initial_def_for_reduction (gimple stmt, tree init_val,
{
case WIDEN_SUM_EXPR:
case DOT_PROD_EXPR:
+ case SAD_EXPR:
case PLUS_EXPR:
case MINUS_EXPR:
case BIT_IOR_EXPR:
diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
index 0a4e812..58a6666 100644
--- a/gcc/tree-vect-patterns.c
+++ b/gcc/tree-vect-patterns.c
@@ -45,6 +45,8 @@ static gimple vect_recog_widen_mult_pattern
(vec<gimple> *, tree *,
tree *);
static gimple vect_recog_dot_prod_pattern (vec<gimple> *, tree *,
tree *);
+static gimple vect_recog_sad_pattern (vec<gimple> *, tree *,
+ tree *);
static gimple vect_recog_pow_pattern (vec<gimple> *, tree *, tree *);
static gimple vect_recog_over_widening_pattern (vec<gimple> *, tree *,
tree *);
@@ -62,6 +64,7 @@ static vect_recog_func_ptr
vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
vect_recog_widen_mult_pattern,
vect_recog_widen_sum_pattern,
vect_recog_dot_prod_pattern,
+ vect_recog_sad_pattern,
vect_recog_pow_pattern,
vect_recog_widen_shift_pattern,
vect_recog_over_widening_pattern,
@@ -140,9 +143,8 @@ vect_single_imm_use (gimple def_stmt)
}
/* Check whether NAME, an ssa-name used in USE_STMT,
- is a result of a type promotion or demotion, such that:
+ is a result of a type promotion, such that:
DEF_STMT: NAME = NOP (name0)
- where the type of name0 (ORIG_TYPE) is smaller/bigger than the type of NAME.
If CHECK_SIGN is TRUE, check that either both types are signed or both are
unsigned. */
@@ -189,10 +191,8 @@ type_conversion_p (tree name, gimple use_stmt,
bool check_sign,
if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
*promotion = true;
- else if (TYPE_PRECISION (*orig_type) >= (TYPE_PRECISION (type) * 2))
- *promotion = false;
else
- return false;
+ *promotion = false;
if (!vect_is_simple_use (oprnd0, *def_stmt, loop_vinfo,
bb_vinfo, &dummy_gimple, &dummy, &dt))
@@ -433,6 +433,240 @@ vect_recog_dot_prod_pattern (vec<gimple> *stmts,
tree *type_in,
}
+/* Function vect_recog_sad_pattern
+
+ Try to find the following Sum of Absolute Difference (SAD) pattern:
+
+ type x_t, y_t;
+ signed TYPE1 diff, abs_diff;
+ TYPE2 sum = init;
+ loop:
+ sum_0 = phi <init, sum_1>
+ S1 x_t = ...
+ S2 y_t = ...
+ S3 x_T = (TYPE1) x_t;
+ S4 y_T = (TYPE1) y_t;
+ S5 diff = x_T - y_T;
+ S6 abs_diff = ABS_EXPR <diff>;
+ [S7 abs_diff = (TYPE2) abs_diff; #optional]
+ S8 sum_1 = abs_diff + sum_0;
+
+ where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
+ same size of 'TYPE1' or bigger. This is a special case of a reduction
+ computation.
+
+ Input:
+
+ * STMTS: Contains a stmt from which the pattern search begins. In the
+ example, when this function is called with S8, the pattern
+ {S3,S4,S5,S6,S7,S8} will be detected.
+
+ Output:
+
+ * TYPE_IN: The type of the input arguments to the pattern.
+
+ * TYPE_OUT: The type of the output of this pattern.
+
+ * Return value: A new stmt that will be used to replace the sequence of
+ stmts that constitute the pattern. In this case it will be:
+ SAD_EXPR <x_t, y_t, sum_0>
+ */
+
+static gimple
+vect_recog_sad_pattern (vec<gimple> *stmts, tree *type_in,
+ tree *type_out)
+{
+ gimple last_stmt = (*stmts)[0];
+ tree sad_oprnd0, sad_oprnd1;
+ stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
+ tree half_type;
+ loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
+ struct loop *loop;
+ bool promotion;
+
+ if (!loop_info)
+ return NULL;
+
+ loop = LOOP_VINFO_LOOP (loop_info);
+
+ if (!is_gimple_assign (last_stmt))
+ return NULL;
+
+ tree sum_type = gimple_expr_type (last_stmt);
+
+ /* Look for the following pattern
+ DX = (TYPE1) X;
+ DY = (TYPE1) Y;
+ DDIFF = DX - DY;
+ DAD = ABS_EXPR <DDIFF>;
+ DDPROD = (TYPE2) DPROD;
+ sum_1 = DAD + sum_0;
+ In which
+ - DX is at least double the size of X
+ - DY is at least double the size of Y
+ - DX, DY, DDIFF, DAD all have the same type
+ - sum is the same size of DAD or bigger
+ - sum has been recognized as a reduction variable.
+
+ This is equivalent to:
+ DDIFF = X w- Y; #widen sub
+ DAD = ABS_EXPR <DDIFF>;
+ sum_1 = DAD w+ sum_0; #widen summation
+ or
+ DDIFF = X w- Y; #widen sub
+ DAD = ABS_EXPR <DDIFF>;
+ sum_1 = DAD + sum_0; #summation
+ */
+
+ /* Starting from LAST_STMT, follow the defs of its uses in search
+ of the above pattern. */
+
+ if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
+ return NULL;
+
+ tree plus_oprnd0, plus_oprnd1;
+
+ if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
+ {
+ /* Has been detected as widening-summation? */
+
+ gimple stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
+ sum_type = gimple_expr_type (stmt);
+ if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
+ return NULL;
+ plus_oprnd0 = gimple_assign_rhs1 (stmt);
+ plus_oprnd1 = gimple_assign_rhs2 (stmt);
+ half_type = TREE_TYPE (plus_oprnd0);
+ }
+ else
+ {
+ gimple def_stmt;
+
+ if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
+ return NULL;
+ plus_oprnd0 = gimple_assign_rhs1 (last_stmt);
+ plus_oprnd1 = gimple_assign_rhs2 (last_stmt);
+ if (!types_compatible_p (TREE_TYPE (plus_oprnd0), sum_type)
+ || !types_compatible_p (TREE_TYPE (plus_oprnd1), sum_type))
+ return NULL;
+
+ /* The type conversion could be promotion, demotion,
+ or just signed -> unsigned. */
+ if (type_conversion_p (plus_oprnd0, last_stmt, false,
+ &half_type, &def_stmt, &promotion))
+ plus_oprnd0 = gimple_assign_rhs1 (def_stmt);
+ else
+ half_type = sum_type;
+ }
+
+ /* So far so good. Since last_stmt was detected as a (summation) reduction,
+ we know that plus_oprnd1 is the reduction variable (defined by a
loop-header
+ phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
+ Then check that plus_oprnd0 is defined by an abs_expr */
+
+ if (TREE_CODE (plus_oprnd0) != SSA_NAME)
+ return NULL;
+
+ tree abs_type = half_type;
+ gimple abs_stmt = SSA_NAME_DEF_STMT (plus_oprnd0);
+
+ /* It could not be the sad pattern if the abs_stmt is outside the loop. */
+ if (!gimple_bb (abs_stmt) || !flow_bb_inside_loop_p (loop,
gimple_bb (abs_stmt)))
+ return NULL;
+
+ /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
+ inside the loop (in case we are analyzing an outer-loop). */
+ if (!is_gimple_assign (abs_stmt))
+ return NULL;
+
+ stmt_vec_info abs_stmt_vinfo = vinfo_for_stmt (abs_stmt);
+ gcc_assert (abs_stmt_vinfo);
+ if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo) != vect_internal_def)
+ return NULL;
+ if (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR)
+ return NULL;
+
+ tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
+ if (!types_compatible_p (TREE_TYPE (abs_oprnd), abs_type))
+ return NULL;
+ if (TYPE_UNSIGNED (abs_type))
+ return NULL;
+
+ /* We then detect if the operand of abs_expr is defined by a minus_expr. */
+
+ if (TREE_CODE (abs_oprnd) != SSA_NAME)
+ return NULL;
+
+ gimple diff_stmt = SSA_NAME_DEF_STMT (abs_oprnd);
+
+ /* It could not be the sad pattern if the diff_stmt is outside the loop. */
+ if (!gimple_bb (diff_stmt)
+ || !flow_bb_inside_loop_p (loop, gimple_bb (diff_stmt)))
+ return NULL;
+
+ /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
+ inside the loop (in case we are analyzing an outer-loop). */
+ if (!is_gimple_assign (diff_stmt))
+ return NULL;
+
+ stmt_vec_info diff_stmt_vinfo = vinfo_for_stmt (diff_stmt);
+ gcc_assert (diff_stmt_vinfo);
+ if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo) != vect_internal_def)
+ return NULL;
+ if (gimple_assign_rhs_code (diff_stmt) != MINUS_EXPR)
+ return NULL;
+
+ tree half_type0, half_type1;
+ gimple def_stmt;
+
+ tree minus_oprnd0 = gimple_assign_rhs1 (diff_stmt);
+ tree minus_oprnd1 = gimple_assign_rhs2 (diff_stmt);
+
+ if (!types_compatible_p (TREE_TYPE (minus_oprnd0), abs_type)
+ || !types_compatible_p (TREE_TYPE (minus_oprnd1), abs_type))
+ return NULL;
+ if (!type_conversion_p (minus_oprnd0, diff_stmt, false,
+ &half_type0, &def_stmt, &promotion)
+ || !promotion)
+ return NULL;
+ sad_oprnd0 = gimple_assign_rhs1 (def_stmt);
+
+ if (!type_conversion_p (minus_oprnd1, diff_stmt, false,
+ &half_type1, &def_stmt, &promotion)
+ || !promotion)
+ return NULL;
+ sad_oprnd1 = gimple_assign_rhs1 (def_stmt);
+
+ if (!types_compatible_p (half_type0, half_type1))
+ return NULL;
+ if (TYPE_PRECISION (abs_type) < TYPE_PRECISION (half_type0) * 2
+ || TYPE_PRECISION (sum_type) < TYPE_PRECISION (half_type0) * 2)
+ return NULL;
+
+ *type_in = TREE_TYPE (sad_oprnd0);
+ *type_out = sum_type;
+
+ /* Pattern detected. Create a stmt to be used to replace the pattern: */
+ tree var = vect_recog_temp_ssa_var (sum_type, NULL);
+ gimple pattern_stmt = gimple_build_assign_with_ops
+ (SAD_EXPR, var, sad_oprnd0, sad_oprnd1, plus_oprnd1);
+
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "vect_recog_sad_pattern: detected: ");
+ dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
+ dump_printf (MSG_NOTE, "\n");
+ }
+
+ /* We don't allow changing the order of the computation in the inner-loop
+ when doing outer-loop vectorization. */
+ gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
+
+ return pattern_stmt;
+}
+
+
/* Handle widening operation by a constant. At the moment we support MULT_EXPR
and LSHIFT_EXPR.
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 8b7b345..0aac75b 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -1044,7 +1044,7 @@ extern void vect_slp_transform_bb (basic_block);
Additional pattern recognition functions can (and will) be added
in the future. */
typedef gimple (* vect_recog_func_ptr) (vec<gimple> *, tree *, tree *);
-#define NUM_PATTERNS 11
+#define NUM_PATTERNS 12
void vect_pattern_recog (loop_vec_info, bb_vec_info);
/* In tree-vectorizer.c. */
diff --git a/gcc/tree.def b/gcc/tree.def
index 88c850a..e15ee61 100644
--- a/gcc/tree.def
+++ b/gcc/tree.def
@@ -1155,6 +1155,12 @@ DEFTREECODE (DOT_PROD_EXPR, "dot_prod_expr",
tcc_expression, 3)
with the second argument. */
DEFTREECODE (WIDEN_SUM_EXPR, "widen_sum_expr", tcc_binary, 2)
+/* Widening sad (sum of absolute differences).
+ The first two arguments are of type t1 which should be integer.
+ The third argument and the result are of type t2, such that t2 is at least
+ twice the size of t1. */
+DEFTREECODE (SAD_EXPR, "sad_expr", tcc_expression, 3)
+
/* Widening multiplication.
The two arguments are of type t1.
The result is of type t2, such that t2 is at least twice
On Thu, Oct 31, 2013 at 3:34 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
> Hello!
>
>> SAD (Sum of Absolute Differences) is a common and important algorithm
>> in image processing and other areas. SSE2 even introduced a new
>> instruction PSADBW for it. A SAD loop can be greatly accelerated by
>> this instruction after being vectorized. This patch introduced a new
>> operation SAD_EXPR and a SAD pattern recognizer in vectorizer.
>>
>> In order to express this new operation, a new expression SAD_EXPR is
>> introduced in tree.def, and the corresponding entry in optabs is
>> added. The patch also added the "define_expand" for SSE2 and AVX2
>> platforms for i386.
>
> +(define_expand "sadv16qi"
> + [(match_operand:V4SI 0 "register_operand")
> + (match_operand:V16QI 1 "register_operand")
> + (match_operand:V16QI 2 "register_operand")
> + (match_operand:V4SI 3 "register_operand")]
> + "TARGET_SSE2"
> +{
> + rtx t1 = gen_reg_rtx (V2DImode);
> + rtx t2 = gen_reg_rtx (V4SImode);
> + emit_insn (gen_sse2_psadbw (t1, operands[1], operands[2]));
> + convert_move (t2, t1, 0);
> + emit_insn (gen_rtx_SET (VOIDmode, operands[0],
> + gen_rtx_PLUS (V4SImode,
> + operands[3], t2)));
> + DONE;
> +})
>
> Please use generic expanders (expand_simple_binop) to generate plus
> expression. Also, please use nonimmediate_operand predicate for
> operand 2 and operand 3.
>
> Please note, that nonimmediate operands should be passed as the second
> input operand to commutative operators, to match their insn pattern
> layout.
>
> Uros.
-------------- next part --------------
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 8a38316..d528307 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,23 @@
+2013-10-29 Cong Hou <congh@google.com>
+
+ * tree-vect-patterns.c (vect_recog_sad_pattern): New function for SAD
+ pattern recognition.
+ (type_conversion_p): PROMOTION is true if it's a type promotion
+ conversion, and false otherwise. Return true if the given expression
+ is a type conversion one.
+ * tree-vectorizer.h: Adjust the number of patterns.
+ * tree.def: Add SAD_EXPR.
+ * optabs.def: Add sad_optab.
+ * cfgexpand.c (expand_debug_expr): Add SAD_EXPR case.
+ * expr.c (expand_expr_real_2): Likewise.
+ * gimple-pretty-print.c (dump_ternary_rhs): Likewise.
+ * gimple.c (get_gimple_rhs_num_ops): Likewise.
+ * optabs.c (optab_for_tree_code): Likewise.
+ * tree-cfg.c (estimate_operator_cost): Likewise.
+ * tree-ssa-operands.c (get_expr_operands): Likewise.
+ * tree-vect-loop.c (get_initial_def_for_reduction): Likewise.
+ * config/i386/sse.md: Add SSE2 and AVX2 expand for SAD.
+
2013-10-14 David Malcolm <dmalcolm@redhat.com>
* dumpfile.h (gcc::dump_manager): New class, to hold state
diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index 7ed29f5..9ec761a 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -2730,6 +2730,7 @@ expand_debug_expr (tree exp)
{
case COND_EXPR:
case DOT_PROD_EXPR:
+ case SAD_EXPR:
case WIDEN_MULT_PLUS_EXPR:
case WIDEN_MULT_MINUS_EXPR:
case FMA_EXPR:
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index c3f6c94..5b97576 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -6052,6 +6052,40 @@
DONE;
})
+(define_expand "usadv16qi"
+ [(match_operand:V4SI 0 "register_operand")
+ (match_operand:V16QI 1 "register_operand")
+ (match_operand:V16QI 2 "nonimmediate_operand")
+ (match_operand:V4SI 3 "nonimmediate_operand")]
+ "TARGET_SSE2"
+{
+ rtx t1 = gen_reg_rtx (V2DImode);
+ rtx t2 = gen_reg_rtx (V4SImode);
+ emit_insn (gen_sse2_psadbw (t1, operands[1], operands[2]));
+ convert_move (t2, t1, 0);
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+ expand_simple_binop (V4SImode, PLUS, t2, operands[3],
+ NULL, 0, OPTAB_DIRECT)));
+ DONE;
+})
+
+(define_expand "usadv32qi"
+ [(match_operand:V8SI 0 "register_operand")
+ (match_operand:V32QI 1 "register_operand")
+ (match_operand:V32QI 2 "nonimmediate_operand")
+ (match_operand:V8SI 3 "nonimmediate_operand")]
+ "TARGET_AVX2"
+{
+ rtx t1 = gen_reg_rtx (V4DImode);
+ rtx t2 = gen_reg_rtx (V8SImode);
+ emit_insn (gen_avx2_psadbw (t1, operands[1], operands[2]));
+ convert_move (t2, t1, 0);
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0],
+ expand_simple_binop (V8SImode, PLUS, t2, operands[3],
+ NULL, 0, OPTAB_DIRECT)));
+ DONE;
+})
+
(define_insn "ashr<mode>3"
[(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
(ashiftrt:VI24_AVX2
diff --git a/gcc/doc/generic.texi b/gcc/doc/generic.texi
index ccecd6e..381ee09 100644
--- a/gcc/doc/generic.texi
+++ b/gcc/doc/generic.texi
@@ -1707,6 +1707,7 @@ its sole argument yields the representation for @code{ap}.
@tindex VEC_PACK_TRUNC_EXPR
@tindex VEC_PACK_SAT_EXPR
@tindex VEC_PACK_FIX_TRUNC_EXPR
+@tindex SAD_EXPR
@table @code
@item VEC_LSHIFT_EXPR
@@ -1787,6 +1788,15 @@ value, it is taken from the second operand. It should never evaluate to
any other value currently, but optimizations should not rely on that
property. In contrast with a @code{COND_EXPR}, all operands are always
evaluated.
+
+@item SAD_EXPR
+This node represents the Sum of Absolute Differences operation. The three
+operands must be vectors of integral types. The first and second operand
+must have the same type. The size of the vector element of the third
+operand must be at lease twice of the size of the vector element of the
+first and second one. The SAD is calculated between the first and second
+operands, added to the third operand, and returned.
+
@end table
diff --git a/gcc/expr.c b/gcc/expr.c
index 4975a64..1db8a49 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -9026,6 +9026,20 @@ expand_expr_real_2 (sepops ops, rtx target, enum machine_mode tmode,
return target;
}
+ case SAD_EXPR:
+ {
+ tree oprnd0 = treeop0;
+ tree oprnd1 = treeop1;
+ tree oprnd2 = treeop2;
+ rtx op2;
+
+ expand_operands (oprnd0, oprnd1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
+ op2 = expand_normal (oprnd2);
+ target = expand_widen_pattern_expr (ops, op0, op1, op2,
+ target, unsignedp);
+ return target;
+ }
+
case REALIGN_LOAD_EXPR:
{
tree oprnd0 = treeop0;
diff --git a/gcc/gimple-pretty-print.c b/gcc/gimple-pretty-print.c
index f0f8166..514ddd1 100644
--- a/gcc/gimple-pretty-print.c
+++ b/gcc/gimple-pretty-print.c
@@ -425,6 +425,16 @@ dump_ternary_rhs (pretty_printer *buffer, gimple gs, int spc, int flags)
dump_generic_node (buffer, gimple_assign_rhs3 (gs), spc, flags, false);
pp_greater (buffer);
break;
+
+ case SAD_EXPR:
+ pp_string (buffer, "SAD_EXPR <");
+ dump_generic_node (buffer, gimple_assign_rhs1 (gs), spc, flags, false);
+ pp_string (buffer, ", ");
+ dump_generic_node (buffer, gimple_assign_rhs2 (gs), spc, flags, false);
+ pp_string (buffer, ", ");
+ dump_generic_node (buffer, gimple_assign_rhs3 (gs), spc, flags, false);
+ pp_greater (buffer);
+ break;
case VEC_PERM_EXPR:
pp_string (buffer, "VEC_PERM_EXPR <");
diff --git a/gcc/gimple.c b/gcc/gimple.c
index a12dd67..4975959 100644
--- a/gcc/gimple.c
+++ b/gcc/gimple.c
@@ -2562,6 +2562,7 @@ get_gimple_rhs_num_ops (enum tree_code code)
|| (SYM) == WIDEN_MULT_PLUS_EXPR \
|| (SYM) == WIDEN_MULT_MINUS_EXPR \
|| (SYM) == DOT_PROD_EXPR \
+ || (SYM) == SAD_EXPR \
|| (SYM) == REALIGN_LOAD_EXPR \
|| (SYM) == VEC_COND_EXPR \
|| (SYM) == VEC_PERM_EXPR \
diff --git a/gcc/optabs.c b/gcc/optabs.c
index 06a626c..16e8f4f 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -462,6 +462,9 @@ optab_for_tree_code (enum tree_code code, const_tree type,
case DOT_PROD_EXPR:
return TYPE_UNSIGNED (type) ? udot_prod_optab : sdot_prod_optab;
+ case SAD_EXPR:
+ return TYPE_UNSIGNED (type) ? usad_optab : ssad_optab;
+
case WIDEN_MULT_PLUS_EXPR:
return (TYPE_UNSIGNED (type)
? (TYPE_SATURATING (type)
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 6b924ac..377763e 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -248,6 +248,8 @@ OPTAB_D (sdot_prod_optab, "sdot_prod$I$a")
OPTAB_D (ssum_widen_optab, "widen_ssum$I$a3")
OPTAB_D (udot_prod_optab, "udot_prod$I$a")
OPTAB_D (usum_widen_optab, "widen_usum$I$a3")
+OPTAB_D (usad_optab, "usad$I$a")
+OPTAB_D (ssad_optab, "ssad$I$a")
OPTAB_D (vec_extract_optab, "vec_extract$a")
OPTAB_D (vec_init_optab, "vec_init$a")
OPTAB_D (vec_pack_sfix_trunc_optab, "vec_pack_sfix_trunc_$a")
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index 075d071..1698912 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,8 @@
+2013-10-29 Cong Hou <congh@google.com>
+
+ * gcc.dg/vect/vect-reduc-sad.c: New.
+ * lib/target-supports.exp (check_effective_target_vect_usad_char): New.
+
2013-10-14 Tobias Burnus <burnus@net-b.de>
PR fortran/58658
diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-sad.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-sad.c
new file mode 100644
index 0000000..15a625f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-sad.c
@@ -0,0 +1,54 @@
+/* { dg-require-effective-target vect_usad_char } */
+
+#include <stdarg.h>
+#include "tree-vect.h"
+
+#define N 64
+#define SAD N*N/2
+
+unsigned char X[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+unsigned char Y[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+
+/* Sum of absolute differences between arrays of unsigned char types.
+ Detected as a sad pattern.
+ Vectorized on targets that support sad for unsigned chars. */
+
+__attribute__ ((noinline)) int
+foo (int len)
+{
+ int i;
+ int result = 0;
+
+ for (i = 0; i < len; i++)
+ result += abs (X[i] - Y[i]);
+
+ return result;
+}
+
+
+int
+main (void)
+{
+ int i;
+ int sad;
+
+ check_vect ();
+
+ for (i = 0; i < N; i++)
+ {
+ X[i] = i;
+ Y[i] = N - i;
+ __asm__ volatile ("");
+ }
+
+ sad = foo (N);
+ if (sad != SAD)
+ abort ();
+
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vect_recog_sad_pattern: detected" 1 "vect" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
+
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 7eb4dfe..01ee6f2 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -3672,6 +3672,26 @@ proc check_effective_target_vect_udot_hi { } {
return $et_vect_udot_hi_saved
}
+# Return 1 if the target plus current options supports a vector
+# sad operation of unsigned chars, 0 otherwise.
+#
+# This won't change for different subtargets so cache the result.
+
+proc check_effective_target_vect_usad_char { } {
+ global et_vect_usad_char
+
+ if [info exists et_vect_usad_char_saved] {
+ verbose "check_effective_target_vect_usad_char: using cached result" 2
+ } else {
+ set et_vect_usad_char_saved 0
+ if { ([istarget i?86-*-*]
+ || [istarget x86_64-*-*]) } {
+ set et_vect_usad_char_saved 1
+ }
+ }
+ verbose "check_effective_target_vect_usad_char: returning $et_vect_usad_char_saved" 2
+ return $et_vect_usad_char_saved
+}
# Return 1 if the target plus current options supports a vector
# demotion (packing) of shorts (to chars) and ints (to shorts)
diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
index 8b66791..c8f3d33 100644
--- a/gcc/tree-cfg.c
+++ b/gcc/tree-cfg.c
@@ -3796,6 +3796,36 @@ verify_gimple_assign_ternary (gimple stmt)
return false;
+ case SAD_EXPR:
+ if (!useless_type_conversion_p (rhs1_type, rhs2_type)
+ || !useless_type_conversion_p (lhs_type, rhs3_type)
+ || 2 * GET_MODE_BITSIZE (GET_MODE_INNER
+ (TYPE_MODE (TREE_TYPE (rhs1_type))))
+ > GET_MODE_BITSIZE (GET_MODE_INNER
+ (TYPE_MODE (TREE_TYPE (lhs_type)))))
+ {
+ error ("type mismatch in sad expression");
+ debug_generic_expr (lhs_type);
+ debug_generic_expr (rhs1_type);
+ debug_generic_expr (rhs2_type);
+ debug_generic_expr (rhs3_type);
+ return true;
+ }
+
+ if (TREE_CODE (rhs1_type) != VECTOR_TYPE
+ || TREE_CODE (rhs2_type) != VECTOR_TYPE
+ || TREE_CODE (rhs3_type) != VECTOR_TYPE)
+ {
+ error ("vector types expected in sad expression");
+ debug_generic_expr (lhs_type);
+ debug_generic_expr (rhs1_type);
+ debug_generic_expr (rhs2_type);
+ debug_generic_expr (rhs3_type);
+ return true;
+ }
+
+ return false;
+
case DOT_PROD_EXPR:
case REALIGN_LOAD_EXPR:
/* FIXME. */
diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c
index 2221b9c..44261a3 100644
--- a/gcc/tree-inline.c
+++ b/gcc/tree-inline.c
@@ -3601,6 +3601,7 @@ estimate_operator_cost (enum tree_code code, eni_weights *weights,
case WIDEN_SUM_EXPR:
case WIDEN_MULT_EXPR:
case DOT_PROD_EXPR:
+ case SAD_EXPR:
case WIDEN_MULT_PLUS_EXPR:
case WIDEN_MULT_MINUS_EXPR:
case WIDEN_LSHIFT_EXPR:
diff --git a/gcc/tree-ssa-operands.c b/gcc/tree-ssa-operands.c
index 603f797..393efc3 100644
--- a/gcc/tree-ssa-operands.c
+++ b/gcc/tree-ssa-operands.c
@@ -854,6 +854,7 @@ get_expr_operands (gimple stmt, tree *expr_p, int flags)
}
case DOT_PROD_EXPR:
+ case SAD_EXPR:
case REALIGN_LOAD_EXPR:
case WIDEN_MULT_PLUS_EXPR:
case WIDEN_MULT_MINUS_EXPR:
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 638b981..89aa8c7 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -3607,6 +3607,7 @@ get_initial_def_for_reduction (gimple stmt, tree init_val,
{
case WIDEN_SUM_EXPR:
case DOT_PROD_EXPR:
+ case SAD_EXPR:
case PLUS_EXPR:
case MINUS_EXPR:
case BIT_IOR_EXPR:
diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
index 0a4e812..58a6666 100644
--- a/gcc/tree-vect-patterns.c
+++ b/gcc/tree-vect-patterns.c
@@ -45,6 +45,8 @@ static gimple vect_recog_widen_mult_pattern (vec<gimple> *, tree *,
tree *);
static gimple vect_recog_dot_prod_pattern (vec<gimple> *, tree *,
tree *);
+static gimple vect_recog_sad_pattern (vec<gimple> *, tree *,
+ tree *);
static gimple vect_recog_pow_pattern (vec<gimple> *, tree *, tree *);
static gimple vect_recog_over_widening_pattern (vec<gimple> *, tree *,
tree *);
@@ -62,6 +64,7 @@ static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
vect_recog_widen_mult_pattern,
vect_recog_widen_sum_pattern,
vect_recog_dot_prod_pattern,
+ vect_recog_sad_pattern,
vect_recog_pow_pattern,
vect_recog_widen_shift_pattern,
vect_recog_over_widening_pattern,
@@ -140,9 +143,8 @@ vect_single_imm_use (gimple def_stmt)
}
/* Check whether NAME, an ssa-name used in USE_STMT,
- is a result of a type promotion or demotion, such that:
+ is a result of a type promotion, such that:
DEF_STMT: NAME = NOP (name0)
- where the type of name0 (ORIG_TYPE) is smaller/bigger than the type of NAME.
If CHECK_SIGN is TRUE, check that either both types are signed or both are
unsigned. */
@@ -189,10 +191,8 @@ type_conversion_p (tree name, gimple use_stmt, bool check_sign,
if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
*promotion = true;
- else if (TYPE_PRECISION (*orig_type) >= (TYPE_PRECISION (type) * 2))
- *promotion = false;
else
- return false;
+ *promotion = false;
if (!vect_is_simple_use (oprnd0, *def_stmt, loop_vinfo,
bb_vinfo, &dummy_gimple, &dummy, &dt))
@@ -433,6 +433,240 @@ vect_recog_dot_prod_pattern (vec<gimple> *stmts, tree *type_in,
}
+/* Function vect_recog_sad_pattern
+
+ Try to find the following Sum of Absolute Difference (SAD) pattern:
+
+ type x_t, y_t;
+ signed TYPE1 diff, abs_diff;
+ TYPE2 sum = init;
+ loop:
+ sum_0 = phi <init, sum_1>
+ S1 x_t = ...
+ S2 y_t = ...
+ S3 x_T = (TYPE1) x_t;
+ S4 y_T = (TYPE1) y_t;
+ S5 diff = x_T - y_T;
+ S6 abs_diff = ABS_EXPR <diff>;
+ [S7 abs_diff = (TYPE2) abs_diff; #optional]
+ S8 sum_1 = abs_diff + sum_0;
+
+ where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
+ same size of 'TYPE1' or bigger. This is a special case of a reduction
+ computation.
+
+ Input:
+
+ * STMTS: Contains a stmt from which the pattern search begins. In the
+ example, when this function is called with S8, the pattern
+ {S3,S4,S5,S6,S7,S8} will be detected.
+
+ Output:
+
+ * TYPE_IN: The type of the input arguments to the pattern.
+
+ * TYPE_OUT: The type of the output of this pattern.
+
+ * Return value: A new stmt that will be used to replace the sequence of
+ stmts that constitute the pattern. In this case it will be:
+ SAD_EXPR <x_t, y_t, sum_0>
+ */
+
+static gimple
+vect_recog_sad_pattern (vec<gimple> *stmts, tree *type_in,
+ tree *type_out)
+{
+ gimple last_stmt = (*stmts)[0];
+ tree sad_oprnd0, sad_oprnd1;
+ stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
+ tree half_type;
+ loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
+ struct loop *loop;
+ bool promotion;
+
+ if (!loop_info)
+ return NULL;
+
+ loop = LOOP_VINFO_LOOP (loop_info);
+
+ if (!is_gimple_assign (last_stmt))
+ return NULL;
+
+ tree sum_type = gimple_expr_type (last_stmt);
+
+ /* Look for the following pattern
+ DX = (TYPE1) X;
+ DY = (TYPE1) Y;
+ DDIFF = DX - DY;
+ DAD = ABS_EXPR <DDIFF>;
+ DDPROD = (TYPE2) DPROD;
+ sum_1 = DAD + sum_0;
+ In which
+ - DX is at least double the size of X
+ - DY is at least double the size of Y
+ - DX, DY, DDIFF, DAD all have the same type
+ - sum is the same size of DAD or bigger
+ - sum has been recognized as a reduction variable.
+
+ This is equivalent to:
+ DDIFF = X w- Y; #widen sub
+ DAD = ABS_EXPR <DDIFF>;
+ sum_1 = DAD w+ sum_0; #widen summation
+ or
+ DDIFF = X w- Y; #widen sub
+ DAD = ABS_EXPR <DDIFF>;
+ sum_1 = DAD + sum_0; #summation
+ */
+
+ /* Starting from LAST_STMT, follow the defs of its uses in search
+ of the above pattern. */
+
+ if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
+ return NULL;
+
+ tree plus_oprnd0, plus_oprnd1;
+
+ if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
+ {
+ /* Has been detected as widening-summation? */
+
+ gimple stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
+ sum_type = gimple_expr_type (stmt);
+ if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
+ return NULL;
+ plus_oprnd0 = gimple_assign_rhs1 (stmt);
+ plus_oprnd1 = gimple_assign_rhs2 (stmt);
+ half_type = TREE_TYPE (plus_oprnd0);
+ }
+ else
+ {
+ gimple def_stmt;
+
+ if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
+ return NULL;
+ plus_oprnd0 = gimple_assign_rhs1 (last_stmt);
+ plus_oprnd1 = gimple_assign_rhs2 (last_stmt);
+ if (!types_compatible_p (TREE_TYPE (plus_oprnd0), sum_type)
+ || !types_compatible_p (TREE_TYPE (plus_oprnd1), sum_type))
+ return NULL;
+
+ /* The type conversion could be promotion, demotion,
+ or just signed -> unsigned. */
+ if (type_conversion_p (plus_oprnd0, last_stmt, false,
+ &half_type, &def_stmt, &promotion))
+ plus_oprnd0 = gimple_assign_rhs1 (def_stmt);
+ else
+ half_type = sum_type;
+ }
+
+ /* So far so good. Since last_stmt was detected as a (summation) reduction,
+ we know that plus_oprnd1 is the reduction variable (defined by a loop-header
+ phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
+ Then check that plus_oprnd0 is defined by an abs_expr */
+
+ if (TREE_CODE (plus_oprnd0) != SSA_NAME)
+ return NULL;
+
+ tree abs_type = half_type;
+ gimple abs_stmt = SSA_NAME_DEF_STMT (plus_oprnd0);
+
+ /* It could not be the sad pattern if the abs_stmt is outside the loop. */
+ if (!gimple_bb (abs_stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (abs_stmt)))
+ return NULL;
+
+ /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
+ inside the loop (in case we are analyzing an outer-loop). */
+ if (!is_gimple_assign (abs_stmt))
+ return NULL;
+
+ stmt_vec_info abs_stmt_vinfo = vinfo_for_stmt (abs_stmt);
+ gcc_assert (abs_stmt_vinfo);
+ if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo) != vect_internal_def)
+ return NULL;
+ if (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR)
+ return NULL;
+
+ tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
+ if (!types_compatible_p (TREE_TYPE (abs_oprnd), abs_type))
+ return NULL;
+ if (TYPE_UNSIGNED (abs_type))
+ return NULL;
+
+ /* We then detect if the operand of abs_expr is defined by a minus_expr. */
+
+ if (TREE_CODE (abs_oprnd) != SSA_NAME)
+ return NULL;
+
+ gimple diff_stmt = SSA_NAME_DEF_STMT (abs_oprnd);
+
+ /* It could not be the sad pattern if the diff_stmt is outside the loop. */
+ if (!gimple_bb (diff_stmt)
+ || !flow_bb_inside_loop_p (loop, gimple_bb (diff_stmt)))
+ return NULL;
+
+ /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
+ inside the loop (in case we are analyzing an outer-loop). */
+ if (!is_gimple_assign (diff_stmt))
+ return NULL;
+
+ stmt_vec_info diff_stmt_vinfo = vinfo_for_stmt (diff_stmt);
+ gcc_assert (diff_stmt_vinfo);
+ if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo) != vect_internal_def)
+ return NULL;
+ if (gimple_assign_rhs_code (diff_stmt) != MINUS_EXPR)
+ return NULL;
+
+ tree half_type0, half_type1;
+ gimple def_stmt;
+
+ tree minus_oprnd0 = gimple_assign_rhs1 (diff_stmt);
+ tree minus_oprnd1 = gimple_assign_rhs2 (diff_stmt);
+
+ if (!types_compatible_p (TREE_TYPE (minus_oprnd0), abs_type)
+ || !types_compatible_p (TREE_TYPE (minus_oprnd1), abs_type))
+ return NULL;
+ if (!type_conversion_p (minus_oprnd0, diff_stmt, false,
+ &half_type0, &def_stmt, &promotion)
+ || !promotion)
+ return NULL;
+ sad_oprnd0 = gimple_assign_rhs1 (def_stmt);
+
+ if (!type_conversion_p (minus_oprnd1, diff_stmt, false,
+ &half_type1, &def_stmt, &promotion)
+ || !promotion)
+ return NULL;
+ sad_oprnd1 = gimple_assign_rhs1 (def_stmt);
+
+ if (!types_compatible_p (half_type0, half_type1))
+ return NULL;
+ if (TYPE_PRECISION (abs_type) < TYPE_PRECISION (half_type0) * 2
+ || TYPE_PRECISION (sum_type) < TYPE_PRECISION (half_type0) * 2)
+ return NULL;
+
+ *type_in = TREE_TYPE (sad_oprnd0);
+ *type_out = sum_type;
+
+ /* Pattern detected. Create a stmt to be used to replace the pattern: */
+ tree var = vect_recog_temp_ssa_var (sum_type, NULL);
+ gimple pattern_stmt = gimple_build_assign_with_ops
+ (SAD_EXPR, var, sad_oprnd0, sad_oprnd1, plus_oprnd1);
+
+ if (dump_enabled_p ())
+ {
+ dump_printf_loc (MSG_NOTE, vect_location,
+ "vect_recog_sad_pattern: detected: ");
+ dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
+ dump_printf (MSG_NOTE, "\n");
+ }
+
+ /* We don't allow changing the order of the computation in the inner-loop
+ when doing outer-loop vectorization. */
+ gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
+
+ return pattern_stmt;
+}
+
+
/* Handle widening operation by a constant. At the moment we support MULT_EXPR
and LSHIFT_EXPR.
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 8b7b345..0aac75b 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -1044,7 +1044,7 @@ extern void vect_slp_transform_bb (basic_block);
Additional pattern recognition functions can (and will) be added
in the future. */
typedef gimple (* vect_recog_func_ptr) (vec<gimple> *, tree *, tree *);
-#define NUM_PATTERNS 11
+#define NUM_PATTERNS 12
void vect_pattern_recog (loop_vec_info, bb_vec_info);
/* In tree-vectorizer.c. */
diff --git a/gcc/tree.def b/gcc/tree.def
index 88c850a..e15ee61 100644
--- a/gcc/tree.def
+++ b/gcc/tree.def
@@ -1155,6 +1155,12 @@ DEFTREECODE (DOT_PROD_EXPR, "dot_prod_expr", tcc_expression, 3)
with the second argument. */
DEFTREECODE (WIDEN_SUM_EXPR, "widen_sum_expr", tcc_binary, 2)
+/* Widening sad (sum of absolute differences).
+ The first two arguments are of type t1 which should be integer.
+ The third argument and the result are of type t2, such that t2 is at least
+ twice the size of t1. */
+DEFTREECODE (SAD_EXPR, "sad_expr", tcc_expression, 3)
+
/* Widening multiplication.
The two arguments are of type t1.
The result is of type t2, such that t2 is at least twice
More information about the Gcc-patches
mailing list