This is the mail archive of the gcc@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [RFC] Converting end of loop computations to MIN_EXPRs.


Sorry about the delayed response, I've been away for some time.

>
> I don't exactly understand why the general transform is not advisable.
> We already synthesize min/max operations.


>
> Can you elaborate on why you think that better code might be generated
> when not doing this transform?

The reason why I wasn't happy was because of the code we ended up
generating in this case for ARM comparing the simple examples showed
the following difference - while I'm pretty sure I can massage the
backend to generate the right form in this case with splitters I
probably didn't realize this when I wrote the mail. Given this, I
wonder if it is worth in general doing this transformation in a fold
type operation rather than restricting ourselves only to invariant
operands ?


The canonical example is as below :


#define min(x, y) ((x) < (y)) ? (x) : (y)
int foo (int i, int x ,int y)
{
// return ( i < x) && (i < y);
 return i < (min (x, y));
}


Case with min_expr:

	cmp	r2, r1	@ 8	*arm_smin_insn/1	[length = 8]
	movge	r2, r1
	cmp	r2, r0	@ 23	*arm_cmpsi_insn/3	[length = 4]
	movle	r0, #0	@ 24	*p *arm_movsi_insn/2	[length = 4]
	movgt	r0, #1	@ 25	*p *arm_movsi_insn/2	[length = 4]
	bx	lr	@ 28	*arm_return	[length = 12]


This might well be .

      cmp       r2, r0
      cmpge  r1, r0
     movle    r0, #0
     movgt    r0, #1
     bx           lr

Case without min_expr:

	cmp	r0, r2	@ 28	*cmp_and/6	[length = 8]
	cmplt	r0, r1
	movge	r0, #0	@ 29	*mov_scc	[length = 8]
	movlt	r0, #1
	bx	lr	@ 32	*arm_return	[length = 12]



>
>> #define min(x,y) ((x) <= (y) ? (x) : (y))
>>
>> void foo (int x, int y, int * ?a, int * b, int *c)
>> {
>> ?int i;
>>
>> ?for (i = 0;
>> ? ? ? i < x && i < y;
>> ? ? ? /* i < min (x, y); */
>> ? ? ? i++)
>> ? ?a[i] = b[i] * c[i];
>>
>> }
>>
>> The patch below deals with this case and I'm guessing that it could
>> also handle more of the comparison cases and come up with more
>> intelligent choices and should be made quite a lot more robust than
>> what it is right now.
>
> Yes. ?At least if you have i < 5 && i < y we canonicalize it to
> i <= 4 && i < y, so your pattern matching would fail.

Of-course considering overflow semantics you could transform this to
i < min (x +1, y) where the original condition was i <= x && i < y.

Thinking about it , it's probably right to state that

i op1 X && i op2 Y => i op min (X1, Y1)

when op1 and op2 are identical or according to the table below :

op1     op2             op            X1           Y1
  <         <=               <=            X + 1        Y
  >         >=               >                X            Y + 1
 < =        <                <=             X             Y + 1
  >=         >               >               X + 1         Y


Other than being careful about overflow semantics the second table
is probably worthwhile looking at -

>
> Btw, the canonical case this happens in is probably
>
> ? for (i = 0; i < n; ++i)
> ? ? for (j = 0; j < m && j < i; ++j)
> ? ? ? a[i][j] = ...
>
> thus iterating over the lower/upper triangular part of a non-square matrix
> (including or not including the diagonal, thus also j < m && j <= i

Ok thanks - fair enough .


Ramana

>
> Richard.
>
>> regards,
>> Ramana
>>
>>
>>
>> diff --git a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c
>> index ce5eb20..a529536 100644
>> --- a/gcc/tree-ssa-loop-im.c
>> +++ b/gcc/tree-ssa-loop-im.c
>> @@ -563,6 +563,7 @@ stmt_cost (gimple stmt)
>>
>> ? switch (gimple_assign_rhs_code (stmt))
>> ? ? {
>> + ? ?case MIN_EXPR:
>> ? ? case MULT_EXPR:
>> ? ? case WIDEN_MULT_EXPR:
>> ? ? case WIDEN_MULT_PLUS_EXPR:
>> @@ -971,6 +972,124 @@ rewrite_reciprocal (gimple_stmt_iterator *bsi)
>> ? return stmt1;
>> ?}
>>
>> +/* We look for a sequence that is :
>> + ? def_stmt1 ?: x = a < b
>> + ? def_stmt2 ?: y = a < c
>> + ? stmt: z = x & y
>> + ? use_stmt_cond: if ( z != 0)
>> +
>> + ? where b, c are loop invariant .
>> +
>> + ? In which case we might as well replace this by :
>> +
>> + ? t = min (b, c)
>> + ? if ( a < t )
>> +*/
>> +
>> +static gimple
>> +rewrite_min_test (gimple_stmt_iterator *bsi)
>> +{
>> + ?gimple stmt, def_stmt_x, def_stmt_y, use_stmt_cond, stmt1;
>> + ?tree x, y, z, a, b, c, var, t, name;
>> + ?use_operand_p use;
>> + ?bool is_lhs_of_comparison = false;
>> +
>> + ?stmt = gsi_stmt (*bsi);
>> + ?z = gimple_assign_lhs (stmt);
>> +
>> + ?/* We start by looking at whether x is used in the
>> + ? ? right set of conditions. ?*/
>> + ?if (TREE_CODE (z) != SSA_NAME
>> + ? ? ?|| !single_imm_use (z, &use, &use_stmt_cond)
>> + ? ? ?|| gimple_code (use_stmt_cond) != GIMPLE_COND)
>> + ? ?return stmt;
>> +
>> + ?x = gimple_assign_rhs1 (stmt);
>> + ?y = gimple_assign_rhs2 (stmt);
>> +
>> + ?if (TREE_CODE (x) != SSA_NAME
>> + ? ? ?|| TREE_CODE (y) != SSA_NAME)
>> + ? ?return stmt;
>> +
>> + ?def_stmt_x = SSA_NAME_DEF_STMT (x);
>> + ?def_stmt_y = SSA_NAME_DEF_STMT (y);
>> +
>> + ?/* def_stmt_x and def_stmt_y should be of the
>> + ? ? form
>> +
>> + ? ? x = a cmp b
>> + ? ? y = a cmp c
>> +
>> + ? ? or
>> +
>> + ? ? x = b cmp a
>> + ? ? y = c cmp a
>> + ?*/
>> + ?if (!is_gimple_assign (def_stmt_x)
>> + ? ? ?|| !is_gimple_assign (def_stmt_y)
>> + ? ? ?|| (gimple_assign_rhs_code (def_stmt_x)
>> + ? ? ? ? != gimple_assign_rhs_code (def_stmt_y)))
>> + ? ?return stmt;
>> +
>> + ?if (gimple_assign_rhs1 (def_stmt_x) == gimple_assign_rhs1 (def_stmt_y)
>> + ? ? ?&& (gimple_assign_rhs_code (def_stmt_x) == LT_EXPR
>> + ? ? ? ? || gimple_assign_rhs_code (def_stmt_x) == LE_EXPR))
>> + ? ?{
>> + ? ? ?a = gimple_assign_rhs1 (def_stmt_x);
>> + ? ? ?b = gimple_assign_rhs2 (def_stmt_x);
>> + ? ? ?c = gimple_assign_rhs2 (def_stmt_y);
>> + ? ? ?is_lhs_of_comparison = true;
>> + ? ?}
>> + ?else
>> + ? ?{
>> + ? ? ?if (gimple_assign_rhs2 (def_stmt_x) == gimple_assign_rhs2 (def_stmt_y)
>> + ? ? ? ? && (gimple_assign_rhs_code (def_stmt_x) == GT_EXPR
>> + ? ? ? ? ? ? || gimple_assign_rhs_code (def_stmt_x) == GE_EXPR))
>> + ? ? ? {
>> + ? ? ? ? a = gimple_assign_rhs2 (def_stmt_x);
>> + ? ? ? ? b = gimple_assign_rhs1 (def_stmt_x);
>> + ? ? ? ? c = gimple_assign_rhs1 (def_stmt_y);
>> + ? ? ? }
>> + ? ? ?else
>> + ? ? ? return stmt;
>> + ? ?}
>> +
>> + ?if (outermost_invariant_loop (b, loop_containing_stmt (def_stmt_x)) != NULL
>> + ? ? ?&& outermost_invariant_loop (c, loop_containing_stmt
>> (def_stmt_y)) != NULL)
>> +
>> + ? ?{
>> + ? ? ?if (dump_file)
>> + ? ? ? fprintf (dump_file, "Found a potential transformation to min\n");
>> +
>> + ? ? ?/* mintmp = min (b , c). ?*/
>> +
>> + ? ? ?var = create_tmp_var (TREE_TYPE (b), "mintmp");
>> + ? ? ?add_referenced_var (var);
>> + ? ? ?t = fold_build2 (MIN_EXPR, TREE_TYPE (b), b, c);
>> + ? ? ?stmt1 = gimple_build_assign (var, t);
>> + ? ? ?name = make_ssa_name (var, stmt1);
>> + ? ? ?gimple_assign_set_lhs (stmt1, name);
>> + ? ? ?gimple_cond_set_code (use_stmt_cond, gimple_assign_rhs_code
>> (def_stmt_x));
>> +
>> +
>> + ? ? ?if (is_lhs_of_comparison)
>> + ? ? ? {
>> + ? ? ? ? gimple_cond_set_lhs (use_stmt_cond, a);
>> + ? ? ? ? gimple_cond_set_rhs (use_stmt_cond, name);
>> + ? ? ? }
>> + ? ? ?else
>> + ? ? ? {
>> + ? ? ? ? gimple_cond_set_lhs (use_stmt_cond, name);
>> + ? ? ? ? gimple_cond_set_rhs (use_stmt_cond, a);
>> + ? ? ? }
>> + ? ? ?update_stmt (use_stmt_cond);
>> + ? ? ?gsi_insert_before (bsi, stmt1, GSI_NEW_STMT);
>> + ? ? ?return stmt1;
>> + ? ?}
>> +
>> + ?return stmt;
>> +}
>> +
>> ?/* Check if the pattern at *BSI is a bittest of the form
>> ? ?(A >> B) & 1 != 0 and in this case rewrite it to A & (1 << B) != 0. ?*/
>>
>> @@ -1171,6 +1290,11 @@ determine_invariantness_stmt (struct
>> dom_walk_data *dw_data ATTRIBUTE_UNUSED,
>> ? ? ? ? ? ? ?&& TREE_CODE (op0) == SSA_NAME
>> ? ? ? ? ? ? ?&& has_single_use (op0))
>> ? ? ? ? ? ?stmt = rewrite_bittest (&bsi);
>> + ? ? ? ? else
>> + ? ? ? ? ? if (pos == MOVE_POSSIBLE
>> + ? ? ? ? ? ? ? && gimple_assign_rhs_code (stmt) == BIT_AND_EXPR)
>> + ? ? ? ? ? ? stmt = rewrite_min_test (&bsi);
>> +
>> ? ? ? ?}
>>
>> ? ? ? lim_data = init_lim_data (stmt);


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]