[RFC] ipa bitwise constant propagation

Fri Aug 5 12:37:00 GMT 2016

Hi,

generally speaking, the ipa-cp.c and ipa-cp.[hc] bits look reasonable,
but I have a few comments:

On Thu, Aug 04, 2016 at 12:06:18PM +0530, Prathamesh Kulkarni wrote:
> Hi,
> This is a prototype patch for propagating known/unknown bits inter-procedurally.
> for integral types which propagates info obtained from get_nonzero_bits ().
> 
> Patch required making following changes:
> a) To make info from get_nonzero_bits() available to ipa

in IPA phase, you should not be looking at SSA_NAMEs, those will not
be available with LTO when you do not have access to function bodies
and thus also not to SSA_NAMES.  In IPA, you should only rely on hat
you have in jump functions.

> , I had to remove
> guard !nonzero_p in ccp_finalize. However that triggered the following ICE
> in get_ptr_info() for default_none.f95 (and several other fortran tests)
> with options: -fopenacc -O2
> ICE: http://pastebin.com/KjD7HMQi
> I confirmed with Richard that this was a latent issue.
> 
> 
> I suppose we would want to gate this on some flag, say -fipa-bit-cp ?

Yes, definitely.

> I haven't yet gated it on the flag, will do in next version of patch.
> I have added some very simple test-cases, I will try to add more
> meaningful ones.

> 
> Patch passes bootstrap+test on x86_64-unknown-linux-gnu
> and cross-tested on arm*-*-* and aarch64*-*-* with the exception
> of some fortran tests failing due to above ICE.
> 
> As next steps, I am planning to extend it to handle alignment propagation,
> and do further testing (lto-bootstrap, chromium).
> I would be grateful for feedback on the current patch.
> 
> Thanks,
> Prathamesh

> diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c
> index 5b6cb9a..b770f6a 100644
> --- a/gcc/ipa-cp.c
> +++ b/gcc/ipa-cp.c
> @@ -120,6 +120,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "params.h"
>  #include "ipa-inline.h"
>  #include "ipa-utils.h"
> +#include "tree-ssa-ccp.h"
>  
>  template <typename valtype> class ipcp_value;
>  
> @@ -266,6 +267,40 @@ private:
>    bool meet_with_1 (unsigned new_align, unsigned new_misalign);
>  };
>  
> +/* Lattice of known bits, only capable of holding one value.
> +   Similar to ccp_prop_value_t, mask represents which bits of value are constant.
> +   If a bit in mask is set to 0, then the corresponding bit in
> +   value is known to be constant.  */
> +
> +class ipcp_bits_lattice
> +{
> +public:
> +  bool bottom_p () { return lattice_val == IPA_BITS_VARYING; }
> +  bool top_p () { return lattice_val == IPA_BITS_UNDEFINED; }
> +  bool constant_p () { return lattice_val == IPA_BITS_CONSTANT; }
> +  bool set_to_bottom ();
> +  bool set_to_constant (widest_int, widest_int, signop, unsigned);
> + 
> +  widest_int get_value () { return value; }
> +  widest_int get_mask () { return mask; }
> +  signop get_sign () { return sgn; }
> +  unsigned get_precision () { return precision; }
> +
> +  bool meet_with (ipcp_bits_lattice& other, enum tree_code, tree);
> +  bool meet_with (widest_int, widest_int, signop, unsigned);
> +
> +  void print (FILE *);
> +
> +private:
> +  enum { IPA_BITS_UNDEFINED, IPA_BITS_CONSTANT, IPA_BITS_VARYING } lattice_val;
> +  widest_int value, mask;
> +  signop sgn;
> +  unsigned precision;
> +
> +  bool meet_with_1 (widest_int, widest_int); 
> +  void get_value_and_mask (tree, widest_int *, widest_int *);
> +}; 
> +
>  /* Structure containing lattices for a parameter itself and for pieces of
>     aggregates that are passed in the parameter or by a reference in a parameter
>     plus some other useful flags.  */
> @@ -281,6 +316,8 @@ public:
>    ipcp_agg_lattice *aggs;
>    /* Lattice describing known alignment.  */
>    ipcp_alignment_lattice alignment;
> +  /* Lattice describing known bits.  */
> +  ipcp_bits_lattice bits_lattice;
>    /* Number of aggregate lattices */
>    int aggs_count;
>    /* True if aggregate data were passed by reference (as opposed to by
> @@ -458,6 +495,21 @@ ipcp_alignment_lattice::print (FILE * f)
>      fprintf (f, "         Alignment %u, misalignment %u\n", align, misalign);
>  }
>  
> +void
> +ipcp_bits_lattice::print (FILE *f)
> +{
> +  if (top_p ())
> +    fprintf (f, "         Bits unknown (TOP)\n");
> +  else if (bottom_p ())
> +    fprintf (f, "         Bits unusable (BOTTOM)\n");
> +  else
> +    {
> +      fprintf (f, "         Bits: value = "); print_hex (get_value (), f);
> +      fprintf (f, ", mask = "); print_hex (get_mask (), f);
> +      fprintf (f, "\n");
> +    }
> +}
> +
>  /* Print all ipcp_lattices of all functions to F.  */
>  
>  static void
> @@ -484,6 +536,7 @@ print_all_lattices (FILE * f, bool dump_sources, bool dump_benefits)
>  	  fprintf (f, "         ctxs: ");
>  	  plats->ctxlat.print (f, dump_sources, dump_benefits);
>  	  plats->alignment.print (f);
> +	  plats->bits_lattice.print (f);
>  	  if (plats->virt_call)
>  	    fprintf (f, "        virt_call flag set\n");
>  
> @@ -911,6 +964,161 @@ ipcp_alignment_lattice::meet_with (const ipcp_alignment_lattice &other,
>    return meet_with_1 (other.align, adjusted_misalign);
>  }
>  
> +/* Set lattice value to bottom, if it already isn't the case.  */
> +
> +bool
> +ipcp_bits_lattice::set_to_bottom ()
> +{
> +  if (bottom_p ())
> +    return false;
> +  lattice_val = IPA_BITS_VARYING;
> +  value = 0;
> +  mask = -1;
> +  return true;
> +}
> +
> +/* Set to constant if it isn't already. Only meant to be called
> +   when switching state from TOP.  */
> +
> +bool
> +ipcp_bits_lattice::set_to_constant (widest_int value, widest_int mask,
> +				    signop sgn, unsigned precision)
> +{
> +  gcc_assert (top_p ());
> +  this->lattice_val = IPA_BITS_CONSTANT;
> +  this->value = value;
> +  this->mask = mask;
> +  this->sgn = sgn;
> +  this->precision = precision;
> +  return true;
> +}
> +
> +/* Convert operand to value, mask form.  */
> +
> +void
> +ipcp_bits_lattice::get_value_and_mask (tree operand, widest_int *valuep, widest_int *maskp)
> +{
> +  wide_int get_nonzero_bits (const_tree);
> +
> +  if (TREE_CODE (operand) == INTEGER_CST)
> +    {
> +      *valuep = wi::to_widest (operand); 
> +      *maskp = 0;
> +    }
> +  else if (TREE_CODE (operand) == SSA_NAME)

IIUC, operand is the operand from pass-through jump function and that
should never be an SSA_NAME.  I have even looked at how we generate
them and it seems fairly safe to say that they never are.  If you have
seen an SSA_NAME here, it is a bug and please let me know because
sooner or later it will cause an assert.

> +    {
> +      *valuep = 0;
> +      *maskp = widest_int::from (get_nonzero_bits (operand), UNSIGNED);
> +    }
> +  else
> +    gcc_unreachable ();

The operand however can be any any other is_gimple_ip_invariant tree.
I assume that you could hit this gcc_unreachable only in a program
with undefined behavior (or with a Fortran CONST_DECL?) but you should
not ICE here.

> +}  
> +
> +/* Meet operation, similar to ccp_lattice_meet, we xor values
> +   if this->value, value have different values at same bit positions, we want
> +   to drop that bit to varying. Return true if mask is changed.
> +   This function assumes that the lattice value is in CONSTANT state  */
> +
> +bool
> +ipcp_bits_lattice::meet_with_1 (widest_int value, widest_int mask)
> +{
> +  gcc_assert (constant_p ());
> +  
> +  widest_int old_mask = this->mask;
> +  this->mask = (this->mask | mask) | (this->value ^ value);
> +
> +  if (wi::sext (this->mask, this->precision) == -1)
> +    return set_to_bottom ();
> +
> +  bool changed = this->mask != old_mask;
> +  return changed;
> +}
> +
> +/* Meet the bits lattice with operand
> +   described by <value, mask, sgn, precision.  */
> +
> +bool
> +ipcp_bits_lattice::meet_with (widest_int value, widest_int mask,
> +			      signop sgn, unsigned precision)
> +{
> +  if (bottom_p ())
> +    return false;
> +
> +  if (top_p ())
> +    {
> +      if (wi::sext (mask, precision) == -1)
> +	return set_to_bottom ();
> +      return set_to_constant (value, mask, sgn, precision);
> +    }
> +
> +  return meet_with_1 (value, mask);

What if precisions do not match?

> +}
> +
> +/* Meet bits lattice with the result of bit_value_binop_1 (other, operand)
> +   if code is binary operation or bit_value_unop_1 (other) if code is unary op.
> +   In the case when code is nop_expr, no adjustment is required. */
> +
> +bool
> +ipcp_bits_lattice::meet_with (ipcp_bits_lattice& other, enum tree_code code, tree operand)
> +{
> +  if (other.bottom_p ())
> +    return set_to_bottom ();
> +
> +  if (bottom_p () || other.top_p ())
> +    return false;
> +
> +  widest_int adjusted_value, adjusted_mask;
> +
> +  if (TREE_CODE_CLASS (code) == tcc_binary)
> +    {
> +      tree type = TREE_TYPE (operand);
> +      gcc_assert (INTEGRAL_TYPE_P (type));
> +      widest_int o_value, o_mask;
> +      get_value_and_mask (operand, &o_value, &o_mask);
> +
> +      signop sgn = other.get_sign ();
> +      unsigned prec = other.get_precision ();
> +
> +      bit_value_binop_1 (code, sgn, prec, &adjusted_value, &adjusted_mask,
> +			 sgn, prec, other.get_value (), other.get_mask (),
> +			 TYPE_SIGN (type), TYPE_PRECISION (type), o_value, o_mask);

It is probably just me not being particularly sharp on a Friday
afternoon and I might not understand the semantics of mask well (also,
you did not document it :-), but... assume that we are looking at a
binary and operation, other comes from an SSA pointer and its mask
would be binary 100 and its value 0 because that's what you set for
ssa names in ipa-prop.h, and the operand is binary value 101, which
means that get_value_and_mask returns mask 0 and value 101.  Now,
bit_value_binop_1 would return value 0 & 101 = 0 and mask according to

(m1 | m2) & ((v1 | m1) & (v2 | m2))

so in our case

(100b & 0) & ((0 | 100b) & (101b | 0)) = 0 & 100b = 0.

So both value and mask would be zero, which, if this was the only
incoming value to the lattice, I am afraid you would later on in
ipcp_update_bits interpret as that no bits can be non-zero.  Yet the
third bit clearly could.

I think that this is the only place where you interpret mask as a mask
and actually use it for and-ing, everywhere else you interpret it
basically only as the result of get_nonzero_bits and use it for
or-ing.

> +
> +      if (wi::sext (adjusted_mask, prec) == -1)
> +	return set_to_bottom ();
> +    }
> +
> +  else if (TREE_CODE_CLASS (code) == tcc_unary)
> +    {
> +      signop sgn = other.get_sign ();
> +      unsigned prec = other.get_precision ();
> +
> +      bit_value_unop_1 (code, sgn, prec, &adjusted_value,
> +			&adjusted_mask, sgn, prec, other.get_value (),
> +			other.get_mask ());
> +
> +      if (wi::sext (adjusted_mask, prec) == -1)
> +	return set_to_bottom ();
> +    }
> +
> +  else if (code == NOP_EXPR)
> +    {
> +      adjusted_value = other.value;
> +      adjusted_mask = other.mask;
> +    }
> +
> +  else
> +    return set_to_bottom ();
> +
> +  if (top_p ())
> +    {
> +      if (wi::sext (adjusted_mask, other.get_precision ()) == -1)
> +	return set_to_bottom ();
> +      return set_to_constant (adjusted_value, adjusted_mask, other.get_sign (), other.get_precision ());
> +    }
> +  else
> +    return meet_with_1 (adjusted_value, adjusted_mask);

Again, What if precisions do not match?

> +}
> +
>  /* Mark bot aggregate and scalar lattices as containing an unknown variable,
>     return true is any of them has not been marked as such so far.  */
>  
> @@ -922,6 +1130,7 @@ set_all_contains_variable (struct ipcp_param_lattices *plats)
>    ret |= plats->ctxlat.set_contains_variable ();
>    ret |= set_agg_lats_contain_variable (plats);
>    ret |= plats->alignment.set_to_bottom ();
> +  ret |= plats->bits_lattice.set_to_bottom ();
>    return ret;
>  }
>  
> @@ -1003,6 +1212,7 @@ initialize_node_lattices (struct cgraph_node *node)
>  	      plats->ctxlat.set_to_bottom ();
>  	      set_agg_lats_to_bottom (plats);
>  	      plats->alignment.set_to_bottom ();
> +	      plats->bits_lattice.set_to_bottom ();
>  	    }
>  	  else
>  	    set_all_contains_variable (plats);
> @@ -1621,6 +1831,57 @@ propagate_alignment_accross_jump_function (cgraph_edge *cs,
>      }
>  }
>  
> +/* Propagate bits across jfunc that is associated with
> +   edge cs and update dest_lattice accordingly.  */
> +
> +bool
> +propagate_bits_accross_jump_function (cgraph_edge *cs, ipa_jump_func *jfunc,
> +				      ipcp_bits_lattice *dest_lattice)
> +{
> +  if (dest_lattice->bottom_p ())
> +    return false;
> +
> +  if (jfunc->type == IPA_JF_PASS_THROUGH)
> +    {
> +      struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
> +      enum tree_code code = ipa_get_jf_pass_through_operation (jfunc);
> +      tree operand = NULL_TREE;
> +
> +      if (code != NOP_EXPR)
> +	operand = ipa_get_jf_pass_through_operand (jfunc);
> +
> +      int src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
> +      struct ipcp_param_lattices *src_lats
> +	= ipa_get_parm_lattices (caller_info, src_idx);
> +
> +      /* Try to proapgate bits if src_lattice is bottom, but jfunc is known.

propagate

> +	 for eg consider:
> +	 int f(int x)
> +	 {
> +	   g (x & 0xff);
> +	 }
> +	 Assume lattice for x is bottom, however we can still propagate
> +	 result of x & 0xff == 0xff, which gets computed during ccp1 pass
> +	 and we store it in jump function during analysis stage.  */
> +
> +      if (src_lats->bits_lattice.bottom_p ()
> +	  && jfunc->bits.known)
> +	return dest_lattice->meet_with (jfunc->bits.value, jfunc->bits.mask,
> +				        jfunc->bits.sgn, jfunc->bits.precision);
> +      else
> +	return dest_lattice->meet_with (src_lats->bits_lattice, code, operand);
> +    }
> +
> +  else if (jfunc->type == IPA_JF_ANCESTOR)
> +    return dest_lattice->set_to_bottom ();
> +
> +  else if (jfunc->bits.known) 
> +    return dest_lattice->meet_with (jfunc->bits.value, jfunc->bits.mask,
> +				    jfunc->bits.sgn, jfunc->bits.precision);
> +  else
> +    return dest_lattice->set_to_bottom ();
> +}
> +

...

> diff --git a/gcc/ipa-prop.h b/gcc/ipa-prop.h
> index e32d078..d69a071 100644
> --- a/gcc/ipa-prop.h
> +++ b/gcc/ipa-prop.h
> @@ -154,6 +154,16 @@ struct GTY(()) ipa_alignment
>    unsigned misalign;
>  };
>  
> +/* Information about zero/non-zero bits.  */
> +struct GTY(()) ipa_bits
> +{
> +  bool known;
> +  widest_int value;
> +  widest_int mask;
> +  enum signop sgn;
> +  unsigned precision;
> +};

Please order the fields according to their size, the largest first and
add a comment describing each one of them.  As I explained above, it
is not immediately clear why the mask would be a mask, for example.

Nevertheless, thanks for looking into this.  It would be nice to have
this for pointers too, not least because we could represent
non-NULL-ness this way, which could be very interesting for cloning
and inlining.  But those are further steps, once normal propagation
works.

Martin