[RFC] Partial vectors for s390

Richard Sandiford richard.sandiford@arm.com
Tue Oct 26 14:18:34 GMT 2021


Robin Dapp <rdapp@linux.ibm.com> writes:
> Hi Richard,
>
>> We already have code to probe the predicates of the underlying
>> define_expands/insns to see whether they support certain constant
>> IFN arguments; see e.g. internal_gather_scatter_fn_supported_p.
>> We could do something similar here: add an extra operand to the optab,
>> and an extra argument to the IFN, that gives a bias amount.
>> The PowerPC version would require 0, the System Z version would
>> require -1.  The vectoriser would probe to see which value
>> it should use.
>> 
>> Doing it that way ensures that the gimple is still self-describing.
>> It avoids gimple semantics depending on target hooks.
>
> As I don't have much previous exposure to the vectoriser code, I cobbled 
> together something pretty ad-hoc (attached).  Does this come somehow 
> close to what you have in mind?

Yeah, looks good.

> internal_len_load_supported_p should rather be called 
> internal_len_load_bias_supported_p or so I guess and the part where we 
> exclude multiple loop_lens is still missing.

Since we only support one bias, it might be better to make the
internal-fn.c function return the bias as an int (with some marker
value for “not supported”), so that the caller doesn't need to probe
both values.

> Would we also check for a viable bias there and then either accept
> multiple lens or not?

Yeah, I think so.

Thanks,
Richard

>
> Regards
>   Robin
>
> commit 2320dbfdfe1477b15a2ac59847d2a52e68de49ab
> Author: Robin Dapp <rdapp@linux.ibm.com>
> Date:   Tue Oct 26 14:36:08 2021 +0200
>
>     bias1
>
> diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
> index 8312d08aab2..bf97d3e471a 100644
> --- a/gcc/internal-fn.c
> +++ b/gcc/internal-fn.c
> @@ -2696,9 +2696,9 @@ expand_call_mem_ref (tree type, gcall *stmt, int index)
>  static void
>  expand_partial_load_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
>  {
> -  class expand_operand ops[3];
> -  tree type, lhs, rhs, maskt;
> -  rtx mem, target, mask;
> +  class expand_operand ops[4];
> +  tree type, lhs, rhs, maskt, biast;
> +  rtx mem, target, mask, bias;
>    insn_code icode;
>  
>    maskt = gimple_call_arg (stmt, 2);
> @@ -2727,7 +2727,18 @@ expand_partial_load_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
>  				 TYPE_UNSIGNED (TREE_TYPE (maskt)));
>    else
>      create_input_operand (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)));
> -  expand_insn (icode, 3, ops);
> +  if (optab == len_load_optab)
> +    {
> +      biast = gimple_call_arg (stmt, 3);
> +      bias = expand_normal (biast);
> +      create_input_operand (&ops[3], bias, SImode);
> +    }
> +
> +  if (optab != len_load_optab)
> +    expand_insn (icode, 3, ops);
> +  else
> +    expand_insn (icode, 4, ops);
> +
>    if (!rtx_equal_p (target, ops[0].value))
>      emit_move_insn (target, ops[0].value);
>  }
> @@ -2741,9 +2752,9 @@ expand_partial_load_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
>  static void
>  expand_partial_store_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
>  {
> -  class expand_operand ops[3];
> -  tree type, lhs, rhs, maskt;
> -  rtx mem, reg, mask;
> +  class expand_operand ops[4];
> +  tree type, lhs, rhs, maskt, biast;
> +  rtx mem, reg, mask, bias;
>    insn_code icode;
>  
>    maskt = gimple_call_arg (stmt, 2);
> @@ -2770,7 +2781,17 @@ expand_partial_store_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
>  				 TYPE_UNSIGNED (TREE_TYPE (maskt)));
>    else
>      create_input_operand (&ops[2], mask, TYPE_MODE (TREE_TYPE (maskt)));
> -  expand_insn (icode, 3, ops);
> +  if (optab == len_store_optab)
> +    {
> +      biast = gimple_call_arg (stmt, 4);
> +      bias = expand_normal (biast);
> +      create_input_operand (&ops[3], bias, SImode);
> +    }
> +
> +  if (optab != len_store_optab)
> +    expand_insn (icode, 3, ops);
> +  else
> +    expand_insn (icode, 4, ops);
>  }
>  
>  #define expand_mask_store_optab_fn expand_partial_store_optab_fn
> @@ -4154,6 +4175,25 @@ internal_gather_scatter_fn_supported_p (internal_fn ifn, tree vector_type,
>  	  && insn_operand_matches (icode, 3 + output_ops, GEN_INT (scale)));
>  }
>  
> +bool
> +internal_len_load_supported_p (internal_fn ifn, tree load_type, int bias)
> +{
> +  if (bias > 0 || bias < -1)
> +    return false;
> +
> +  machine_mode mode = TYPE_MODE (load_type);
> +
> +  optab optab = direct_internal_fn_optab (ifn);
> +  insn_code icode = direct_optab_handler (optab, mode);
> +  int output_ops = internal_load_fn_p (ifn) ? 1 : 0;
> +
> +  if (icode != CODE_FOR_nothing
> +      && insn_operand_matches (icode, 2 + output_ops, GEN_INT (bias)))
> +    return true;
> +
> +  return false;
> +}
> +
>  /* Return true if the target supports IFN_CHECK_{RAW,WAR}_PTRS function IFN
>     for pointers of type TYPE when the accesses have LENGTH bytes and their
>     common byte alignment is ALIGN.  */
> diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h
> index 19d0f849a5a..d0bf9941bcc 100644
> --- a/gcc/internal-fn.h
> +++ b/gcc/internal-fn.h
> @@ -225,6 +225,7 @@ extern int internal_fn_mask_index (internal_fn);
>  extern int internal_fn_stored_value_index (internal_fn);
>  extern bool internal_gather_scatter_fn_supported_p (internal_fn, tree,
>  						    tree, tree, int);
> +extern bool internal_len_load_supported_p (internal_fn ifn, tree, int);
>  extern bool internal_check_ptrs_fn_supported_p (internal_fn, tree,
>  						poly_uint64, unsigned int);
>  
> diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
> index d7723b1a92a..50537763ace 100644
> --- a/gcc/tree-vect-stmts.c
> +++ b/gcc/tree-vect-stmts.c
> @@ -8272,12 +8272,14 @@ vectorizable_store (vec_info *vinfo,
>  		  opt_machine_mode new_ovmode
>  		    = get_len_load_store_mode (vmode, false);
>  		  machine_mode new_vmode = new_ovmode.require ();
> +		  tree vtype = vectype;
>  		  /* Need conversion if it's wrapped with VnQI.  */
>  		  if (vmode != new_vmode)
>  		    {
>  		      tree new_vtype
>  			= build_vector_type_for_mode (unsigned_intQI_type_node,
> -						      new_vmode);
> +						  new_vmode);
> +		      vtype = new_vtype;
>  		      tree var
>  			= vect_get_new_ssa_name (new_vtype, vect_simple_var);
>  		      vec_oprnd
> @@ -8289,9 +8291,29 @@ vectorizable_store (vec_info *vinfo,
>  						   gsi);
>  		      vec_oprnd = var;
>  		    }
> +
> +		  /* Check which bias value to use.  Default is 0. */
> +		  tree bias = build_int_cst (intSI_type_node, 0);
> +		  tree new_len = final_len;
> +		  if (!internal_len_load_supported_p (IFN_LEN_LOAD, vtype, 0)
> +		      && internal_len_load_supported_p (IFN_LEN_LOAD,
> +							vtype, -1))
> +		    {
> +		      bias = build_int_cst (intSI_type_node, -1);
> +		      new_len = make_ssa_name (TREE_TYPE (final_len));
> +		      gassign *m1 = gimple_build_assign (new_len,
> +							 MINUS_EXPR,
> +							 final_len,
> +							 build_one_cst
> +							 (TREE_TYPE
> +							  (final_len)));
> +		      vect_finish_stmt_generation (vinfo, stmt_info, m1,
> +						   gsi);
> +		    }
>  		  gcall *call
> -		    = gimple_build_call_internal (IFN_LEN_STORE, 4, dataref_ptr,
> -						  ptr, final_len, vec_oprnd);
> +		    = gimple_build_call_internal (IFN_LEN_STORE, 5, dataref_ptr,
> +						  ptr, new_len, vec_oprnd,
> +						  bias);
>  		  gimple_call_set_nothrow (call, true);
>  		  vect_finish_stmt_generation (vinfo, stmt_info, call, gsi);
>  		  new_stmt = call;
> @@ -9588,24 +9610,50 @@ vectorizable_load (vec_info *vinfo,
>  					       vec_num * j + i);
>  			tree ptr = build_int_cst (ref_type,
>  						  align * BITS_PER_UNIT);
> +
> +			machine_mode vmode = TYPE_MODE (vectype);
> +			opt_machine_mode new_ovmode
> +			  = get_len_load_store_mode (vmode, true);
> +			machine_mode new_vmode = new_ovmode.require ();
> +			tree qi_type = unsigned_intQI_type_node;
> +			tree new_vtype
> +			  = build_vector_type_for_mode (qi_type, new_vmode);
> +
> +			tree vtype = vectype;
> +			if (vmode != new_vmode)
> +			  vtype = new_vtype;
> +
> +			/* Check which bias value to use.  Default is 0. */
> +			tree bias = build_int_cst (intSI_type_node, 0);
> +			tree new_len = final_len;
> +			if (!internal_len_load_supported_p (IFN_LEN_LOAD,
> +							    vtype, 0)
> +			    && internal_len_load_supported_p (IFN_LEN_LOAD,
> +							      vtype, -1))
> +			  {
> +			    bias = build_int_cst (intSI_type_node, -1);
> +			    new_len = make_ssa_name (TREE_TYPE (final_len));
> +			    gassign *m1 = gimple_build_assign (new_len,
> +							       MINUS_EXPR,
> +							       final_len,
> +							       build_one_cst
> +							       (TREE_TYPE
> +								(final_len)));
> +			    vect_finish_stmt_generation (vinfo, stmt_info, m1,
> +							 gsi);
> +			  }
> +
>  			gcall *call
> -			  = gimple_build_call_internal (IFN_LEN_LOAD, 3,
> +			  = gimple_build_call_internal (IFN_LEN_LOAD, 4,
>  							dataref_ptr, ptr,
> -							final_len);
> +							new_len, bias);
>  			gimple_call_set_nothrow (call, true);
>  			new_stmt = call;
>  			data_ref = NULL_TREE;
>  
>  			/* Need conversion if it's wrapped with VnQI.  */
> -			machine_mode vmode = TYPE_MODE (vectype);
> -			opt_machine_mode new_ovmode
> -			  = get_len_load_store_mode (vmode, true);
> -			machine_mode new_vmode = new_ovmode.require ();
>  			if (vmode != new_vmode)
>  			  {
> -			    tree qi_type = unsigned_intQI_type_node;
> -			    tree new_vtype
> -			      = build_vector_type_for_mode (qi_type, new_vmode);
>  			    tree var = vect_get_new_ssa_name (new_vtype,
>  							      vect_simple_var);
>  			    gimple_set_lhs (call, var);


More information about the Gcc-patches mailing list