[PATCH, rs6000] [v3] Folding of vector loads in GIMPLE

Will Schmidt will_schmidt@vnet.ibm.com
Fri Sep 15 15:00:00 GMT 2017


Hi,

[PATCH, rs6000] [v3] Folding of vector loads in GIMPLE
    
Folding of vector loads in GIMPLE.
    
Add code to handle gimple folding for the vec_ld builtins.
Remove the now obsoleted folding code for vec_ld from rs6000-c.c. Surrounding
comments have been adjusted slightly so they continue to read OK for the
existing vec_st code.
    
The resulting code is specifically verified by the powerpc/fold-vec-ld-*.c
tests which are already in-tree.
    
For V2 of this patch, I've removed the chunk of code that prohibited the
gimple fold from occurring in BE environments.   This had fixed an issue
for me earlier during my development of the code, and turns out this was
not necessary.  (this introduced a failure in LE environment, so V3...)

for V3 of this patch;
 I've added a reworked statement that prohibits the folding of a vector
load when altivec=be is specified in an LE environment.
 Adjusted the arg1_type definition to use ptr_type_node per feedback and
discussions and experimentation with generated code.

Regtest to be run on power6 and newer.
    
OK for trunk?  (assuming successful completion of regtest).
    
Thanks,
-Will
    
[gcc]
    
2017-09-15  Will Schmidt  <will_schmidt@vnet.ibm.com>
    
    * config/rs6000/rs6000.c (rs6000_gimple_fold_builtin): Add handling
    for early folding of vector loads (ALTIVEC_BUILTIN_LVX_*).
    * config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin):
    Remove obsoleted code for handling ALTIVEC_BUILTIN_VEC_LD.


diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
index d27f563..a49db97 100644
--- a/gcc/config/rs6000/rs6000-c.c
+++ b/gcc/config/rs6000/rs6000-c.c
@@ -6470,89 +6470,19 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
 		     convert (TREE_TYPE (stmt), arg0));
       stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl);
       return stmt;
     }
 
-  /* Expand vec_ld into an expression that masks the address and
-     performs the load.  We need to expand this early to allow
+  /* Expand vec_st into an expression that masks the address and
+     performs the store.  We need to expand this early to allow
      the best aliasing, as by the time we get into RTL we no longer
      are able to honor __restrict__, for example.  We may want to
      consider this for all memory access built-ins.
 
      When -maltivec=be is specified, or the wrong number of arguments
      is provided, simply punt to existing built-in processing.  */
-  if (fcode == ALTIVEC_BUILTIN_VEC_LD
-      && (BYTES_BIG_ENDIAN || !VECTOR_ELT_ORDER_BIG)
-      && nargs == 2)
-    {
-      tree arg0 = (*arglist)[0];
-      tree arg1 = (*arglist)[1];
-
-      /* Strip qualifiers like "const" from the pointer arg.  */
-      tree arg1_type = TREE_TYPE (arg1);
-      if (TREE_CODE (arg1_type) == ARRAY_TYPE && c_dialect_cxx ())
-	{
-	  /* Force array-to-pointer decay for C++.  */
-	  arg1 = default_conversion (arg1);
-	  arg1_type = TREE_TYPE (arg1);
-	}
-      if (!POINTER_TYPE_P (arg1_type))
-	goto bad;
-
-      tree inner_type = TREE_TYPE (arg1_type);
-      if (TYPE_QUALS (TREE_TYPE (arg1_type)) != 0)
-	{
-	  arg1_type = build_pointer_type (build_qualified_type (inner_type,
-								0));
-	  arg1 = fold_convert (arg1_type, arg1);
-	}
-
-      /* Construct the masked address.  Let existing error handling take
-	 over if we don't have a constant offset.  */
-      arg0 = fold (arg0);
-
-      if (TREE_CODE (arg0) == INTEGER_CST)
-	{
-	  if (!ptrofftype_p (TREE_TYPE (arg0)))
-	    arg0 = build1 (NOP_EXPR, sizetype, arg0);
-
-	  tree addr = fold_build2_loc (loc, POINTER_PLUS_EXPR, arg1_type,
-				       arg1, arg0);
-	  tree aligned = fold_build2_loc (loc, BIT_AND_EXPR, arg1_type, addr,
-					  build_int_cst (arg1_type, -16));
-
-	  /* Find the built-in to get the return type so we can convert
-	     the result properly (or fall back to default handling if the
-	     arguments aren't compatible).  */
-	  for (desc = altivec_overloaded_builtins;
-	       desc->code && desc->code != fcode; desc++)
-	    continue;
-
-	  for (; desc->code == fcode; desc++)
-	    if (rs6000_builtin_type_compatible (TREE_TYPE (arg0), desc->op1)
-		&& (rs6000_builtin_type_compatible (TREE_TYPE (arg1),
-						    desc->op2)))
-	      {
-		tree ret_type = rs6000_builtin_type (desc->ret_type);
-		if (TYPE_MODE (ret_type) == V2DImode)
-		  /* Type-based aliasing analysis thinks vector long
-		     and vector long long are different and will put them
-		     in distinct alias classes.  Force our return type
-		     to be a may-alias type to avoid this.  */
-		  ret_type
-		    = build_pointer_type_for_mode (ret_type, Pmode,
-						   true/*can_alias_all*/);
-		else
-		  ret_type = build_pointer_type (ret_type);
-		aligned = build1 (NOP_EXPR, ret_type, aligned);
-		tree ret_val = build_indirect_ref (loc, aligned, RO_NULL);
-		return ret_val;
-	      }
-	}
-    }
 
-  /* Similarly for stvx.  */
   if (fcode == ALTIVEC_BUILTIN_VEC_ST
       && (BYTES_BIG_ENDIAN || !VECTOR_ELT_ORDER_BIG)
       && nargs == 3)
     {
       tree arg0 = (*arglist)[0];
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 1338371..89ee431 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -16547,10 +16547,52 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
 	res = gimple_build (&stmts, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), res);
 	gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
 	update_call_from_tree (gsi, res);
 	return true;
       }
+    /* Vector loads.  */
+    case ALTIVEC_BUILTIN_LVX_V16QI:
+    case ALTIVEC_BUILTIN_LVX_V8HI:
+    case ALTIVEC_BUILTIN_LVX_V4SI:
+    case ALTIVEC_BUILTIN_LVX_V4SF:
+    case ALTIVEC_BUILTIN_LVX_V2DI:
+    case ALTIVEC_BUILTIN_LVX_V2DF:
+      {
+	 gimple *g;
+	 arg0 = gimple_call_arg (stmt, 0);  // offset
+	 arg1 = gimple_call_arg (stmt, 1);  // address
+	 /* Do not fold for -maltivec=be on LE targets.  */
+	 if (VECTOR_ELT_ORDER_BIG && !BYTES_BIG_ENDIAN)
+	    return false;
+	 lhs = gimple_call_lhs (stmt);
+	 location_t loc = gimple_location (stmt);
+	 /* Since arg1 may be cast to a different type, just use ptr_type_node
+	    here instead of trying to enforce TBAA on pointer types.  */
+	 tree arg1_type = ptr_type_node;
+	 tree lhs_type = TREE_TYPE (lhs);
+	 /* POINTER_PLUS_EXPR wants the offset to be of type 'sizetype'.  Create
+	    the tree using the value from arg0.  The resulting type will match
+	    the type of arg1.  */
+	 gimple_seq stmts = NULL;
+	 tree temp_offset = gimple_convert (&stmts, loc, sizetype, arg0);
+	 tree temp_addr = gimple_build (&stmts, loc, POINTER_PLUS_EXPR,
+				       arg1_type, arg1, temp_offset);
+	 /* Mask off any lower bits from the address.  */
+	 tree aligned_addr = gimple_build (&stmts, loc, BIT_AND_EXPR,
+					  arg1_type, temp_addr,
+					  build_int_cst (arg1_type, -16));
+	 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
+	 /* Use the build2 helper to set up the mem_ref.  The MEM_REF could also
+	    take an offset, but since we've already incorporated the offset
+	    above, here we just pass in a zero.  */
+	 g = gimple_build_assign (lhs, build2 (MEM_REF, lhs_type, aligned_addr,
+						build_int_cst (arg1_type, 0)));
+	 gimple_set_location (g, loc);
+	 gsi_replace (gsi, g, true);
+	 return true;
+      }
+
     default:
 	if (TARGET_DEBUG_BUILTIN)
 	   fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n",
 		    fn_code, fn_name1, fn_name2);
       break;







More information about the Gcc-patches mailing list