This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH][RFC] Detect and use implementations of BLAS routines


This adds recognition of [sd]axpy and [sd]dot computing partitions
to loop distribution (as an example for a moderately complex kernel
and one kernel involving a reduction).

To make this a reality we have to control this by an option
(-fblas?) and we have to settle to an ABI we rely on (trailing
underscore, argument passing conventions and what integer type
to use).  "Official" CBLAS uses f2c and f2c.h to define the ABI.

I suppose other compiler vendors simply ship their own BLAS
routines and thus have complete control over the ABI (and can
also avoid passing scalar parameters by reference ...).

Main use of this transformation is of course to get automagic
access to vendor optimized BLAS kernels.

Any comments?  (yeah, pattern matching sucks)

Thanks,
Richard.

2013-10-02  Richard Biener  <rguenther@suse.de>

	* tree-loop-distribution.c (enum partition_kind): Add PKIND_BLAS.
	(enum blas_kind): New enum.
	(struct partition_s): Add ops and subkind members.
	(partition_contains_stmt): New function.
	(build_elt_step): Likewise.
	(build_addr_arg_loc): Make nb_bytes parameter optional.
	(force_addr_of): New function.
	(force_addr_of_int): Likewise.
	(generate_blas_builtin): Likewise.
	(generate_code_for_partition): Handle PKIND_BLAS.
	(classify_partition): Detect [sd]axpy and [sd]dot like
	partitions.

	* gcc.dg/tree-ssa/ldist-24.c: New testcase.
	* gcc.dg/tree-ssa/ldist-25.c: Likewise.

Index: gcc/tree-loop-distribution.c
===================================================================
*** gcc/tree-loop-distribution.c.orig	2013-10-02 15:47:08.492960908 +0200
--- gcc/tree-loop-distribution.c	2013-10-02 15:47:21.519110583 +0200
*************** build_rdg (vec<loop_p> loop_nest, contro
*** 558,564 ****
  
  
  enum partition_kind {
!     PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY
  };
  
  typedef struct partition_s
--- 558,570 ----
  
  
  enum partition_kind {
!     PKIND_NORMAL,
!     PKIND_MEMSET, PKIND_MEMCPY,
!     PKIND_BLAS
! };
! 
! enum blas_kind {
!     BLAS_DAXPY, BLAS_DDOT
  };
  
  typedef struct partition_s
*************** typedef struct partition_s
*** 567,576 ****
    bitmap loops;
    bool reduction_p;
    enum partition_kind kind;
!   /* data-references a kind != PKIND_NORMAL partition is about.  */
    data_reference_p main_dr;
    data_reference_p secondary_dr;
!   tree niter;
  } *partition_t;
  
  
--- 573,587 ----
    bitmap loops;
    bool reduction_p;
    enum partition_kind kind;
!   enum blas_kind subkind;
!   /* kind != PKIND_NORMAL data follows.  */
!   /* number of invocations of main_dr.  */
!   tree niter;
!   /* data-references participating.  */
    data_reference_p main_dr;
    data_reference_p secondary_dr;
!   /* auxiliary operands.  */
!   tree ops[2];
  } *partition_t;
  
  
*************** partition_reduction_p (partition_t parti
*** 613,618 ****
--- 624,638 ----
    return partition->reduction_p;
  }
  
+ /* Returns true if PARTITION contains STMT.  */
+ 
+ static bool
+ partition_contains_stmt (partition_t partition, gimple stmt)
+ {
+   int uid = gimple_uid (stmt);
+   return uid != -1 && bitmap_bit_p (partition->stmts, uid);
+ }
+ 
  /* Merge PARTITION into the partition DEST.  */
  
  static void
*************** build_size_arg_loc (location_t loc, data
*** 803,808 ****
--- 823,837 ----
    return fold_convert_loc (loc, size_type_node, size);
  }
  
+ /* Build the element step tree for DR.  */
+ 
+ static tree
+ build_elt_step (location_t loc, data_reference_p dr)
+ {
+   tree sz = fold_convert (ssizetype, TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
+   return size_binop_loc (loc, EXACT_DIV_EXPR, DR_STEP (dr), sz);
+ }
+ 
  /* Build an address argument for a memory operation call.  */
  
  static tree
*************** build_addr_arg_loc (location_t loc, data
*** 814,820 ****
    addr_base = fold_convert_loc (loc, sizetype, addr_base);
  
    /* Test for a negative stride, iterating over every element.  */
!   if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
      {
        addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
  				  fold_convert_loc (loc, sizetype, nb_bytes));
--- 843,850 ----
    addr_base = fold_convert_loc (loc, sizetype, addr_base);
  
    /* Test for a negative stride, iterating over every element.  */
!   if (nb_bytes
!       && tree_int_cst_sgn (DR_STEP (dr)) == -1)
      {
        addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
  				  fold_convert_loc (loc, sizetype, nb_bytes));
*************** generate_memcpy_builtin (struct loop *lo
*** 955,960 ****
--- 985,1117 ----
      }
  }
  
+ /* Force OP to a new temporary and return the address of that temporary
+    appending necessary statements at GSI.  */
+ 
+ static tree
+ force_addr_of (gimple_stmt_iterator *gsi, tree op)
+ {
+   tree tem = create_tmp_var (TREE_TYPE (op), NULL);
+   TREE_ADDRESSABLE (tem) = 1;
+   gimple s = gimple_build_assign
+       (tem,
+        force_gimple_operand_gsi (gsi, op, true, NULL_TREE,
+ 				 false, GSI_CONTINUE_LINKING));
+   gsi_insert_after (gsi, s, GSI_CONTINUE_LINKING);
+   return build1 (ADDR_EXPR, build_pointer_type (TREE_TYPE (tem)), tem);
+ }
+ 
+ /* Force OP to a new temporary of INTEGER kind and return the address of
+    that temporary appending necessary statements at GSI.  */
+ 
+ static tree
+ force_addr_of_int (gimple_stmt_iterator *gsi, tree op)
+ {
+   return force_addr_of (gsi, fold_convert (integer_type_node, op));
+ }
+ 
+ /* Generate a call to a blas library function for PARTITION in LOOP.  */
+ 
+ static void
+ generate_blas_builtin (struct loop *loop, partition_t partition)
+ {
+   gimple_stmt_iterator gsi;
+   gimple stmt, fn_call;
+   location_t loc;
+ 
+   stmt = DR_STMT (partition->main_dr);
+   loc = gimple_location (stmt);
+ 
+   /* The new statements will be placed before LOOP.  */
+   gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
+ 
+   tree y = build_addr_arg_loc (loc, partition->main_dr, NULL_TREE);
+   tree x = build_addr_arg_loc (loc, partition->secondary_dr, NULL_TREE);
+   x = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE,
+ 				false, GSI_CONTINUE_LINKING);
+   y = force_gimple_operand_gsi (&gsi, y, true, NULL_TREE,
+ 				false, GSI_CONTINUE_LINKING);
+ 
+   /* Check whether it's the d or the s variant.  */
+   bool dvariant_p
+    = types_compatible_p (double_type_node, TREE_TYPE (partition->ops[0]));
+ 
+   if (partition->subkind == BLAS_DAXPY)
+     {
+       tree daxpy = build_fn_decl (dvariant_p ? "daxpy_" : "saxpy_",
+ 				  build_function_type_list (void_type_node,
+ 							    ptr_type_node,
+ 							    ptr_type_node,
+ 							    ptr_type_node,
+ 							    ptr_type_node,
+ 							    ptr_type_node,
+ 							    ptr_type_node,
+ 							    NULL_TREE));
+       fn_call = gimple_build_call (daxpy, 6,
+ 				   force_addr_of_int (&gsi, partition->niter),
+ 				   force_addr_of (&gsi, partition->ops[0]),
+ 				   x,
+ 				   force_addr_of_int
+ 				     (&gsi, build_elt_step
+ 				              (loc, partition->secondary_dr)),
+ 				   y,
+ 				   force_addr_of_int (&gsi,
+ 				     build_elt_step (loc, partition->main_dr)));
+       gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
+ 
+       if (dump_file && (dump_flags & TDF_DETAILS))
+ 	fprintf (dump_file, "generated call to blas daxpy\n");
+     }
+   else if (partition->subkind == BLAS_DDOT)
+     {
+       tree ddot = build_fn_decl (dvariant_p ? "ddot_" : "sdot_",
+ 				 build_function_type_list (dvariant_p
+ 							   ? double_type_node
+ 							   : float_type_node,
+ 							   ptr_type_node,
+ 							   ptr_type_node,
+ 							   ptr_type_node,
+ 							   ptr_type_node,
+ 							   ptr_type_node,
+ 							   NULL_TREE));
+       fn_call = gimple_build_call (ddot, 5,
+ 				   force_addr_of_int (&gsi, partition->niter),
+ 				   x,
+ 				   force_addr_of_int (&gsi,
+ 				     build_elt_step (loc,
+ 						     partition->secondary_dr)),
+ 				   y,
+ 				   force_addr_of_int (&gsi,
+ 				     build_elt_step (loc, partition->main_dr)));
+ 
+       /* Remove the original loop closed PHI node.  */
+       tree res = partition->ops[0];
+       gimple phi = SSA_NAME_DEF_STMT (res);
+       gimple_stmt_iterator gsi2 = gsi_for_stmt (phi);
+       remove_phi_node (&gsi2, false);
+ 
+       /* Now adjust the result by the reduction initial value if
+ 	 necessary and assign it to the result of the PHI.  */
+       if (!real_zerop (partition->ops[1]))
+ 	{
+ 	  tree tem = make_ssa_name (TREE_TYPE (res), fn_call);
+ 	  gimple_call_set_lhs (fn_call, tem);
+ 	  gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
+ 	  fn_call = gimple_build_assign_with_ops (PLUS_EXPR,
+ 						  res,
+ 						  partition->ops[1], tem);
+ 	}
+       else
+ 	gimple_call_set_lhs (fn_call, res);
+       gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
+ 
+       if (dump_file && (dump_flags & TDF_DETAILS))
+ 	fprintf (dump_file, "generated call to blas ddot\n");
+     }
+   else
+     gcc_unreachable ();
+ }
+ 
  /* Remove and destroy the loop LOOP.  */
  
  static void
*************** generate_code_for_partition (struct loop
*** 1026,1031 ****
--- 1183,1192 ----
        generate_memcpy_builtin (loop, partition);
        break;
  
+     case PKIND_BLAS:
+       generate_blas_builtin (loop, partition);
+       break;
+ 
      default:
        gcc_unreachable ();
      }
*************** classify_partition (loop_p loop, struct
*** 1112,1121 ****
  
        /* If the stmt has uses outside of the loop mark it as reduction.  */
        if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
! 	{
! 	  partition->reduction_p = true;
! 	  return;
! 	}
      }
  
    /* Perform general partition disqualification for builtins.  */
--- 1273,1279 ----
  
        /* If the stmt has uses outside of the loop mark it as reduction.  */
        if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
! 	partition->reduction_p = true;
      }
  
    /* Perform general partition disqualification for builtins.  */
*************** classify_partition (loop_p loop, struct
*** 1124,1129 ****
--- 1282,1288 ----
      return;
  
    /* Detect memset and memcpy.  */
+   unsigned n_loads = 0;
    single_load = NULL;
    single_store = NULL;
    EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
*************** classify_partition (loop_p loop, struct
*** 1148,1156 ****
  	{
  	  if (DR_IS_READ (dr))
  	    {
! 	      if (single_load != NULL)
! 		return;
! 	      single_load = dr;
  	    }
  	  else
  	    {
--- 1307,1317 ----
  	{
  	  if (DR_IS_READ (dr))
  	    {
! 	      n_loads++;
! 	      if (n_loads > 1)
! 		single_load = NULL;
! 	      else
! 		single_load = dr;
  	    }
  	  else
  	    {
*************** classify_partition (loop_p loop, struct
*** 1161,1178 ****
  	}
      }
  
!   if (!single_store)
!     return;
! 
!   if (!dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src,
! 		       gimple_bb (DR_STMT (single_store))))
      nb_iter = number_of_latch_executions (loop);
    else
      nb_iter = number_of_exit_cond_executions (loop);
    if (!nb_iter || nb_iter == chrec_dont_know)
      return;
  
!   if (single_store && !single_load)
      {
        gimple stmt = DR_STMT (single_store);
        tree rhs = gimple_assign_rhs1 (stmt);
--- 1322,1337 ----
  	}
      }
  
!   if (single_store
!       && !dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src,
! 			  gimple_bb (DR_STMT (single_store))))
      nb_iter = number_of_latch_executions (loop);
    else
      nb_iter = number_of_exit_cond_executions (loop);
    if (!nb_iter || nb_iter == chrec_dont_know)
      return;
  
!   if (single_store && n_loads == 0 && !partition_reduction_p (partition))
      {
        gimple stmt = DR_STMT (single_store);
        tree rhs = gimple_assign_rhs1 (stmt);
*************** classify_partition (loop_p loop, struct
*** 1193,1199 ****
        partition->main_dr = single_store;
        partition->niter = nb_iter;
      }
!   else if (single_store && single_load)
      {
        gimple store = DR_STMT (single_store);
        gimple load = DR_STMT (single_load);
--- 1352,1358 ----
        partition->main_dr = single_store;
        partition->niter = nb_iter;
      }
!   else if (single_store && single_load && !partition_reduction_p (partition))
      {
        gimple store = DR_STMT (single_store);
        gimple load = DR_STMT (single_load);
*************** classify_partition (loop_p loop, struct
*** 1250,1255 ****
--- 1409,1578 ----
        partition->secondary_dr = single_load;
        partition->niter = nb_iter;
      }
+   else if (single_store && n_loads == 2 && !partition_reduction_p (partition))
+     {
+       /* Match BLAS [sd]axpy which is *y = a * *x + *y.  */
+       gimple store = DR_STMT (single_store);
+       tree name = gimple_assign_rhs1 (store);
+       if (TREE_CODE (name) != SSA_NAME
+ 	  || !(types_compatible_p (double_type_node, TREE_TYPE (name))
+ 	       || types_compatible_p (float_type_node, TREE_TYPE (name))))
+ 	return;
+       gimple stmt = SSA_NAME_DEF_STMT (name);
+       if (!is_gimple_assign (stmt)
+ 	  || gimple_assign_rhs_code (stmt) != PLUS_EXPR)
+ 	return;
+       name = gimple_assign_rhs1 (stmt);
+       tree name2 = gimple_assign_rhs2 (stmt);
+       if (TREE_CODE (name) != SSA_NAME
+ 	  || TREE_CODE (name2) != SSA_NAME)
+ 	return;
+       stmt = SSA_NAME_DEF_STMT (name);
+       gimple stmt2 = SSA_NAME_DEF_STMT (name2);
+       if (!is_gimple_assign (stmt)
+ 	  || !is_gimple_assign (stmt2))
+ 	return;
+       gimple y_stmt;
+       if (gimple_assign_rhs_code (stmt) == MULT_EXPR
+ 	  && gimple_assign_load_p (stmt2)
+ 	  && partition_contains_stmt (partition, stmt2))
+ 	y_stmt = stmt2;
+       else if (gimple_assign_load_p (stmt)
+ 	       && partition_contains_stmt (partition, stmt)
+ 	       && gimple_assign_rhs_code (stmt2) == MULT_EXPR)
+ 	{
+ 	  y_stmt = stmt;
+ 	  stmt = stmt2;
+ 	}
+       else
+ 	return;
+       /* y is also the destination.  */
+       if (!same_data_refs (single_store,
+ 			   RDG_DATAREFS (rdg,
+ 					 rdg_vertex_for_stmt (rdg, y_stmt))[0]))
+ 	return;
+       name = gimple_assign_rhs1 (stmt);
+       name2 = gimple_assign_rhs2 (stmt);
+       if (TREE_CODE (name) != SSA_NAME
+ 	  || TREE_CODE (name2) != SSA_NAME)
+ 	return;
+       stmt = SSA_NAME_DEF_STMT (name);
+       stmt2 = SSA_NAME_DEF_STMT (name2);
+       gimple x_stmt;
+       tree a_name;
+       if (gimple_assign_load_p (stmt)
+ 	  && loop_containing_stmt (stmt2) != loop)
+ 	{
+ 	  x_stmt = stmt;
+ 	  a_name = name2;
+ 	}
+       else if (loop_containing_stmt (stmt) != loop
+ 	       && gimple_assign_load_p (stmt2))
+ 	{
+ 	  x_stmt = stmt2;
+ 	  a_name = name;
+ 	}
+       else
+ 	return;
+       /* Ok, now we got single_store += a_name * x_stmt.  Now
+          we have to verify that the array we load from does not overlap
+ 	 with the array we store to.  */
+       vec<loop_p> loops = vNULL;
+       ddr_p ddr;
+       loops.safe_push (loop);
+       data_reference_p load_dr
+ 	= RDG_DATAREFS (rdg, rdg_vertex_for_stmt (rdg, x_stmt))[0];
+       ddr = initialize_data_dependence_relation (load_dr, single_store, loops);
+       compute_affine_dependence (ddr, loop);
+       if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
+ 	{
+ 	  free_dependence_relation (ddr);
+ 	  loops.release ();
+ 	  return;
+ 	}
+       free_dependence_relation (ddr);
+       loops.release ();
+ 
+       partition->kind = PKIND_BLAS;
+       partition->subkind = BLAS_DAXPY;
+       partition->niter = nb_iter;
+       partition->main_dr = single_store;
+       partition->secondary_dr = load_dr;
+       partition->ops[0] = a_name;
+     }
+   else if (!single_store && n_loads == 2 && partition_reduction_p (partition))
+     {
+       /* Match BLAS [sd]dot which is res += *x * *y.  */
+       basic_block e = single_exit (loop)->dest;
+       gimple stmt = NULL;
+       tree phires = NULL_TREE;
+       for (gimple_stmt_iterator gsi = gsi_start_phis (e); !gsi_end_p (gsi);
+ 	   gsi_next (&gsi))
+ 	{
+ 	  gimple phi = gsi_stmt (gsi);
+ 	  tree arg = PHI_ARG_DEF (phi, 0);
+ 	  if (virtual_operand_p (arg)
+ 	      || TREE_CODE (arg) != SSA_NAME)
+ 	    continue;
+ 	  if (partition_contains_stmt (partition, SSA_NAME_DEF_STMT (arg)))
+ 	    {
+ 	      if (stmt != NULL)
+ 		return;
+ 	      stmt = SSA_NAME_DEF_STMT (arg);
+ 	      phires = gimple_phi_result (phi);
+ 	    }
+ 	}
+       if (!stmt
+ 	  || !is_gimple_assign (stmt)
+ 	  || gimple_assign_rhs_code (stmt) != PLUS_EXPR)
+ 	return;
+       tree name1 = gimple_assign_rhs1 (stmt);
+       tree name2 = gimple_assign_rhs2 (stmt);
+       if (TREE_CODE (name1) != SSA_NAME
+ 	  || TREE_CODE (name2) != SSA_NAME)
+ 	return;
+       gimple stmt1 = SSA_NAME_DEF_STMT (name1);
+       gimple stmt2 = SSA_NAME_DEF_STMT (name2);
+       if (gimple_code (stmt1) == GIMPLE_PHI
+ 	  && is_gimple_assign (stmt2))
+ 	;
+       else if (is_gimple_assign (stmt1)
+ 	       && gimple_code (stmt2) == GIMPLE_PHI)
+ 	{
+ 	  gimple tem = stmt1;
+ 	  stmt1 = stmt2;
+ 	  stmt2 = tem;
+ 	}
+       else
+ 	return;
+       tree reducinit
+ 	= PHI_ARG_DEF_FROM_EDGE (stmt1, loop_preheader_edge (loop));
+       if (!is_gimple_assign (stmt2)
+ 	  || gimple_assign_rhs_code (stmt2) != MULT_EXPR)
+ 	return;
+       name1 = gimple_assign_rhs1 (stmt2);
+       name2 = gimple_assign_rhs2 (stmt2);
+       if (TREE_CODE (name1) != SSA_NAME
+ 	  || TREE_CODE (name2) != SSA_NAME)
+ 	return;
+       gimple load1 = SSA_NAME_DEF_STMT (name1);
+       gimple load2 = SSA_NAME_DEF_STMT (name2);
+       if (!gimple_assign_load_p (load1)
+ 	  || !partition_contains_stmt (partition, load1)
+ 	  || !gimple_assign_load_p (load2)
+ 	  || !partition_contains_stmt (partition, load2))
+ 	return;
+ 
+       partition->kind = PKIND_BLAS;
+       partition->subkind = BLAS_DDOT;
+       partition->niter = nb_iter;
+       partition->main_dr
+ 	= RDG_DATAREFS (rdg, rdg_vertex_for_stmt (rdg, load1))[0];
+       partition->secondary_dr
+ 	= RDG_DATAREFS (rdg, rdg_vertex_for_stmt (rdg, load2))[0];
+       partition->ops[0] = phires;
+       partition->ops[1] = reducinit;
+     }
  }
  
  /* For a data reference REF, return the declaration of its base
Index: gcc/testsuite/gcc.dg/tree-ssa/ldist-24.c
===================================================================
*** /dev/null	1970-01-01 00:00:00.000000000 +0000
--- gcc/testsuite/gcc.dg/tree-ssa/ldist-24.c	2013-10-02 15:47:21.519110583 +0200
***************
*** 0 ****
--- 1,41 ----
+ /* { dg-do run } */
+ /* { dg-options "-O3 -fdump-tree-ldist" } */
+ 
+ extern void abort (void);
+ 
+ void __attribute__((optimize("no-tree-loop-distribute-patterns")))
+ daxpy_ (int *n, double * __restrict da,
+ 	double * __restrict dx, int * __restrict incx,
+ 	double * __restrict dy, int * __restrict incy)
+ {
+   int i;
+   for (i = 0; i < *n; ++i)
+     dy[i * *incy] += *da * dx[i * *incx];
+ }
+ 
+ void __attribute__((noinline,noclone))
+ foo (double * __restrict x, int dx, double * __restrict y, int dy,
+      double a, int n)
+ {
+   int i;
+   for (i = 0; i < n; ++i)
+     y[i*dy] = y[i*dy] + a*x[i*dx];
+ }
+ 
+ int main()
+ {
+   double x[1024], y[1024];
+   int i;
+   for (i = 0; i < 1024; i++)
+     y[i] = 0.;
+   for (i = 0; i < 1024; i++)
+     x[i] = i;
+   foo (x, 2, y, 1, 2., 512);
+   for (i = 0; i < 512; i++)
+     if (y[i] != 4*i)
+       abort ();
+   return 0;
+ }
+ 
+ /* { dg-final { scan-tree-dump-times "daxpy_" 1 "ldist" } } */
+ /* { dg-final { cleanup-tree-dump "ldist" } } */
Index: gcc/testsuite/gcc.dg/tree-ssa/ldist-25.c
===================================================================
*** /dev/null	1970-01-01 00:00:00.000000000 +0000
--- gcc/testsuite/gcc.dg/tree-ssa/ldist-25.c	2013-10-02 15:47:21.520110594 +0200
***************
*** 0 ****
--- 1,47 ----
+ /* { dg-do run } */
+ /* { dg-options "-O3 -fdump-tree-ldist" } */
+ 
+ extern void abort (void);
+ 
+ float __attribute__((optimize("no-tree-loop-distribute-patterns")))
+ sdot_ (int *n,
+        float * __restrict sx, int * __restrict incx,
+        float * __restrict sy, int * __restrict incy)
+ {
+   int i;
+   float stemp = 0.0f;
+   for (i = 0; i < *n; ++i)
+     stemp += sx[i * *incx] * sy[i * *incy];
+   return stemp;
+ }
+ 
+ float __attribute__((noinline,noclone))
+ foo (float * __restrict x, int dx, float * __restrict y, int dy, int n)
+ {
+   int i;
+   float res = 1.0f;
+   for (i = 0; i < n; ++i)
+     res += y[i*dy] * x[i*dx];
+   return res;
+ }
+ 
+ int main()
+ {
+   float x[1024], y[1024];
+   int i;
+   for (i = 0; i < 1024; i++)
+     {
+       y[i] = 1.0f;
+       x[i] = i;
+     }
+   float res = foo (x, 2, y, 1, 512);
+   float tem = 1.0f;
+   for (i = 0; i < 512; i++)
+     tem += i*2;
+   if (res != tem)
+     abort ();
+   return 0;
+ }
+ 
+ /* { dg-final { scan-tree-dump-times "sdot_" 1 "ldist" } } */
+ /* { dg-final { cleanup-tree-dump "ldist" } } */


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]