This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH] data prefetch support and __builtin_prefetch (take 2)


This is an update of the data prefetch patch I submitted November 29:

  http://gcc.gnu.org/ml/gcc-patches/2001-11/msg02007.html

It provides a framework for data prefetch support via a new PREFETCH rtx
code, supports the functionality on ia64, adds a __builtin_prefetch
function, and changes the name of existing SSE-specific prefetch support
for an ia32 builtin.  ix86 support will be in a separate patch.  I have
no plans to add support for other targets.

I've made some changes suggested by Richard Henderson, added a test for
volatile data and pointers, and modified some of the other tests.

Bootstrapped and regression tested on ia64-unknown-linux-gnu and
i686-pc-linux-gnu; also bootstrapped and regression tested on
i686-pc-linux-gnu with an unfinished patch for ix86 prefetch support.
I've run "make info" and "make dvi".

There are a couple of ia64 issues that I intend to fix soon.  There are
bundling constraints for the lfetch instruction that affect performance,
and I need to find the right value for SIMULTANEOUS_PREFETCHES for
Itanium, which will be used for prefetch optimizations.

OK to commit?

2001-12-03  Janis Johnson  <janis187@us.ibm.com>

	* rtl.def (PREFETCH): New rtx code.
	* doc/rtl.texi (PREFETCH): Add documentation.
	* function.c (instantiate_virtual_regs_1): Handle PREFETCH rtx.
	* rtlanal.c (reg_referenced_p): Ditto.
	* sched-vis.c (print_exp):  Ditto.
	* ssa-dce.c (find_inherently_necessary):  Ditto.

	* config/ia64/ia64.h (SIMULTANEOUS PREFETCHES): Define.
	  (PREFETCH_BLOCK): Define.
	* config/ia64/ia64.md (itanium_class): Add lfetch.
	  (prefetch, prefetch_internal): New.

	* builtin-types.def (BT_FN_VOID_PTR_INT_INT): New.
	* builtins.def (BUILT_IN_PREFETCH): New.
	* builtins.c (expand_builtin_expect): New.
	  (expand_builtin): Call it.
	* doc/extend.texi: Document __builtin_expect.

	* config/i386/i386.md (prefetch): Rename as prefetch_sse to avoid
	  conflicts with new generic prefetch functionality.
	* config/i386/i386.c (ix86_expand_builtin): Use new name for
	  prefetch_sse.

2001-12-03  Janis Johnson  <janis187@us.ibm.com>

	* gcc.c-torture/execute/builtin-prefetch-1.c: New test.
	* gcc.c-torture/execute/builtin-prefetch-2.c: New test.
	* gcc.c-torture/execute/builtin-prefetch-3.c: New test.
	* gcc.c-torture/execute/builtin-prefetch-4.c: New test.
	* gcc.c-torture/execute/builtin-prefetch-5.c: New test.
	* gcc.dg/builtin-prefetch-1.c: New test.

--- gcc/function.c.orig	Fri Nov 30 16:57:54 2001
+++ gcc/function.c	Fri Nov 30 16:59:54 2001
@@ -3979,6 +3979,7 @@ instantiate_virtual_regs_1 (loc, object,
 	}
 
       /* Fall through to generic unary operation case.  */
+    case PREFETCH:
     case SUBREG:
     case STRICT_LOW_PART:
     case NEG:          case NOT:
--- gcc/rtlanal.c.orig	Fri Nov 30 16:57:57 2001
+++ gcc/rtlanal.c	Fri Nov 30 16:59:54 2001
@@ -569,6 +569,9 @@ reg_referenced_p (x, body)
     case TRAP_IF:
       return reg_overlap_mentioned_p (x, TRAP_CONDITION (body));
 
+    case PREFETCH:
+      return reg_overlap_mentioned_p (x, XEXP (body, 0));
+
     case UNSPEC:
     case UNSPEC_VOLATILE:
       for (i = XVECLEN (body, 0) - 1; i >= 0; i--)
@@ -1456,6 +1459,10 @@ note_uses (pbody, fun, data)
       (*fun) (&TRAP_CONDITION (body), data);
       return;
 
+    case PREFETCH:
+      (*fun) (&XEXP (body, 0), data);
+      return;
+
     case UNSPEC:
     case UNSPEC_VOLATILE:
       for (i = XVECLEN (body, 0) - 1; i >= 0; i--)
--- gcc/sched-vis.c.orig	Fri Nov 30 16:57:57 2001
+++ gcc/sched-vis.c	Fri Nov 30 16:59:54 2001
@@ -474,6 +474,12 @@ print_exp (buf, x, verbose)
       fun = "trap_if";
       op[0] = TRAP_CONDITION (x);
       break;
+    case PREFETCH:
+      fun = "prefetch";
+      op[0] = XEXP (x, 0);
+      op[1] = XEXP (x, 1);
+      op[2] = XEXP (x, 2);
+      break;
     case UNSPEC:
     case UNSPEC_VOLATILE:
       {
--- gcc/ssa-dce.c.orig	Fri Nov 30 16:57:57 2001
+++ gcc/ssa-dce.c	Fri Nov 30 16:59:54 2001
@@ -373,6 +373,7 @@ find_inherently_necessary (x)
       {  
       case CALL_INSN:
       case BARRIER:
+      case PREFETCH:
 	return !0;
       case CODE_LABEL:
       case NOTE:
--- gcc/rtl.def.orig	Fri Nov 30 16:57:56 2001
+++ gcc/rtl.def	Fri Nov 30 16:59:54 2001
@@ -514,6 +514,17 @@ DEF_RTL_EXPR(ADDR_VEC, "addr_vec", "E", 
      
 DEF_RTL_EXPR(ADDR_DIFF_VEC, "addr_diff_vec", "eEee0", 'x')
 
+/* Memory prefetch, with attributes supported on some targets.
+   Operand 1 is the address of the memory to fetch.
+   Operand 2 is 1 for a write access, 0 otherwise.
+   Operand 3 is the level of temporal locality; 0 means there is no
+   temporal locality and 1, 2, and 3 are for increasing levels of temporal
+   locality.
+
+   The attributes specified by operands 2 and 3 are ignored for targets
+   whose prefetch instructions do not support them.  */
+DEF_RTL_EXPR(PREFETCH, "prefetch", "eee", 'x')
+
 /* ----------------------------------------------------------------------
    At the top level of an instruction (perhaps under PARALLEL).
    ---------------------------------------------------------------------- */
--- gcc/doc/rtl.texi.orig	Fri Nov 30 16:58:12 2001
+++ gcc/doc/rtl.texi	Fri Nov 30 16:59:54 2001
@@ -2326,6 +2326,20 @@ are set up by branch shortening and hold
 maximum address, respectively.  @var{flags} indicates the relative
 position of @var{base}, @var{min} and @var{max} to the containing insn
 and of @var{min} and @var{max} to @var{base}.  See rtl.def for details.
+
+@findex prefetch
+@item (prefetch:@var{m} @var{addr} @var{rw} @var{locality})
+Represents prefetch of memory at address @var{addr}.
+Operand @var{rw} is 1 if the prefetch is for data to be written, 0 otherwise;
+targets that do not support write prefetches should treat this as a normal
+prefetch.
+Operand @var{locality} specifies the amount of temporal locality; 0 if there
+is none or 1, 2, or 3 for increasing levels of temporal locality;
+targets that do not support locality hints should ignore this.
+
+This insn is used to minimize cache-miss latency by moving data into a
+cache before it is accessed.  It should use only non-faulting data prefetch
+instructions.
 @end table
 
 @node Incdec
--- gcc/config/ia64/ia64.h.orig	Fri Nov 30 16:58:05 2001
+++ gcc/config/ia64/ia64.h	Fri Nov 30 16:59:54 2001
@@ -2707,6 +2707,19 @@ do {									\
    #pragma weak.  Note, #pragma weak will only be supported if SUPPORT_WEAK is
    defined.  */
 
+/* If this architecture supports prefetch, define this to be the number of
+   prefetch commands that can be executed in parallel.
+
+   ??? This number is bogus and needs to be replaced before the value is
+   actually used in optimizations.  */
+
+#define SIMULTANEOUS_PREFETCHES 6
+
+/* If this architecture supports prefetch, define this to be the size of
+   the cache line that is prefetched.  */
+
+#define PREFETCH_BLOCK 32
+
 #define HANDLE_SYSV_PRAGMA
 
 /* In rare cases, correct code generation requires extra machine dependent
--- gcc/config/ia64/ia64.md.orig	Fri Nov 30 16:58:06 2001
+++ gcc/config/ia64/ia64.md	Fri Nov 30 16:59:54 2001
@@ -99,7 +99,7 @@
 ;; multiple instructions, patterns which emit 0 instructions, and patterns
 ;; which emit instruction that can go in any slot (e.g. nop).
 
-(define_attr "itanium_class" "unknown,ignore,stop_bit,br,fcmp,fcvtfx,fld,fmac,fmisc,frar_i,frar_m,frbr,frfr,frpr,ialu,icmp,ilog,ishf,ld,chk_s,long_i,mmmul,mmshf,mmshfi,rse_m,scall,sem,stf,st,syst_m0,syst_m,tbit,toar_i,toar_m,tobr,tofr,topr,xmpy,xtd,nop_b,nop_f,nop_i,nop_m,nop_x"
+(define_attr "itanium_class" "unknown,ignore,stop_bit,br,fcmp,fcvtfx,fld,fmac,fmisc,frar_i,frar_m,frbr,frfr,frpr,ialu,icmp,ilog,ishf,ld,chk_s,long_i,mmmul,mmshf,mmshfi,rse_m,scall,sem,stf,st,syst_m0,syst_m,tbit,toar_i,toar_m,tobr,tofr,topr,xmpy,xtd,nop_b,nop_f,nop_i,nop_m,nop_x,lfetch"
          (const_string "unknown"))
 
 ;; chk_s has an I and an M form; use type A for convenience.
@@ -107,6 +107,7 @@
   (cond [(eq_attr "itanium_class" "ld,st,fld,stf,sem,nop_m") (const_string "M")
 	 (eq_attr "itanium_class" "rse_m,syst_m,syst_m0") (const_string "M")
 	 (eq_attr "itanium_class" "frar_m,toar_m,frfr,tofr") (const_string "M")
+	 (eq_attr "itanium_class" "lfetch") (const_string "M")
 	 (eq_attr "itanium_class" "chk_s,ialu,icmp,ilog") (const_string "A")
 	 (eq_attr "itanium_class" "fmisc,fmac,fcmp,xmpy") (const_string "F")
 	 (eq_attr "itanium_class" "fcvtfx,nop_f") (const_string "F")
@@ -5048,6 +5049,34 @@
   ""
   "break.f 0"
   [(set_attr "itanium_class" "nop_f")])
+
+(define_insn "prefetch"
+  [(prefetch (match_operand:DI 0 "address_operand" "p")
+	     (match_operand:DI 1 "const_int_operand" "n")
+	     (match_operand:DI 2 "const_int_operand" "n"))]
+  ""
+  "*
+{
+  static const char * const alt[2][4] = {
+    \"lfetch.nta [%0]\",
+    \"lfetch.nt1 [%0]\",
+    \"lfetch.nt2 [%0]\",
+    \"lfetch [%0]\",
+    \"lfetch.excl.nta [%0]\",
+    \"lfetch.excl.nt1 [%0]\",
+    \"lfetch.excl.nt2 [%0]\",
+    \"lfetch.excl [%0]\"
+  };
+  int i = (INTVAL (operands[1]));
+  int j = (INTVAL (operands[2]));
+
+  if (i != 0 && i != 1)
+    abort ();
+  if (j < 0 || j > 3)
+    abort ();
+  return alt[i][j];
+}"
+  [(set_attr "itanium_class" "lfetch")])
 
 ;; Non-local goto support.
 
--- gcc/builtin-types.def.orig	Fri Nov 30 16:57:51 2001
+++ gcc/builtin-types.def	Fri Nov 30 16:59:54 2001
@@ -156,6 +156,7 @@ DEF_FUNCTION_TYPE_3 (BT_FN_TRAD_PTR_PTR_
 	             BT_TRAD_PTR, BT_PTR, BT_INT, BT_SIZE)
 DEF_FUNCTION_TYPE_3 (BT_FN_INT_TRAD_CONST_PTR_TRAD_CONST_PTR_LEN,
 		     BT_INT, BT_TRAD_CONST_PTR, BT_TRAD_CONST_PTR, BT_LEN)
+DEF_FUNCTION_TYPE_3 (BT_FN_VOID_PTR_INT_INT, BT_VOID, BT_PTR, BT_INT, BT_INT)
 
 DEF_FUNCTION_TYPE_4 (BT_FN_SIZE_CONST_PTR_SIZE_SIZE_PTR,
 		     BT_SIZE, BT_CONST_PTR, BT_SIZE, BT_SIZE, BT_PTR)
--- gcc/builtins.def.orig	Fri Nov 30 16:57:52 2001
+++ gcc/builtins.def	Fri Nov 30 16:59:54 2001
@@ -336,6 +336,9 @@ DEF_GCC_BUILTIN(BUILT_IN_LONGJMP,
 DEF_GCC_BUILTIN(BUILT_IN_TRAP,
 		"__builtin_trap",
 		BT_FN_VOID)
+DEF_GCC_BUILTIN(BUILT_IN_PREFETCH,
+		"__builtin_prefetch",
+		BT_FN_VOID_PTR_INT_INT)
 
 /* Stdio builtins.  */
 DEF_FALLBACK_BUILTIN(BUILT_IN_PUTCHAR,
--- gcc/builtins.c.orig	Fri Nov 30 16:57:52 2001
+++ gcc/builtins.c	Sat Dec  1 10:01:33 2001
@@ -87,6 +87,7 @@ static int apply_result_size		PARAMS ((v
 static rtx result_vector		PARAMS ((int, rtx));
 #endif
 static rtx expand_builtin_setjmp	PARAMS ((tree, rtx));
+static void expand_builtin_prefetch	PARAMS ((tree));
 static rtx expand_builtin_apply_args	PARAMS ((void));
 static rtx expand_builtin_apply_args_1	PARAMS ((void));
 static rtx expand_builtin_apply		PARAMS ((rtx, rtx, rtx));
@@ -715,6 +716,69 @@ expand_builtin_longjmp (buf_addr, value)
     }
 }
 
+/* Expand a call to __builtin_prefetch.  For a target that does not support
+   data prefetch, evaluate the memory address argument in case it has side
+   effects.  */
+
+static void
+expand_builtin_prefetch (arglist)
+     tree arglist;
+{
+  tree arg0, arg1, arg2;
+  rtx op0, op1, op2;
+
+  arg0 = TREE_VALUE (arglist);
+  arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+  arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
+
+  /* Argument 0 is an address.  */
+  op0 = expand_expr (arg0, NULL_RTX, Pmode, EXPAND_NORMAL);
+
+  /* Argument 1 (read/write flag) must be a compile-time constant int.  */
+  if (TREE_CODE (arg1) != INTEGER_CST)
+    {
+       error ("second arg to `__builtin_prefetch' must be a constant");
+       arg1 = integer_zero_node;
+    }
+  op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); 
+  /* Argument 1 must be either zero or one.  */
+  if (INTVAL (op1) != 0 && INTVAL (op1) != 1)
+    {
+      warning ("invalid second arg to __builtin_prefetch; using zero");
+      op1 = const0_rtx;
+    }
+
+  /* Argument 2 (locality) must be a compile-time constant int.  */
+  if (TREE_CODE (arg2) != INTEGER_CST)
+    {
+      error ("third arg to `__builtin_prefetch' must be a constant");
+      arg2 = integer_zero_node;
+    }
+  op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0); 
+  /* Argument 2 must be 0, 1, 2, or 3.  */
+  if (INTVAL (op2) < 0 || INTVAL (op2) > 3)
+    {
+      warning ("invalid third arg to __builtin_prefetch; using zero");
+      op2 = const0_rtx;
+    }
+
+#ifdef HAVE_prefetch
+  if (HAVE_prefetch)
+    {
+      if (! (*insn_data[(int)CODE_FOR_prefetch].operand[0].predicate)
+	    (op0, Pmode))
+        op0 = force_reg (Pmode, op0);
+      emit_insn (gen_prefetch (op0, op1, op2));
+    }
+  else
+#endif
+    op0 = protect_from_queue (op0, 0);
+    /* Don't do anything with direct references to volatile memory, but
+       generate code to handle other side effects.  */
+    if (GET_CODE (op0) != MEM && side_effects_p (op0))
+      emit_insn (op0);
+}
+
 /* Get a MEM rtx for expression EXP which is the address of an operand
    to be used to be used in a string instruction (cmpstrsi, movstrsi, ..).  */
 
@@ -3809,6 +3873,10 @@ expand_builtin (exp, target, subtarget, 
       return expand_builtin_va_copy (arglist);
     case BUILT_IN_EXPECT:
       return expand_builtin_expect (arglist, target);
+    case BUILT_IN_PREFETCH:
+      expand_builtin_prefetch (arglist);
+      return const0_rtx;
+
 
     default:			/* just do library call, if unknown builtin */
       error ("built-in function `%s' not currently supported",
--- gcc/doc/extend.texi.orig	Fri Nov 30 16:58:09 2001
+++ gcc/doc/extend.texi	Fri Nov 30 17:18:45 2001
@@ -4474,6 +4474,45 @@ if (__builtin_expect (ptr != NULL, 1))
 when testing pointer or floating-point values.
 @end deftypefn
 
+@deftypefn {Built-in Function} void __builtin_prefetch (void *@var{addr}, int @var{rw}, int @var{locality})
+This function is used to minimize cache-miss latency by moving data into
+a cache before it is accessed.
+You can insert calls to @code{__builtin_prefetch} into code for which
+you know addresses of data in memory that is likely to be accessed soon.
+If the target supports them, data prefetch instructions will be generated.
+If the prefetch is done early enough before the access then the data will
+be in the cache by the time it is accessed.
+
+The value of @var{addr} is the address of the memory to prefetch.
+The value of @var{rw} is a compile-time constant one or zero; one
+means that the prefetch is preparing for a write to the memory address.
+The value @var{locality} must be a compile-time constant integer between
+zero and three.  A value of zero means that the data has no temporal
+locality, so it need not be left in the cache after the access.  A value
+of three means that the data has a high degree of temporal locality and
+should be left in all levels of cache possible.  Values of one and two
+mean, respectively, a low or moderate degree of temporal locality.
+
+@smallexample
+for (i = 0; i < n; i++)
+  @{
+    a[i] = a[i] + b[i];
+    __builtin_prefetch (&a[i+j], 1, 1);
+    __builtin_prefetch (&b[i+j], 0, 1);
+    /* ... */
+  @}
+@end smallexample
+
+Data prefetch does not generate faults if @var{addr} is invalid, but 
+the address expression itself must be valid.  For example, a prefetch
+of @code{p->next} will not fault if @code{p->next} is not a valid
+address, but evaluation will fault if @code{p} is not a valid address.
+
+If the target does not support data prefetch, the address expression
+is evaluated if it includes side effects but no other code is generated
+and GCC does not issue a warning.
+@end deftypefn
+
 @node Pragmas
 @section Pragmas Accepted by GCC
 @cindex pragmas
--- gcc/testsuite/gcc.c-torture/execute/builtin-prefetch-1.c.orig	Wed Nov 28 15:04:17 2001
+++ gcc/testsuite/gcc.c-torture/execute/builtin-prefetch-1.c	Fri Nov 30 16:59:54 2001
@@ -0,0 +1,59 @@
+/* Test that __builtin_prefetch does no harm.
+
+   Prefetch using all valid combinations of rw and locality values.
+   These must be compile-time constants.  */
+
+#define NO_TEMPORAL_LOCALITY 0
+#define LOW_TEMPORAL_LOCALITY 1
+#define MODERATE_TEMPORAL_LOCALITY 1
+#define HIGH_TEMPORAL_LOCALITY 3
+
+#define WRITE_ACCESS 1
+#define READ_ACCESS 0
+
+enum locality { none, low, moderate, high };
+enum rw { read, write };
+
+int arr[10];
+
+void
+good_const (int *p)
+{
+  __builtin_prefetch (p, 0, 0);
+  __builtin_prefetch (p, 0, 1);
+  __builtin_prefetch (p, 0, 2);
+  __builtin_prefetch (p, READ_ACCESS, 3);
+  __builtin_prefetch (p, 1, NO_TEMPORAL_LOCALITY);
+  __builtin_prefetch (p, 1, LOW_TEMPORAL_LOCALITY);
+  __builtin_prefetch (p, 1, MODERATE_TEMPORAL_LOCALITY);
+  __builtin_prefetch (p, WRITE_ACCESS, HIGH_TEMPORAL_LOCALITY);
+}
+
+void
+good_enum (int *p)
+{
+    __builtin_prefetch (p, read, none);
+    __builtin_prefetch (p, read, low);
+    __builtin_prefetch (p, read, moderate);
+    __builtin_prefetch (p, read, high);
+    __builtin_prefetch (p, write, none);
+    __builtin_prefetch (p, write, low);
+    __builtin_prefetch (p, write, moderate);
+    __builtin_prefetch (p, write, high);
+}
+
+void
+good_expr (int *p)
+{
+  __builtin_prefetch (p, 1 - 1, 6 - (2 * 3));
+  __builtin_prefetch (p, 1 + 0, 1 + 2);
+}
+
+int
+main ()
+{
+  good_const (arr);
+  good_enum (arr);
+  good_expr (arr);
+  exit (0);
+}
--- gcc/testsuite/gcc.c-torture/execute/builtin-prefetch-2.c.orig	Wed Nov 28 15:04:17 2001
+++ gcc/testsuite/gcc.c-torture/execute/builtin-prefetch-2.c	Fri Nov 30 16:59:54 2001
@@ -0,0 +1,152 @@
+/* Test that __builtin_prefetch does no harm.
+
+   Prefetch data using a variety of storage classes and address
+   expressions.  */
+
+int glob_int_arr[100];
+int *glob_ptr_int = glob_int_arr;
+int glob_int = 4;
+
+static stat_int_arr[100];
+static int *stat_ptr_int = stat_int_arr;
+static int stat_int;
+
+struct S {
+  int a;
+  short b, c;
+  char d[8];
+  struct S *next;
+};
+
+struct S str;
+struct S *ptr_str = &str;
+
+/* Prefetch global variables using the address of the variable.  */
+
+void
+simple_global ()
+{
+  __builtin_prefetch (glob_int_arr, 0, 0);
+  __builtin_prefetch (glob_ptr_int, 0, 0);
+  __builtin_prefetch (&glob_int, 0, 0);
+}
+
+/* Prefetch file-level static variables using the address of the variable.  */
+
+void
+simple_file ()
+{
+  __builtin_prefetch (stat_int_arr, 0, 0);
+  __builtin_prefetch (stat_ptr_int, 0, 0);
+  __builtin_prefetch (&stat_int, 0, 0);
+}
+
+/* Prefetch local static variables using the address of the variable.  */
+
+void
+simple_static_local ()
+{
+  static int gx[100];
+  static int *hx = gx;
+  static int ix;
+  __builtin_prefetch (gx, 0, 0);
+  __builtin_prefetch (hx, 0, 0);
+  __builtin_prefetch (&ix, 0, 0);
+}
+
+/* Prefetch local stack variables using the address of the variable.  */
+
+void
+simple_local ()
+{
+  int gx[100];
+  int *hx = gx;
+  int ix;
+  __builtin_prefetch (gx, 0, 0);
+  __builtin_prefetch (hx, 0, 0);
+  __builtin_prefetch (&ix, 0, 0);
+}
+
+/* Prefetch arguments using the address of the variable.  */
+
+void
+simple_arg (int g[100], int *h, int i)
+{
+  __builtin_prefetch (g, 0, 0);
+  __builtin_prefetch (h, 0, 0);
+  __builtin_prefetch (&i, 0, 0);
+}
+
+/* Prefetch using address expressions involving global variables.  */
+
+void
+expr_global (void)
+{
+  __builtin_prefetch (&str, 0, 0);
+  __builtin_prefetch (ptr_str, 0, 0);
+  __builtin_prefetch (&str.b, 0, 0);
+  __builtin_prefetch (&ptr_str->b, 0, 0);
+  __builtin_prefetch (&str.d, 0, 0);
+  __builtin_prefetch (&ptr_str->d, 0, 0);
+  __builtin_prefetch (str.next, 0, 0);
+  __builtin_prefetch (ptr_str->next, 0, 0);
+  __builtin_prefetch (str.next->d, 0, 0);
+  __builtin_prefetch (ptr_str->next->d, 0, 0);
+
+  __builtin_prefetch (&glob_int_arr, 0, 0);
+  __builtin_prefetch (glob_ptr_int, 0, 0);
+  __builtin_prefetch (&glob_int_arr[2], 0, 0);
+  __builtin_prefetch (&glob_ptr_int[3], 0, 0);
+  __builtin_prefetch (glob_int_arr+3, 0, 0);
+  __builtin_prefetch (glob_int_arr+glob_int, 0, 0);
+  __builtin_prefetch (glob_ptr_int+5, 0, 0);
+  __builtin_prefetch (glob_ptr_int+glob_int, 0, 0);
+}
+
+/* Prefetch using address expressions involving local variables.  */
+
+void
+expr_local (void)
+{
+  int b[10];
+  int *pb = b;
+  struct S t;
+  struct S *pt = &t;
+  int j = 4;
+
+  __builtin_prefetch (&t, 0, 0);
+  __builtin_prefetch (pt, 0, 0);
+  __builtin_prefetch (&t.b, 0, 0);
+  __builtin_prefetch (&pt->b, 0, 0);
+  __builtin_prefetch (&t.d, 0, 0);
+  __builtin_prefetch (&pt->d, 0, 0);
+  __builtin_prefetch (t.next, 0, 0);
+  __builtin_prefetch (pt->next, 0, 0);
+  __builtin_prefetch (t.next->d, 0, 0);
+  __builtin_prefetch (pt->next->d, 0, 0);
+
+  __builtin_prefetch (&b, 0, 0);
+  __builtin_prefetch (pb, 0, 0);
+  __builtin_prefetch (&b[2], 0, 0);
+  __builtin_prefetch (&pb[3], 0, 0);
+  __builtin_prefetch (b+3, 0, 0);
+  __builtin_prefetch (b+j, 0, 0);
+  __builtin_prefetch (pb+5, 0, 0);
+  __builtin_prefetch (pb+j, 0, 0);
+}
+
+int
+main ()
+{
+  simple_global ();
+  simple_file ();
+  simple_static_local ();
+  simple_local ();
+  simple_arg (glob_int_arr, glob_ptr_int, glob_int);
+
+  str.next = &str;
+  expr_global ();
+  expr_local ();
+
+  exit (0);
+}
--- gcc/testsuite/gcc.c-torture/execute/builtin-prefetch-3.c.orig	Wed Nov 28 15:04:17 2001
+++ gcc/testsuite/gcc.c-torture/execute/builtin-prefetch-3.c	Fri Nov 30 16:59:54 2001
@@ -0,0 +1,113 @@
+/* Test that __builtin_prefetch does no harm.
+
+   Prefetch data using a variety of storage classes and address
+   expressions with volatile variables and pointers.  */
+
+int glob_int_arr[100];
+int glob_int = 4;
+volatile int glob_vol_int_arr[100];
+int * volatile glob_vol_ptr_int = glob_int_arr;
+volatile int *glob_ptr_vol_int = glob_vol_int_arr;
+volatile int * volatile glob_vol_ptr_vol_int = glob_vol_int_arr;
+volatile int glob_vol_int;
+
+static stat_int_arr[100];
+static volatile int stat_vol_int_arr[100];
+static int * volatile stat_vol_ptr_int = stat_int_arr;
+static volatile int *stat_ptr_vol_int = stat_vol_int_arr;
+static volatile int * volatile stat_vol_ptr_vol_int = stat_vol_int_arr;
+static volatile int stat_vol_int;
+
+struct S {
+  int a;
+  short b, c;
+  char d[8];
+  struct S *next;
+};
+
+struct S str;
+volatile struct S vol_str;
+struct S * volatile vol_ptr_str = &str;
+volatile struct S *ptr_vol_str = &vol_str;
+volatile struct S * volatile vol_ptr_vol_str = &vol_str;
+
+/* Prefetch volatile global variables using the address of the variable.  */
+
+void
+simple_vol_global ()
+{
+  __builtin_prefetch (glob_vol_int_arr, 0, 0);
+  __builtin_prefetch (glob_vol_ptr_int, 0, 0);
+  __builtin_prefetch (glob_ptr_vol_int, 0, 0);
+  __builtin_prefetch (glob_vol_ptr_vol_int, 0, 0);
+  __builtin_prefetch (&glob_vol_int, 0, 0);
+}
+
+/* Prefetch volatile static variables using the address of the variable.  */
+
+void
+simple_vol_file ()
+{
+  __builtin_prefetch (stat_vol_int_arr, 0, 0);
+  __builtin_prefetch (stat_vol_ptr_int, 0, 0);
+  __builtin_prefetch (stat_ptr_vol_int, 0, 0);
+  __builtin_prefetch (stat_vol_ptr_vol_int, 0, 0);
+  __builtin_prefetch (&stat_vol_int, 0, 0);
+}
+
+/* Prefetch using address expressions involving volatile global variables.  */
+
+void
+expr_vol_global (void)
+{
+  __builtin_prefetch (&vol_str, 0, 0);
+  __builtin_prefetch (ptr_vol_str, 0, 0);
+  __builtin_prefetch (vol_ptr_str, 0, 0);
+  __builtin_prefetch (vol_ptr_vol_str, 0, 0);
+  __builtin_prefetch (&vol_str.b, 0, 0);
+  __builtin_prefetch (&ptr_vol_str->b, 0, 0);
+  __builtin_prefetch (&vol_ptr_str->b, 0, 0);
+  __builtin_prefetch (&vol_ptr_vol_str->b, 0, 0);
+  __builtin_prefetch (&vol_str.d, 0, 0);
+  __builtin_prefetch (&vol_ptr_str->d, 0, 0);
+  __builtin_prefetch (&ptr_vol_str->d, 0, 0);
+  __builtin_prefetch (&vol_ptr_vol_str->d, 0, 0);
+  __builtin_prefetch (vol_str.next, 0, 0);
+  __builtin_prefetch (vol_ptr_str->next, 0, 0);
+  __builtin_prefetch (ptr_vol_str->next, 0, 0);
+  __builtin_prefetch (vol_ptr_vol_str->next, 0, 0);
+  __builtin_prefetch (vol_str.next->d, 0, 0);
+  __builtin_prefetch (vol_ptr_str->next->d, 0, 0);
+  __builtin_prefetch (ptr_vol_str->next->d, 0, 0);
+  __builtin_prefetch (vol_ptr_vol_str->next->d, 0, 0);
+
+  __builtin_prefetch (&glob_vol_int_arr, 0, 0);
+  __builtin_prefetch (glob_vol_ptr_int, 0, 0);
+  __builtin_prefetch (glob_ptr_vol_int, 0, 0);
+  __builtin_prefetch (glob_vol_ptr_vol_int, 0, 0);
+  __builtin_prefetch (&glob_vol_int_arr[2], 0, 0);
+  __builtin_prefetch (&glob_vol_ptr_int[3], 0, 0);
+  __builtin_prefetch (&glob_ptr_vol_int[3], 0, 0);
+  __builtin_prefetch (&glob_vol_ptr_vol_int[3], 0, 0);
+  __builtin_prefetch (glob_vol_int_arr+3, 0, 0);
+  __builtin_prefetch (glob_vol_int_arr+glob_vol_int, 0, 0);
+  __builtin_prefetch (glob_vol_ptr_int+5, 0, 0);
+  __builtin_prefetch (glob_ptr_vol_int+5, 0, 0);
+  __builtin_prefetch (glob_vol_ptr_vol_int+5, 0, 0);
+  __builtin_prefetch (glob_vol_ptr_int+glob_vol_int, 0, 0);
+  __builtin_prefetch (glob_ptr_vol_int+glob_vol_int, 0, 0);
+  __builtin_prefetch (glob_vol_ptr_vol_int+glob_vol_int, 0, 0);
+}
+
+int
+main ()
+{
+  simple_vol_global ();
+  simple_vol_file ();
+
+  str.next = &str;
+  vol_str.next = &str;
+  expr_vol_global ();
+
+  exit (0);
+}
--- gcc/testsuite/gcc.c-torture/execute/builtin-prefetch-4.c.orig	Wed Nov 28 15:04:17 2001
+++ gcc/testsuite/gcc.c-torture/execute/builtin-prefetch-4.c	Fri Nov 30 16:59:54 2001
@@ -0,0 +1,271 @@
+/* Test that __builtin_prefetch does no harm.
+
+   Check that the expression containing the address to prefetch is
+   evaluated if it has side effects, even if the target does not support
+   data prefetch.  Check changes to pointers and to array indices that are
+   either global variables or arguments.  */
+
+#define ARRSIZE 100
+
+int arr[ARRSIZE];
+int *ptr = &arr[20]; 
+int arrindex = 4;
+
+/* Check that assignment within a prefetch argument is evaluated.  */
+
+int
+assign_arg_ptr (int *p)
+{
+  int *q;
+  __builtin_prefetch ((q = p), 0, 0);
+  return q == p;
+}
+
+int
+assign_glob_ptr (void)
+{
+  int *q;
+  __builtin_prefetch ((q = ptr), 0, 0);
+  return q == ptr;
+}
+
+int
+assign_arg_idx (int *p, int i)
+{
+  int j;
+  __builtin_prefetch (&p[j = i], 0, 0);
+  return j == i;
+}
+
+int
+assign_glob_idx (void)
+{
+  int j;
+  __builtin_prefetch (&ptr[j = arrindex], 0, 0);
+  return j == arrindex;
+}
+
+/* Check that pre/post increment/decrement within a prefetch argument are
+   evaluated.  */
+
+int
+preinc_arg_ptr (int *p)
+{
+  int *q;
+  q = p + 1;
+  __builtin_prefetch (++p, 0, 0);
+  return p == q;
+}
+
+int
+preinc_glob_ptr (void)
+{
+  int *q;
+  q = ptr + 1;
+  __builtin_prefetch (++ptr, 0, 0);
+  return ptr == q;
+}
+
+int
+postinc_arg_ptr (int *p)
+{
+  int *q;
+  q = p + 1;
+  __builtin_prefetch (p++, 0, 0);
+  return p == q;
+}
+
+int
+postinc_glob_ptr (void)
+{
+  int *q;
+  q = ptr + 1;
+  __builtin_prefetch (ptr++, 0, 0);
+  return ptr == q;
+}
+
+int
+predec_arg_ptr (int *p)
+{
+  int *q;
+  q = p - 1;
+  __builtin_prefetch (--p, 0, 0);
+  return p == q;
+}
+
+int
+predec_glob_ptr (void)
+{
+  int *q;
+  q = ptr - 1;
+  __builtin_prefetch (--ptr, 0, 0);
+  return ptr == q;
+}
+
+int
+postdec_arg_ptr (int *p)
+{
+  int *q;
+  q = p - 1;
+  __builtin_prefetch (p--, 0, 0);
+  return p == q;
+}
+
+int
+postdec_glob_ptr (void)
+{
+  int *q;
+  q = ptr - 1;
+  __builtin_prefetch (ptr--, 0, 0);
+  return ptr == q;
+}
+
+int
+preinc_arg_idx (int *p, int i)
+{
+  int j = i + 1;
+  __builtin_prefetch (&p[++i], 0, 0);
+  return i == j;
+}
+
+
+int
+preinc_glob_idx (void)
+{
+  int j = arrindex + 1;
+  __builtin_prefetch (&ptr[++arrindex], 0, 0);
+  return arrindex == j;
+}
+
+int
+postinc_arg_idx (int *p, int i)
+{
+  int j = i + 1;
+  __builtin_prefetch (&p[i++], 0, 0);
+  return i == j;
+}
+
+int
+postinc_glob_idx (void)
+{
+  int j = arrindex + 1;
+  __builtin_prefetch (&ptr[arrindex++], 0, 0);
+  return arrindex == j;
+}
+
+int
+predec_arg_idx (int *p, int i)
+{
+  int j = i - 1;
+  __builtin_prefetch (&p[--i], 0, 0);
+  return i == j;
+}
+
+int
+predec_glob_idx (void)
+{
+  int j = arrindex - 1;
+  __builtin_prefetch (&ptr[--arrindex], 0, 0);
+  return arrindex == j;
+}
+
+int
+postdec_arg_idx (int *p, int i)
+{
+  int j = i - 1;
+  __builtin_prefetch (&p[i--], 0, 0);
+  return i == j;
+}
+
+int
+postdec_glob_idx (void)
+{
+  int j = arrindex - 1;
+  __builtin_prefetch (&ptr[arrindex--], 0, 0);
+  return arrindex == j;
+}
+
+/* Check that function calls within the first prefetch argument are
+   evaluated.  */
+
+int getptrcnt = 0;
+
+int *
+getptr (int *p)
+{
+  getptrcnt++;
+  return p + 1;
+}
+
+int
+funccall_arg_ptr (int *p)
+{
+  __builtin_prefetch (getptr (p), 0, 0);
+  return getptrcnt == 1;
+}
+
+int getintcnt = 0;
+
+int
+getint (int i)
+{
+  getintcnt++;
+  return i + 1;
+}
+
+int
+funccall_arg_idx (int *p, int i)
+{
+  __builtin_prefetch (&p[getint (i)], 0, 0);
+  return getintcnt == 1;
+}
+
+int
+main ()
+{
+  if (!assign_arg_ptr (ptr))
+    abort ();
+  if (!assign_glob_ptr ())
+    abort ();
+  if (!assign_arg_idx (ptr, 4))
+    abort ();
+  if (!assign_glob_idx ())
+    abort ();
+  if (!preinc_arg_ptr (ptr))
+    abort ();
+  if (!preinc_glob_ptr ())
+    abort ();
+  if (!postinc_arg_ptr (ptr))
+    abort ();
+  if (!postinc_glob_ptr ())
+    abort ();
+  if (!predec_arg_ptr (ptr))
+    abort ();
+  if (!predec_glob_ptr ())
+    abort ();
+  if (!postdec_arg_ptr (ptr))
+    abort ();
+  if (!postdec_glob_ptr ())
+    abort ();
+  if (!preinc_arg_idx (ptr, 3))
+    abort ();
+  if (!preinc_glob_idx ())
+    abort ();
+  if (!postinc_arg_idx (ptr, 3))
+    abort ();
+  if (!postinc_glob_idx ())
+    abort ();
+  if (!predec_arg_idx (ptr, 3))
+    abort ();
+  if (!predec_glob_idx ())
+    abort ();
+  if (!postdec_arg_idx (ptr, 3))
+    abort ();
+  if (!postdec_glob_idx ())
+    abort ();
+  if (!funccall_arg_ptr (ptr))
+    abort ();
+  if (!funccall_arg_idx (ptr, 3))
+    abort ();
+  exit (0);
+}
--- gcc/testsuite/gcc.c-torture/execute/builtin-prefetch-5.c.orig	Wed Nov 28 15:04:17 2001
+++ gcc/testsuite/gcc.c-torture/execute/builtin-prefetch-5.c	Fri Nov 30 16:59:54 2001
@@ -0,0 +1,60 @@
+/* Test that __builtin_prefetch does no harm.
+
+   Use addresses that are unlikely to be word-aligned.  Some targets
+   have alignment requirements for prefetch addresses, so make sure the
+   compiler takes care of that.  This fails if it aborts, anything else
+   is OK.  */
+
+struct S {
+  short a;
+  short b;
+  char c[8];
+} s;
+
+char arr[100];
+char *ptr = arr;
+int idx = 3;
+
+void
+arg_ptr (char *p)
+{
+  __builtin_prefetch (p, 0, 0);
+}
+
+void
+arg_idx (char *p, int i)
+{
+  __builtin_prefetch (&p[i], 0, 0);
+}
+
+void
+glob_ptr (void)
+{
+  __builtin_prefetch (ptr, 0, 0);
+}
+
+void
+glob_idx (void)
+{
+  __builtin_prefetch (&ptr[idx], 0, 0);
+}
+
+int
+main ()
+{
+  __builtin_prefetch (&s.b, 0, 0);
+  __builtin_prefetch (&s.c[1], 0, 0);
+
+  arg_ptr (&s.c[1]);
+  arg_ptr (ptr+3);
+  arg_idx (ptr, 3);
+  arg_idx (ptr+1, 2);
+  idx = 3;
+  glob_ptr ();
+  glob_idx ();
+  ptr++;
+  idx = 2;
+  glob_ptr ();
+  glob_idx ();
+  exit (0);
+}
--- gcc/testsuite/gcc.c-torture/execute/builtin-prefetch-6.c.orig	Fri Nov 30 09:55:45 2001
+++ gcc/testsuite/gcc.c-torture/execute/builtin-prefetch-6.c	Fri Nov 30 16:59:54 2001
@@ -0,0 +1,46 @@
+/* Test that __builtin_prefetch does no harm.
+
+   Data prefetch should not fault if used with an invalid address.  */
+
+#include <limits.h>
+
+#define ARRSIZE 65
+int *bad_addr[ARRSIZE];
+int arr_used;
+
+/* Fill bad_addr with a range of values in the hopes that on any target
+   some will be invalid addresses.  */
+void
+init_addrs (void)
+{
+  int i;
+  int bits_per_ptr = sizeof (void *) * 8;
+  for (i = 0; i < bits_per_ptr; i++)
+    bad_addr[i] = (void *)(1UL << i);
+  arr_used = bits_per_ptr + 1;  /* The last element used is zero.  */
+}
+
+void
+prefetch_for_read (void)
+{
+  int i;
+  for (i = 0; i < ARRSIZE; i++)
+    __builtin_prefetch (bad_addr[i], 0, 0);
+}
+
+void
+prefetch_for_write (void)
+{
+  int i;
+  for (i = 0; i < ARRSIZE; i++)
+    __builtin_prefetch (bad_addr[i], 1, 0);
+}
+
+int
+main ()
+{
+  init_addrs ();
+  prefetch_for_read ();
+  prefetch_for_write ();
+  exit (0);
+}
--- gcc/testsuite/gcc.dg/builtin-prefetch-1.c.orig	Wed Nov 28 15:04:39 2001
+++ gcc/testsuite/gcc.dg/builtin-prefetch-1.c	Sat Dec  1 10:26:28 2001
@@ -0,0 +1,43 @@
+/* Test that __builtin_prefetch does no harm.
+
+   Prefetch using some invalid rw and locality values.  These must be
+   compile-time constants.  */
+
+/* { dg-do run } */
+
+enum locality { none, low, moderate, high, bogus };
+enum rw { read, write };
+
+int arr[10];
+
+void
+good (int *p)
+{
+  __builtin_prefetch (p, 0, 0);
+  __builtin_prefetch (p, 0, 1);
+  __builtin_prefetch (p, 0, 2);
+  __builtin_prefetch (p, 0, 3);
+  __builtin_prefetch (p, 1, 0);
+  __builtin_prefetch (p, 1, 1);
+  __builtin_prefetch (p, 1, 2);
+  __builtin_prefetch (p, 1, 3);
+}
+
+void
+bad (int *p)
+{
+  __builtin_prefetch (p, -1, 0);  /* { dg-warning "invalid second arg to __builtin_prefetch; using zero" } */
+  __builtin_prefetch (p, 2, 0);   /* { dg-warning "invalid second arg to __builtin_prefetch; using zero" } */
+  __builtin_prefetch (p, bogus, 0);   /* { dg-warning "invalid second arg to __builtin_prefetch; using zero" } */
+  __builtin_prefetch (p, 0, -1);  /* { dg-warning "invalid third arg to __builtin_prefetch; using zero" } */
+  __builtin_prefetch (p, 0, 4);   /* { dg-warning "invalid third arg to __builtin_prefetch; using zero" } */
+  __builtin_prefetch (p, 0, bogus);   /* { dg-warning "invalid third arg to __builtin_prefetch; using zero" } */
+}
+
+int
+main ()
+{
+  good (arr);
+  bad (arr);
+  exit (0);
+}
--- gcc/config/i386/i386.c.orig	Fri Nov 30 16:58:00 2001
+++ gcc/config/i386/i386.c	Fri Nov 30 17:38:07 2001
@@ -11889,7 +11889,7 @@ ix86_expand_builtin (exp, target, subtar
       return copy_to_mode_reg (SImode, target);
 
     case IX86_BUILTIN_PREFETCH:
-      icode = CODE_FOR_prefetch;
+      icode = CODE_FOR_prefetch_sse;
       arg0 = TREE_VALUE (arglist);
       arg1 = TREE_VALUE (TREE_CHAIN (arglist));
       op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
--- gcc/config/i386/i386.md.orig	Tue Nov 27 10:13:32 2001
+++ gcc/config/i386/i386.md	Tue Nov 27 10:14:33 2001
@@ -81,7 +81,7 @@
 ;; 32 This is a `maskmov' operation.
 ;; 33 This is a `movmsk' operation.
 ;; 34 This is a `non-temporal' move.
-;; 35 This is a `prefetch' operation.
+;; 35 This is a `prefetch' (SSE) operation.
 ;; 36 This is used to distinguish COMISS from UCOMISS.
 ;; 37 This is a `ldmxcsr' operation.
 ;; 38 This is a forced `movaps' instruction (rather than whatever movti does)
@@ -19333,7 +19333,7 @@
   [(set_attr "type" "sse")
    (set_attr "memory" "unknown")])
 
-(define_insn "prefetch"
+(define_insn "prefetch_sse"
   [(unspec [(match_operand:SI 0 "address_operand" "p")
 	    (match_operand:SI 1 "immediate_operand" "n")] 35)]
   "TARGET_SSE || TARGET_3DNOW_A"


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]