preliminary patch for data prefetch support

Janis Johnson janis187@us.ibm.com
Tue Nov 6 15:35:00 GMT 2001


This is a preliminary patch for a framework for data prefetch support
in GCC.  The prefetch RTL pattern used here is not necessarily  what
I'll propose for real, and I suspect that there are numerous places in
the compiler that will need to recognize the new pattern.  This is just
to find out if I'm on the right track.

A compiler with this patch builds (simple build, C only) for
i686-pc-linux-gnu and as a cross compiler for ia64-linux.  When the
build also includes an update to Jan Hubicka's loop array prefetch
patch (to be posted very soon), the resulting compilers generate
prefetch instructions for a simple test case for i386 with -msse and
for ia64.

If this were a real patch, the ChangeLog entry would be:

2001-11-06  Janis Johnson  <janis187@us.ibm.com>

	* rtl.def (PREFETCH): New RTL pattern.
	* doc/rtl.texi: (PREFETCH): Document.
	* config/ia64/ia64.h (SIMULTANEOUS_PREFETCHES, PREFETCH_BLOCK): New.
	* config/ia64/ia64.md (prefetch): New.
	* config/i386/i386.h (SIMULTANEOUS_PREFETCHES, PREFETCH_BLOCK): New.
	* config/i386/i386.c (ix86_expand_builtin): Change the name of the
	code from prefetch to prefetch_sse.
	* config/i386/i386.md: Same, plus add new prefetch for all variants.

--- gcc/rtl.def.orig	Mon Nov  5 15:43:46 2001
+++ gcc/rtl.def	Tue Nov  6 13:23:19 2001
@@ -552,6 +552,18 @@ DEF_RTL_EXPR(TRAP_IF, "trap_if", "ee", '
    which control is flowing.  */
 DEF_RTL_EXPR(RESX, "resx", "i", 'x')
 
+/* Memory prefetch, with attributes supported on some machines.
+   Operand 1 is the address of the memory to fetch.
+   Operand 2 is 1 for a write access, 0 otherwise.
+   Operand 3 is the level of temporal locality; 0 means there is no
+   temporal locality and 1, 2, and 3 are for increasing levels of temporal
+   locality.
+
+   The attributes specified by operands 2 and 3 are ignored for targets
+   whose prefetch instructions do not support them.  */
+
+DEF_RTL_EXPR(PREFETCH, "prefetch", "eee", 'x')
+
 /* ----------------------------------------------------------------------
    Primitive values for use in expressions.
    ---------------------------------------------------------------------- */
--- gcc/doc/rtl.texi.orig	Mon Nov  5 15:43:46 2001
+++ gcc/doc/rtl.texi	Mon Nov  5 16:12:55 2001
@@ -2326,6 +2326,19 @@ are set up by branch shortening and hold
 maximum address, respectively.  @var{flags} indicates the relative
 position of @var{base}, @var{min} and @var{max} to the containing insn
 and of @var{min} and @var{max} to @var{base}.  See rtl.def for details.
+
+@findex prefetch
+@item (prefetch:@var{m} @var{addr} @var{rw} @var{temploc}
+Represents prefetch of memory at address @var{addr}.
+Operand @var{rw} is 1 if the prefetch is for data to be written, 0 otherwise;
+targets that do not support write prefetches should treat this as a normal
+prefetch.
+Operand @var{temploc} specifies the amount of temporal locality; 0 if there
+is none or 1, 2, or 3 for increasing levels of temporal locality;
+targets that do not support locality hints should ignore this.
+
+This insn is used to minimze cache-miss latency by moving data into a
+cache before it is accessed.
 @end table
 
 @node Incdec
--- gcc/config/ia64/ia64.h.orig	Mon Nov  5 15:43:46 2001
+++ gcc/config/ia64/ia64.h	Tue Nov  6 10:26:50 2001
@@ -2735,6 +2735,22 @@ do {									\
 
 #define FUNCTION_MODE Pmode
 
+/* If this architecture supports prefetch, define this to be the number of
+   prefetch commands that can be executed in parallel.
+
+   ??? This is temporary and needs to be handled in a way so it will be
+   specific to an implementation.  */
+
+#define SIMULTANEOUS_PREFETCHES 6
+
+/* If this architecture supports prefetch, define this to be the size of
+   the cache line that is prefetched.
+
+   ??? This is temporary and needs to be handled in a way so it will be
+   specific to an implementation.  */
+
+#define PREFETCH_BLOCK 32
+
 /* Define this macro to handle System V style pragmas: #pragma pack and
    #pragma weak.  Note, #pragma weak will only be supported if SUPPORT_WEAK is
    defined.  */
--- gcc/config/ia64/ia64.md.orig	Mon Nov  5 15:43:46 2001
+++ gcc/config/ia64/ia64.md	Tue Nov  6 12:52:37 2001
@@ -99,7 +99,7 @@
 ;; multiple instructions, patterns which emit 0 instructions, and patterns
 ;; which emit instruction that can go in any slot (e.g. nop).
 
-(define_attr "itanium_class" "unknown,ignore,stop_bit,br,fcmp,fcvtfx,fld,fmac,fmisc,frar_i,frar_m,frbr,frfr,frpr,ialu,icmp,ilog,ishf,ld,chk_s,long_i,mmmul,mmshf,mmshfi,rse_m,scall,sem,stf,st,syst_m0,syst_m,tbit,toar_i,toar_m,tobr,tofr,topr,xmpy,xtd,nop_b,nop_f,nop_i,nop_m,nop_x"
+(define_attr "itanium_class" "unknown,ignore,stop_bit,br,fcmp,fcvtfx,fld,fmac,fmisc,frar_i,frar_m,frbr,frfr,frpr,ialu,icmp,ilog,ishf,ld,chk_s,long_i,mmmul,mmshf,mmshfi,rse_m,scall,sem,stf,st,syst_m0,syst_m,tbit,toar_i,toar_m,tobr,tofr,topr,xmpy,xtd,nop_b,nop_f,nop_i,nop_m,nop_x,lfetch"
          (const_string "unknown"))
 
 ;; chk_s has an I and an M form; use type A for convenience.
@@ -107,6 +107,7 @@
   (cond [(eq_attr "itanium_class" "ld,st,fld,stf,sem,nop_m") (const_string "M")
 	 (eq_attr "itanium_class" "rse_m,syst_m,syst_m0") (const_string "M")
 	 (eq_attr "itanium_class" "frar_m,toar_m,frfr,tofr") (const_string "M")
+	 (eq_attr "itanium_class" "lfetch") (const_string "M")
 	 (eq_attr "itanium_class" "chk_s,ialu,icmp,ilog") (const_string "A")
 	 (eq_attr "itanium_class" "fmisc,fmac,fcmp,xmpy") (const_string "F")
 	 (eq_attr "itanium_class" "fcvtfx,nop_f") (const_string "F")
@@ -5048,6 +5049,32 @@
   ""
   "break.f 0"
   [(set_attr "itanium_class" "nop_f")])
+
+(define_insn "prefetch"
+  [(prefetch (match_operand:DI 0 "address_operand" "p")
+	     (match_operand:DI 1 "const_int_operand" "n")
+	     (match_operand:DI 2 "const_int_operand" "n"))]
+  ""
+  "*
+{
+  static const char * const alt[2][4] = {
+    \"lfetch.nta [%0]\",
+    \"lfetch.nt1 [%0]\",
+    \"lfetch.nt2 [%0]\",
+    \"lfetch [%0]\",
+    \"lfetch.excl.nta [%0]\",
+    \"lfetch.excl.nt1 [%0]\",
+    \"lfetch.excl.nt2 [%0]\",
+    \"lfetch.excl [%0]\"
+  };
+  int i = (INTVAL (operands[1]) == 1);
+  int j = (INTVAL (operands[2]));
+
+  if (j < 0 || j > 3)
+    j = 0;   /* ??? What's the correct thing to do here?  */
+  return alt[i][j];
+}"
+  [(set_attr "itanium_class" "lfetch")])
 
 ;; Non-local goto support.
 
@@ -5270,3 +5297,4 @@
   "addp4 %0 = 0,%1"
   [(set_attr "itanium_class" "ialu")])
 
+
--- gcc/config/i386/i386.h.orig	Mon Nov  5 15:43:46 2001
+++ gcc/config/i386/i386.h	Tue Nov  6 10:27:25 2001
@@ -2411,6 +2411,22 @@ while (0)
    is a byte address (for indexing purposes)
    so give the MEM rtx a byte's mode.  */
 #define FUNCTION_MODE QImode
+
+/* If this architecture supports prefetch, define this to be the number of
+   prefetch commands that can be executed in parallel.
+
+   ??? This is temporary and needs to be handled in a way so it will be
+   specific to an implementation.  */
+
+#define SIMULTANEOUS_PREFETCHES 6
+
+/* If this architecture supports prefetch, define this to be the size of
+   the cache line that is prefetched.
+
+   ??? This is temporary and needs to be handled in a way so it will be
+   specific to an implementation.  */
+
+#define PREFETCH_BLOCK 32
 
 /* A part of a C `switch' statement that describes the relative costs
    of constant RTL expressions.  It must contain `case' labels for
--- gcc/config/i386/i386.c.orig	Mon Nov  5 15:43:46 2001
+++ gcc/config/i386/i386.c	Mon Nov  5 15:55:54 2001
@@ -11863,7 +11863,7 @@ ix86_expand_builtin (exp, target, subtar
       return copy_to_mode_reg (SImode, target);
 
     case IX86_BUILTIN_PREFETCH:
-      icode = CODE_FOR_prefetch;
+      icode = CODE_FOR_prefetch_sse;
       arg0 = TREE_VALUE (arglist);
       arg1 = TREE_VALUE (TREE_CHAIN (arglist));
       op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
--- gcc/config/i386/i386.md.orig	Mon Nov  5 15:43:46 2001
+++ gcc/config/i386/i386.md	Tue Nov  6 12:50:36 2001
@@ -81,7 +81,7 @@
 ;; 32 This is a `maskmov' operation.
 ;; 33 This is a `movmsk' operation.
 ;; 34 This is a `non-temporal' move.
-;; 35 This is a `prefetch' operation.
+;; 35 This is a `prefetch' (SSE) operation.
 ;; 36 This is used to distinguish COMISS from UCOMISS.
 ;; 37 This is a `ldmxcsr' operation.
 ;; 38 This is a forced `movaps' instruction (rather than whatever movti does)
@@ -19319,7 +19319,47 @@
   [(set_attr "type" "sse")
    (set_attr "memory" "unknown")])
 
-(define_insn "prefetch"
+(define_expand "prefetch"
+  [(prefetch (match_operand:SI 0 "address_operand" "p")
+	     (match_operand:SI 1 "const_int_operand" "n")
+	     (match_operand:SI 2 "const_int_operand" "n"))]
+  "TARGET_SSE || TARGET_3DNOW || TARGET_3DNOW_A"
+  "
+{
+  if (TARGET_3DNOW)
+    {
+      if (INTVAL (operands[1]) == 1)
+        emit_insn (gen_prefetchw (operands[0]));
+      else
+        emit_insn (gen_prefetch_3dnow (operands[0]));
+    }
+  else
+    {
+      int i;
+      switch (INTVAL (operands[2]))
+	{
+	  case 0:	/* Non temporal locality.  */
+	    i = 0;
+	    break;
+	  case 1:	/* Lowest level of temporal locality.  */
+	    i = 3;
+	    break;
+	  case 2:
+	    i = 2;
+	    break;
+	  case 3:	/* Highest level of temporal locality.  */
+	    i = 1;
+	    break;
+	  default:
+	    i = 0;	/* ??? should we abort here instead?  */
+	    break;
+	}
+      emit_insn (gen_prefetch_sse (operands[0], GEN_INT (i)));
+    }
+  DONE;
+}")
+
+(define_insn "prefetch_sse"
   [(unspec [(match_operand:SI 0 "address_operand" "p")
 	    (match_operand:SI 1 "immediate_operand" "n")] 35)]
   "TARGET_SSE || TARGET_3DNOW_A"



More information about the Gcc-patches mailing list