preliminary patch for data prefetch support
Janis Johnson
janis187@us.ibm.com
Tue Nov 6 15:35:00 GMT 2001
This is a preliminary patch for a framework for data prefetch support
in GCC. The prefetch RTL pattern used here is not necessarily what
I'll propose for real, and I suspect that there are numerous places in
the compiler that will need to recognize the new pattern. This is just
to find out if I'm on the right track.
A compiler with this patch builds (simple build, C only) for
i686-pc-linux-gnu and as a cross compiler for ia64-linux. When the
build also includes an update to Jan Hubicka's loop array prefetch
patch (to be posted very soon), the resulting compilers generate
prefetch instructions for a simple test case for i386 with -msse and
for ia64.
If this were a real patch, the ChangeLog entry would be:
2001-11-06 Janis Johnson <janis187@us.ibm.com>
* rtl.def (PREFETCH): New RTL pattern.
* doc/rtl.texi: (PREFETCH): Document.
* config/ia64/ia64.h (SIMULTANEOUS_PREFETCHES, PREFETCH_BLOCK): New.
* config/ia64/ia64.md (prefetch): New.
* config/i386/i386.h (SIMULTANEOUS_PREFETCHES, PREFETCH_BLOCK): New.
* config/i386/i386.c (ix86_expand_builtin): Change the name of the
code from prefetch to prefetch_sse.
* config/i386/i386.md: Same, plus add new prefetch for all variants.
--- gcc/rtl.def.orig Mon Nov 5 15:43:46 2001
+++ gcc/rtl.def Tue Nov 6 13:23:19 2001
@@ -552,6 +552,18 @@ DEF_RTL_EXPR(TRAP_IF, "trap_if", "ee", '
which control is flowing. */
DEF_RTL_EXPR(RESX, "resx", "i", 'x')
+/* Memory prefetch, with attributes supported on some machines.
+ Operand 1 is the address of the memory to fetch.
+ Operand 2 is 1 for a write access, 0 otherwise.
+ Operand 3 is the level of temporal locality; 0 means there is no
+ temporal locality and 1, 2, and 3 are for increasing levels of temporal
+ locality.
+
+ The attributes specified by operands 2 and 3 are ignored for targets
+ whose prefetch instructions do not support them. */
+
+DEF_RTL_EXPR(PREFETCH, "prefetch", "eee", 'x')
+
/* ----------------------------------------------------------------------
Primitive values for use in expressions.
---------------------------------------------------------------------- */
--- gcc/doc/rtl.texi.orig Mon Nov 5 15:43:46 2001
+++ gcc/doc/rtl.texi Mon Nov 5 16:12:55 2001
@@ -2326,6 +2326,19 @@ are set up by branch shortening and hold
maximum address, respectively. @var{flags} indicates the relative
position of @var{base}, @var{min} and @var{max} to the containing insn
and of @var{min} and @var{max} to @var{base}. See rtl.def for details.
+
+@findex prefetch
+@item (prefetch:@var{m} @var{addr} @var{rw} @var{temploc}
+Represents prefetch of memory at address @var{addr}.
+Operand @var{rw} is 1 if the prefetch is for data to be written, 0 otherwise;
+targets that do not support write prefetches should treat this as a normal
+prefetch.
+Operand @var{temploc} specifies the amount of temporal locality; 0 if there
+is none or 1, 2, or 3 for increasing levels of temporal locality;
+targets that do not support locality hints should ignore this.
+
+This insn is used to minimze cache-miss latency by moving data into a
+cache before it is accessed.
@end table
@node Incdec
--- gcc/config/ia64/ia64.h.orig Mon Nov 5 15:43:46 2001
+++ gcc/config/ia64/ia64.h Tue Nov 6 10:26:50 2001
@@ -2735,6 +2735,22 @@ do { \
#define FUNCTION_MODE Pmode
+/* If this architecture supports prefetch, define this to be the number of
+ prefetch commands that can be executed in parallel.
+
+ ??? This is temporary and needs to be handled in a way so it will be
+ specific to an implementation. */
+
+#define SIMULTANEOUS_PREFETCHES 6
+
+/* If this architecture supports prefetch, define this to be the size of
+ the cache line that is prefetched.
+
+ ??? This is temporary and needs to be handled in a way so it will be
+ specific to an implementation. */
+
+#define PREFETCH_BLOCK 32
+
/* Define this macro to handle System V style pragmas: #pragma pack and
#pragma weak. Note, #pragma weak will only be supported if SUPPORT_WEAK is
defined. */
--- gcc/config/ia64/ia64.md.orig Mon Nov 5 15:43:46 2001
+++ gcc/config/ia64/ia64.md Tue Nov 6 12:52:37 2001
@@ -99,7 +99,7 @@
;; multiple instructions, patterns which emit 0 instructions, and patterns
;; which emit instruction that can go in any slot (e.g. nop).
-(define_attr "itanium_class" "unknown,ignore,stop_bit,br,fcmp,fcvtfx,fld,fmac,fmisc,frar_i,frar_m,frbr,frfr,frpr,ialu,icmp,ilog,ishf,ld,chk_s,long_i,mmmul,mmshf,mmshfi,rse_m,scall,sem,stf,st,syst_m0,syst_m,tbit,toar_i,toar_m,tobr,tofr,topr,xmpy,xtd,nop_b,nop_f,nop_i,nop_m,nop_x"
+(define_attr "itanium_class" "unknown,ignore,stop_bit,br,fcmp,fcvtfx,fld,fmac,fmisc,frar_i,frar_m,frbr,frfr,frpr,ialu,icmp,ilog,ishf,ld,chk_s,long_i,mmmul,mmshf,mmshfi,rse_m,scall,sem,stf,st,syst_m0,syst_m,tbit,toar_i,toar_m,tobr,tofr,topr,xmpy,xtd,nop_b,nop_f,nop_i,nop_m,nop_x,lfetch"
(const_string "unknown"))
;; chk_s has an I and an M form; use type A for convenience.
@@ -107,6 +107,7 @@
(cond [(eq_attr "itanium_class" "ld,st,fld,stf,sem,nop_m") (const_string "M")
(eq_attr "itanium_class" "rse_m,syst_m,syst_m0") (const_string "M")
(eq_attr "itanium_class" "frar_m,toar_m,frfr,tofr") (const_string "M")
+ (eq_attr "itanium_class" "lfetch") (const_string "M")
(eq_attr "itanium_class" "chk_s,ialu,icmp,ilog") (const_string "A")
(eq_attr "itanium_class" "fmisc,fmac,fcmp,xmpy") (const_string "F")
(eq_attr "itanium_class" "fcvtfx,nop_f") (const_string "F")
@@ -5048,6 +5049,32 @@
""
"break.f 0"
[(set_attr "itanium_class" "nop_f")])
+
+(define_insn "prefetch"
+ [(prefetch (match_operand:DI 0 "address_operand" "p")
+ (match_operand:DI 1 "const_int_operand" "n")
+ (match_operand:DI 2 "const_int_operand" "n"))]
+ ""
+ "*
+{
+ static const char * const alt[2][4] = {
+ \"lfetch.nta [%0]\",
+ \"lfetch.nt1 [%0]\",
+ \"lfetch.nt2 [%0]\",
+ \"lfetch [%0]\",
+ \"lfetch.excl.nta [%0]\",
+ \"lfetch.excl.nt1 [%0]\",
+ \"lfetch.excl.nt2 [%0]\",
+ \"lfetch.excl [%0]\"
+ };
+ int i = (INTVAL (operands[1]) == 1);
+ int j = (INTVAL (operands[2]));
+
+ if (j < 0 || j > 3)
+ j = 0; /* ??? What's the correct thing to do here? */
+ return alt[i][j];
+}"
+ [(set_attr "itanium_class" "lfetch")])
;; Non-local goto support.
@@ -5270,3 +5297,4 @@
"addp4 %0 = 0,%1"
[(set_attr "itanium_class" "ialu")])
+
--- gcc/config/i386/i386.h.orig Mon Nov 5 15:43:46 2001
+++ gcc/config/i386/i386.h Tue Nov 6 10:27:25 2001
@@ -2411,6 +2411,22 @@ while (0)
is a byte address (for indexing purposes)
so give the MEM rtx a byte's mode. */
#define FUNCTION_MODE QImode
+
+/* If this architecture supports prefetch, define this to be the number of
+ prefetch commands that can be executed in parallel.
+
+ ??? This is temporary and needs to be handled in a way so it will be
+ specific to an implementation. */
+
+#define SIMULTANEOUS_PREFETCHES 6
+
+/* If this architecture supports prefetch, define this to be the size of
+ the cache line that is prefetched.
+
+ ??? This is temporary and needs to be handled in a way so it will be
+ specific to an implementation. */
+
+#define PREFETCH_BLOCK 32
/* A part of a C `switch' statement that describes the relative costs
of constant RTL expressions. It must contain `case' labels for
--- gcc/config/i386/i386.c.orig Mon Nov 5 15:43:46 2001
+++ gcc/config/i386/i386.c Mon Nov 5 15:55:54 2001
@@ -11863,7 +11863,7 @@ ix86_expand_builtin (exp, target, subtar
return copy_to_mode_reg (SImode, target);
case IX86_BUILTIN_PREFETCH:
- icode = CODE_FOR_prefetch;
+ icode = CODE_FOR_prefetch_sse;
arg0 = TREE_VALUE (arglist);
arg1 = TREE_VALUE (TREE_CHAIN (arglist));
op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
--- gcc/config/i386/i386.md.orig Mon Nov 5 15:43:46 2001
+++ gcc/config/i386/i386.md Tue Nov 6 12:50:36 2001
@@ -81,7 +81,7 @@
;; 32 This is a `maskmov' operation.
;; 33 This is a `movmsk' operation.
;; 34 This is a `non-temporal' move.
-;; 35 This is a `prefetch' operation.
+;; 35 This is a `prefetch' (SSE) operation.
;; 36 This is used to distinguish COMISS from UCOMISS.
;; 37 This is a `ldmxcsr' operation.
;; 38 This is a forced `movaps' instruction (rather than whatever movti does)
@@ -19319,7 +19319,47 @@
[(set_attr "type" "sse")
(set_attr "memory" "unknown")])
-(define_insn "prefetch"
+(define_expand "prefetch"
+ [(prefetch (match_operand:SI 0 "address_operand" "p")
+ (match_operand:SI 1 "const_int_operand" "n")
+ (match_operand:SI 2 "const_int_operand" "n"))]
+ "TARGET_SSE || TARGET_3DNOW || TARGET_3DNOW_A"
+ "
+{
+ if (TARGET_3DNOW)
+ {
+ if (INTVAL (operands[1]) == 1)
+ emit_insn (gen_prefetchw (operands[0]));
+ else
+ emit_insn (gen_prefetch_3dnow (operands[0]));
+ }
+ else
+ {
+ int i;
+ switch (INTVAL (operands[2]))
+ {
+ case 0: /* Non temporal locality. */
+ i = 0;
+ break;
+ case 1: /* Lowest level of temporal locality. */
+ i = 3;
+ break;
+ case 2:
+ i = 2;
+ break;
+ case 3: /* Highest level of temporal locality. */
+ i = 1;
+ break;
+ default:
+ i = 0; /* ??? should we abort here instead? */
+ break;
+ }
+ emit_insn (gen_prefetch_sse (operands[0], GEN_INT (i)));
+ }
+ DONE;
+}")
+
+(define_insn "prefetch_sse"
[(unspec [(match_operand:SI 0 "address_operand" "p")
(match_operand:SI 1 "immediate_operand" "n")] 35)]
"TARGET_SSE || TARGET_3DNOW_A"
More information about the Gcc-patches
mailing list