ia64 vectorization improvements

Richard Henderson rth@redhat.com
Sun Jun 19 16:27:00 GMT 2005


The special constant alternatives in movv2sf weren't being used, because
none of the constants were legitimate.

Both vectors and integers are wholy contained within the fp register
significand, and thus getf.sig/setf.sig are the access method to that
data.  Thus we need not set CANNOT_CHANGE_MODE_CLASS for this data.
This improves code that manipulates vector components significantly.
Or did until...

I finished off the vector component set/extract patterns.  In the case
of vec_set, we do have a couple of specialized fpu instructions that
can merge data in interesting ways.  In the case of vec_extract, we
need not work so hard as generic code wants.  It is unfortunate that
Intel did not see fit to provide an funpack instruction, but the
minimum required is a move to and from the integer registers.  I also 
thought that the unspec would be useful if we ever wanted to look for
extracts followed by inserts, and turn them into permutes.  But perhaps
that ought to happen at a higher level generically...

Tested on ia64-linux.


r~


        * config/ia64/ia64-modes.def (V4SF): Add.
        * config/ia64/ia64.c (ia64_legitimate_constant_p): Handle CONST_VECTOR.
        * config/ia64/ia64.h (CANNOT_CHANGE_MODE_CLASS): Allow vector to
        integer mode changes in fp regs.
        * config/ia64/ia64.md (UNSPEC_VECT_EXTR): New.
        * config/ia64/vect.md (smaxv2sf3, sminv2sf3): Fix typos in names.
        (reduc_plus_v2sf, reduc_smax_v2sf, reduc_smin_v2sf): New.
        (vcondv2sf): Use gen_fpack
        (fpack): Remove * from name.
        (fswap, fmix_l, fmix_r, fmix_lr): New.
        (vec_setv2sf, vec_extractv2sf_0_le, vec_extractv2sf_0_be): New.
        (vec_extractv2sf_1, vec_extractv2sf): New.

Index: config/ia64/ia64-modes.def
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/ia64/ia64-modes.def,v
retrieving revision 1.8
diff -u -p -d -r1.8 ia64-modes.def
--- config/ia64/ia64-modes.def	18 Jan 2005 12:01:30 -0000	1.8
+++ config/ia64/ia64-modes.def	19 Jun 2005 16:06:14 -0000
@@ -74,3 +74,5 @@ VECTOR_MODE (INT, QI, 16);
 VECTOR_MODE (INT, HI, 8);
 VECTOR_MODE (INT, SI, 4);
 VECTOR_MODE (FLOAT, SF, 2);
+VECTOR_MODE (FLOAT, SF, 4);
+
Index: config/ia64/ia64.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/ia64/ia64.c,v
retrieving revision 1.378
diff -u -p -d -r1.378 ia64.c
--- config/ia64/ia64.c	18 Jun 2005 11:56:26 -0000	1.378
+++ config/ia64/ia64.c	19 Jun 2005 16:06:15 -0000
@@ -738,6 +738,17 @@ ia64_legitimate_constant_p (rtx x)
     case SYMBOL_REF:
       return tls_symbolic_operand_type (x) == 0;
 
+    case CONST_VECTOR:
+      {
+	enum machine_mode mode = GET_MODE (x);
+
+	if (mode == V2SFmode)
+	  return ia64_extra_constraint (x, 'Y');
+
+	return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
+		&& GET_MODE_SIZE (mode) <= 8);
+      }
+
     default:
       return false;
     }
Index: config/ia64/ia64.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/ia64/ia64.h,v
retrieving revision 1.203
diff -u -p -d -r1.203 ia64.h
--- config/ia64/ia64.h	8 Jun 2005 21:29:36 -0000	1.203
+++ config/ia64/ia64.h	19 Jun 2005 16:06:16 -0000
@@ -892,11 +892,11 @@ enum reg_class
    : ((CLASS) == FR_REGS && (MODE) == XCmode) ? 2		\
    : (GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
 
-/* In FP regs, we can't change FP values to integer values and vice
-   versa, but we can change e.g. DImode to SImode.  */
+/* In FP regs, we can't change FP values to integer values and vice versa,
+   but we can change e.g. DImode to SImode, and V2SFmode into DImode.  */
 
-#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) 	\
-  (GET_MODE_CLASS (FROM) != GET_MODE_CLASS (TO)		\
+#define CANNOT_CHANGE_MODE_CLASS(FROM, TO, CLASS) 		\
+  (SCALAR_FLOAT_MODE_P (FROM) != SCALAR_FLOAT_MODE_P (TO)	\
    ? reg_classes_intersect_p (CLASS, FR_REGS) : 0)
 
 /* A C expression that defines the machine-dependent operand constraint
Index: config/ia64/ia64.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/ia64/ia64.md,v
retrieving revision 1.154
diff -u -p -d -r1.154 ia64.md
--- config/ia64/ia64.md	17 Jun 2005 20:45:50 -0000	1.154
+++ config/ia64/ia64.md	19 Jun 2005 16:06:17 -0000
@@ -80,6 +80,7 @@
    (UNSPEC_FR_SQRT_RECIP_APPROX 28)
    (UNSPEC_SHRP			29)
    (UNSPEC_COPYSIGN		30)
+   (UNSPEC_VECT_EXTR		31)
   ])
 
 (define_constants
Index: config/ia64/vect.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/ia64/vect.md,v
retrieving revision 1.4
diff -u -p -d -r1.4 vect.md
--- config/ia64/vect.md	9 Jun 2005 17:38:06 -0000	1.4
+++ config/ia64/vect.md	19 Jun 2005 16:06:17 -0000
@@ -845,7 +845,7 @@
   "fpnma %0 = %1, %2, %3"
   [(set_attr "itanium_class" "fmac")])
 
-(define_insn "smaxv2sf2"
+(define_insn "smaxv2sf3"
   [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
 	(smax:V2SF (match_operand:V2SF 1 "fr_register_operand" "f")
 		   (match_operand:V2SF 2 "fr_register_operand" "f")))]
@@ -853,7 +853,7 @@
   "fpmax %0 = %1, %2"
   [(set_attr "itanium_class" "fmisc")])
 
-(define_insn "sminv2sf2"
+(define_insn "sminv2sf3"
   [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
 	(smin:V2SF (match_operand:V2SF 1 "fr_register_operand" "f")
 		   (match_operand:V2SF 2 "fr_register_operand" "f")))]
@@ -861,6 +861,39 @@
   "fpmin %0 = %1, %2"
   [(set_attr "itanium_class" "fmisc")])
 
+(define_expand "reduc_plus_v2sf"
+  [(match_operand:V2SF 0 "fr_register_operand" "")
+   (match_operand:V2SF 1 "fr_register_operand" "")]
+  ""
+{
+  rtx tmp = gen_reg_rtx (V2SFmode);
+  emit_insn (gen_fswap (tmp, operands[1], CONST0_RTX (V2SFmode)));
+  emit_insn (gen_addv2sf3 (operands[0], operands[1], tmp));
+  DONE;
+})
+
+(define_expand "reduc_smax_v2sf"
+  [(match_operand:V2SF 0 "fr_register_operand" "")
+   (match_operand:V2SF 1 "fr_register_operand" "")]
+  ""
+{
+  rtx tmp = gen_reg_rtx (V2SFmode);
+  emit_insn (gen_fswap (tmp, operands[1], CONST0_RTX (V2SFmode)));
+  emit_insn (gen_smaxv2sf3 (operands[0], operands[1], tmp));
+  DONE;
+})
+
+(define_expand "reduc_smin_v2sf"
+  [(match_operand:V2SF 0 "fr_register_operand" "")
+   (match_operand:V2SF 1 "fr_register_operand" "")]
+  ""
+{
+  rtx tmp = gen_reg_rtx (V2SFmode);
+  emit_insn (gen_fswap (tmp, operands[1], CONST0_RTX (V2SFmode)));
+  emit_insn (gen_sminv2sf3 (operands[0], operands[1], tmp));
+  DONE;
+})
+
 (define_expand "vcondv2sf"
   [(set (match_operand:V2SF 0 "fr_register_operand" "")
 	(if_then_else:V2SF
@@ -922,12 +955,11 @@
   if (!fr_reg_or_fp01_operand (op2, SFmode))
     op2 = force_reg (SFmode, op2);
 
-  x = gen_rtx_VEC_CONCAT (V2SFmode, op1, op2);
-  emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
+  emit_insn (gen_fpack (operands[0], op1, op2));
   DONE;
 })
 
-(define_insn "*fpack"
+(define_insn "fpack"
   [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
 	(vec_concat:V2SF
 	  (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")
@@ -936,8 +968,139 @@
   "fpack %0 = %F2, %F1"
   [(set_attr "itanium_class" "fmisc")])
 
+(define_insn "fswap"
+  [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
+	(vec_select:V2SF
+	  (vec_concat:V4SF
+	    (match_operand:V2SF 1 "fr_reg_or_0_operand" "fU")
+	    (match_operand:V2SF 2 "fr_reg_or_0_operand" "fU"))
+	  (parallel [(const_int 1) (const_int 2)])))]
+  ""
+  "fswap %0 = %F1, %F2"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "*fmix_l"
+  [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
+	(vec_select:V2SF
+	  (vec_concat:V4SF
+	    (match_operand:V2SF 1 "fr_reg_or_0_operand" "fU")
+	    (match_operand:V2SF 2 "fr_reg_or_0_operand" "fU"))
+	  (parallel [(const_int 1) (const_int 3)])))]
+  ""
+  "fmix.l %0 = %F2, %F1"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "fmix_r"
+  [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
+	(vec_select:V2SF
+	  (vec_concat:V4SF
+	    (match_operand:V2SF 1 "fr_reg_or_0_operand" "fU")
+	    (match_operand:V2SF 2 "fr_reg_or_0_operand" "fU"))
+	  (parallel [(const_int 0) (const_int 2)])))]
+  ""
+  "fmix.r %0 = %F2, %F1"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_insn "fmix_lr"
+  [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
+	(vec_select:V2SF
+	  (vec_concat:V4SF
+	    (match_operand:V2SF 1 "fr_reg_or_0_operand" "fU")
+	    (match_operand:V2SF 2 "fr_reg_or_0_operand" "fU"))
+	  (parallel [(const_int 0) (const_int 3)])))]
+  ""
+  "fmix.lr %0 = %F2, %F1"
+  [(set_attr "itanium_class" "fmisc")])
+
+(define_expand "vec_setv2sf"
+  [(match_operand:V2SF 0 "fr_register_operand" "")
+   (match_operand:SF 1 "fr_register_operand" "")
+   (match_operand 2 "const_int_operand" "")]
+  ""
+{
+  rtx tmp = gen_reg_rtx (V2SFmode);
+  emit_insn (gen_fpack (tmp, operands[1], CONST0_RTX (SFmode)));
+
+  switch (INTVAL (operands[2]))
+    {
+    case 0:
+      emit_insn (gen_fmix_lr (operands[0], tmp, operands[0]));
+      break;
+    case 1:
+      emit_insn (gen_fmix_r (operands[0], operands[0], tmp));
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  DONE;
+})
+
+(define_insn_and_split "*vec_extractv2sf_0_le"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=r,f,m")
+	(unspec:SF [(match_operand:V2SF 1 "nonimmediate_operand" "rfm,rm,r")
+		    (const_int 0)]
+		   UNSPEC_VECT_EXTR))]
+  "!TARGET_BIG_ENDIAN"
+  "#"
+  "reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  if (REG_P (operands[1]) && FR_REGNO_P (REGNO (operands[1])))
+    operands[0] = gen_rtx_REG (V2SFmode, REGNO (operands[0]));
+  else if (MEM_P (operands[1]))
+    operands[1] = adjust_address (operands[1], SFmode, 0);
+  else
+    operands[1] = gen_rtx_REG (SFmode, REGNO (operands[1]));
+})
+
+(define_insn_and_split "*vec_extractv2sf_0_be"
+  [(set (match_operand:SF 0 "register_operand" "=r,f")
+	(unspec:SF [(match_operand:V2SF 1 "register_operand" "rf,r")
+		    (const_int 0)]
+		   UNSPEC_VECT_EXTR))]
+  "TARGET_BIG_ENDIAN"
+  "#"
+  "reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  if (REG_P (operands[1]) && FR_REGNO_P (REGNO (operands[1])))
+    operands[0] = gen_rtx_REG (V2SFmode, REGNO (operands[0]));
+  else
+    operands[1] = gen_rtx_REG (SFmode, REGNO (operands[1]));
+})
+
+(define_insn_and_split "*vec_extractv2sf_1"
+  [(set (match_operand:SF 0 "register_operand" "=rf")
+	(unspec:SF [(match_operand:V2SF 1 "register_operand" "r")
+		    (const_int 1)]
+		   UNSPEC_VECT_EXTR))]
+  ""
+  "#"
+  "reload_completed"
+  [(const_int 0)]
+{
+  if (FR_REGNO_P (REGNO (operands[0])))
+    {
+      operands[1] = gen_rtx_REG (SFmode, REGNO (operands[1]));
+      emit_move_insn (operands[0], operands[1]);
+    }
+  else
+    {
+      operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
+      operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
+      emit_insn (gen_lshrdi3 (operands[0], operands[1], GEN_INT (32)));
+    }
+  DONE;
+})
+
+(define_expand "vec_extractv2sf"
+  [(set (match_operand:SF 0 "register_operand" "")
+	(unspec:SF [(match_operand:V2SF 1 "register_operand" "")
+		    (match_operand:DI 2 "const_int_operand" "")]
+		   UNSPEC_VECT_EXTR))]
+  ""
+  "")
+
 ;; Missing operations
 ;; fprcpa
 ;; fpsqrta
-;; vec_setv2sf
-;; vec_extractv2sf



More information about the Gcc-patches mailing list