This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [lno] [patch] vectorizer update - support constants.


Dorit Naishlos <DORIT@il.ibm.com> writes:

> > Then vectorization worked, but the code was not quite optimal.
> 
> I'm not familiar with code generation problems on Alpha. The
> vectorizer does generate additional IVs, but it relies on subsequent
> optimization passes to make the necessary cleanups and worry about
> efficient addressing.  Did you use -O2/-O3?

Yes, I did. It looks like something inhibits later optimization as
soon as vectorization was applied. I have no clue what. I'm attaching
the patch I was using.

-- 
	Falk

Index: gcc/config/alpha/alpha.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/alpha/alpha.md,v
retrieving revision 1.194.2.16
diff -u -p -r1.194.2.16 alpha.md
--- gcc/config/alpha/alpha.md	29 Oct 2003 21:36:05 -0000	1.194.2.16
+++ gcc/config/alpha/alpha.md	24 Jan 2004 20:54:44 -0000
@@ -6370,6 +6370,32 @@
    stt %R1,%0"
   [(set_attr "type" "ilog,ild,ist,fcpys,fld,fst")])
 
+(define_expand "subv8qi3"
+  [(set (match_dup 4)
+        (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "")
+                    (match_operand:DI 2 "reg_or_8bit_operand" "")]
+                   UNSPEC_CMPBGE))
+   (set (match_dup 5)
+        (minus:DI (match_dup 1)
+                  (match_dup 2)))
+   (set (match_dup 6)
+	(and:DI (unspec:DI [(match_dup 7)] UNSPEC_ZAP)
+		(match_dup 3)))
+   (set (match_operand:DI 0 "register_operand" "")
+	(plus:DI (mult:DI (match_dup 6)
+			  (const_int 4))
+		 (match_dup 5)))]
+  "HOST_BITS_PER_WIDE_INT >= 64"
+{
+  operands[0] = gen_lowpart (DImode, operands[0]);
+  operands[3] = gen_int_mode (((unsigned HOST_WIDE_INT) 0x40404040) << 32
+                              | 0x40404040, DImode);
+  operands[4] = gen_reg_rtx (DImode);
+  operands[5] = gen_reg_rtx (DImode);
+  operands[6] = gen_reg_rtx (DImode);
+  operands[7] = gen_lowpart (QImode, operands[4]);
+})
+
 (define_insn "uminv8qi3"
   [(set (match_operand:V8QI 0 "register_operand" "=r")
 	(umin:V8QI (match_operand:V8QI 1 "reg_or_0_operand" "rW")
Index: gcc/config/alpha/alpha.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/alpha/alpha.h,v
retrieving revision 1.170.2.22.2.1
diff -u -p -r1.170.2.22.2.1 alpha.h
--- gcc/config/alpha/alpha.h	21 Jan 2004 01:11:29 -0000	1.170.2.22.2.1
+++ gcc/config/alpha/alpha.h	24 Jan 2004 20:54:50 -0000
@@ -482,6 +482,8 @@ extern const char *alpha_tls_size_string
 /* Width of a word, in units (bytes).  */
 #define UNITS_PER_WORD 8
 
+#define UNITS_PER_SIMD_WORD 8
+
 /* Width in bits of a pointer.
    See also the macro `Pmode' defined below.  */
 #define POINTER_SIZE 64
Index: gcc/tree-vectorizer.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/Attic/tree-vectorizer.c,v
retrieving revision 1.1.2.14
diff -u -p -r1.1.2.14 tree-vectorizer.c
--- gcc/tree-vectorizer.c	21 Jan 2004 12:59:05 -0000	1.1.2.14
+++ gcc/tree-vectorizer.c	24 Jan 2004 20:54:51 -0000
@@ -1432,6 +1432,9 @@ vect_is_supportable_binop (tree stmt)
     case MINUS_EXPR:
       binoptab = sub_optab;
       break;
+    case BIT_IOR_EXPR:
+      binoptab = ior_optab;
+      break;
     default:
       return false;
     }
@@ -1454,7 +1457,10 @@ vect_is_supportable_binop (tree stmt)
 
   vec_mode = TYPE_MODE (vectype);
 
-  if (binoptab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
+  if (binoptab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing
+      /* These can be efficiently open-coded.  */
+      && !((code == PLUS_EXPR || code == MINUS_EXPR)
+	   && GET_MODE_NUNITS (vec_mode) >= 4))
     {
       if (tree_dump_file && (tree_dump_flags & TDF_DETAILS))
 	fprintf (tree_dump_file, "op not supported by target\n");
Index: gcc/optabs.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/optabs.c,v
retrieving revision 1.137.2.27.2.1
diff -u -p -r1.137.2.27.2.1 optabs.c
--- gcc/optabs.c	21 Jan 2004 01:10:36 -0000	1.137.2.27.2.1
+++ gcc/optabs.c	24 Jan 2004 20:54:53 -0000
@@ -1931,6 +1931,43 @@ expand_vector_binop (enum machine_mode m
       if (!target)
 	target = gen_reg_rtx (mode);
 
+      if (binoptab->code == PLUS && elts >= 4
+	  && int_mode_for_mode (mode) != BLKmode)
+	{
+	  /* Do full word add and fix up spilling overflows.  */
+	  rtx signmask, inv_signmask, signs;
+	  HOST_WIDE_INT m = 0;
+
+	  tmode = int_mode_for_mode (mode);
+
+	  /* Build mask for all sign bits except highest one.  */
+	  for (i = 0; i < elts - 1; ++i)
+	    {
+	      m <<= subbitsize;
+	      m |= 1 << (subbitsize - 1);
+	    }
+	  signmask = GEN_INT(m);
+	  inv_signmask = GEN_INT(~m);
+
+	  t = simplify_gen_subreg (tmode, target, mode, 0);
+	  a = simplify_gen_subreg (tmode, op0, mode, 0);
+	  b = simplify_gen_subreg (tmode, op1, mode, 0);
+
+	  signs = expand_binop (tmode, xor_optab, a, b, NULL_RTX,
+				true, methods);
+	  signs = expand_binop (tmode, and_optab, signs, signmask, NULL_RTX,
+				true, methods);
+	  a = expand_binop (tmode, and_optab, a, inv_signmask, NULL_RTX,
+			    true, methods);
+	  b = expand_binop (tmode, and_optab, b, inv_signmask, NULL_RTX,
+			    true, methods);
+	  a = expand_binop (tmode, add_optab, a, b, NULL_RTX,
+			    true, methods);	  
+	  res = expand_binop (tmode, xor_optab, a, signs, t,
+			      true, methods);
+	  emit_move_insn (t, res);
+	}
+      else
       for (i = 0; i < elts; ++i)
 	{
 	  /* If this is part of a register, and not the first item in the

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]