This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [lno] [patch] vectorizer update - support constants.
- From: Falk Hueffner <falk dot hueffner at student dot uni-tuebingen dot de>
- To: Dorit Naishlos <DORIT at il dot ibm dot com>
- Cc: gcc-patches at gcc dot gnu dot org, pop at gauvain dot u-strasbg dot fr,Richard Henderson <rth at redhat dot com>
- Date: 24 Jan 2004 21:59:09 +0100
- Subject: Re: [lno] [patch] vectorizer update - support constants.
- References: <OF967A5E0D.553471F4-ONC2256E22.0078C240-C2256E23.004AE60D@il.ibm.com>
Dorit Naishlos <DORIT@il.ibm.com> writes:
> > Then vectorization worked, but the code was not quite optimal.
>
> I'm not familiar with code generation problems on Alpha. The
> vectorizer does generate additional IVs, but it relies on subsequent
> optimization passes to make the necessary cleanups and worry about
> efficient addressing. Did you use -O2/-O3?
Yes, I did. It looks like something inhibits later optimization as
soon as vectorization was applied. I have no clue what. I'm attaching
the patch I was using.
--
Falk
Index: gcc/config/alpha/alpha.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/alpha/alpha.md,v
retrieving revision 1.194.2.16
diff -u -p -r1.194.2.16 alpha.md
--- gcc/config/alpha/alpha.md 29 Oct 2003 21:36:05 -0000 1.194.2.16
+++ gcc/config/alpha/alpha.md 24 Jan 2004 20:54:44 -0000
@@ -6370,6 +6370,32 @@
stt %R1,%0"
[(set_attr "type" "ilog,ild,ist,fcpys,fld,fst")])
+(define_expand "subv8qi3"
+ [(set (match_dup 4)
+ (unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "")
+ (match_operand:DI 2 "reg_or_8bit_operand" "")]
+ UNSPEC_CMPBGE))
+ (set (match_dup 5)
+ (minus:DI (match_dup 1)
+ (match_dup 2)))
+ (set (match_dup 6)
+ (and:DI (unspec:DI [(match_dup 7)] UNSPEC_ZAP)
+ (match_dup 3)))
+ (set (match_operand:DI 0 "register_operand" "")
+ (plus:DI (mult:DI (match_dup 6)
+ (const_int 4))
+ (match_dup 5)))]
+ "HOST_BITS_PER_WIDE_INT >= 64"
+{
+ operands[0] = gen_lowpart (DImode, operands[0]);
+ operands[3] = gen_int_mode (((unsigned HOST_WIDE_INT) 0x40404040) << 32
+ | 0x40404040, DImode);
+ operands[4] = gen_reg_rtx (DImode);
+ operands[5] = gen_reg_rtx (DImode);
+ operands[6] = gen_reg_rtx (DImode);
+ operands[7] = gen_lowpart (QImode, operands[4]);
+})
+
(define_insn "uminv8qi3"
[(set (match_operand:V8QI 0 "register_operand" "=r")
(umin:V8QI (match_operand:V8QI 1 "reg_or_0_operand" "rW")
Index: gcc/config/alpha/alpha.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/alpha/alpha.h,v
retrieving revision 1.170.2.22.2.1
diff -u -p -r1.170.2.22.2.1 alpha.h
--- gcc/config/alpha/alpha.h 21 Jan 2004 01:11:29 -0000 1.170.2.22.2.1
+++ gcc/config/alpha/alpha.h 24 Jan 2004 20:54:50 -0000
@@ -482,6 +482,8 @@ extern const char *alpha_tls_size_string
/* Width of a word, in units (bytes). */
#define UNITS_PER_WORD 8
+#define UNITS_PER_SIMD_WORD 8
+
/* Width in bits of a pointer.
See also the macro `Pmode' defined below. */
#define POINTER_SIZE 64
Index: gcc/tree-vectorizer.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/Attic/tree-vectorizer.c,v
retrieving revision 1.1.2.14
diff -u -p -r1.1.2.14 tree-vectorizer.c
--- gcc/tree-vectorizer.c 21 Jan 2004 12:59:05 -0000 1.1.2.14
+++ gcc/tree-vectorizer.c 24 Jan 2004 20:54:51 -0000
@@ -1432,6 +1432,9 @@ vect_is_supportable_binop (tree stmt)
case MINUS_EXPR:
binoptab = sub_optab;
break;
+ case BIT_IOR_EXPR:
+ binoptab = ior_optab;
+ break;
default:
return false;
}
@@ -1454,7 +1457,10 @@ vect_is_supportable_binop (tree stmt)
vec_mode = TYPE_MODE (vectype);
- if (binoptab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
+ if (binoptab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing
+ /* These can be efficiently open-coded. */
+ && !((code == PLUS_EXPR || code == MINUS_EXPR)
+ && GET_MODE_NUNITS (vec_mode) >= 4))
{
if (tree_dump_file && (tree_dump_flags & TDF_DETAILS))
fprintf (tree_dump_file, "op not supported by target\n");
Index: gcc/optabs.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/optabs.c,v
retrieving revision 1.137.2.27.2.1
diff -u -p -r1.137.2.27.2.1 optabs.c
--- gcc/optabs.c 21 Jan 2004 01:10:36 -0000 1.137.2.27.2.1
+++ gcc/optabs.c 24 Jan 2004 20:54:53 -0000
@@ -1931,6 +1931,43 @@ expand_vector_binop (enum machine_mode m
if (!target)
target = gen_reg_rtx (mode);
+ if (binoptab->code == PLUS && elts >= 4
+ && int_mode_for_mode (mode) != BLKmode)
+ {
+ /* Do full word add and fix up spilling overflows. */
+ rtx signmask, inv_signmask, signs;
+ HOST_WIDE_INT m = 0;
+
+ tmode = int_mode_for_mode (mode);
+
+ /* Build mask for all sign bits except highest one. */
+ for (i = 0; i < elts - 1; ++i)
+ {
+ m <<= subbitsize;
+ m |= 1 << (subbitsize - 1);
+ }
+ signmask = GEN_INT(m);
+ inv_signmask = GEN_INT(~m);
+
+ t = simplify_gen_subreg (tmode, target, mode, 0);
+ a = simplify_gen_subreg (tmode, op0, mode, 0);
+ b = simplify_gen_subreg (tmode, op1, mode, 0);
+
+ signs = expand_binop (tmode, xor_optab, a, b, NULL_RTX,
+ true, methods);
+ signs = expand_binop (tmode, and_optab, signs, signmask, NULL_RTX,
+ true, methods);
+ a = expand_binop (tmode, and_optab, a, inv_signmask, NULL_RTX,
+ true, methods);
+ b = expand_binop (tmode, and_optab, b, inv_signmask, NULL_RTX,
+ true, methods);
+ a = expand_binop (tmode, add_optab, a, b, NULL_RTX,
+ true, methods);
+ res = expand_binop (tmode, xor_optab, a, signs, t,
+ true, methods);
+ emit_move_insn (t, res);
+ }
+ else
for (i = 0; i < elts; ++i)
{
/* If this is part of a register, and not the first item in the