This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [patch] lno branch merge -- vectorizer patch #4
- From: Dorit Naishlos <DORIT at il dot ibm dot com>
- To: Richard Henderson <rth at redhat dot com>
- Cc: Ayal Zaks <ZAKS at il dot ibm dot com>, gcc-patches at gcc dot gnu dot org, mark at codesourcery dot com
- Date: Thu, 23 Sep 2004 10:59:53 +0200
- Subject: Re: [patch] lno branch merge -- vectorizer patch #4
> Cross-compile for alpha, with -O2 -mcpu=ev6 -ftree-vectorize, with
> the attached patch. You should see "ldq_u" instructions.
here they are:
foo:
.frame $30,0,$26,0
ldah $29,0($27) !gpdisp!1
lda $29,0($29) !gpdisp!1
$foo..ng:
.prologue 1
ldah $1,A($29) !gprelhigh
ldq $22,B($29) !literal
mov $31,$7
lda $8,A($1) !gprellow
.align 4
$L2:
zapnot $7,15,$5
addl $7,8,$7
addq $22,$5,$4
zapnot $7,15,$6
addq $8,$5,$5
>> ldq_u $3,0($4)
>> ldq_u $1,7($4)
lda $6,-1000($6)
extqh $1,$4,$1
extql $3,$4,$3
bis $3,$1,$2
stq $2,0($5)
bne $6,$L2
ret $31,($26),1
.end foo
.section .bss
.type A, @object
.size A, 1000
.align 3
I guess I'll commit the alpha patch along with my patch (?).
thanks!
dorit
Richard Henderson
<rth@redhat.com> To: Dorit Naishlos/Haifa/IBM@IBMIL, gcc-patches@gcc.gnu.org,
mark@codesourcery.com, Ayal Zaks/Haifa/IBM@IBMIL
22/09/2004 21:48 cc:
Subject: Re: [patch] lno branch merge -- vectorizer patch #4
On Wed, Sep 22, 2004 at 12:39:32PM -0700, Richard Henderson wrote:
> This is wrong. If integer_zerop, then you know *no* alignment, correct?
> Which means that align = BITS_PER_UNIT. Otherwise we wind up with
> "align = GET_MODE_ALIGNMENT (mode)", which leads to the move expanders
> not doing the right thing.
Test case:
#define N 1000
static char A[N];
extern char B[N];
void foo()
{
long i;
for (i = 0; i < N; ++i)
A[i] = B[i];
}
Cross-compile for alpha, with -O2 -mcpu=ev6 -ftree-vectorize, with
the attached patch. You should see "ldq_u" instructions.
r~
Index: alpha.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/alpha/alpha.c,v
retrieving revision 1.395
diff -c -p -d -u -r1.395 alpha.c
--- alpha.c 20 Sep 2004 20:13:39 -0000 1.395
+++ alpha.c 22 Sep 2004 19:46:53 -0000
@@ -1980,17 +1980,51 @@ alpha_emit_set_long_const (rtx target, H
return target;
}
+/* Vectorization wants to know when we're willing to handle totally
+ unaligned loads and stores. We'll do so for any 8 byte vector. */
+
+static bool
+alpha_vectorize_misaligned_mem_ok (enum machine_mode mode)
+{
+ return VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8;
+}
+
/* Expand a move instruction; return true if all work is done.
We don't handle non-bwx subword loads here. */
bool
alpha_expand_mov (enum machine_mode mode, rtx *operands)
{
+ /* Honor misaligned loads, for those we promised to do so. */
+ if (GET_CODE (operands[1]) == MEM
+ && alpha_vectorize_misaligned_mem_ok (mode)
+ && MEM_ALIGN (operands[1]) < GET_MODE_ALIGNMENT (mode))
+ {
+ rtx tmp;
+ if (register_operand (operands[0], mode))
+ tmp = operands[0];
+ else
+ tmp = gen_reg_rtx (mode);
+ alpha_expand_unaligned_load (tmp, operands[1], 8, 0, 0);
+ if (tmp == operands[0])
+ return true;
+ operands[1] = tmp;
+ }
+
/* If the output is not a register, the input must be. */
if (GET_CODE (operands[0]) == MEM
&& ! reg_or_0_operand (operands[1], mode))
operands[1] = force_reg (mode, operands[1]);
+ /* Honor misaligned stores, for those we promised to do so. */
+ if (GET_CODE (operands[0]) == MEM
+ && alpha_vectorize_misaligned_mem_ok (mode)
+ && MEM_ALIGN (operands[0]) < GET_MODE_ALIGNMENT (mode))
+ {
+ alpha_expand_unaligned_store (operands[0], operands[1], 8, 0);
+ return true;
+ }
+
/* Allow legitimize_address to perform some simplifications. */
if (mode == Pmode && symbolic_operand (operands[1], mode))
{
@@ -3337,7 +3371,7 @@ alpha_expand_unaligned_store (rtx dst, r
{
addr = copy_addr_to_reg (plus_constant (dsta, ofs));
- if (src != const0_rtx)
+ if (src != CONST0_RTX (GET_MODE (src)))
{
emit_insn (gen_insxh (insh, gen_lowpart (DImode, src),
GEN_INT (size*8), addr));
@@ -3375,7 +3409,7 @@ alpha_expand_unaligned_store (rtx dst, r
}
}
- if (src != const0_rtx)
+ if (src != CONST0_RTX (GET_MODE (src)))
{
dsth = expand_binop (DImode, ior_optab, insh, dsth, dsth, 0,
OPTAB_WIDEN);
dstl = expand_binop (DImode, ior_optab, insl, dstl, dstl, 0,
OPTAB_WIDEN);
@@ -9427,6 +9461,9 @@ alpha_init_libfuncs (void)
#undef TARGET_BUILD_BUILTIN_VA_LIST
#define TARGET_BUILD_BUILTIN_VA_LIST alpha_build_builtin_va_list
+#undef TARGET_VECTORIZE_MISALIGNED_MEM_OK
+#define TARGET_VECTORIZE_MISALIGNED_MEM_OK
alpha_vectorize_misaligned_mem_ok
+
struct gcc_target targetm = TARGET_INITIALIZER;
Index: alpha.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/alpha/alpha.h,v
retrieving revision 1.231
diff -c -p -d -u -r1.231 alpha.h
--- alpha.h 10 Sep 2004 11:55:08 -0000 1.231
+++ alpha.h 22 Sep 2004 19:46:53 -0000
@@ -560,6 +560,9 @@ extern const char *alpha_tls_size_string
On the Alpha, they trap. */
#define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) 1
+
+/* Our SIMD is all done on single integer registers. */
+#define UNITS_PER_SIMD_WORD UNITS_PER_WORD
/* Standard register usage. */