[PATCH][4.6] [1/2] Handle multiple vector sizes with AVX
Richard Guenther
rguenther@suse.de
Wed Mar 3 13:15:00 GMT 2010
This is a preparatory patch to allow UNITS_PER_SIMD_WORD to hand out
different vector sizes based on a desired vectorization factor.
It will unconditionally use the largest available vector size
with this patch, more thorough support will come with [2/2], but
this is the part that touches target code.
Queued for 4.6.
Richard.
2010-03-03 Richard Guenther <rguenther@suse.de>
* defaults.h (UNITS_PER_SIMD_WORD): Adjust.
* config/i386/i386.h (UNITS_PER_SIMD_WORD): Allow 32byte float
vectors.
* config/arm/arm.h (UNITS_PER_SIMD_WORD): Adjust.
* config/mips/mips.h (UNITS_PER_SIMD_WORD): Likewise.
* config/rs6000/rs6000.h (UNITS_PER_SIMD_WORD): Likewise.
* config/sparc/sparc.h (UNITS_PER_SIMD_WORD): Likewise.
* tree-vect-stmts.c (get_vectype_for_scalar_type): Adjust.
* tree-vectorizer.h (MAX_VECTORIZATION_FACTOR): New define.
Index: trunk/gcc/config/i386/i386.h
===================================================================
*** trunk.orig/gcc/config/i386/i386.h 2010-03-01 17:55:28.000000000 +0100
--- trunk/gcc/config/i386/i386.h 2010-03-01 18:11:23.000000000 +0100
*************** enum target_cpu_default
*** 1027,1038 ****
/* ??? No autovectorization into MMX or 3DNOW until we can reliably
place emms and femms instructions.
! FIXME: AVX has 32byte floating point vector operations and 16byte
! integer vector operations. But vectorizer doesn't support
! different sizes for integer and floating point vectors. We limit
! vector size to 16byte. */
! #define UNITS_PER_SIMD_WORD(MODE) \
! (TARGET_AVX ? (((MODE) == DFmode || (MODE) == SFmode) ? 16 : 16) \
: (TARGET_SSE ? 16 : UNITS_PER_WORD))
#define VALID_DFP_MODE_P(MODE) \
--- 1027,1039 ----
/* ??? No autovectorization into MMX or 3DNOW until we can reliably
place emms and femms instructions.
! AVX has 32byte floating point vector operations and 16byte
! integer vector operations but also can handle 16byte floating
! point vectors if required. */
! #define UNITS_PER_SIMD_WORD(MODE, VF) \
! (TARGET_AVX ? ((MODE) == DFmode ? ((VF) >= 4 ? 32 : 16) \
! : ((MODE) == SFmode ? ((VF) >= 8 ? 32 : 16) \
! : 16)) \
: (TARGET_SSE ? 16 : UNITS_PER_WORD))
#define VALID_DFP_MODE_P(MODE) \
Index: trunk/gcc/defaults.h
===================================================================
*** trunk.orig/gcc/defaults.h 2010-03-01 17:55:28.000000000 +0100
--- trunk/gcc/defaults.h 2010-03-01 18:11:23.000000000 +0100
*************** see the files COPYING3 and COPYING.RUNTI
*** 908,914 ****
/* By default, only attempt to parallelize bitwise operations, and
possibly adds/subtracts using bit-twiddling. */
#ifndef UNITS_PER_SIMD_WORD
! #define UNITS_PER_SIMD_WORD(MODE) UNITS_PER_WORD
#endif
/* Determine whether __cxa_atexit, rather than atexit, is used to
--- 908,914 ----
/* By default, only attempt to parallelize bitwise operations, and
possibly adds/subtracts using bit-twiddling. */
#ifndef UNITS_PER_SIMD_WORD
! #define UNITS_PER_SIMD_WORD(MODE,VF) UNITS_PER_WORD
#endif
/* Determine whether __cxa_atexit, rather than atexit, is used to
Index: trunk/gcc/config/arm/arm.h
===================================================================
*** trunk.orig/gcc/config/arm/arm.h 2010-03-01 17:55:28.000000000 +0100
--- trunk/gcc/config/arm/arm.h 2010-03-01 18:11:23.000000000 +0100
*************** extern int arm_arch_hwdiv;
*** 536,542 ****
/* Use the option -mvectorize-with-neon-quad to override the use of doubleword
registers when autovectorizing for Neon, at least until multiple vector
widths are supported properly by the middle-end. */
! #define UNITS_PER_SIMD_WORD(MODE) \
(TARGET_NEON ? (TARGET_NEON_VECTORIZE_QUAD ? 16 : 8) : UNITS_PER_WORD)
/* True if natural alignment is used for doubleword types. */
--- 536,542 ----
/* Use the option -mvectorize-with-neon-quad to override the use of doubleword
registers when autovectorizing for Neon, at least until multiple vector
widths are supported properly by the middle-end. */
! #define UNITS_PER_SIMD_WORD(MODE, VF) \
(TARGET_NEON ? (TARGET_NEON_VECTORIZE_QUAD ? 16 : 8) : UNITS_PER_WORD)
/* True if natural alignment is used for doubleword types. */
Index: trunk/gcc/config/mips/mips.h
===================================================================
*** trunk.orig/gcc/config/mips/mips.h 2010-03-01 17:55:28.000000000 +0100
--- trunk/gcc/config/mips/mips.h 2010-03-01 18:11:23.000000000 +0100
*************** enum mips_code_readable_setting {
*** 1391,1397 ****
/* The number of bytes in a double. */
#define UNITS_PER_DOUBLE (TYPE_PRECISION (double_type_node) / BITS_PER_UNIT)
! #define UNITS_PER_SIMD_WORD(MODE) \
(TARGET_PAIRED_SINGLE_FLOAT ? 8 : UNITS_PER_WORD)
/* Set the sizes of the core types. */
--- 1391,1397 ----
/* The number of bytes in a double. */
#define UNITS_PER_DOUBLE (TYPE_PRECISION (double_type_node) / BITS_PER_UNIT)
! #define UNITS_PER_SIMD_WORD(MODE, VF) \
(TARGET_PAIRED_SINGLE_FLOAT ? 8 : UNITS_PER_WORD)
/* Set the sizes of the core types. */
Index: trunk/gcc/config/rs6000/rs6000.h
===================================================================
*** trunk.orig/gcc/config/rs6000/rs6000.h 2010-03-01 17:55:28.000000000 +0100
--- trunk/gcc/config/rs6000/rs6000.h 2010-03-01 18:11:23.000000000 +0100
*************** extern unsigned rs6000_pointer_size;
*** 1074,1080 ****
#define PAIRED_VECTOR_MODE(MODE) \
((MODE) == V2SFmode)
! #define UNITS_PER_SIMD_WORD(MODE) \
(TARGET_VSX ? UNITS_PER_VSX_WORD \
: (TARGET_ALTIVEC ? UNITS_PER_ALTIVEC_WORD \
: (TARGET_SPE ? UNITS_PER_SPE_WORD \
--- 1074,1080 ----
#define PAIRED_VECTOR_MODE(MODE) \
((MODE) == V2SFmode)
! #define UNITS_PER_SIMD_WORD(MODE, VF) \
(TARGET_VSX ? UNITS_PER_VSX_WORD \
: (TARGET_ALTIVEC ? UNITS_PER_ALTIVEC_WORD \
: (TARGET_SPE ? UNITS_PER_SPE_WORD \
Index: trunk/gcc/config/sparc/sparc.h
===================================================================
*** trunk.orig/gcc/config/sparc/sparc.h 2010-03-01 17:55:28.000000000 +0100
--- trunk/gcc/config/sparc/sparc.h 2010-03-01 18:11:23.000000000 +0100
*************** extern struct sparc_cpu_select sparc_sel
*** 607,613 ****
#define MIN_UNITS_PER_WORD 4
#endif
! #define UNITS_PER_SIMD_WORD(MODE) (TARGET_VIS ? 8 : UNITS_PER_WORD)
/* Now define the sizes of the C data types. */
--- 607,613 ----
#define MIN_UNITS_PER_WORD 4
#endif
! #define UNITS_PER_SIMD_WORD(MODE, VF) (TARGET_VIS ? 8 : UNITS_PER_WORD)
/* Now define the sizes of the C data types. */
Index: trunk/gcc/tree-vect-stmts.c
===================================================================
*** trunk.orig/gcc/tree-vect-stmts.c 2010-03-01 18:11:22.000000000 +0100
--- trunk/gcc/tree-vect-stmts.c 2010-03-01 18:16:28.000000000 +0100
*************** get_vectype_for_scalar_type (tree scalar
*** 4413,4419 ****
int nunits;
tree vectype;
! if (nbytes == 0 || nbytes >= UNITS_PER_SIMD_WORD (inner_mode))
return NULL_TREE;
/* We can't build a vector type of elements with alignment bigger than
--- 4413,4420 ----
int nunits;
tree vectype;
! if (nbytes == 0 || nbytes >= UNITS_PER_SIMD_WORD (inner_mode,
! MAX_VECTORIZATION_FACTOR))
return NULL_TREE;
/* We can't build a vector type of elements with alignment bigger than
*************** get_vectype_for_scalar_type (tree scalar
*** 4423,4429 ****
/* FORNOW: Only a single vector size per mode (UNITS_PER_SIMD_WORD)
is expected. */
! nunits = UNITS_PER_SIMD_WORD (inner_mode) / nbytes;
vectype = build_vector_type (scalar_type, nunits);
if (vect_print_dump_info (REPORT_DETAILS))
--- 4424,4430 ----
/* FORNOW: Only a single vector size per mode (UNITS_PER_SIMD_WORD)
is expected. */
! nunits = UNITS_PER_SIMD_WORD (inner_mode, MAX_VECTORIZATION_FACTOR) / nbytes;
vectype = build_vector_type (scalar_type, nunits);
if (vect_print_dump_info (REPORT_DETAILS))
Index: trunk/gcc/tree-vectorizer.h
===================================================================
*** trunk.orig/gcc/tree-vectorizer.h 2010-03-01 18:11:19.000000000 +0100
--- trunk/gcc/tree-vectorizer.h 2010-03-01 18:16:01.000000000 +0100
*************** typedef struct _stmt_vec_info {
*** 601,606 ****
--- 601,609 ----
conversion. */
#define MAX_INTERM_CVT_STEPS 3
+ /* The maximum vectorization factor supported by any target (V32QI). */
+ #define MAX_VECTORIZATION_FACTOR 32
+
/* Avoid GTY(()) on stmt_vec_info. */
typedef void *vec_void_p;
DEF_VEC_P (vec_void_p);
More information about the Gcc-patches
mailing list