This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH] Vectorize lrint on x86_64/i686 -m32 -mavx using 32-byte vectors
- From: Jakub Jelinek <jakub at redhat dot com>
- To: Richard Henderson <rth at redhat dot com>, Uros Bizjak <ubizjak at gmail dot com>
- Cc: gcc-patches at gcc dot gnu dot org
- Date: Mon, 7 Nov 2011 19:38:03 +0100
- Subject: [PATCH] Vectorize lrint on x86_64/i686 -m32 -mavx using 32-byte vectors
- Reply-to: Jakub Jelinek <jakub at redhat dot com>
Hi!
BUILT_IN_LRINT has been vectorized just using 16-byte
vectors, the following patch cures it (of course, for -m64
it unfortunately can't be vectorized, as long there is DImode
rather than SImode).
Bootstrapped/regtested on x86_64-linux and i686-linux. Ok for trunk?
2011-11-07 Jakub Jelinek <jakub@redhat.com>
* config/i386/i386-bultin-types.def (V8SI_FTYPE_V4DF_V4DF): Add.
* config/i386/i386.c (enum ix86_builtins): Add
IX86_BUILTIN_VEC_PACK_SFIX256.
(bdesc_args): Add __builtin_ia32_vec_pack_sfix256.
(ix86_expand_args_builtin): Handle V8SI_FTYPE_V4DF_V4DF.
(ix86_builtin_vectorized_function): Also vectorize lrint using
256-bit vectors for -mavx.
--- gcc/config/i386/i386-builtin-types.def.jj 2011-11-07 12:40:36.000000000 +0100
+++ gcc/config/i386/i386-builtin-types.def 2011-11-07 15:34:27.000000000 +0100
@@ -332,6 +332,7 @@ DEF_FUNCTION_TYPE (V16HI, V16HI, INT)
DEF_FUNCTION_TYPE (V16HI, V16HI, SI)
DEF_FUNCTION_TYPE (V16HI, V16HI, V16HI, INT)
DEF_FUNCTION_TYPE (V32QI, V32QI, V32QI, INT)
+DEF_FUNCTION_TYPE (V8SI, V4DF, V4DF)
DEF_FUNCTION_TYPE (V8SI, V8SI, V4SI)
DEF_FUNCTION_TYPE (V8SI, V8SI, V8SI)
DEF_FUNCTION_TYPE (V8SI, V16HI, V16HI)
--- gcc/config/i386/i386.c.jj 2011-11-07 12:40:55.000000000 +0100
+++ gcc/config/i386/i386.c 2011-11-07 15:42:41.000000000 +0100
@@ -24830,6 +24830,7 @@ enum ix86_builtins
IX86_BUILTIN_VEC_SET_V16QI,
IX86_BUILTIN_VEC_PACK_SFIX,
+ IX86_BUILTIN_VEC_PACK_SFIX256,
/* SSE4.2. */
IX86_BUILTIN_CRC32QI,
@@ -26351,6 +26352,8 @@ static const struct builtin_description
{ OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3, "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
{ OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3, "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
+ { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
+
/* AVX2 */
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
@@ -28025,6 +28028,7 @@ ix86_expand_args_builtin (const struct b
case V32QI_FTYPE_V32QI_V32QI:
case V16HI_FTYPE_V32QI_V32QI:
case V16HI_FTYPE_V16HI_V16HI:
+ case V8SI_FTYPE_V4DF_V4DF:
case V8SI_FTYPE_V8SI_V8SI:
case V8SI_FTYPE_V16HI_V16HI:
case V4DI_FTYPE_V4DI_V4DI:
@@ -29121,9 +29125,13 @@ ix86_builtin_vectorized_function (tree f
break;
case BUILT_IN_LRINT:
- if (out_mode == SImode && out_n == 4
- && in_mode == DFmode && in_n == 2)
- return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
+ if (out_mode == SImode && in_mode == DFmode)
+ {
+ if (out_n == 4 && in_n == 2)
+ return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
+ else if (out_n == 8 && in_n == 4)
+ return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX256];
+ }
break;
case BUILT_IN_LRINTF:
Jakub