This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Vectorize lrint on x86_64/i686 -m32 -mavx using 32-byte vectors


Hi!

BUILT_IN_LRINT has been vectorized just using 16-byte
vectors, the following patch cures it (of course, for -m64
it unfortunately can't be vectorized, as long there is DImode
rather than SImode).

Bootstrapped/regtested on x86_64-linux and i686-linux.  Ok for trunk?

2011-11-07  Jakub Jelinek  <jakub@redhat.com>

	* config/i386/i386-bultin-types.def (V8SI_FTYPE_V4DF_V4DF): Add.
	* config/i386/i386.c (enum ix86_builtins): Add
	IX86_BUILTIN_VEC_PACK_SFIX256.
	(bdesc_args): Add __builtin_ia32_vec_pack_sfix256.
	(ix86_expand_args_builtin): Handle V8SI_FTYPE_V4DF_V4DF.
	(ix86_builtin_vectorized_function): Also vectorize lrint using
	256-bit vectors for -mavx.

--- gcc/config/i386/i386-builtin-types.def.jj	2011-11-07 12:40:36.000000000 +0100
+++ gcc/config/i386/i386-builtin-types.def	2011-11-07 15:34:27.000000000 +0100
@@ -332,6 +332,7 @@ DEF_FUNCTION_TYPE (V16HI, V16HI, INT)
 DEF_FUNCTION_TYPE (V16HI, V16HI, SI)
 DEF_FUNCTION_TYPE (V16HI, V16HI, V16HI, INT)
 DEF_FUNCTION_TYPE (V32QI, V32QI, V32QI, INT)
+DEF_FUNCTION_TYPE (V8SI, V4DF, V4DF)
 DEF_FUNCTION_TYPE (V8SI, V8SI, V4SI)
 DEF_FUNCTION_TYPE (V8SI, V8SI, V8SI)
 DEF_FUNCTION_TYPE (V8SI, V16HI, V16HI)
--- gcc/config/i386/i386.c.jj	2011-11-07 12:40:55.000000000 +0100
+++ gcc/config/i386/i386.c	2011-11-07 15:42:41.000000000 +0100
@@ -24830,6 +24830,7 @@ enum ix86_builtins
   IX86_BUILTIN_VEC_SET_V16QI,
 
   IX86_BUILTIN_VEC_PACK_SFIX,
+  IX86_BUILTIN_VEC_PACK_SFIX256,
 
   /* SSE4.2.  */
   IX86_BUILTIN_CRC32QI,
@@ -26351,6 +26352,8 @@ static const struct builtin_description 
   { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv8sf3,  "__builtin_ia32_copysignps256", IX86_BUILTIN_CPYSGNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
   { OPTION_MASK_ISA_AVX, CODE_FOR_copysignv4df3,  "__builtin_ia32_copysignpd256", IX86_BUILTIN_CPYSGNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
 
+  { OPTION_MASK_ISA_AVX, CODE_FOR_vec_pack_sfix_v4df, "__builtin_ia32_vec_pack_sfix256 ", IX86_BUILTIN_VEC_PACK_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V4DF_V4DF },
+
   /* AVX2 */
   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mpsadbw, "__builtin_ia32_mpsadbw256", IX86_BUILTIN_MPSADBW256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT },
   { OPTION_MASK_ISA_AVX2, CODE_FOR_absv32qi2, "__builtin_ia32_pabsb256", IX86_BUILTIN_PABSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI },
@@ -28025,6 +28028,7 @@ ix86_expand_args_builtin (const struct b
     case V32QI_FTYPE_V32QI_V32QI:
     case V16HI_FTYPE_V32QI_V32QI:
     case V16HI_FTYPE_V16HI_V16HI:
+    case V8SI_FTYPE_V4DF_V4DF:
     case V8SI_FTYPE_V8SI_V8SI:
     case V8SI_FTYPE_V16HI_V16HI:
     case V4DI_FTYPE_V4DI_V4DI:
@@ -29121,9 +29125,13 @@ ix86_builtin_vectorized_function (tree f
       break;
 
     case BUILT_IN_LRINT:
-      if (out_mode == SImode && out_n == 4
-	  && in_mode == DFmode && in_n == 2)
-	return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
+      if (out_mode == SImode && in_mode == DFmode)
+	{
+	  if (out_n == 4 && in_n == 2)
+	    return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
+	  else if (out_n == 8 && in_n == 4)
+	    return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX256];
+	}
       break;
 
     case BUILT_IN_LRINTF:

	Jakub


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]