This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]
Other format:	[Raw text]

Re: [RTL, i386] Use subreg instead of UNSPEC_CAST

From: Marc Glisse <marc dot glisse at inria dot fr>
To: Richard Henderson <rth at redhat dot com>
Cc: gcc-patches at gcc dot gnu dot org
Date: Tue, 10 Jun 2014 08:36:39 +0200 (CEST)
Subject: Re: [RTL, i386] Use subreg instead of UNSPEC_CAST
Authentication-results: sourceware.org; auth=none
References: <alpine dot DEB dot 2 dot 02 dot 1303191627200 dot 4515 at stedding dot saclay dot inria dot fr> <5148D1FC dot 7000708 at redhat dot com>

On Tue, 19 Mar 2013, Richard Henderson wrote:

I'm not fond of this, primarily because I believe the pattern should
not exist at all.

One year later, new try. Tweaking the pattern, I ended up with a copy ofthe mov pattern (the subreg is generated automatically when the modesdon't match), so I just removed it. I know the comment in emit-rtl.c sayssplitters are a better way forward than subregs, but I haven't managedwith splitters while the subreg patch is very simple :-) I added a -O0testcase because when I was experimenting I had many versions that workedfor -O2 but ICEd at -O0 (and vice versa), but it might be redundant withsome other tests.


Bootstrap+testsuite on x86_64-linux-gnu.

2014-06-10  Marc Glisse  <marc.glisse@inria.fr>

	PR target/50829
gcc/
	* config/i386/sse.md (enum unspec): Remove UNSPEC_CAST.
	(avx_<castmode><avxsizesuffix>_<castmode>): Remove.
	* config/i386/i386.c (builtin_description) [__builtin_ia32_si256_si,
	__builtin_ia32_ps256_ps, __builtin_ia32_pd256_pd]: Replace the
	removed insn with mov.
	* emit-rtl.c (validate_subreg): Allow vector-vector subregs.

gcc/testsuite/
	* gcc.target/i386/pr50829-1.c: New file.
	* gcc.target/i386/pr50829-2.c: New file.

--
Marc Glisse

Index: gcc/config/i386/i386.c
===================================================================
--- gcc/config/i386/i386.c	(revision 211397)
+++ gcc/config/i386/i386.c	(working copy)
@@ -29793,23 +29793,23 @@ static const struct builtin_description
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps_sfix256, "__builtin_ia32_ceilps_sfix256", IX86_BUILTIN_CEILPS_SFIX256, (enum rtx_code) ROUND_CEIL, (int) V8SI_FTYPE_V8SF_ROUND },
 
   { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2, "__builtin_ia32_roundps_az256", IX86_BUILTIN_ROUNDPS_AZ256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
   { OPTION_MASK_ISA_AVX, CODE_FOR_roundv8sf2_sfix, "__builtin_ia32_roundps_az_sfix256", IX86_BUILTIN_ROUNDPS_AZ_SFIX256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
 
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256,  "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256,  "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256,  "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256,  "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
 
-  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
-  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
-  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_movv8si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_movv8sf, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_movv4df, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
   { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8si, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
   { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v8sf, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
   { OPTION_MASK_ISA_AVX, CODE_FOR_vec_extract_lo_v4df, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
 
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
Index: gcc/config/i386/sse.md
===================================================================
--- gcc/config/i386/sse.md	(revision 211397)
+++ gcc/config/i386/sse.md	(working copy)
@@ -66,21 +66,20 @@
   UNSPEC_AESKEYGENASSIST
 
   ;; For PCLMUL support
   UNSPEC_PCLMUL
 
   ;; For AVX support
   UNSPEC_PCMP
   UNSPEC_VPERMIL
   UNSPEC_VPERMIL2
   UNSPEC_VPERMIL2F128
-  UNSPEC_CAST
   UNSPEC_VTESTP
   UNSPEC_VCVTPH2PS
   UNSPEC_VCVTPS2PH
 
   ;; For AVX2 support
   UNSPEC_VPERMVAR
   UNSPEC_VPERMTI
   UNSPEC_GATHER
   UNSPEC_VSIBADDR
 
@@ -14816,40 +14815,20 @@
 
 (define_expand "maskstore<mode>"
   [(set (match_operand:V48_AVX2 0 "memory_operand")
 	(unspec:V48_AVX2
 	  [(match_operand:<sseintvecmode> 2 "register_operand")
 	   (match_operand:V48_AVX2 1 "register_operand")
 	   (match_dup 0)]
 	  UNSPEC_MASKMOV))]
   "TARGET_AVX")
 
-(define_insn_and_split "avx_<castmode><avxsizesuffix>_<castmode>"
-  [(set (match_operand:AVX256MODE2P 0 "nonimmediate_operand" "=x,m")
-	(unspec:AVX256MODE2P
-	  [(match_operand:<ssehalfvecmode> 1 "nonimmediate_operand" "xm,x")]
-	  UNSPEC_CAST))]
-  "TARGET_AVX"
-  "#"
-  "&& reload_completed"
-  [(const_int 0)]
-{
-  rtx op0 = operands[0];
-  rtx op1 = operands[1];
-  if (REG_P (op0))
-    op0 = gen_rtx_REG (<ssehalfvecmode>mode, REGNO (op0));
-  else
-    op1 = gen_rtx_REG (<MODE>mode, REGNO (op1));
-  emit_move_insn (op0, op1);
-  DONE;
-})
-
 (define_expand "vec_init<mode>"
   [(match_operand:V_256 0 "register_operand")
    (match_operand 1)]
   "TARGET_AVX"
 {
   ix86_expand_vector_init (false, operands[0], operands[1]);
   DONE;
 })
 
 (define_expand "vec_init<mode>"
Index: gcc/emit-rtl.c
===================================================================
--- gcc/emit-rtl.c	(revision 211397)
+++ gcc/emit-rtl.c	(working copy)
@@ -775,20 +775,23 @@ validate_subreg (enum machine_mode omode
   else if ((COMPLEX_MODE_P (imode) || VECTOR_MODE_P (imode))
 	   && GET_MODE_INNER (imode) == omode)
     ;
   /* ??? x86 sse code makes heavy use of *paradoxical* vector subregs,
      i.e. (subreg:V4SF (reg:SF) 0).  This surely isn't the cleanest way to
      represent this.  It's questionable if this ought to be represented at
      all -- why can't this all be hidden in post-reload splitters that make
      arbitrarily mode changes to the registers themselves.  */
   else if (VECTOR_MODE_P (omode) && GET_MODE_INNER (omode) == imode)
     ;
+  else if (VECTOR_MODE_P (omode) && VECTOR_MODE_P (imode)
+	   && GET_MODE_INNER (omode) == GET_MODE_INNER (imode))
+    ;
   /* Subregs involving floating point modes are not allowed to
      change size.  Therefore (subreg:DI (reg:DF) 0) is fine, but
      (subreg:SI (reg:DF) 0) isn't.  */
   else if (FLOAT_MODE_P (imode) || FLOAT_MODE_P (omode))
     {
       if (! (isize == osize
 	     /* LRA can use subreg to store a floating point value in
 		an integer mode.  Although the floating point and the
 		integer modes need the same number of hard registers,
 		the size of floating point mode can be less than the
Index: gcc/testsuite/gcc.target/i386/pr50829-1.c
===================================================================
--- gcc/testsuite/gcc.target/i386/pr50829-1.c	(revision 0)
+++ gcc/testsuite/gcc.target/i386/pr50829-1.c	(working copy)
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mavx" } */
+
+#include <x86intrin.h>
+
+__m256d
+concat (__m128d x)
+{
+  __m256d z = _mm256_castpd128_pd256 (x);
+  return _mm256_insertf128_pd (z, x, 1);
+}
+
+/* { dg-final { scan-assembler-not "vmov" } } */
Index: gcc/testsuite/gcc.target/i386/pr50829-2.c
===================================================================
--- gcc/testsuite/gcc.target/i386/pr50829-2.c	(revision 0)
+++ gcc/testsuite/gcc.target/i386/pr50829-2.c	(working copy)
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O0 -mavx" } */
+
+#include <x86intrin.h>
+
+__m256d
+concat (__m128d x)
+{
+  __m256d z = _mm256_castpd128_pd256 (x);
+  return _mm256_insertf128_pd (z, x, 1);
+}

Index Nav:	[Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav:	[Date Prev] [Date Next]	[Thread Prev] [Thread Next]