This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[power7-meissner] Fixup some issues with V2DI mode, add a minor optimization


I was writing some tests today, and noticed that the basic vector functions
were broken with V2DImode.  This patch fixes this.  I also decided to add a
small optimization for 64-bit extract from a memory operation.  I added a test
to make sure vec_insert/vec_extract/vec_spalts works for every vector type, and
for constant and variable elements.

In doing the change, I noticed that a load of a double was using the 'f'
constraint, so I changed it to 'd'.

Finally, in submitting patch #4, I made some cleanups to rs6000.md, and I
forgot to submit them to the branch.

If possible, I would like these patches to be considered with patch #4 to the
mainline, but if you would prefer for me to submit them as a separate patch
after patch #4 is dealt with, I can do that as well.

[gcc]
2009-07-24  Michael Meissner  <meissner@linux.vnet.ibm.com>

	* config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin):
	Delete test for long long type and key off of using VSX memory
	instructions instead of the VSX arithmetic unit to allow V2DI
	builtins.

	* config/rs6000/rs6000.c (rs6000_expand_vector_init): Don't move
	splat of V2DI/V2DF operands to be in a register.
	(rs6000_handle_altivec_attribute): Allow long long vectors in VSX
	mode.  Move all VSX tests under one if.

	* config/rs6000/vsx.md (vsx_extract_<mode>): Use 'd' constraint to
	load double, not 'f'.
	(vsx_extract_<mode>_zero): Add optimization for extracting
	constant elements memory.
	(vsx_extract_<mode>_one): Ditto.

	* config/rs6000/rs6000.md (btruncdf2_fpr): Be consistant with
	other names in using _fpr suffix, not _fprs.
	(ceildf2_fpr): Ditto.
	(floordf2_fpr): Ditto.
	(movdf_hardfloat32): Eliminate extra spaces in set_attr.
	(movdf_hardfloat64_mfpgpr): Ditto.
	(movdf_hardfloat64): Ditto.

[gcc/testsuite]
2009-07-24  Michael Meissner  <meissner@linux.vnet.ibm.com>

	* gcc.target/powerpc/vsx-builtin-7.c: New file, to test various
	combinations of vector init, extract, and splat.

Index: gcc/config/rs6000/rs6000-c.c
===================================================================
--- gcc/config/rs6000/rs6000-c.c	(revision 149953)
+++ gcc/config/rs6000/rs6000-c.c	(working copy)
@@ -3259,9 +3259,6 @@ altivec_resolve_overloaded_builtin (loca
 	  && !INTEGRAL_TYPE_P (type))
 	goto bad;
       unsigned_p = TYPE_UNSIGNED (type);
-      if (type == long_long_unsigned_type_node
-          || type == long_long_integer_type_node)
-	goto bad;
       switch (TYPE_MODE (type))
 	{
 	  case DImode:
@@ -3328,7 +3325,7 @@ altivec_resolve_overloaded_builtin (loca
 
       /* If we can use the VSX xxpermdi instruction, use that for extract.  */
       mode = TYPE_MODE (arg1_type);
-      if ((mode == V2DFmode || mode == V2DImode) && VECTOR_UNIT_VSX_P (mode)
+      if ((mode == V2DFmode || mode == V2DImode) && VECTOR_MEM_VSX_P (mode)
 	  && TREE_CODE (arg2) == INTEGER_CST
 	  && TREE_INT_CST_HIGH (arg2) == 0
 	  && (TREE_INT_CST_LOW (arg2) == 0 || TREE_INT_CST_LOW (arg2) == 1))
Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c	(revision 149960)
+++ gcc/config/rs6000/rs6000.c	(working copy)
@@ -4200,7 +4200,7 @@ rs6000_expand_vector_init (rtx target, r
     {
       if (all_same)
 	{
-	  rtx element = copy_to_reg (XVECEXP (vals, 0, 0));
+	  rtx element = XVECEXP (vals, 0, 0);
 	  if (mode == V2DFmode)
 	    emit_insn (gen_vsx_splat_v2df (target, element));
 	  else
@@ -22793,19 +22793,24 @@ rs6000_handle_altivec_attribute (tree *n
   mode = TYPE_MODE (type);
 
   /* Check for invalid AltiVec type qualifiers.  */
-  if ((type == long_unsigned_type_node || type == long_integer_type_node)
-      && !TARGET_VSX)
+  if (!TARGET_VSX)
     {
-    if (TARGET_64BIT)
-      error ("use of %<long%> in AltiVec types is invalid for 64-bit code");
-    else if (rs6000_warn_altivec_long)
-      warning (0, "use of %<long%> in AltiVec types is deprecated; use %<int%>");
-    }
-  else if (type == long_long_unsigned_type_node
-           || type == long_long_integer_type_node)
-    error ("use of %<long long%> in AltiVec types is invalid");
-  else if (type == double_type_node && !TARGET_VSX)
-    error ("use of %<double%> in AltiVec types is invalid without -mvsx");
+      if (type == long_unsigned_type_node || type == long_integer_type_node)
+	{
+	  if (TARGET_64BIT)
+	    error ("use of %<long%> in AltiVec types is invalid for "
+		   "64-bit code without -mvsx");
+	  else if (rs6000_warn_altivec_long)
+	    warning (0, "use of %<long%> in AltiVec types is deprecated; "
+		     "use %<int%>");
+	}
+      else if (type == long_long_unsigned_type_node
+	       || type == long_long_integer_type_node)
+	error ("use of %<long long%> in AltiVec types is invalid without "
+	       "-mvsx");
+      else if (type == double_type_node)
+	error ("use of %<double%> in AltiVec types is invalid without -mvsx");
+    }
   else if (type == long_double_type_node)
     error ("use of %<long double%> in AltiVec types is invalid");
   else if (type == boolean_type_node)
Index: gcc/config/rs6000/vsx.md
===================================================================
--- gcc/config/rs6000/vsx.md	(revision 149953)
+++ gcc/config/rs6000/vsx.md	(working copy)
@@ -1201,9 +1201,9 @@ (define_insn "vsx_set_<mode>"
 }
   [(set_attr "type" "vecperm")])
 
-;; Extract a DF element from V2DF
+;; Extract a DF/DI element from V2DF/V2DI
 (define_insn "vsx_extract_<mode>"
-  [(set (match_operand:<VS_scalar> 0 "vsx_register_operand" "=ws,f,?wa")
+  [(set (match_operand:<VS_scalar> 0 "vsx_register_operand" "=ws,d,?wa")
 	(vec_select:<VS_scalar> (match_operand:VSX_D 1 "vsx_register_operand" "wd,wd,wa")
 		       (parallel
 			[(match_operand:QI 2 "u5bit_cint_operand" "i,i,i")])))]
@@ -1215,6 +1215,29 @@ (define_insn "vsx_extract_<mode>"
 }
   [(set_attr "type" "vecperm")])
 
+;; Optimize extracting element 0 from memory
+(define_insn "*vsx_extract_<mode>_zero"
+  [(set (match_operand:<VS_scalar> 0 "vsx_register_operand" "=ws,d,?wa")
+	(vec_select:<VS_scalar>
+	 (match_operand:VSX_D 1 "indexed_or_indirect_operand" "Z,Z,Z")
+	 (parallel [(const_int 0)])))]
+  "VECTOR_MEM_VSX_P (<MODE>mode) && WORDS_BIG_ENDIAN"
+  "lxsd%U1x %x0,%y1"
+  [(set_attr "type" "fpload")
+   (set_attr "length" "4")])  
+
+;; Optimize element 1 for a single pointer reference using the traditional
+;; offsetable memory load
+(define_insn "*vsx_extract_<mode>_one"
+  [(set (match_operand:<VS_scalar> 0 "vsx_register_operand" "=d")
+	(vec_select:<VS_scalar>
+	 (mem:VSX_D (match_operand:P 1 "gpc_reg_operand" "b"))
+	 (parallel [(const_int 1)])))]
+  "VECTOR_MEM_VSX_P (<MODE>mode) && WORDS_BIG_ENDIAN"
+  "lfd %0,4(%1)"
+  [(set_attr "type" "fpload")
+   (set_attr "length" "4")])  
+
 ;; General double word oriented permute, allow the other vector types for
 ;; optimizing the permute instruction.
 (define_insn "vsx_xxpermdi_<mode>"
Index: gcc/config/rs6000/rs6000.md
===================================================================
--- gcc/config/rs6000/rs6000.md	(revision 149953)
+++ gcc/config/rs6000/rs6000.md	(working copy)
@@ -6588,7 +6588,7 @@ (define_expand "btruncdf2"
   "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
   "")
 
-(define_insn "*btruncdf2_fprs"
+(define_insn "*btruncdf2_fpr"
   [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
 	(unspec:DF [(match_operand:DF 1 "gpc_reg_operand" "f")] UNSPEC_FRIZ))]
   "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
@@ -6609,7 +6609,7 @@ (define_expand "ceildf2"
   "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
   "")
 
-(define_insn "*ceildf2_fprs"
+(define_insn "*ceildf2_fpr"
   [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
 	(unspec:DF [(match_operand:DF 1 "gpc_reg_operand" "d")] UNSPEC_FRIP))]
   "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
@@ -6630,7 +6630,7 @@ (define_expand "floordf2"
   "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
   "")
 
-(define_insn "*floordf2_fprs"
+(define_insn "*floordf2_fpr"
   [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
 	(unspec:DF [(match_operand:DF 1 "gpc_reg_operand" "d")] UNSPEC_FRIM))]
   "TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
@@ -9142,8 +9142,8 @@ (define_insn "*movdf_hardfloat32"
       return \"#\";
     }
 }"
-  [(set_attr "type" "two, load, store, fp, fp, fpload, fpload, fpstore, fpstore, fp, fpload, fpstore, vecsimple, *,  *,  *")
-   (set_attr "length" "8,   16,    16,  4,  4,      4,      4,       4,       4,  4,      4,       4,         4, 8, 12, 16")])
+  [(set_attr "type" "two,load,store,fp,fp,fpload,fpload,fpstore,fpstore,fp,fpload,fpstore,vecsimple,*,*,*")
+   (set_attr "length" "8,16,16,4,4,4,4,4,4,4,4,4,4,8,12,16")])
 
 (define_insn "*movdf_softfloat32"
   [(set (match_operand:DF 0 "nonimmediate_operand" "=r,r,m,r,r,r")
@@ -9219,8 +9219,8 @@ (define_insn "*movdf_hardfloat64_mfpgpr"
    #
    mftgpr %0,%1
    mffgpr %0,%1"
-  [(set_attr "type" "store, load, *, fp, fp, fpload, fpload, fpstore, fpstore, fp, fpload, fpstore, vecsimple, mtjmpr, mfjmpr, *, *,  *,  *, mftgpr, mffgpr")
-   (set_attr "length"   "4,    4, 4,  4,  4,      4,      4,       4,       4,  4,      4,       4,         4,      4,      4, 4, 8, 12, 16,      4,      4")])
+  [(set_attr "type" "store,load,*,fp,fp,fpload,fpload,fpstore,fpstore,fp,fpload,fpstore,vecsimple,mtjmpr,mfjmpr,*,*,*,*,mftgpr,mffgpr")
+   (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8,12,16,4,4")])
 
 ; ld/std require word-aligned displacements -> 'Y' constraint.
 ; List Y->r and r->Y before r->r for reload.
@@ -9251,8 +9251,8 @@ (define_insn "*movdf_hardfloat64"
    #
    #
    #"
-  [(set_attr "type" "store, load, *, fp, fp, fpload, fpload, fpstore, fpstore, fp, fpload, fpstore, vecsimple, mtjmpr, mfjmpr,  *,  *,  *,  *")
-   (set_attr "length" "  4,    4, 4,  4,  4,      4,      4,       4,       4,  4,      4,       4,         4,      4,      4,  4,  8, 12, 16")])
+  [(set_attr "type" "store,load,*,fp,fp,fpload,fpload,fpstore,fpstore,fp,fpload,fpstore,vecsimple,mtjmpr,mfjmpr,*,*,*,*")
+   (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8,12,16")])
 
 (define_insn "*movdf_softfloat64"
   [(set (match_operand:DF 0 "nonimmediate_operand" "=r,Y,r,cl,r,r,r,r,*h")
Index: gcc/testsuite/gcc.target/powerpc/vsx-builtin-7.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/vsx-builtin-7.c	(revision 0)
+++ gcc/testsuite/gcc.target/powerpc/vsx-builtin-7.c	(revision 0)
@@ -0,0 +1,150 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mcpu=power7" } */
+
+/* Test simple extract/insert/slat operations.  Make sure all types are
+   supported with various options.  */
+
+#include <altivec.h>
+
+double extract_df_0_reg (vector double p) { return vec_extract (p, 0); }
+double extract_df_1_reg (vector double p) { return vec_extract (p, 1); }
+double extract_df_n_reg (vector double p, int n) { return vec_extract (p, n); }
+
+double extract_df_0_mem (vector double *p) { return vec_extract (*p, 0); }
+double extract_df_1_mem (vector double *p) { return vec_extract (*p, 1); }
+double extract_df_n_mem (vector double *p, int n) { return vec_extract (*p, n); }
+
+vector double insert_df_0 (vector double p, double x) { return vec_insert (x, p, 0); }
+vector double insert_df_1 (vector double p, double x) { return vec_insert (x, p, 1); }
+vector double insert_df_n (vector double p, double x, int n) { return vec_insert (x, p, n); }
+
+vector double splat_df_reg (double x) { return vec_splats (x); }
+vector double splat_df_mem (double *x) { return vec_splats (*x); }
+
+#ifdef _ARCH_PPC64
+#define ll long
+#else
+#define ll long long
+#endif
+
+ll extract_di_0_reg (vector ll p) { return vec_extract (p, 0); }
+ll extract_di_1_reg (vector ll p) { return vec_extract (p, 1); }
+ll extract_di_n_reg (vector ll p, int n) { return vec_extract (p, n); }
+
+ll extract_di_0_mem (vector ll *p) { return vec_extract (*p, 0); }
+ll extract_di_1_mem (vector ll *p) { return vec_extract (*p, 1); }
+ll extract_di_n_mem (vector ll *p, int n) { return vec_extract (*p, n); }
+
+vector ll insert_di_0 (vector ll p, ll x) { return vec_insert (x, p, 0); }
+vector ll insert_di_1 (vector ll p, ll x) { return vec_insert (x, p, 1); }
+vector ll insert_di_n (vector ll p, ll x, int n) { return vec_insert (x, p, n); }
+
+vector ll splat_di_reg (ll x) { return vec_splats (x); }
+vector ll splat_di_mem (ll *x) { return vec_splats (*x); }
+
+float extract_sf_0_reg (vector float p) { return vec_extract (p, 0); }
+float extract_sf_3_reg (vector float p) { return vec_extract (p, 3); }
+float extract_sf_n_reg (vector float p, int n) { return vec_extract (p, n); }
+
+float extract_sf_0_mem (vector float *p) { return vec_extract (*p, 0); }
+float extract_sf_3_mem (vector float *p) { return vec_extract (*p, 3); }
+float extract_sf_n_mem (vector float *p, int n) { return vec_extract (*p, n); }
+
+vector float insert_sf_0 (vector float p, float x) { return vec_insert (x, p, 0); }
+vector float insert_sf_3 (vector float p, float x) { return vec_insert (x, p, 3); }
+vector float insert_sf_n (vector float p, float x, int n) { return vec_insert (x, p, n); }
+
+vector float splat_sf_reg (float x) { return vec_splats (x); }
+vector float splat_sf_mem (float *x) { return vec_splats (*x); }
+
+int extract_si_0_reg (vector int p) { return vec_extract (p, 0); }
+int extract_si_3_reg (vector int p) { return vec_extract (p, 3); }
+int extract_si_n_reg (vector int p, int n) { return vec_extract (p, n); }
+
+int extract_si_0_mem (vector int *p) { return vec_extract (*p, 0); }
+int extract_si_3_mem (vector int *p) { return vec_extract (*p, 3); }
+int extract_si_n_mem (vector int *p, int n) { return vec_extract (*p, n); }
+
+vector int insert_si_0 (vector int p, int x) { return vec_insert (x, p, 0); }
+vector int insert_si_3 (vector int p, int x) { return vec_insert (x, p, 3); }
+vector int insert_si_n (vector int p, int x, int n) { return vec_insert (x, p, n); }
+
+vector int splat_si_reg (int x) { return vec_splats (x); }
+vector int splat_si_mem (int *x) { return vec_splats (*x); }
+
+unsigned int extract_usi_0_reg (vector unsigned int p) { return vec_extract (p, 0); }
+unsigned int extract_usi_3_reg (vector unsigned int p) { return vec_extract (p, 3); }
+unsigned int extract_usi_n_reg (vector unsigned int p, int n) { return vec_extract (p, n); }
+
+unsigned int extract_usi_0_mem (vector unsigned int *p) { return vec_extract (*p, 0); }
+unsigned int extract_usi_3_mem (vector unsigned int *p) { return vec_extract (*p, 3); }
+unsigned int extract_usi_n_mem (vector unsigned int *p, int n) { return vec_extract (*p, n); }
+
+vector unsigned int insert_usi_0 (vector unsigned int p, unsigned int x) { return vec_insert (x, p, 0); }
+vector unsigned int insert_usi_3 (vector unsigned int p, unsigned int x) { return vec_insert (x, p, 3); }
+vector unsigned int insert_usi_n (vector unsigned int p, unsigned int x, int n) { return vec_insert (x, p, n); }
+
+vector unsigned int splat_usi_reg (unsigned int x) { return vec_splats (x); }
+vector unsigned int splat_usi_mem (unsigned int *x) { return vec_splats (*x); }
+
+short extract_hi_0_reg (vector short p) { return vec_extract (p, 0); }
+short extract_hi_7_reg (vector short p) { return vec_extract (p, 7); }
+short extract_hi_n_reg (vector short p, int n) { return vec_extract (p, n); }
+
+short extract_hi_0_mem (vector short *p) { return vec_extract (*p, 0); }
+short extract_hi_7_mem (vector short *p) { return vec_extract (*p, 7); }
+short extract_hi_n_mem (vector short *p, int n) { return vec_extract (*p, n); }
+
+vector short insert_hi_0 (vector short p, short x) { return vec_insert (x, p, 0); }
+vector short insert_hi_7 (vector short p, short x) { return vec_insert (x, p, 7); }
+vector short insert_hi_n (vector short p, short x, int n) { return vec_insert (x, p, n); }
+
+vector short splat_hi_reg (short x) { return vec_splats (x); }
+vector short splat_hi_mem (short *x) { return vec_splats (*x); }
+
+unsigned short extract_uhi_0_reg (vector unsigned short p) { return vec_extract (p, 0); }
+unsigned short extract_uhi_7_reg (vector unsigned short p) { return vec_extract (p, 7); }
+unsigned short extract_uhi_n_reg (vector unsigned short p, int n) { return vec_extract (p, n); }
+
+unsigned short extract_uhi_0_mem (vector unsigned short *p) { return vec_extract (*p, 0); }
+unsigned short extract_uhi_7_mem (vector unsigned short *p) { return vec_extract (*p, 7); }
+unsigned short extract_uhi_n_mem (vector unsigned short *p, int n) { return vec_extract (*p, n); }
+
+vector unsigned short insert_uhi_0 (vector unsigned short p, unsigned short x) { return vec_insert (x, p, 0); }
+vector unsigned short insert_uhi_7 (vector unsigned short p, unsigned short x) { return vec_insert (x, p, 7); }
+vector unsigned short insert_uhi_n (vector unsigned short p, unsigned short x, int n) { return vec_insert (x, p, n); }
+
+vector unsigned short splat_uhi_reg (unsigned short x) { return vec_splats (x); }
+vector unsigned short splat_uhi_mem (unsigned short *x) { return vec_splats (*x); }
+
+signed char extract_qi_0_reg (vector signed char p) { return vec_extract (p, 0); }
+signed char extract_qi_1_reg5 (vector signed char p) { return vec_extract (p, 15); }
+signed char extract_qi_n_reg (vector signed char p, int n) { return vec_extract (p, n); }
+
+signed char extract_qi_0_mem (vector signed char *p) { return vec_extract (*p, 0); }
+signed char extract_qi_1_mem5 (vector signed char *p) { return vec_extract (*p, 15); }
+signed char extract_qi_n_mem (vector signed char *p, int n) { return vec_extract (*p, n); }
+
+vector signed char insert_qi_0 (vector signed char p, signed char x) { return vec_insert (x, p, 0); }
+vector signed char insert_qi_15 (vector signed char p, signed char x) { return vec_insert (x, p, 15); }
+vector signed char insert_qi_n (vector signed char p, signed char x, int n) { return vec_insert (x, p, n); }
+
+vector signed char splat_qi_reg (signed char x) { return vec_splats (x); }
+vector signed char splat_qi_mem (signed char *x) { return vec_splats (*x); }
+
+unsigned char extract_uqi_0_reg (vector unsigned char p) { return vec_extract (p, 0); }
+unsigned char extract_uqi_1_reg5 (vector unsigned char p) { return vec_extract (p, 15); }
+unsigned char extract_uqi_n_reg (vector unsigned char p, int n) { return vec_extract (p, n); }
+
+unsigned char extract_uqi_0_mem (vector unsigned char *p) { return vec_extract (*p, 0); }
+unsigned char extract_uqi_1_mem5 (vector unsigned char *p) { return vec_extract (*p, 15); }
+unsigned char extract_uqi_n_mem (vector unsigned char *p, int n) { return vec_extract (*p, n); }
+
+vector unsigned char insert_uqi_0 (vector unsigned char p, unsigned char x) { return vec_insert (x, p, 0); }
+vector unsigned char insert_uqi_15 (vector unsigned char p, unsigned char x) { return vec_insert (x, p, 15); }
+vector unsigned char insert_uqi_n (vector unsigned char p, unsigned char x, int n) { return vec_insert (x, p, n); }
+
+vector unsigned char splat_uqi_reg (unsigned char x) { return vec_splats (x); }
+vector unsigned char splat_uqi_mem (unsigned char *x) { return vec_splats (*x); }

-- 
Michael Meissner, IBM
4 Technology Place Drive, MS 2203A, Westford, MA, 01886, USA
meissner@linux.vnet.ibm.com


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]