[power7-meissner] Fixup some issues with V2DI mode, add a minor optimization
Michael Meissner
meissner@linux.vnet.ibm.com
Fri Jul 24 23:24:00 GMT 2009
I was writing some tests today, and noticed that the basic vector functions
were broken with V2DImode. This patch fixes this. I also decided to add a
small optimization for 64-bit extract from a memory operation. I added a test
to make sure vec_insert/vec_extract/vec_spalts works for every vector type, and
for constant and variable elements.
In doing the change, I noticed that a load of a double was using the 'f'
constraint, so I changed it to 'd'.
Finally, in submitting patch #4, I made some cleanups to rs6000.md, and I
forgot to submit them to the branch.
If possible, I would like these patches to be considered with patch #4 to the
mainline, but if you would prefer for me to submit them as a separate patch
after patch #4 is dealt with, I can do that as well.
[gcc]
2009-07-24 Michael Meissner <meissner@linux.vnet.ibm.com>
* config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin):
Delete test for long long type and key off of using VSX memory
instructions instead of the VSX arithmetic unit to allow V2DI
builtins.
* config/rs6000/rs6000.c (rs6000_expand_vector_init): Don't move
splat of V2DI/V2DF operands to be in a register.
(rs6000_handle_altivec_attribute): Allow long long vectors in VSX
mode. Move all VSX tests under one if.
* config/rs6000/vsx.md (vsx_extract_<mode>): Use 'd' constraint to
load double, not 'f'.
(vsx_extract_<mode>_zero): Add optimization for extracting
constant elements memory.
(vsx_extract_<mode>_one): Ditto.
* config/rs6000/rs6000.md (btruncdf2_fpr): Be consistant with
other names in using _fpr suffix, not _fprs.
(ceildf2_fpr): Ditto.
(floordf2_fpr): Ditto.
(movdf_hardfloat32): Eliminate extra spaces in set_attr.
(movdf_hardfloat64_mfpgpr): Ditto.
(movdf_hardfloat64): Ditto.
[gcc/testsuite]
2009-07-24 Michael Meissner <meissner@linux.vnet.ibm.com>
* gcc.target/powerpc/vsx-builtin-7.c: New file, to test various
combinations of vector init, extract, and splat.
Index: gcc/config/rs6000/rs6000-c.c
===================================================================
--- gcc/config/rs6000/rs6000-c.c (revision 149953)
+++ gcc/config/rs6000/rs6000-c.c (working copy)
@@ -3259,9 +3259,6 @@ altivec_resolve_overloaded_builtin (loca
&& !INTEGRAL_TYPE_P (type))
goto bad;
unsigned_p = TYPE_UNSIGNED (type);
- if (type == long_long_unsigned_type_node
- || type == long_long_integer_type_node)
- goto bad;
switch (TYPE_MODE (type))
{
case DImode:
@@ -3328,7 +3325,7 @@ altivec_resolve_overloaded_builtin (loca
/* If we can use the VSX xxpermdi instruction, use that for extract. */
mode = TYPE_MODE (arg1_type);
- if ((mode == V2DFmode || mode == V2DImode) && VECTOR_UNIT_VSX_P (mode)
+ if ((mode == V2DFmode || mode == V2DImode) && VECTOR_MEM_VSX_P (mode)
&& TREE_CODE (arg2) == INTEGER_CST
&& TREE_INT_CST_HIGH (arg2) == 0
&& (TREE_INT_CST_LOW (arg2) == 0 || TREE_INT_CST_LOW (arg2) == 1))
Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c (revision 149960)
+++ gcc/config/rs6000/rs6000.c (working copy)
@@ -4200,7 +4200,7 @@ rs6000_expand_vector_init (rtx target, r
{
if (all_same)
{
- rtx element = copy_to_reg (XVECEXP (vals, 0, 0));
+ rtx element = XVECEXP (vals, 0, 0);
if (mode == V2DFmode)
emit_insn (gen_vsx_splat_v2df (target, element));
else
@@ -22793,19 +22793,24 @@ rs6000_handle_altivec_attribute (tree *n
mode = TYPE_MODE (type);
/* Check for invalid AltiVec type qualifiers. */
- if ((type == long_unsigned_type_node || type == long_integer_type_node)
- && !TARGET_VSX)
+ if (!TARGET_VSX)
{
- if (TARGET_64BIT)
- error ("use of %<long%> in AltiVec types is invalid for 64-bit code");
- else if (rs6000_warn_altivec_long)
- warning (0, "use of %<long%> in AltiVec types is deprecated; use %<int%>");
- }
- else if (type == long_long_unsigned_type_node
- || type == long_long_integer_type_node)
- error ("use of %<long long%> in AltiVec types is invalid");
- else if (type == double_type_node && !TARGET_VSX)
- error ("use of %<double%> in AltiVec types is invalid without -mvsx");
+ if (type == long_unsigned_type_node || type == long_integer_type_node)
+ {
+ if (TARGET_64BIT)
+ error ("use of %<long%> in AltiVec types is invalid for "
+ "64-bit code without -mvsx");
+ else if (rs6000_warn_altivec_long)
+ warning (0, "use of %<long%> in AltiVec types is deprecated; "
+ "use %<int%>");
+ }
+ else if (type == long_long_unsigned_type_node
+ || type == long_long_integer_type_node)
+ error ("use of %<long long%> in AltiVec types is invalid without "
+ "-mvsx");
+ else if (type == double_type_node)
+ error ("use of %<double%> in AltiVec types is invalid without -mvsx");
+ }
else if (type == long_double_type_node)
error ("use of %<long double%> in AltiVec types is invalid");
else if (type == boolean_type_node)
Index: gcc/config/rs6000/vsx.md
===================================================================
--- gcc/config/rs6000/vsx.md (revision 149953)
+++ gcc/config/rs6000/vsx.md (working copy)
@@ -1201,9 +1201,9 @@ (define_insn "vsx_set_<mode>"
}
[(set_attr "type" "vecperm")])
-;; Extract a DF element from V2DF
+;; Extract a DF/DI element from V2DF/V2DI
(define_insn "vsx_extract_<mode>"
- [(set (match_operand:<VS_scalar> 0 "vsx_register_operand" "=ws,f,?wa")
+ [(set (match_operand:<VS_scalar> 0 "vsx_register_operand" "=ws,d,?wa")
(vec_select:<VS_scalar> (match_operand:VSX_D 1 "vsx_register_operand" "wd,wd,wa")
(parallel
[(match_operand:QI 2 "u5bit_cint_operand" "i,i,i")])))]
@@ -1215,6 +1215,29 @@ (define_insn "vsx_extract_<mode>"
}
[(set_attr "type" "vecperm")])
+;; Optimize extracting element 0 from memory
+(define_insn "*vsx_extract_<mode>_zero"
+ [(set (match_operand:<VS_scalar> 0 "vsx_register_operand" "=ws,d,?wa")
+ (vec_select:<VS_scalar>
+ (match_operand:VSX_D 1 "indexed_or_indirect_operand" "Z,Z,Z")
+ (parallel [(const_int 0)])))]
+ "VECTOR_MEM_VSX_P (<MODE>mode) && WORDS_BIG_ENDIAN"
+ "lxsd%U1x %x0,%y1"
+ [(set_attr "type" "fpload")
+ (set_attr "length" "4")])
+
+;; Optimize element 1 for a single pointer reference using the traditional
+;; offsetable memory load
+(define_insn "*vsx_extract_<mode>_one"
+ [(set (match_operand:<VS_scalar> 0 "vsx_register_operand" "=d")
+ (vec_select:<VS_scalar>
+ (mem:VSX_D (match_operand:P 1 "gpc_reg_operand" "b"))
+ (parallel [(const_int 1)])))]
+ "VECTOR_MEM_VSX_P (<MODE>mode) && WORDS_BIG_ENDIAN"
+ "lfd %0,4(%1)"
+ [(set_attr "type" "fpload")
+ (set_attr "length" "4")])
+
;; General double word oriented permute, allow the other vector types for
;; optimizing the permute instruction.
(define_insn "vsx_xxpermdi_<mode>"
Index: gcc/config/rs6000/rs6000.md
===================================================================
--- gcc/config/rs6000/rs6000.md (revision 149953)
+++ gcc/config/rs6000/rs6000.md (working copy)
@@ -6588,7 +6588,7 @@ (define_expand "btruncdf2"
"TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
"")
-(define_insn "*btruncdf2_fprs"
+(define_insn "*btruncdf2_fpr"
[(set (match_operand:DF 0 "gpc_reg_operand" "=f")
(unspec:DF [(match_operand:DF 1 "gpc_reg_operand" "f")] UNSPEC_FRIZ))]
"TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
@@ -6609,7 +6609,7 @@ (define_expand "ceildf2"
"TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
"")
-(define_insn "*ceildf2_fprs"
+(define_insn "*ceildf2_fpr"
[(set (match_operand:DF 0 "gpc_reg_operand" "=d")
(unspec:DF [(match_operand:DF 1 "gpc_reg_operand" "d")] UNSPEC_FRIP))]
"TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
@@ -6630,7 +6630,7 @@ (define_expand "floordf2"
"TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
"")
-(define_insn "*floordf2_fprs"
+(define_insn "*floordf2_fpr"
[(set (match_operand:DF 0 "gpc_reg_operand" "=d")
(unspec:DF [(match_operand:DF 1 "gpc_reg_operand" "d")] UNSPEC_FRIM))]
"TARGET_FPRND && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
@@ -9142,8 +9142,8 @@ (define_insn "*movdf_hardfloat32"
return \"#\";
}
}"
- [(set_attr "type" "two, load, store, fp, fp, fpload, fpload, fpstore, fpstore, fp, fpload, fpstore, vecsimple, *, *, *")
- (set_attr "length" "8, 16, 16, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 12, 16")])
+ [(set_attr "type" "two,load,store,fp,fp,fpload,fpload,fpstore,fpstore,fp,fpload,fpstore,vecsimple,*,*,*")
+ (set_attr "length" "8,16,16,4,4,4,4,4,4,4,4,4,4,8,12,16")])
(define_insn "*movdf_softfloat32"
[(set (match_operand:DF 0 "nonimmediate_operand" "=r,r,m,r,r,r")
@@ -9219,8 +9219,8 @@ (define_insn "*movdf_hardfloat64_mfpgpr"
#
mftgpr %0,%1
mffgpr %0,%1"
- [(set_attr "type" "store, load, *, fp, fp, fpload, fpload, fpstore, fpstore, fp, fpload, fpstore, vecsimple, mtjmpr, mfjmpr, *, *, *, *, mftgpr, mffgpr")
- (set_attr "length" "4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 12, 16, 4, 4")])
+ [(set_attr "type" "store,load,*,fp,fp,fpload,fpload,fpstore,fpstore,fp,fpload,fpstore,vecsimple,mtjmpr,mfjmpr,*,*,*,*,mftgpr,mffgpr")
+ (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8,12,16,4,4")])
; ld/std require word-aligned displacements -> 'Y' constraint.
; List Y->r and r->Y before r->r for reload.
@@ -9251,8 +9251,8 @@ (define_insn "*movdf_hardfloat64"
#
#
#"
- [(set_attr "type" "store, load, *, fp, fp, fpload, fpload, fpstore, fpstore, fp, fpload, fpstore, vecsimple, mtjmpr, mfjmpr, *, *, *, *")
- (set_attr "length" " 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 12, 16")])
+ [(set_attr "type" "store,load,*,fp,fp,fpload,fpload,fpstore,fpstore,fp,fpload,fpstore,vecsimple,mtjmpr,mfjmpr,*,*,*,*")
+ (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8,12,16")])
(define_insn "*movdf_softfloat64"
[(set (match_operand:DF 0 "nonimmediate_operand" "=r,Y,r,cl,r,r,r,r,*h")
Index: gcc/testsuite/gcc.target/powerpc/vsx-builtin-7.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/vsx-builtin-7.c (revision 0)
+++ gcc/testsuite/gcc.target/powerpc/vsx-builtin-7.c (revision 0)
@@ -0,0 +1,150 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-O2 -mcpu=power7" } */
+
+/* Test simple extract/insert/slat operations. Make sure all types are
+ supported with various options. */
+
+#include <altivec.h>
+
+double extract_df_0_reg (vector double p) { return vec_extract (p, 0); }
+double extract_df_1_reg (vector double p) { return vec_extract (p, 1); }
+double extract_df_n_reg (vector double p, int n) { return vec_extract (p, n); }
+
+double extract_df_0_mem (vector double *p) { return vec_extract (*p, 0); }
+double extract_df_1_mem (vector double *p) { return vec_extract (*p, 1); }
+double extract_df_n_mem (vector double *p, int n) { return vec_extract (*p, n); }
+
+vector double insert_df_0 (vector double p, double x) { return vec_insert (x, p, 0); }
+vector double insert_df_1 (vector double p, double x) { return vec_insert (x, p, 1); }
+vector double insert_df_n (vector double p, double x, int n) { return vec_insert (x, p, n); }
+
+vector double splat_df_reg (double x) { return vec_splats (x); }
+vector double splat_df_mem (double *x) { return vec_splats (*x); }
+
+#ifdef _ARCH_PPC64
+#define ll long
+#else
+#define ll long long
+#endif
+
+ll extract_di_0_reg (vector ll p) { return vec_extract (p, 0); }
+ll extract_di_1_reg (vector ll p) { return vec_extract (p, 1); }
+ll extract_di_n_reg (vector ll p, int n) { return vec_extract (p, n); }
+
+ll extract_di_0_mem (vector ll *p) { return vec_extract (*p, 0); }
+ll extract_di_1_mem (vector ll *p) { return vec_extract (*p, 1); }
+ll extract_di_n_mem (vector ll *p, int n) { return vec_extract (*p, n); }
+
+vector ll insert_di_0 (vector ll p, ll x) { return vec_insert (x, p, 0); }
+vector ll insert_di_1 (vector ll p, ll x) { return vec_insert (x, p, 1); }
+vector ll insert_di_n (vector ll p, ll x, int n) { return vec_insert (x, p, n); }
+
+vector ll splat_di_reg (ll x) { return vec_splats (x); }
+vector ll splat_di_mem (ll *x) { return vec_splats (*x); }
+
+float extract_sf_0_reg (vector float p) { return vec_extract (p, 0); }
+float extract_sf_3_reg (vector float p) { return vec_extract (p, 3); }
+float extract_sf_n_reg (vector float p, int n) { return vec_extract (p, n); }
+
+float extract_sf_0_mem (vector float *p) { return vec_extract (*p, 0); }
+float extract_sf_3_mem (vector float *p) { return vec_extract (*p, 3); }
+float extract_sf_n_mem (vector float *p, int n) { return vec_extract (*p, n); }
+
+vector float insert_sf_0 (vector float p, float x) { return vec_insert (x, p, 0); }
+vector float insert_sf_3 (vector float p, float x) { return vec_insert (x, p, 3); }
+vector float insert_sf_n (vector float p, float x, int n) { return vec_insert (x, p, n); }
+
+vector float splat_sf_reg (float x) { return vec_splats (x); }
+vector float splat_sf_mem (float *x) { return vec_splats (*x); }
+
+int extract_si_0_reg (vector int p) { return vec_extract (p, 0); }
+int extract_si_3_reg (vector int p) { return vec_extract (p, 3); }
+int extract_si_n_reg (vector int p, int n) { return vec_extract (p, n); }
+
+int extract_si_0_mem (vector int *p) { return vec_extract (*p, 0); }
+int extract_si_3_mem (vector int *p) { return vec_extract (*p, 3); }
+int extract_si_n_mem (vector int *p, int n) { return vec_extract (*p, n); }
+
+vector int insert_si_0 (vector int p, int x) { return vec_insert (x, p, 0); }
+vector int insert_si_3 (vector int p, int x) { return vec_insert (x, p, 3); }
+vector int insert_si_n (vector int p, int x, int n) { return vec_insert (x, p, n); }
+
+vector int splat_si_reg (int x) { return vec_splats (x); }
+vector int splat_si_mem (int *x) { return vec_splats (*x); }
+
+unsigned int extract_usi_0_reg (vector unsigned int p) { return vec_extract (p, 0); }
+unsigned int extract_usi_3_reg (vector unsigned int p) { return vec_extract (p, 3); }
+unsigned int extract_usi_n_reg (vector unsigned int p, int n) { return vec_extract (p, n); }
+
+unsigned int extract_usi_0_mem (vector unsigned int *p) { return vec_extract (*p, 0); }
+unsigned int extract_usi_3_mem (vector unsigned int *p) { return vec_extract (*p, 3); }
+unsigned int extract_usi_n_mem (vector unsigned int *p, int n) { return vec_extract (*p, n); }
+
+vector unsigned int insert_usi_0 (vector unsigned int p, unsigned int x) { return vec_insert (x, p, 0); }
+vector unsigned int insert_usi_3 (vector unsigned int p, unsigned int x) { return vec_insert (x, p, 3); }
+vector unsigned int insert_usi_n (vector unsigned int p, unsigned int x, int n) { return vec_insert (x, p, n); }
+
+vector unsigned int splat_usi_reg (unsigned int x) { return vec_splats (x); }
+vector unsigned int splat_usi_mem (unsigned int *x) { return vec_splats (*x); }
+
+short extract_hi_0_reg (vector short p) { return vec_extract (p, 0); }
+short extract_hi_7_reg (vector short p) { return vec_extract (p, 7); }
+short extract_hi_n_reg (vector short p, int n) { return vec_extract (p, n); }
+
+short extract_hi_0_mem (vector short *p) { return vec_extract (*p, 0); }
+short extract_hi_7_mem (vector short *p) { return vec_extract (*p, 7); }
+short extract_hi_n_mem (vector short *p, int n) { return vec_extract (*p, n); }
+
+vector short insert_hi_0 (vector short p, short x) { return vec_insert (x, p, 0); }
+vector short insert_hi_7 (vector short p, short x) { return vec_insert (x, p, 7); }
+vector short insert_hi_n (vector short p, short x, int n) { return vec_insert (x, p, n); }
+
+vector short splat_hi_reg (short x) { return vec_splats (x); }
+vector short splat_hi_mem (short *x) { return vec_splats (*x); }
+
+unsigned short extract_uhi_0_reg (vector unsigned short p) { return vec_extract (p, 0); }
+unsigned short extract_uhi_7_reg (vector unsigned short p) { return vec_extract (p, 7); }
+unsigned short extract_uhi_n_reg (vector unsigned short p, int n) { return vec_extract (p, n); }
+
+unsigned short extract_uhi_0_mem (vector unsigned short *p) { return vec_extract (*p, 0); }
+unsigned short extract_uhi_7_mem (vector unsigned short *p) { return vec_extract (*p, 7); }
+unsigned short extract_uhi_n_mem (vector unsigned short *p, int n) { return vec_extract (*p, n); }
+
+vector unsigned short insert_uhi_0 (vector unsigned short p, unsigned short x) { return vec_insert (x, p, 0); }
+vector unsigned short insert_uhi_7 (vector unsigned short p, unsigned short x) { return vec_insert (x, p, 7); }
+vector unsigned short insert_uhi_n (vector unsigned short p, unsigned short x, int n) { return vec_insert (x, p, n); }
+
+vector unsigned short splat_uhi_reg (unsigned short x) { return vec_splats (x); }
+vector unsigned short splat_uhi_mem (unsigned short *x) { return vec_splats (*x); }
+
+signed char extract_qi_0_reg (vector signed char p) { return vec_extract (p, 0); }
+signed char extract_qi_1_reg5 (vector signed char p) { return vec_extract (p, 15); }
+signed char extract_qi_n_reg (vector signed char p, int n) { return vec_extract (p, n); }
+
+signed char extract_qi_0_mem (vector signed char *p) { return vec_extract (*p, 0); }
+signed char extract_qi_1_mem5 (vector signed char *p) { return vec_extract (*p, 15); }
+signed char extract_qi_n_mem (vector signed char *p, int n) { return vec_extract (*p, n); }
+
+vector signed char insert_qi_0 (vector signed char p, signed char x) { return vec_insert (x, p, 0); }
+vector signed char insert_qi_15 (vector signed char p, signed char x) { return vec_insert (x, p, 15); }
+vector signed char insert_qi_n (vector signed char p, signed char x, int n) { return vec_insert (x, p, n); }
+
+vector signed char splat_qi_reg (signed char x) { return vec_splats (x); }
+vector signed char splat_qi_mem (signed char *x) { return vec_splats (*x); }
+
+unsigned char extract_uqi_0_reg (vector unsigned char p) { return vec_extract (p, 0); }
+unsigned char extract_uqi_1_reg5 (vector unsigned char p) { return vec_extract (p, 15); }
+unsigned char extract_uqi_n_reg (vector unsigned char p, int n) { return vec_extract (p, n); }
+
+unsigned char extract_uqi_0_mem (vector unsigned char *p) { return vec_extract (*p, 0); }
+unsigned char extract_uqi_1_mem5 (vector unsigned char *p) { return vec_extract (*p, 15); }
+unsigned char extract_uqi_n_mem (vector unsigned char *p, int n) { return vec_extract (*p, n); }
+
+vector unsigned char insert_uqi_0 (vector unsigned char p, unsigned char x) { return vec_insert (x, p, 0); }
+vector unsigned char insert_uqi_15 (vector unsigned char p, unsigned char x) { return vec_insert (x, p, 15); }
+vector unsigned char insert_uqi_n (vector unsigned char p, unsigned char x, int n) { return vec_insert (x, p, n); }
+
+vector unsigned char splat_uqi_reg (unsigned char x) { return vec_splats (x); }
+vector unsigned char splat_uqi_mem (unsigned char *x) { return vec_splats (*x); }
--
Michael Meissner, IBM
4 Technology Place Drive, MS 2203A, Westford, MA, 01886, USA
meissner@linux.vnet.ibm.com
More information about the Gcc-patches
mailing list