This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH, rs6000] Fix PR79261 (vec_xxpermdi is not endian-sensitive)
- From: Bill Schmidt <wschmidt at linux dot vnet dot ibm dot com>
- To: GCC Patches <gcc-patches at gcc dot gnu dot org>
- Cc: Segher Boessenkool <segher at kernel dot crashing dot org>, David Edelsohn <dje dot gcc at gmail dot com>
- Date: Thu, 16 Feb 2017 14:16:02 -0600
- Subject: [PATCH, rs6000] Fix PR79261 (vec_xxpermdi is not endian-sensitive)
- Authentication-results: sourceware.org; auth=none
Hi,
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79261 records that the interface
vec_xxpermdi isn't implemented in a bi-endian fashion; instead, it produces
results appropriate for big-endian vector element numbering even when run on
a little endian machine. This is not part of the "official vector API" from
the ELFv2 ABI document, but should still have appropriate bi-endian behavior.
The problem appears to have arisen because some of GCC's internal usages of
the xxpermdi instruction wanted to use them in big-endian form, which is
natural to the original hardware instruction. The define_expand used for this
(vsx_xxpermdi_<mode>) is also the one that is used for the built-in forms.
Since these two uses require different semantics, I've created a new
vsx_xxpermdi_<mode>_be define_expand to be used for the internal uses, and
re-directed those usages there. That version retains the original semantics,
while the semantics for the built-ins are changed to be endian-sensitive.
I've added a runtime test to verify that the vec_xxpermdi intrinsics produce
correct results for the various vector modes in both big- and little-endian
environments.
Bootstrapped and tested on powerpc64[le]-unknown-linux-gnu with no regressions
in either environment. Is this ok for trunk, and after a suitable period, for
backport to GCC 5 and 6?
Thanks,
Bill
[gcc]
2017-02-16 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
PR target/79261
* config/rs6000/rs6000.c (rs6000_expand_ternop_builtin): Add
support for CODE_FOR_vsx_xxpermdi_v2d[fi]_be.
* config/rs6000/rs6000.md (reload_gpr_from_vsx<mode>): Call
generator for vsx_xxpermdi_<mode>_be.
* config/rs6000/vsx.md (vsx_xxpermdi_<mode>): Remove logic to
force big-endian semantics.
(vsx_xxpermdi_<mode>_be): New define_expand with same
implementation as previous version of vsx_xxpermdi_<mode>.
[gcc/testsuite]
2017-02-16 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
PR target/79261
* gcc.target/powerpc/vec-xxpermdi.c: New file.
Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c (revision 245490)
+++ gcc/config/rs6000/rs6000.c (working copy)
@@ -15436,6 +15436,8 @@ rs6000_expand_ternop_builtin (enum insn_code icode
}
else if (icode == CODE_FOR_vsx_xxpermdi_v2df
|| icode == CODE_FOR_vsx_xxpermdi_v2di
+ || icode == CODE_FOR_vsx_xxpermdi_v2df_be
+ || icode == CODE_FOR_vsx_xxpermdi_v2di_be
|| icode == CODE_FOR_vsx_xxsldwi_v16qi
|| icode == CODE_FOR_vsx_xxsldwi_v8hi
|| icode == CODE_FOR_vsx_xxsldwi_v4si
Index: gcc/config/rs6000/rs6000.md
===================================================================
--- gcc/config/rs6000/rs6000.md (revision 245490)
+++ gcc/config/rs6000/rs6000.md (working copy)
@@ -8419,7 +8419,7 @@
rtx gpr_lo_reg = gen_lowpart (DFmode, dest);
emit_insn (gen_p8_mfvsrd_3_<mode> (gpr_hi_reg, src));
- emit_insn (gen_vsx_xxpermdi_<mode> (tmp, src, src, GEN_INT (3)));
+ emit_insn (gen_vsx_xxpermdi_<mode>_be (tmp, src, src, GEN_INT (3)));
emit_insn (gen_p8_mfvsrd_3_<mode> (gpr_lo_reg, tmp));
DONE;
}
Index: gcc/config/rs6000/vsx.md
===================================================================
--- gcc/config/rs6000/vsx.md (revision 245490)
+++ gcc/config/rs6000/vsx.md (working copy)
@@ -2461,6 +2461,38 @@
op1 = gen_lowpart (V2DImode, op1);
}
}
+ emit_insn (gen (target, op0, op1, perm0, perm1));
+ DONE;
+})
+
+;; Special version of xxpermdi that retains big-endian semantics.
+(define_expand "vsx_xxpermdi_<mode>_be"
+ [(match_operand:VSX_L 0 "vsx_register_operand" "")
+ (match_operand:VSX_L 1 "vsx_register_operand" "")
+ (match_operand:VSX_L 2 "vsx_register_operand" "")
+ (match_operand:QI 3 "u5bit_cint_operand" "")]
+ "VECTOR_MEM_VSX_P (<MODE>mode)"
+{
+ rtx target = operands[0];
+ rtx op0 = operands[1];
+ rtx op1 = operands[2];
+ int mask = INTVAL (operands[3]);
+ rtx perm0 = GEN_INT ((mask >> 1) & 1);
+ rtx perm1 = GEN_INT ((mask & 1) + 2);
+ rtx (*gen) (rtx, rtx, rtx, rtx, rtx);
+
+ if (<MODE>mode == V2DFmode)
+ gen = gen_vsx_xxpermdi2_v2df_1;
+ else
+ {
+ gen = gen_vsx_xxpermdi2_v2di_1;
+ if (<MODE>mode != V2DImode)
+ {
+ target = gen_lowpart (V2DImode, target);
+ op0 = gen_lowpart (V2DImode, op0);
+ op1 = gen_lowpart (V2DImode, op1);
+ }
+ }
/* In little endian mode, vsx_xxpermdi2_<mode>_1 will perform a
transformation we don't want; it is necessary for
rs6000_expand_vec_perm_const_1 but not for this use. So we
@@ -3870,7 +3902,7 @@
{
rtx op1 = operands[1];
rtx v4si_tmp = gen_reg_rtx (V4SImode);
- emit_insn (gen_vsx_xxpermdi_v4si (v4si_tmp, op1, op1, const1_rtx));
+ emit_insn (gen_vsx_xxpermdi_v4si_be (v4si_tmp, op1, op1, const1_rtx));
operands[1] = v4si_tmp;
operands[3] = GEN_INT (12 - INTVAL (operands[3]));
}
Index: gcc/testsuite/gcc.target/powerpc/vec-xxpermdi.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/vec-xxpermdi.c (nonexistent)
+++ gcc/testsuite/gcc.target/powerpc/vec-xxpermdi.c (working copy)
@@ -0,0 +1,68 @@
+/* { dg-do run { target { powerpc64*-*-* && vsx_hw } } } */
+/* { dg-options "-O2 -mvsx" } */
+
+/* Added for PR79261 to test that vec_xxpermdi works correctly for
+ both BE and LE targets. */
+
+#include <altivec.h>
+void abort (void);
+
+vector double vdx = { 0.0, 1.0 };
+vector double vdy = { 2.0, 3.0 };
+vector double vdz;
+
+vector signed long long vsllx = { 0, 1 };
+vector signed long long vslly = { 2, 3 };
+vector signed long long vsllz;
+
+vector float vfx = { 0.0, 1.0, 2.0, 3.0 };
+vector float vfy = { 4.0, 5.0, 6.0, 7.0 };
+vector float vfz;
+
+vector signed int vsix = { 0, 1, 2, 3 };
+vector signed int vsiy = { 4, 5, 6, 7 };
+vector signed int vsiz;
+
+vector signed short vssx = { 0, 1, 2, 3, 4, 5, 6, 7 };
+vector signed short vssy = { 8, 9, 10, 11, 12, 13, 14, 15 };
+vector signed short vssz;
+
+vector signed char vscx = { 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15 };
+vector signed char vscy = { 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31 };
+vector signed char vscz;
+
+int
+main ()
+{
+ vdz = vec_xxpermdi (vdx, vdy, 0b01);
+ if (vdz[0] != 0.0 || vdz[1] != 3.0)
+ abort ();
+
+ vsllz = vec_xxpermdi (vsllx, vslly, 0b10);
+ if (vsllz[0] != 1 || vsllz[1] != 2)
+ abort ();
+
+ vfz = vec_xxpermdi (vfx, vfy, 0b01);
+ if (vfz[0] != 0.0 || vfz[1] != 1.0 || vfz[2] != 6.0 || vfz[3] != 7.0)
+ abort ();
+
+ vsiz = vec_xxpermdi (vsix, vsiy, 0b10);
+ if (vsiz[0] != 2 || vsiz[1] != 3 || vsiz[2] != 4 || vsiz[3] != 5)
+ abort ();
+
+ vssz = vec_xxpermdi (vssx, vssy, 0b00);
+ if (vssz[0] != 0 || vssz[1] != 1 || vssz[2] != 2 || vssz[3] != 3
+ || vssz[4] != 8 || vssz[5] != 9 || vssz[6] != 10 || vssz[7] != 11)
+ abort ();
+
+ vscz = vec_xxpermdi (vscx, vscy, 0b11);
+ if (vscz[0] != 8 || vscz[1] != 9 || vscz[2] != 10 || vscz[3] != 11
+ || vscz[4] != 12 || vscz[5] != 13 || vscz[6] != 14 || vscz[7] != 15
+ || vscz[8] != 24 || vscz[9] != 25 || vscz[10] != 26 || vscz[11] != 27
+ || vscz[12] != 28 || vscz[13] != 29 || vscz[14] != 30 || vscz[15] != 31)
+ abort ();
+
+ return 0;
+}