This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH 2/2] [ARM] PR68532 Fix up vzip recognition for big endian
- From: Charles Baylis <charles dot baylis at linaro dot org>
- To: Kyrill Tkachov <kyrylo dot tkachov at foss dot arm dot com>
- Cc: Ramana Radhakrishnan <Ramana dot Radhakrishnan at arm dot com>, Richard Earnshaw <richard dot earnshaw at arm dot com>, Richard Earnshaw <rearnsha at arm dot com>, GCC Patches <gcc-patches at gcc dot gnu dot org>, Michael Collison <michael dot collison at linaro dot org>
- Date: Tue, 9 Feb 2016 17:07:18 +0000
- Subject: Re: [PATCH 2/2] [ARM] PR68532 Fix up vzip recognition for big endian
- Authentication-results: sourceware.org; auth=none
- References: <1454525947-14690-1-git-send-email-charles dot baylis at linaro dot org> <1454525947-14690-3-git-send-email-charles dot baylis at linaro dot org> <56B87F23 dot 4030906 at foss dot arm dot com>
On 8 February 2016 at 11:42, Kyrill Tkachov <kyrylo.tkachov@foss.arm.com> wrote:
> On 03/02/16 18:59, charles.baylis@linaro.org wrote:
>> --- a/gcc/config/arm/arm.c
>> +++ b/gcc/config/arm/arm.c
>> @@ -28318,15 +28318,21 @@ arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
>> unsigned int i, high, mask, nelt = d->nelt;
>> rtx out0, out1, in0, in1;
>> rtx (*gen)(rtx, rtx, rtx, rtx);
>> + int first_elem;
>> + bool is_swapped;
>> if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
>> return false;
>> + is_swapped = BYTES_BIG_ENDIAN ? true : false;
>
>
> This is just "is_swapped = BYTES_BIG_ENDIAN;"
Done.
>> +
>> /* Note that these are little-endian tests. Adjust for big-endian
>> later. */
>
>
> I think you can remove this comment now, like in patch 1/2
Done.
>> + first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
>> +
>> high = nelt / 2;
>> - if (d->perm[0] == high)
>> + if (first_elem == neon_endian_lane_map (d->vmode, high))
>> ;
>> - else if (d->perm[0] == 0)
>> + else if (first_elem == neon_endian_lane_map (d->vmode, 0))
>> high = 0;
>> else
>> return false;
>> @@ -28334,11 +28340,16 @@ arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
>> for (i = 0; i < nelt / 2; i++)
>> {
>> - unsigned elt = (i + high) & mask;
>> - if (d->perm[i * 2] != elt)
>> + unsigned elt =
>> + neon_pair_endian_lane_map (d->vmode, i + high) & mask;
>> + if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i +
>> is_swapped)]
>> + != elt)
>> return false;
>> - elt = (elt + nelt) & mask;
>> - if (d->perm[i * 2 + 1] != elt)
>> + elt =
>> + neon_pair_endian_lane_map (d->vmode, i + nelt + high)
>> + & mask;
>
>
> The "& mask" can go on the previous line.
Done
>> + if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i +
>> !is_swapped)]
>> + != elt)
>> return false;
>> }
>> @@ -28362,10 +28373,9 @@ arm_evpc_neon_vzip (struct expand_vec_perm_d
>> *d)
>> in0 = d->op0;
>> in1 = d->op1;
>> - if (BYTES_BIG_ENDIAN)
>> + if (is_swapped)
>> {
>> std::swap (in0, in1);
>> - high = !high;
>> }
>
>
> remove the braces around the std::swap.
Done.
> Ok with these changes.
> I've tried out both patch and they do fix execution failures on big-endian
> and don't break any NEON intrinsics tests that I threw at them.
Attached for completeness, will commit once the VUZP patch is OKd.
From 469f82610a4e70284bf23c373b8a73685cad0ec1 Mon Sep 17 00:00:00 2001
From: Charles Baylis <charles.baylis@linaro.org>
Date: Tue, 9 Feb 2016 15:18:44 +0000
Subject: [PATCH 2/2] [ARM] PR68532 Fix up vzip recognition for big endian
gcc/ChangeLog:
2016-02-09 Charles Baylis <charles.baylis@linaro.org>
PR target/68532
* config/arm/arm.c (arm_evpc_neon_vzip): Allow for big endian lane
order.
* config/arm/arm_neon.h (vzipq_s8): Adjust shuffle patterns for big
endian.
(vzipq_s16): Likewise.
(vzipq_s32): Likewise.
(vzipq_f32): Likewise.
(vzipq_u8): Likewise.
(vzipq_u16): Likewise.
(vzipq_u32): Likewise.
(vzipq_p8): Likewise.
(vzipq_p16): Likewise.
Change-Id: I327678f5e73c1de2f413c1d22769ab42ce1d6c16
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 95ee9a5..5562baa 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -28318,15 +28318,20 @@ arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
unsigned int i, high, mask, nelt = d->nelt;
rtx out0, out1, in0, in1;
rtx (*gen)(rtx, rtx, rtx, rtx);
+ int first_elem;
+ bool is_swapped;
if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
return false;
- /* Note that these are little-endian tests. Adjust for big-endian later. */
+ is_swapped = BYTES_BIG_ENDIAN;
+
+ first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
+
high = nelt / 2;
- if (d->perm[0] == high)
+ if (first_elem == neon_endian_lane_map (d->vmode, high))
;
- else if (d->perm[0] == 0)
+ else if (first_elem == neon_endian_lane_map (d->vmode, 0))
high = 0;
else
return false;
@@ -28334,11 +28339,15 @@ arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
for (i = 0; i < nelt / 2; i++)
{
- unsigned elt = (i + high) & mask;
- if (d->perm[i * 2] != elt)
+ unsigned elt =
+ neon_pair_endian_lane_map (d->vmode, i + high) & mask;
+ if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
+ != elt)
return false;
- elt = (elt + nelt) & mask;
- if (d->perm[i * 2 + 1] != elt)
+ elt =
+ neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
+ if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
+ != elt)
return false;
}
@@ -28362,11 +28371,8 @@ arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
in0 = d->op0;
in1 = d->op1;
- if (BYTES_BIG_ENDIAN)
- {
- std::swap (in0, in1);
- high = !high;
- }
+ if (is_swapped)
+ std::swap (in0, in1);
out0 = d->target;
out1 = gen_reg_rtx (d->vmode);
diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h
index 2e014b6..aa17f49 100644
--- a/gcc/config/arm/arm_neon.h
+++ b/gcc/config/arm/arm_neon.h
@@ -8453,9 +8453,9 @@ vzipq_s8 (int8x16_t __a, int8x16_t __b)
int8x16x2_t __rv;
#ifdef __ARM_BIG_ENDIAN
__rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
- { 24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15 });
+ { 20, 4, 21, 5, 22, 6, 23, 7, 16, 0, 17, 1, 18, 2, 19, 3 });
__rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
- { 16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7 });
+ { 28, 12, 29, 13, 30, 14, 31, 15, 24, 8, 25, 9, 26, 10, 27, 11 });
#else
__rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
{ 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 });
@@ -8471,9 +8471,9 @@ vzipq_s16 (int16x8_t __a, int16x8_t __b)
int16x8x2_t __rv;
#ifdef __ARM_BIG_ENDIAN
__rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
- { 12, 4, 13, 5, 14, 6, 15, 7 });
+ { 10, 2, 11, 3, 8, 0, 9, 1 });
__rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
- { 8, 0, 9, 1, 10, 2, 11, 3 });
+ { 14, 6, 15, 7, 12, 4, 13, 5 });
#else
__rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
{ 0, 8, 1, 9, 2, 10, 3, 11 });
@@ -8488,8 +8488,8 @@ vzipq_s32 (int32x4_t __a, int32x4_t __b)
{
int32x4x2_t __rv;
#ifdef __ARM_BIG_ENDIAN
- __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 6, 2, 7, 3 });
- __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 0, 5, 1 });
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 5, 1, 4, 0 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 7, 3, 6, 2 });
#else
__rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 1, 5 });
__rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 6, 3, 7 });
@@ -8502,8 +8502,8 @@ vzipq_f32 (float32x4_t __a, float32x4_t __b)
{
float32x4x2_t __rv;
#ifdef __ARM_BIG_ENDIAN
- __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 6, 2, 7, 3 });
- __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 0, 5, 1 });
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 5, 1, 4, 0 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 7, 3, 6, 2 });
#else
__rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 1, 5 });
__rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 6, 3, 7 });
@@ -8517,9 +8517,9 @@ vzipq_u8 (uint8x16_t __a, uint8x16_t __b)
uint8x16x2_t __rv;
#ifdef __ARM_BIG_ENDIAN
__rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
- { 24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15 });
+ { 20, 4, 21, 5, 22, 6, 23, 7, 16, 0, 17, 1, 18, 2, 19, 3 });
__rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
- { 16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7 });
+ { 28, 12, 29, 13, 30, 14, 31, 15, 24, 8, 25, 9, 26, 10, 27, 11 });
#else
__rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
{ 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 });
@@ -8535,9 +8535,9 @@ vzipq_u16 (uint16x8_t __a, uint16x8_t __b)
uint16x8x2_t __rv;
#ifdef __ARM_BIG_ENDIAN
__rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
- { 12, 4, 13, 5, 14, 6, 15, 7 });
+ { 10, 2, 11, 3, 8, 0, 9, 1 });
__rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
- { 8, 0, 9, 1, 10, 2, 11, 3 });
+ { 14, 6, 15, 7, 12, 4, 13, 5 });
#else
__rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
{ 0, 8, 1, 9, 2, 10, 3, 11 });
@@ -8552,8 +8552,8 @@ vzipq_u32 (uint32x4_t __a, uint32x4_t __b)
{
uint32x4x2_t __rv;
#ifdef __ARM_BIG_ENDIAN
- __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 6, 2, 7, 3 });
- __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 0, 5, 1 });
+ __rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 5, 1, 4, 0 });
+ __rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 7, 3, 6, 2 });
#else
__rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 4, 1, 5 });
__rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 6, 3, 7 });
@@ -8567,9 +8567,9 @@ vzipq_p8 (poly8x16_t __a, poly8x16_t __b)
poly8x16x2_t __rv;
#ifdef __ARM_BIG_ENDIAN
__rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
- { 24, 8, 25, 9, 26, 10, 27, 11, 28, 12, 29, 13, 30, 14, 31, 15 });
+ { 20, 4, 21, 5, 22, 6, 23, 7, 16, 0, 17, 1, 18, 2, 19, 3 });
__rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
- { 16, 0, 17, 1, 18, 2, 19, 3, 20, 4, 21, 5, 22, 6, 23, 7 });
+ { 28, 12, 29, 13, 30, 14, 31, 15, 24, 8, 25, 9, 26, 10, 27, 11 });
#else
__rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
{ 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 });
@@ -8585,9 +8585,9 @@ vzipq_p16 (poly16x8_t __a, poly16x8_t __b)
poly16x8x2_t __rv;
#ifdef __ARM_BIG_ENDIAN
__rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
- { 12, 4, 13, 5, 14, 6, 15, 7 });
+ { 10, 2, 11, 3, 8, 0, 9, 1 });
__rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
- { 8, 0, 9, 1, 10, 2, 11, 3 });
+ { 14, 6, 15, 7, 12, 4, 13, 5 });
#else
__rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
{ 0, 8, 1, 9, 2, 10, 3, 11 });
--
1.9.1