This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH, ARM] Constant vector permute for the Neon vext insn


On 28 August 2012 16:20, Christophe Lyon <christophe.lyon@linaro.org> wrote:
> This makes writing exhaustive, portable (big and little endian),
> executable tests a painful task.
>
For instance, considering the attached sample code, I obtain a
different result in big-endian vs little-endian, while the input
values are the same.
Indeed, in both cases, the program prints:
__a[0] = 0 ..... __a[7] = 7
__b[0] = 8 .... __b[7] = 15
__mask1[0] = 2 .... __mask1[7] = 9
but in Little-endian, the result of builtin_shuffle(__a, __b, __mask1) is
mem[0] = 2, mem[1] = 3 .... mem[7] = 9
while in big-endian it is:
mem[0] =5, mem[1] = 4, .... mem[5] = 0, mem[6] = 15, mem[7] = 14

What am I missing?

Thanks,

Christophe.
/* { dg-do run } */
/* { dg-require-effective-target arm_neon_ok } */
/* { dg-options "-O2" } */
/* { dg-add-options arm_neon } */

#include <arm_neon.h>
#include <stdlib.h>
#include <stdio.h>

uint8x8_t
tst_vext_u8 (uint8x8_t __a, uint8x8_t __b)
{
#ifdef __ARMEL__
  uint8x8_t __mask1 = {2, 3, 4, 5, 6, 7, 8, 9};
#else
  uint8x8_t __mask1 = {9, 8, 7, 6, 5, 4, 3, 2};
#endif

  union {uint8x8_t v; uint8_t buf[8];} mem_u8x8;
  int i;

  vst1_u8(mem_u8x8.buf, __a);
  for(i=0; i<8; i++) {
    fprintf(stderr, "__a[%d]=%d\n", i, mem_u8x8.buf[i]);
  }
  vst1_u8(mem_u8x8.buf, __b);
  for(i=0; i<8; i++) {
    fprintf(stderr, "__b[%d]=%d\n", i, mem_u8x8.buf[i]);
  }
  vst1_u8(mem_u8x8.buf, __mask1);
  for(i=0; i<8; i++) {
    fprintf(stderr, "__mask1[%d]=%d\n", i, mem_u8x8.buf[i]);
  }
  return __builtin_shuffle ( __a, __b, __mask1) ;
}

int main(void)
{
  uint8_t arr_u8x8[] = {0, 1, 2, 3, 4, 5, 6, 7};
  uint8_t arr2_u8x8[] = {8, 9, 10, 11, 12, 13, 14, 15};

  uint8x8_t vec_u8x8 = vld1_u8(arr_u8x8);
  uint8x8_t vec2_u8x8 = vld1_u8(arr2_u8x8);

  uint8x8_t result_u8x8;

  union {uint8x8_t v; uint8_t buf[8];} mem_u8x8;

  int i;

  result_u8x8 = tst_vext_u8 (vec_u8x8, vec2_u8x8);
  vst1_u8(mem_u8x8.buf, result_u8x8);

  for (i=0; i<8; i++) {
    printf("mem_u8x8[%d]=%d\n", i, mem_u8x8.buf[i]);
  }

  return 0;
}

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]