This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[ARM] [1/3] Refactor NEON builtin code
- From: Jie Zhang <jie at codesourcery dot com>
- To: gcc-patches at gcc dot gnu dot org
- Date: Mon, 11 Oct 2010 15:43:56 +0800
- Subject: [ARM] [1/3] Refactor NEON builtin code
This patch series implements TARGET_BUILTIN_DECL for ARM. This first
patch tries to refactor NEON builtin code to make it simpler and
possible to count the number of NEON builtins at compile time.
The current code uses two nested loops to deal with NEON builtins. The
data structure used to store NEON builtins make it impossible to count
the number of NEON builtins at compile time. Without changing it, it
isn't as simple to define arm_builtin_decls[] as other targets. This
patch changes it to use one level loop. The benefits are:
* possible to count the number of NEON builtins at compile time, thus
easy to define arm_builtin_decls[].
* neon_builtin_compare and locate_neon_builtin_icode can be removed,
since fcode is simply the array index (after subtract a base) to locate
the neon builtin data.
The badness is neon_builtin_data[] will be a little larger.
From:
text data bss dec hex filename
12073946 51016 1379840 13504802 ce1122 cc1
to:
text data bss dec hex filename
12073410 58824 1379840 13512074 ce2d8a cc1
But the overall change is relatively small.
After this patch, arm_init_neon_builtins needs re-indent. To ease the
patch review. I use the second patch to do that, thus it's easily to see
what is changed in this patch.
Any comments? Is it OK?
Regards,
--
Jie Zhang
CodeSourcery
* config/arm/arm.c (neon_builtin_type_bits): Remove.
(typedef enum neon_builtin_mode): New.
(T_MAX): Don't define.
(typedef enum neon_builtin_datum): Remove bits, codes[],
num_vars and base_fcode. Add mode, code and fcode.
(VAR1, VAR2, VAR3, VAR4, VAR5, VAR6, VAR7, VAR8, VAR9
VAR10): Change accordingly.
(neon_builtin_data[]): Change accordingly
(arm_init_neon_builtins): Change accordingly.
(neon_builtin_compare): Remove.
(locate_neon_builtin_icode): Remove.
(arm_expand_neon_builtin): Change accordingly.
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 8ad312e..763aea7 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -18005,21 +18005,22 @@ arm_init_tls_builtins (void)
TREE_READONLY (decl) = 1;
}
-enum neon_builtin_type_bits {
- T_V8QI = 0x0001,
- T_V4HI = 0x0002,
- T_V2SI = 0x0004,
- T_V2SF = 0x0008,
- T_DI = 0x0010,
- T_V16QI = 0x0020,
- T_V8HI = 0x0040,
- T_V4SI = 0x0080,
- T_V4SF = 0x0100,
- T_V2DI = 0x0200,
- T_TI = 0x0400,
- T_EI = 0x0800,
- T_OI = 0x1000
-};
+typedef enum {
+ T_V8QI,
+ T_V4HI,
+ T_V2SI,
+ T_V2SF,
+ T_DI,
+ T_V16QI,
+ T_V8HI,
+ T_V4SI,
+ T_V4SF,
+ T_V2DI,
+ T_TI,
+ T_EI,
+ T_OI,
+ T_MAX
+} neon_builtin_mode;
#define v8qi_UP T_V8QI
#define v4hi_UP T_V4HI
@@ -18037,8 +18038,6 @@ enum neon_builtin_type_bits {
#define UP(X) X##_UP
-#define T_MAX 13
-
typedef enum {
NEON_BINOP,
NEON_TERNOP,
@@ -18082,49 +18081,42 @@ typedef enum {
typedef struct {
const char *name;
const neon_itype itype;
- const int bits;
- const enum insn_code codes[T_MAX];
- const unsigned int num_vars;
- unsigned int base_fcode;
+ const neon_builtin_mode mode;
+ const enum insn_code code;
+ unsigned int fcode;
} neon_builtin_datum;
#define CF(N,X) CODE_FOR_neon_##N##X
#define VAR1(T, N, A) \
- #N, NEON_##T, UP (A), { CF (N, A) }, 1, 0
+ {#N, NEON_##T, UP (A), CF (N, A), 0}
#define VAR2(T, N, A, B) \
- #N, NEON_##T, UP (A) | UP (B), { CF (N, A), CF (N, B) }, 2, 0
+ VAR1 (T, N, A), \
+ {#N, NEON_##T, UP (B), CF (N, B), 0}
#define VAR3(T, N, A, B, C) \
- #N, NEON_##T, UP (A) | UP (B) | UP (C), \
- { CF (N, A), CF (N, B), CF (N, C) }, 3, 0
+ VAR2 (T, N, A, B), \
+ {#N, NEON_##T, UP (C), CF (N, C), 0}
#define VAR4(T, N, A, B, C, D) \
- #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D), \
- { CF (N, A), CF (N, B), CF (N, C), CF (N, D) }, 4, 0
+ VAR3 (T, N, A, B, C), \
+ {#N, NEON_##T, UP (D), CF (N, D), 0}
#define VAR5(T, N, A, B, C, D, E) \
- #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E), \
- { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E) }, 5, 0
+ VAR4 (T, N, A, B, C, D), \
+ {#N, NEON_##T, UP (E), CF (N, E), 0}
#define VAR6(T, N, A, B, C, D, E, F) \
- #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F), \
- { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F) }, 6, 0
+ VAR5 (T, N, A, B, C, D, E), \
+ {#N, NEON_##T, UP (F), CF (N, F), 0}
#define VAR7(T, N, A, B, C, D, E, F, G) \
- #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G), \
- { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
- CF (N, G) }, 7, 0
+ VAR6 (T, N, A, B, C, D, E, F), \
+ {#N, NEON_##T, UP (G), CF (N, G), 0}
#define VAR8(T, N, A, B, C, D, E, F, G, H) \
- #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
- | UP (H), \
- { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
- CF (N, G), CF (N, H) }, 8, 0
+ VAR7 (T, N, A, B, C, D, E, F, G), \
+ {#N, NEON_##T, UP (H), CF (N, H), 0}
#define VAR9(T, N, A, B, C, D, E, F, G, H, I) \
- #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
- | UP (H) | UP (I), \
- { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
- CF (N, G), CF (N, H), CF (N, I) }, 9, 0
+ VAR8 (T, N, A, B, C, D, E, F, G, H), \
+ {#N, NEON_##T, UP (I), CF (N, I), 0}
#define VAR10(T, N, A, B, C, D, E, F, G, H, I, J) \
- #N, NEON_##T, UP (A) | UP (B) | UP (C) | UP (D) | UP (E) | UP (F) | UP (G) \
- | UP (H) | UP (I) | UP (J), \
- { CF (N, A), CF (N, B), CF (N, C), CF (N, D), CF (N, E), CF (N, F), \
- CF (N, G), CF (N, H), CF (N, I), CF (N, J) }, 10, 0
+ VAR9 (T, N, A, B, C, D, E, F, G, H, I), \
+ {#N, NEON_##T, UP (J), CF (N, J), 0}
/* The mode entries in the following table correspond to the "key" type of the
instruction variant, i.e. equivalent to that which would be specified after
@@ -18132,192 +18124,190 @@ typedef struct {
(Signed/unsigned/polynomial types are not differentiated between though, and
are all mapped onto the same mode for a given element size.) The modes
listed per instruction should be the same as those defined for that
- instruction's pattern in neon.md.
- WARNING: Variants should be listed in the same increasing order as
- neon_builtin_type_bits. */
+ instruction's pattern in neon.md. */
static neon_builtin_datum neon_builtin_data[] =
{
- { VAR10 (BINOP, vadd,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
- { VAR3 (BINOP, vaddl, v8qi, v4hi, v2si) },
- { VAR3 (BINOP, vaddw, v8qi, v4hi, v2si) },
- { VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
- { VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
- { VAR3 (BINOP, vaddhn, v8hi, v4si, v2di) },
- { VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
- { VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
- { VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si) },
- { VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
- { VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si) },
- { VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si) },
- { VAR2 (TERNOP, vqdmlal, v4hi, v2si) },
- { VAR2 (TERNOP, vqdmlsl, v4hi, v2si) },
- { VAR3 (BINOP, vmull, v8qi, v4hi, v2si) },
- { VAR2 (SCALARMULL, vmull_n, v4hi, v2si) },
- { VAR2 (LANEMULL, vmull_lane, v4hi, v2si) },
- { VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si) },
- { VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si) },
- { VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si) },
- { VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si) },
- { VAR2 (BINOP, vqdmull, v4hi, v2si) },
- { VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
- { VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
- { VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
- { VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di) },
- { VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di) },
- { VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di) },
- { VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
- { VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
- { VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
- { VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si) },
- { VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
- { VAR10 (BINOP, vsub,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
- { VAR3 (BINOP, vsubl, v8qi, v4hi, v2si) },
- { VAR3 (BINOP, vsubw, v8qi, v4hi, v2si) },
- { VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
- { VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
- { VAR3 (BINOP, vsubhn, v8hi, v4si, v2di) },
- { VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
- { VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
- { VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
- { VAR2 (BINOP, vcage, v2sf, v4sf) },
- { VAR2 (BINOP, vcagt, v2sf, v4sf) },
- { VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
- { VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
- { VAR3 (BINOP, vabdl, v8qi, v4hi, v2si) },
- { VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
- { VAR3 (TERNOP, vabal, v8qi, v4hi, v2si) },
- { VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
- { VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
- { VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf) },
- { VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
- { VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
- { VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf) },
- { VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf) },
- { VAR2 (BINOP, vrecps, v2sf, v4sf) },
- { VAR2 (BINOP, vrsqrts, v2sf, v4sf) },
- { VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
- { VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di) },
- { VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
- { VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
- { VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
- { VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
- { VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
- { VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
- { VAR2 (UNOP, vcnt, v8qi, v16qi) },
- { VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf) },
- { VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf) },
- { VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si) },
+ VAR10 (BINOP, vadd,
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+ VAR3 (BINOP, vaddl, v8qi, v4hi, v2si),
+ VAR3 (BINOP, vaddw, v8qi, v4hi, v2si),
+ VAR6 (BINOP, vhadd, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
+ VAR8 (BINOP, vqadd, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
+ VAR3 (BINOP, vaddhn, v8hi, v4si, v2di),
+ VAR8 (BINOP, vmul, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+ VAR8 (TERNOP, vmla, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+ VAR3 (TERNOP, vmlal, v8qi, v4hi, v2si),
+ VAR8 (TERNOP, vmls, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+ VAR3 (TERNOP, vmlsl, v8qi, v4hi, v2si),
+ VAR4 (BINOP, vqdmulh, v4hi, v2si, v8hi, v4si),
+ VAR2 (TERNOP, vqdmlal, v4hi, v2si),
+ VAR2 (TERNOP, vqdmlsl, v4hi, v2si),
+ VAR3 (BINOP, vmull, v8qi, v4hi, v2si),
+ VAR2 (SCALARMULL, vmull_n, v4hi, v2si),
+ VAR2 (LANEMULL, vmull_lane, v4hi, v2si),
+ VAR2 (SCALARMULL, vqdmull_n, v4hi, v2si),
+ VAR2 (LANEMULL, vqdmull_lane, v4hi, v2si),
+ VAR4 (SCALARMULH, vqdmulh_n, v4hi, v2si, v8hi, v4si),
+ VAR4 (LANEMULH, vqdmulh_lane, v4hi, v2si, v8hi, v4si),
+ VAR2 (BINOP, vqdmull, v4hi, v2si),
+ VAR8 (BINOP, vshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
+ VAR8 (BINOP, vqshl, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
+ VAR8 (SHIFTIMM, vshr_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
+ VAR3 (SHIFTIMM, vshrn_n, v8hi, v4si, v2di),
+ VAR3 (SHIFTIMM, vqshrn_n, v8hi, v4si, v2di),
+ VAR3 (SHIFTIMM, vqshrun_n, v8hi, v4si, v2di),
+ VAR8 (SHIFTIMM, vshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
+ VAR8 (SHIFTIMM, vqshl_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
+ VAR8 (SHIFTIMM, vqshlu_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
+ VAR3 (SHIFTIMM, vshll_n, v8qi, v4hi, v2si),
+ VAR8 (SHIFTACC, vsra_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
+ VAR10 (BINOP, vsub,
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+ VAR3 (BINOP, vsubl, v8qi, v4hi, v2si),
+ VAR3 (BINOP, vsubw, v8qi, v4hi, v2si),
+ VAR8 (BINOP, vqsub, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
+ VAR6 (BINOP, vhsub, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
+ VAR3 (BINOP, vsubhn, v8hi, v4si, v2di),
+ VAR8 (BINOP, vceq, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+ VAR8 (BINOP, vcge, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+ VAR8 (BINOP, vcgt, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+ VAR2 (BINOP, vcage, v2sf, v4sf),
+ VAR2 (BINOP, vcagt, v2sf, v4sf),
+ VAR6 (BINOP, vtst, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
+ VAR8 (BINOP, vabd, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+ VAR3 (BINOP, vabdl, v8qi, v4hi, v2si),
+ VAR6 (TERNOP, vaba, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
+ VAR3 (TERNOP, vabal, v8qi, v4hi, v2si),
+ VAR8 (BINOP, vmax, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+ VAR8 (BINOP, vmin, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+ VAR4 (BINOP, vpadd, v8qi, v4hi, v2si, v2sf),
+ VAR6 (UNOP, vpaddl, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
+ VAR6 (BINOP, vpadal, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
+ VAR4 (BINOP, vpmax, v8qi, v4hi, v2si, v2sf),
+ VAR4 (BINOP, vpmin, v8qi, v4hi, v2si, v2sf),
+ VAR2 (BINOP, vrecps, v2sf, v4sf),
+ VAR2 (BINOP, vrsqrts, v2sf, v4sf),
+ VAR8 (SHIFTINSERT, vsri_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
+ VAR8 (SHIFTINSERT, vsli_n, v8qi, v4hi, v2si, di, v16qi, v8hi, v4si, v2di),
+ VAR8 (UNOP, vabs, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+ VAR6 (UNOP, vqabs, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
+ VAR8 (UNOP, vneg, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+ VAR6 (UNOP, vqneg, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
+ VAR6 (UNOP, vcls, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
+ VAR6 (UNOP, vclz, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
+ VAR2 (UNOP, vcnt, v8qi, v16qi),
+ VAR4 (UNOP, vrecpe, v2si, v2sf, v4si, v4sf),
+ VAR4 (UNOP, vrsqrte, v2si, v2sf, v4si, v4sf),
+ VAR6 (UNOP, vmvn, v8qi, v4hi, v2si, v16qi, v8hi, v4si),
/* FIXME: vget_lane supports more variants than this! */
- { VAR10 (GETLANE, vget_lane,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
- { VAR10 (SETLANE, vset_lane,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
- { VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di) },
- { VAR10 (DUP, vdup_n,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
- { VAR10 (DUPLANE, vdup_lane,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
- { VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di) },
- { VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di) },
- { VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di) },
- { VAR3 (UNOP, vmovn, v8hi, v4si, v2di) },
- { VAR3 (UNOP, vqmovn, v8hi, v4si, v2di) },
- { VAR3 (UNOP, vqmovun, v8hi, v4si, v2di) },
- { VAR3 (UNOP, vmovl, v8qi, v4hi, v2si) },
- { VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
- { VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
- { VAR2 (LANEMAC, vmlal_lane, v4hi, v2si) },
- { VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si) },
- { VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
- { VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si) },
- { VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si) },
- { VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
- { VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
- { VAR2 (SCALARMAC, vmlal_n, v4hi, v2si) },
- { VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si) },
- { VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
- { VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si) },
- { VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si) },
- { VAR10 (BINOP, vext,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
- { VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
- { VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi) },
- { VAR2 (UNOP, vrev16, v8qi, v16qi) },
- { VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf) },
- { VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf) },
- { VAR10 (SELECT, vbsl,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
- { VAR1 (VTBL, vtbl1, v8qi) },
- { VAR1 (VTBL, vtbl2, v8qi) },
- { VAR1 (VTBL, vtbl3, v8qi) },
- { VAR1 (VTBL, vtbl4, v8qi) },
- { VAR1 (VTBX, vtbx1, v8qi) },
- { VAR1 (VTBX, vtbx2, v8qi) },
- { VAR1 (VTBX, vtbx3, v8qi) },
- { VAR1 (VTBX, vtbx4, v8qi) },
- { VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
- { VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
- { VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf) },
- { VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di) },
- { VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di) },
- { VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di) },
- { VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di) },
- { VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di) },
- { VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di) },
- { VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di) },
- { VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di) },
- { VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di) },
- { VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di) },
- { VAR10 (LOAD1, vld1,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
- { VAR10 (LOAD1LANE, vld1_lane,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
- { VAR10 (LOAD1, vld1_dup,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
- { VAR10 (STORE1, vst1,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
- { VAR10 (STORE1LANE, vst1_lane,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
- { VAR9 (LOADSTRUCT,
- vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
- { VAR7 (LOADSTRUCTLANE, vld2_lane,
- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
- { VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di) },
- { VAR9 (STORESTRUCT, vst2,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
- { VAR7 (STORESTRUCTLANE, vst2_lane,
- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
- { VAR9 (LOADSTRUCT,
- vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
- { VAR7 (LOADSTRUCTLANE, vld3_lane,
- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
- { VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di) },
- { VAR9 (STORESTRUCT, vst3,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
- { VAR7 (STORESTRUCTLANE, vst3_lane,
- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
- { VAR9 (LOADSTRUCT, vld4,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
- { VAR7 (LOADSTRUCTLANE, vld4_lane,
- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
- { VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di) },
- { VAR9 (STORESTRUCT, vst4,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf) },
- { VAR7 (STORESTRUCTLANE, vst4_lane,
- v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf) },
- { VAR10 (LOGICBINOP, vand,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
- { VAR10 (LOGICBINOP, vorr,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
- { VAR10 (BINOP, veor,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
- { VAR10 (LOGICBINOP, vbic,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) },
- { VAR10 (LOGICBINOP, vorn,
- v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di) }
+ VAR10 (GETLANE, vget_lane,
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+ VAR10 (SETLANE, vset_lane,
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+ VAR5 (CREATE, vcreate, v8qi, v4hi, v2si, v2sf, di),
+ VAR10 (DUP, vdup_n,
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+ VAR10 (DUPLANE, vdup_lane,
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+ VAR5 (COMBINE, vcombine, v8qi, v4hi, v2si, v2sf, di),
+ VAR5 (SPLIT, vget_high, v16qi, v8hi, v4si, v4sf, v2di),
+ VAR5 (SPLIT, vget_low, v16qi, v8hi, v4si, v4sf, v2di),
+ VAR3 (UNOP, vmovn, v8hi, v4si, v2di),
+ VAR3 (UNOP, vqmovn, v8hi, v4si, v2di),
+ VAR3 (UNOP, vqmovun, v8hi, v4si, v2di),
+ VAR3 (UNOP, vmovl, v8qi, v4hi, v2si),
+ VAR6 (LANEMUL, vmul_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
+ VAR6 (LANEMAC, vmla_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
+ VAR2 (LANEMAC, vmlal_lane, v4hi, v2si),
+ VAR2 (LANEMAC, vqdmlal_lane, v4hi, v2si),
+ VAR6 (LANEMAC, vmls_lane, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
+ VAR2 (LANEMAC, vmlsl_lane, v4hi, v2si),
+ VAR2 (LANEMAC, vqdmlsl_lane, v4hi, v2si),
+ VAR6 (SCALARMUL, vmul_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
+ VAR6 (SCALARMAC, vmla_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
+ VAR2 (SCALARMAC, vmlal_n, v4hi, v2si),
+ VAR2 (SCALARMAC, vqdmlal_n, v4hi, v2si),
+ VAR6 (SCALARMAC, vmls_n, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
+ VAR2 (SCALARMAC, vmlsl_n, v4hi, v2si),
+ VAR2 (SCALARMAC, vqdmlsl_n, v4hi, v2si),
+ VAR10 (BINOP, vext,
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+ VAR8 (UNOP, vrev64, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+ VAR4 (UNOP, vrev32, v8qi, v4hi, v16qi, v8hi),
+ VAR2 (UNOP, vrev16, v8qi, v16qi),
+ VAR4 (CONVERT, vcvt, v2si, v2sf, v4si, v4sf),
+ VAR4 (FIXCONV, vcvt_n, v2si, v2sf, v4si, v4sf),
+ VAR10 (SELECT, vbsl,
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+ VAR1 (VTBL, vtbl1, v8qi),
+ VAR1 (VTBL, vtbl2, v8qi),
+ VAR1 (VTBL, vtbl3, v8qi),
+ VAR1 (VTBL, vtbl4, v8qi),
+ VAR1 (VTBX, vtbx1, v8qi),
+ VAR1 (VTBX, vtbx2, v8qi),
+ VAR1 (VTBX, vtbx3, v8qi),
+ VAR1 (VTBX, vtbx4, v8qi),
+ VAR8 (RESULTPAIR, vtrn, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+ VAR8 (RESULTPAIR, vzip, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+ VAR8 (RESULTPAIR, vuzp, v8qi, v4hi, v2si, v2sf, v16qi, v8hi, v4si, v4sf),
+ VAR5 (REINTERP, vreinterpretv8qi, v8qi, v4hi, v2si, v2sf, di),
+ VAR5 (REINTERP, vreinterpretv4hi, v8qi, v4hi, v2si, v2sf, di),
+ VAR5 (REINTERP, vreinterpretv2si, v8qi, v4hi, v2si, v2sf, di),
+ VAR5 (REINTERP, vreinterpretv2sf, v8qi, v4hi, v2si, v2sf, di),
+ VAR5 (REINTERP, vreinterpretdi, v8qi, v4hi, v2si, v2sf, di),
+ VAR5 (REINTERP, vreinterpretv16qi, v16qi, v8hi, v4si, v4sf, v2di),
+ VAR5 (REINTERP, vreinterpretv8hi, v16qi, v8hi, v4si, v4sf, v2di),
+ VAR5 (REINTERP, vreinterpretv4si, v16qi, v8hi, v4si, v4sf, v2di),
+ VAR5 (REINTERP, vreinterpretv4sf, v16qi, v8hi, v4si, v4sf, v2di),
+ VAR5 (REINTERP, vreinterpretv2di, v16qi, v8hi, v4si, v4sf, v2di),
+ VAR10 (LOAD1, vld1,
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+ VAR10 (LOAD1LANE, vld1_lane,
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+ VAR10 (LOAD1, vld1_dup,
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+ VAR10 (STORE1, vst1,
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+ VAR10 (STORE1LANE, vst1_lane,
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+ VAR9 (LOADSTRUCT,
+ vld2, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
+ VAR7 (LOADSTRUCTLANE, vld2_lane,
+ v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
+ VAR5 (LOADSTRUCT, vld2_dup, v8qi, v4hi, v2si, v2sf, di),
+ VAR9 (STORESTRUCT, vst2,
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
+ VAR7 (STORESTRUCTLANE, vst2_lane,
+ v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
+ VAR9 (LOADSTRUCT,
+ vld3, v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
+ VAR7 (LOADSTRUCTLANE, vld3_lane,
+ v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
+ VAR5 (LOADSTRUCT, vld3_dup, v8qi, v4hi, v2si, v2sf, di),
+ VAR9 (STORESTRUCT, vst3,
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
+ VAR7 (STORESTRUCTLANE, vst3_lane,
+ v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
+ VAR9 (LOADSTRUCT, vld4,
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
+ VAR7 (LOADSTRUCTLANE, vld4_lane,
+ v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
+ VAR5 (LOADSTRUCT, vld4_dup, v8qi, v4hi, v2si, v2sf, di),
+ VAR9 (STORESTRUCT, vst4,
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf),
+ VAR7 (STORESTRUCTLANE, vst4_lane,
+ v8qi, v4hi, v2si, v2sf, v8hi, v4si, v4sf),
+ VAR10 (LOGICBINOP, vand,
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+ VAR10 (LOGICBINOP, vorr,
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+ VAR10 (BINOP, veor,
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+ VAR10 (LOGICBINOP, vbic,
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di),
+ VAR10 (LOGICBINOP, vorn,
+ v8qi, v4hi, v2si, v2sf, di, v16qi, v8hi, v4si, v4sf, v2di)
};
#undef CF
@@ -18586,25 +18576,16 @@ arm_init_neon_builtins (void)
for (i = 0; i < ARRAY_SIZE (neon_builtin_data); i++)
{
neon_builtin_datum *d = &neon_builtin_data[i];
- unsigned int j, codeidx = 0;
- d->base_fcode = fcode;
-
- for (j = 0; j < T_MAX; j++)
- {
const char* const modenames[] = {
"v8qi", "v4hi", "v2si", "v2sf", "di",
"v16qi", "v8hi", "v4si", "v4sf", "v2di"
};
char namebuf[60];
tree ftype = NULL;
- enum insn_code icode;
int is_load = 0, is_store = 0;
- if ((d->bits & (1 << j)) == 0)
- continue;
-
- icode = d->codes[codeidx++];
+ d->fcode = fcode;
switch (d->itype)
{
@@ -18655,7 +18636,7 @@ arm_init_neon_builtins (void)
/* Build a function type directly from the insn_data for this
builtin. The build_function_type() function takes care of
removing duplicates for us. */
- for (k = insn_data[icode].n_operands - 1; k >= 0; k--)
+ for (k = insn_data[d->code].n_operands - 1; k >= 0; k--)
{
tree eltype;
@@ -18665,9 +18646,9 @@ arm_init_neon_builtins (void)
(a SImode pointer) in the operand 1 position. We
want a const pointer to the element type in that
position. */
- gcc_assert (insn_data[icode].operand[k].mode == SImode);
+ gcc_assert (insn_data[d->code].operand[k].mode == SImode);
- switch (1 << j)
+ switch (d->mode)
{
case T_V8QI:
case T_V16QI:
@@ -18703,9 +18684,9 @@ arm_init_neon_builtins (void)
the memory location to store to (a SImode pointer).
Use a pointer to the element type of the store in
that position. */
- gcc_assert (insn_data[icode].operand[k].mode == SImode);
+ gcc_assert (insn_data[d->code].operand[k].mode == SImode);
- switch (1 << j)
+ switch (d->mode)
{
case T_V8QI:
case T_V16QI:
@@ -18737,7 +18718,7 @@ arm_init_neon_builtins (void)
}
else
{
- switch (insn_data[icode].operand[k].mode)
+ switch (insn_data[d->code].operand[k].mode)
{
case VOIDmode: eltype = void_type_node; break;
/* Scalars. */
@@ -18778,7 +18759,7 @@ arm_init_neon_builtins (void)
case NEON_RESULTPAIR:
{
- switch (insn_data[icode].operand[1].mode)
+ switch (insn_data[d->code].operand[1].mode)
{
case V8QImode: ftype = void_ftype_pv8qi_v8qi_v8qi; break;
case V4HImode: ftype = void_ftype_pv4hi_v4hi_v4hi; break;
@@ -18799,8 +18780,8 @@ arm_init_neon_builtins (void)
{
/* We iterate over 5 doubleword types, then 5 quadword
types. */
- int rhs = j % 5;
- switch (insn_data[icode].operand[0].mode)
+ int rhs = d->mode % 5;
+ switch (insn_data[d->code].operand[0].mode)
{
case V8QImode: ftype = reinterp_ftype_dreg[0][rhs]; break;
case V4HImode: ftype = reinterp_ftype_dreg[1][rhs]; break;
@@ -18823,11 +18804,10 @@ arm_init_neon_builtins (void)
gcc_assert (ftype != NULL);
- sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[j]);
+ sprintf (namebuf, "__builtin_neon_%s%s", d->name, modenames[d->mode]);
add_builtin_function (namebuf, ftype, fcode++, BUILT_IN_MD, NULL,
NULL_TREE);
- }
}
}
@@ -19006,42 +18986,6 @@ arm_expand_unop_builtin (enum insn_code icode,
return target;
}
-static int
-neon_builtin_compare (const void *a, const void *b)
-{
- const neon_builtin_datum *const key = (const neon_builtin_datum *) a;
- const neon_builtin_datum *const memb = (const neon_builtin_datum *) b;
- unsigned int soughtcode = key->base_fcode;
-
- if (soughtcode >= memb->base_fcode
- && soughtcode < memb->base_fcode + memb->num_vars)
- return 0;
- else if (soughtcode < memb->base_fcode)
- return -1;
- else
- return 1;
-}
-
-static enum insn_code
-locate_neon_builtin_icode (int fcode, neon_itype *itype)
-{
- neon_builtin_datum key, *found;
- int idx;
-
- key.base_fcode = fcode;
- found = (neon_builtin_datum *)
- bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
- sizeof (neon_builtin_data[0]), neon_builtin_compare);
- gcc_assert (found);
- idx = fcode - (int) found->base_fcode;
- gcc_assert (idx >= 0 && idx < T_MAX && idx < (int)found->num_vars);
-
- if (itype)
- *itype = found->itype;
-
- return found->codes[idx];
-}
-
typedef enum {
NEON_ARG_COPY_TO_REG,
NEON_ARG_CONSTANT,
@@ -19176,8 +19120,9 @@ arm_expand_neon_args (rtx target, int icode, int have_retval,
static rtx
arm_expand_neon_builtin (int fcode, tree exp, rtx target)
{
- neon_itype itype;
- enum insn_code icode = locate_neon_builtin_icode (fcode, &itype);
+ neon_builtin_datum *d = &neon_builtin_data[fcode - ARM_BUILTIN_NEON_BASE];
+ neon_itype itype = d->itype;
+ enum insn_code icode = d->code;
switch (itype)
{