[AArch64] - feedback on the approach followed for vulcan scheduler
Virendra Pathak
virendra.pathak@broadcom.com
Wed Jun 29 17:03:00 GMT 2016
Hi gcc-patches group,
I am working on adding vulcan.md (machine description) for vulcan cpu
in the aarch64 port. However, before proposing the final patch, I would like
the basic approach to be reviewed by you all (as changes are there in
aarch64.md)
In vulcan, a (load/store) instruction could be scheduled to cpu units in
different way based on the addressing mode(e.g. load, or load+integer).
So the requirement is to identify the addressing mode of (load/store)
instruction's operand while scheduling.
For this purpose, a new attribute "addr_type" has been added in the
aarch64.md file. This helps in identifying which operands of (load/store)
instruction should be considered for finding the addressing mode.
vulcan.md, while scheduling, calls a new function aarch64_mem_type_p
in the aarch64.c (via match_test) to decide the scheduling option based
on the addressing mode.
I have copied the code snippet below (complete patch is attached with
this mail).
Kindly review and give your feedback/comment.
Also if you think there could be an better alternative way, kindly suggest.
Thanks in advance for your time.
<Code Snippet>
FILE - gcc/config/aarch64/aarch64-protos.h
/* Mask bits to use for for aarch64_mem_type_p. Unshifted/shifted index
register variants are separated for scheduling purposes because the
distinction matters on some cores. */
#define AARCH64_ADDR_REG_IMM 0x01
#define AARCH64_ADDR_REG_WB 0x02
#define AARCH64_ADDR_REG_REG 0x04
#define AARCH64_ADDR_REG_SHIFT 0x08
#define AARCH64_ADDR_REG_EXT 0x10
#define AARCH64_ADDR_REG_SHIFT_EXT 0x20
#define AARCH64_ADDR_LO_SUM 0x40
#define AARCH64_ADDR_SYMBOLIC 0x80
FILE - gcc/config/aarch64/aarch64.md
(define_attr "addr_type" "none,op0,op1,op0addr,op1addr,lo_sum,wb"
(const_string "none"))
(define_insn "*mov<mode>_aarch64"
[(set (match_operand:SHORT 0 "nonimmediate_operand" "=r,r,
*w,r,*w, m, m, r,*w,*w")
(match_operand:SHORT 1 "general_operand" " r,M,D<hq>,m,
m,rZ,*w,*w, r,*w"))]
"(register_operand (operands[0], <MODE>mode)
|| aarch64_reg_or_zero (operands[1], <MODE>mode))"
{
switch (which_alternative)
{
case 0:
return "mov\t%w0, %w1";
case 1:
return "mov\t%w0, %1";
case 2:
return aarch64_output_scalar_simd_mov_immediate (operands[1],
<MODE>mode);
case 3:
return "ldr<size>\t%w0, %1";
case 4:
return "ldr\t%<size>0, %1";
case 5:
return "str<size>\t%w1, %0";
case 6:
return "str\t%<size>1, %0";
case 7:
return "umov\t%w0, %1.<v>[0]";
case 8:
return "dup\t%0.<Vallxd>, %w1";
case 9:
return "dup\t%<Vetype>0, %1.<v>[0]";
default:
gcc_unreachable ();
}
}
[(set_attr "type" "mov_reg,mov_imm,neon_move,load1,load1,store1,store1,\
neon_to_gp<q>,neon_from_gp<q>,neon_dup")
(set_attr "simd" "*,*,yes,*,*,*,*,yes,yes,yes")
(set_attr "addr_type" "*,*,*,op1,op1,op0,op0,*,*,*")]
)
FILE - gcc/config/aarch64/vulcan.md
;; Integer loads and stores.
(define_insn_reservation "vulcan_load_basic" 4
(and (eq_attr "tune" "vulcan")
(eq_attr "type" "load1")
(match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_SYMBOLIC
| AARCH64_ADDR_REG_IMM
| AARCH64_ADDR_LO_SUM)"))
"vulcan_ls01")
(define_insn_reservation "vulcan_load_automod" 4
(and (eq_attr "tune" "vulcan")
(eq_attr "type" "load1")
(match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_WB)"))
"vulcan_ls01,vulcan_i012")
FILE - gcc/config/aarch64/aarch64.c
/* Return TRUE if INSN uses an address that satisfies any of the (non-strict)
addressing modes specified by MASK. This is intended for use in scheduling
models that are sensitive to the form of address used by some particular
instruction. */
bool
aarch64_mem_type_p (rtx_insn *insn, unsigned HOST_WIDE_INT mask)
{
aarch64_address_info info;
bool valid;
attr_addr_type addr_type;
rtx mem, addr;
machine_mode mode;
addr_type = get_attr_addr_type (insn);
switch (addr_type)
{
case ADDR_TYPE_WB:
info.type = ADDRESS_REG_WB;
break;
case ADDR_TYPE_LO_SUM:
info.type = ADDRESS_LO_SUM;
break;
case ADDR_TYPE_OP0:
case ADDR_TYPE_OP1:
extract_insn_cached (insn);
mem = recog_data.operand[(addr_type == ADDR_TYPE_OP0) ? 0 : 1];
gcc_assert (MEM_P (mem));
addr = XEXP (mem, 0);
mode = GET_MODE (mem);
classify:
valid = aarch64_classify_address (&info, addr, mode, MEM, false);
if (!valid)
return false;
break;
case ADDR_TYPE_OP0ADDR:
case ADDR_TYPE_OP1ADDR:
extract_insn_cached (insn);
addr = recog_data.operand[(addr_type == ADDR_TYPE_OP0ADDR) ? 0 : 1];
mode = DImode;
goto classify;
case ADDR_TYPE_NONE:
return false;
}
switch (info.type)
{
case ADDRESS_REG_IMM:
return (mask & AARCH64_ADDR_REG_IMM) != 0;
case ADDRESS_REG_WB:
return (mask & AARCH64_ADDR_REG_WB) != 0;
case ADDRESS_REG_REG:
if (info.shift == 0)
return (mask & AARCH64_ADDR_REG_REG) != 0;
else
return (mask & AARCH64_ADDR_REG_SHIFT) != 0;
case ADDRESS_REG_UXTW:
case ADDRESS_REG_SXTW:
if (info.shift == 0)
return (mask & AARCH64_ADDR_REG_EXT) != 0;
else
return (mask & AARCH64_ADDR_REG_SHIFT_EXT) != 0;
case ADDRESS_LO_SUM:
return (mask & AARCH64_ADDR_LO_SUM) != 0;
case ADDRESS_SYMBOLIC:
return (mask & AARCH64_ADDR_SYMBOLIC) != 0;
default:
return false;
}
}
<END>
with regards,
Virendra Pathak
-------------- next part --------------
From 4687f79a01334c15b20f0191811d58a93e5dfbae Mon Sep 17 00:00:00 2001
From: Virendra Pathak <virendra.pathak@broadcom.com>
Date: Wed, 29 Jun 2016 05:04:38 -0700
Subject: [PATCH] AArch64: add scheduler for vulcan cpu
---
gcc/config/aarch64/aarch64-cores.def | 2 +-
gcc/config/aarch64/aarch64-protos.h | 14 +
gcc/config/aarch64/aarch64.c | 82 +++++
gcc/config/aarch64/aarch64.md | 91 +++--
gcc/config/aarch64/vulcan.md | 619 +++++++++++++++++++++++++++++++++++
5 files changed, 778 insertions(+), 30 deletions(-)
create mode 100644 gcc/config/aarch64/vulcan.md
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index f29d25a..55d2514 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -51,7 +51,7 @@ AARCH64_CORE("xgene1", xgene1, xgene1, 8A, AARCH64_FL_FOR_ARCH8, xge
/* V8.1 Architecture Processors. */
-AARCH64_CORE("vulcan", vulcan, cortexa57, 8_1A, AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, vulcan, "0x42", "0x516")
+AARCH64_CORE("vulcan", vulcan, vulcan, 8_1A, AARCH64_FL_FOR_ARCH8_1 | AARCH64_FL_CRYPTO, vulcan, "0x42", "0x516")
/* V8 big.LITTLE implementations. */
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index e8c2ac8..43e21e2 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -240,6 +240,19 @@ struct tune_params
unsigned int extra_tuning_flags;
};
+/* Mask bits to use for for aarch64_mem_type_p. Unshifted/shifted index
+ register variants are separated for scheduling purposes because the
+ distinction matters on some cores. */
+
+#define AARCH64_ADDR_REG_IMM 0x01
+#define AARCH64_ADDR_REG_WB 0x02
+#define AARCH64_ADDR_REG_REG 0x04
+#define AARCH64_ADDR_REG_SHIFT 0x08
+#define AARCH64_ADDR_REG_EXT 0x10
+#define AARCH64_ADDR_REG_SHIFT_EXT 0x20
+#define AARCH64_ADDR_LO_SUM 0x40
+#define AARCH64_ADDR_SYMBOLIC 0x80
+
#define AARCH64_FUSION_PAIR(x, name) \
AARCH64_FUSE_##name##_index,
/* Supported fusion operations. */
@@ -341,6 +354,7 @@ bool aarch64_simd_scalar_immediate_valid_for_move (rtx, machine_mode);
bool aarch64_simd_shift_imm_p (rtx, machine_mode, bool);
bool aarch64_simd_valid_immediate (rtx, machine_mode, bool,
struct simd_immediate_info *);
+bool aarch64_mem_type_p (rtx_insn *, unsigned HOST_WIDE_INT);
bool aarch64_symbolic_address_p (rtx);
bool aarch64_uimm12_shift (HOST_WIDE_INT);
bool aarch64_use_return_insn_p (void);
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index d7eb754..806d028 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -4192,6 +4192,88 @@ aarch64_classify_address (struct aarch64_address_info *info,
}
}
+/* Return TRUE if INSN uses an address that satisfies any of the (non-strict)
+ addressing modes specified by MASK. This is intended for use in scheduling
+ models that are sensitive to the form of address used by some particular
+ instruction. */
+
+bool
+aarch64_mem_type_p (rtx_insn *insn, unsigned HOST_WIDE_INT mask)
+{
+ aarch64_address_info info;
+ bool valid;
+ attr_addr_type addr_type;
+ rtx mem, addr;
+ machine_mode mode;
+
+ addr_type = get_attr_addr_type (insn);
+
+ switch (addr_type)
+ {
+ case ADDR_TYPE_WB:
+ info.type = ADDRESS_REG_WB;
+ break;
+
+ case ADDR_TYPE_LO_SUM:
+ info.type = ADDRESS_LO_SUM;
+ break;
+
+ case ADDR_TYPE_OP0:
+ case ADDR_TYPE_OP1:
+ extract_insn_cached (insn);
+
+ mem = recog_data.operand[(addr_type == ADDR_TYPE_OP0) ? 0 : 1];
+
+ gcc_assert (MEM_P (mem));
+
+ addr = XEXP (mem, 0);
+ mode = GET_MODE (mem);
+
+ classify:
+ valid = aarch64_classify_address (&info, addr, mode, MEM, false);
+ if (!valid)
+ return false;
+
+ break;
+
+ case ADDR_TYPE_OP0ADDR:
+ case ADDR_TYPE_OP1ADDR:
+ extract_insn_cached (insn);
+
+ addr = recog_data.operand[(addr_type == ADDR_TYPE_OP0ADDR) ? 0 : 1];
+ mode = DImode;
+ goto classify;
+
+ case ADDR_TYPE_NONE:
+ return false;
+ }
+
+ switch (info.type)
+ {
+ case ADDRESS_REG_IMM:
+ return (mask & AARCH64_ADDR_REG_IMM) != 0;
+ case ADDRESS_REG_WB:
+ return (mask & AARCH64_ADDR_REG_WB) != 0;
+ case ADDRESS_REG_REG:
+ if (info.shift == 0)
+ return (mask & AARCH64_ADDR_REG_REG) != 0;
+ else
+ return (mask & AARCH64_ADDR_REG_SHIFT) != 0;
+ case ADDRESS_REG_UXTW:
+ case ADDRESS_REG_SXTW:
+ if (info.shift == 0)
+ return (mask & AARCH64_ADDR_REG_EXT) != 0;
+ else
+ return (mask & AARCH64_ADDR_REG_SHIFT_EXT) != 0;
+ case ADDRESS_LO_SUM:
+ return (mask & AARCH64_ADDR_LO_SUM) != 0;
+ case ADDRESS_SYMBOLIC:
+ return (mask & AARCH64_ADDR_SYMBOLIC) != 0;
+ default:
+ return false;
+ }
+}
+
bool
aarch64_symbolic_address_p (rtx x)
{
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index bcb7db0..2fa2a89 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -206,6 +206,9 @@
;; no predicated insns.
(define_attr "predicated" "yes,no" (const_string "no"))
+(define_attr "addr_type" "none,op0,op1,op0addr,op1addr,lo_sum,wb"
+ (const_string "none"))
+
;; -------------------------------------------------------------------
;; Pipeline descriptions and scheduling
;; -------------------------------------------------------------------
@@ -219,6 +222,7 @@
(include "../arm/exynos-m1.md")
(include "thunderx.md")
(include "../arm/xgene1.md")
+(include "vulcan.md")
;; -------------------------------------------------------------------
;; Jumps and other miscellaneous insns
@@ -535,7 +539,8 @@
return pftype[INTVAL(operands[1])][locality];
}
- [(set_attr "type" "load1")]
+ [(set_attr "type" "load1")
+ (set_attr "addr_type" "op0addr")]
)
(define_insn "trap"
@@ -1017,7 +1022,8 @@
}
[(set_attr "type" "mov_reg,mov_imm,neon_move,load1,load1,store1,store1,\
neon_to_gp<q>,neon_from_gp<q>,neon_dup")
- (set_attr "simd" "*,*,yes,*,*,*,*,yes,yes,yes")]
+ (set_attr "simd" "*,*,yes,*,*,*,*,yes,yes,yes")
+ (set_attr "addr_type" "*,*,*,op1,op1,op0,op0,*,*,*")]
)
(define_expand "mov<mode>"
@@ -1068,7 +1074,8 @@
}"
[(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,load1,load1,store1,store1,\
adr,adr,f_mcr,f_mrc,fmov")
- (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes")]
+ (set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes")
+ (set_attr "addr_type" "*,*,*,*,*,op1,op1,op0,op0,*,*,*,*,*")]
)
(define_insn_and_split "*movdi_aarch64"
@@ -1102,7 +1109,8 @@
[(set_attr "type" "mov_reg,mov_reg,mov_reg,mov_imm,mov_imm,load1,load1,store1,store1,\
adr,adr,f_mcr,f_mrc,fmov,neon_move")
(set_attr "fp" "*,*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
- (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")]
+ (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,yes")
+ (set_attr "addr_type" "*,*,*,*,*,op1,op1,op0,op0,*,*,*,*,*,*")]
)
(define_insn "insv_imm<mode>"
@@ -1147,7 +1155,8 @@
load2,store2,store2,f_loadd,f_stored")
(set_attr "length" "8,8,8,4,4,4,4,4,4")
(set_attr "simd" "*,*,*,yes,*,*,*,*,*")
- (set_attr "fp" "*,*,*,*,*,*,*,yes,yes")]
+ (set_attr "fp" "*,*,*,*,*,*,*,yes,yes")
+ (set_attr "addr_type" "*,*,*,*,op1,op0,op0,op1,op0")]
)
;; Split a TImode register-register or register-immediate move into
@@ -1219,7 +1228,8 @@
mov\\t%w0, %w1"
[(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconsts,\
f_loads,f_stores,load1,store1,mov_reg")
- (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")]
+ (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")
+ (set_attr "addr_type" "*,*,*,*,*,op1,op0,op1,op0,*")]
)
(define_insn "*movdf_aarch64"
@@ -1240,7 +1250,8 @@
mov\\t%x0, %x1"
[(set_attr "type" "neon_move,f_mcr,f_mrc,fmov,fconstd,\
f_loadd,f_stored,load1,store1,mov_reg")
- (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")]
+ (set_attr "simd" "yes,*,*,*,*,*,*,*,*,*")
+ (set_attr "addr_type" "*,*,*,*,*,op1,op0,op1,op0,*")]
)
(define_insn "*movtf_aarch64"
@@ -1265,7 +1276,8 @@
[(set_attr "type" "logic_reg,multiple,f_mcr,f_mrc,neon_move_q,f_mcr,\
f_loadd,f_stored,load2,store2,store2")
(set_attr "length" "4,8,8,8,4,4,4,4,4,4,4")
- (set_attr "simd" "yes,*,*,*,yes,*,*,*,*,*,*")]
+ (set_attr "simd" "yes,*,*,*,yes,*,*,*,*,*,*")
+ (set_attr "addr_type" "*,*,*,*,*,*,op1,op0,op1,op0,op0")]
)
(define_split
@@ -1312,7 +1324,8 @@
ldp\\t%w0, %w2, %1
ldp\\t%s0, %s2, %1"
[(set_attr "type" "load2,neon_load1_2reg")
- (set_attr "fp" "*,yes")]
+ (set_attr "fp" "*,yes")
+ (set_attr "addr_type" "op1")]
)
(define_insn "load_pairdi"
@@ -1328,7 +1341,8 @@
ldp\\t%x0, %x2, %1
ldp\\t%d0, %d2, %1"
[(set_attr "type" "load2,neon_load1_2reg")
- (set_attr "fp" "*,yes")]
+ (set_attr "fp" "*,yes")
+ (set_attr "addr_type" "op1")]
)
@@ -1347,7 +1361,8 @@
stp\\t%w1, %w3, %0
stp\\t%s1, %s3, %0"
[(set_attr "type" "store2,neon_store1_2reg")
- (set_attr "fp" "*,yes")]
+ (set_attr "fp" "*,yes")
+ (set_attr "addr_type" "op0")]
)
(define_insn "store_pairdi"
@@ -1363,7 +1378,8 @@
stp\\t%x1, %x3, %0
stp\\t%d1, %d3, %0"
[(set_attr "type" "store2,neon_store1_2reg")
- (set_attr "fp" "*,yes")]
+ (set_attr "fp" "*,yes")
+ (set_attr "addr_type" "op0")]
)
;; Operands 1 and 3 are tied together by the final condition; so we allow
@@ -1381,7 +1397,8 @@
ldp\\t%s0, %s2, %1
ldp\\t%w0, %w2, %1"
[(set_attr "type" "neon_load1_2reg,load2")
- (set_attr "fp" "yes,*")]
+ (set_attr "fp" "yes,*")
+ (set_attr "addr_type" "op1")]
)
(define_insn "load_pairdf"
@@ -1397,7 +1414,8 @@
ldp\\t%d0, %d2, %1
ldp\\t%x0, %x2, %1"
[(set_attr "type" "neon_load1_2reg,load2")
- (set_attr "fp" "yes,*")]
+ (set_attr "fp" "yes,*")
+ (set_attr "addr_type" "op1")]
)
;; Operands 0 and 2 are tied together by the final condition; so we allow
@@ -1415,7 +1433,8 @@
stp\\t%s1, %s3, %0
stp\\t%w1, %w3, %0"
[(set_attr "type" "neon_store1_2reg,store2")
- (set_attr "fp" "yes,*")]
+ (set_attr "fp" "yes,*")
+ (set_attr "addr_type" "op0")]
)
(define_insn "store_pairdf"
@@ -1431,7 +1450,8 @@
stp\\t%d1, %d3, %0
stp\\t%x1, %x3, %0"
[(set_attr "type" "neon_store1_2reg,store2")
- (set_attr "fp" "yes,*")]
+ (set_attr "fp" "yes,*")
+ (set_attr "addr_type" "op0")]
)
;; Load pair with post-index writeback. This is primarily used in function
@@ -1448,7 +1468,8 @@
(match_operand:P 5 "const_int_operand" "n"))))])]
"INTVAL (operands[5]) == GET_MODE_SIZE (<GPI:MODE>mode)"
"ldp\\t%<w>2, %<w>3, [%1], %4"
- [(set_attr "type" "load2")]
+ [(set_attr "type" "load2")
+ (set_attr "addr_type" "wb")]
)
(define_insn "loadwb_pair<GPF:mode>_<P:mode>"
@@ -1481,7 +1502,8 @@
(match_operand:GPI 3 "register_operand" "r"))])]
"INTVAL (operands[5]) == INTVAL (operands[4]) + GET_MODE_SIZE (<GPI:MODE>mode)"
"stp\\t%<w>2, %<w>3, [%0, %4]!"
- [(set_attr "type" "store2")]
+ [(set_attr "type" "store2")
+ (set_attr "addr_type" "wb")]
)
(define_insn "storewb_pair<GPF:mode>_<P:mode>"
@@ -1517,7 +1539,8 @@
"@
sxtw\t%0, %w1
ldrsw\t%0, %1"
- [(set_attr "type" "extend,load1")]
+ [(set_attr "type" "extend,load1")
+ (set_attr "addr_type" "*,op1")]
)
(define_insn "*load_pair_extendsidi2_aarch64"
@@ -1530,7 +1553,8 @@
XEXP (operands[1], 0),
GET_MODE_SIZE (SImode)))"
"ldpsw\\t%0, %2, %1"
- [(set_attr "type" "load2")]
+ [(set_attr "type" "load2")
+ (set_attr "addr_type" "op1")]
)
(define_insn "*zero_extendsidi2_aarch64"
@@ -1540,7 +1564,8 @@
"@
uxtw\t%0, %w1
ldr\t%w0, %1"
- [(set_attr "type" "extend,load1")]
+ [(set_attr "type" "extend,load1")
+ (set_attr "addr_type" "*,op1")]
)
(define_insn "*load_pair_zero_extendsidi2_aarch64"
@@ -1553,7 +1578,8 @@
XEXP (operands[1], 0),
GET_MODE_SIZE (SImode)))"
"ldp\\t%w0, %w2, %1"
- [(set_attr "type" "load2")]
+ [(set_attr "type" "load2")
+ (set_attr "addr_type" "op1")]
)
(define_expand "<ANY_EXTEND:optab><SHORT:mode><GPI:mode>2"
@@ -1569,7 +1595,8 @@
"@
sxt<SHORT:size>\t%<GPI:w>0, %w1
ldrs<SHORT:size>\t%<GPI:w>0, %1"
- [(set_attr "type" "extend,load1")]
+ [(set_attr "type" "extend,load1")
+ (set_attr "addr_type" "*,op1")]
)
(define_insn "*zero_extend<SHORT:mode><GPI:mode>2_aarch64"
@@ -1580,7 +1607,8 @@
uxt<SHORT:size>\t%<GPI:w>0, %w1
ldr<SHORT:size>\t%w0, %1
ldr\t%<SHORT:size>0, %1"
- [(set_attr "type" "extend,load1,load1")]
+ [(set_attr "type" "extend,load1,load1")
+ (set_attr "addr_type" "*,op1,op1")]
)
(define_expand "<optab>qihi2"
@@ -4983,7 +5011,8 @@
UNSPEC_GOTSMALLPIC))]
""
"ldr\\t%<w>0, [%1, #:got_lo12:%a2]"
- [(set_attr "type" "load1")]
+ [(set_attr "type" "load1")
+ (set_attr "addr_type" "lo_sum")]
)
(define_insn "ldr_got_small_sidi"
@@ -4995,7 +5024,8 @@
UNSPEC_GOTSMALLPIC)))]
"TARGET_ILP32"
"ldr\\t%w0, [%1, #:got_lo12:%a2]"
- [(set_attr "type" "load1")]
+ [(set_attr "type" "load1")
+ (set_attr "addr_type" "lo_sum")]
)
(define_insn "ldr_got_small_28k_<mode>"
@@ -5027,7 +5057,8 @@
UNSPEC_GOTTINYPIC))]
""
"ldr\\t%0, %L1"
- [(set_attr "type" "load1")]
+ [(set_attr "type" "load1")
+ (set_attr "addr_type" "op1addr")]
)
(define_insn "aarch64_load_tp_hard"
@@ -5069,7 +5100,8 @@
""
"adrp\\t%0, %A1\;ldr\\t%<w>0, [%0, #%L1]"
[(set_attr "type" "load1")
- (set_attr "length" "8")]
+ (set_attr "length" "8")
+ (set_attr "addr_type" "op1addr")]
)
(define_insn "tlsie_small_sidi"
@@ -5080,7 +5112,8 @@
""
"adrp\\t%0, %A1\;ldr\\t%w0, [%0, #%L1]"
[(set_attr "type" "load1")
- (set_attr "length" "8")]
+ (set_attr "length" "8")
+ (set_attr "addr_type" "op1addr")]
)
(define_insn "tlsie_tiny_<mode>"
diff --git a/gcc/config/aarch64/vulcan.md b/gcc/config/aarch64/vulcan.md
new file mode 100644
index 0000000..db015d6
--- /dev/null
+++ b/gcc/config/aarch64/vulcan.md
@@ -0,0 +1,619 @@
+;; Broadcom Vulcan pipeline description
+;; Copyright (C) 2015 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "vulcan")
+
+(define_cpu_unit "vulcan_i0" "vulcan")
+(define_cpu_unit "vulcan_i1" "vulcan")
+(define_cpu_unit "vulcan_i2" "vulcan")
+(define_cpu_unit "vulcan_ls0" "vulcan")
+(define_cpu_unit "vulcan_ls1" "vulcan")
+(define_cpu_unit "vulcan_sd" "vulcan")
+
+; Pseudo-units for multiply pipeline.
+
+(define_cpu_unit "vulcan_i1m1" "vulcan")
+(define_cpu_unit "vulcan_i1m2" "vulcan")
+(define_cpu_unit "vulcan_i1m3" "vulcan")
+
+; Pseudo-units for load delay (assuming dcache hit).
+
+(define_cpu_unit "vulcan_ls0d1" "vulcan")
+(define_cpu_unit "vulcan_ls0d2" "vulcan")
+(define_cpu_unit "vulcan_ls0d3" "vulcan")
+
+(define_cpu_unit "vulcan_ls1d1" "vulcan")
+(define_cpu_unit "vulcan_ls1d2" "vulcan")
+(define_cpu_unit "vulcan_ls1d3" "vulcan")
+
+; Make some aliases for f0/f1.
+(define_reservation "vulcan_f0" "vulcan_i0")
+(define_reservation "vulcan_f1" "vulcan_i1")
+
+(define_reservation "vulcan_i012" "vulcan_i0|vulcan_i1|vulcan_i2")
+(define_reservation "vulcan_ls01" "vulcan_ls0|vulcan_ls1")
+(define_reservation "vulcan_f01" "vulcan_f0|vulcan_f1")
+
+(define_reservation "vulcan_ls_both" "vulcan_ls0+vulcan_ls1")
+
+; A load with delay in the ls0/ls1 pipes.
+(define_reservation "vulcan_l0delay" "vulcan_ls0,vulcan_ls0d1,vulcan_ls0d2,\
+ vulcan_ls0d3")
+(define_reservation "vulcan_l1delay" "vulcan_ls1,vulcan_ls1d1,vulcan_ls1d2,\
+ vulcan_ls1d3")
+(define_reservation "vulcan_l01delay" "vulcan_l0delay|vulcan_l1delay")
+
+;; Branch and call instructions.
+
+(define_insn_reservation "vulcan_branch" 1
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "call,branch"))
+ "vulcan_i2")
+
+;; Integer arithmetic/logic instructions.
+
+; Plain register moves are handled by renaming, and don't create any uops.
+
+(define_insn_reservation "vulcan_regmove" 0
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "mov_reg"))
+ "nothing")
+
+(define_insn_reservation "vulcan_alu_basic" 1
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "alu_imm,alu_sreg,alus_imm,alus_sreg,\
+ adc_reg,adc_imm,adcs_reg,adcs_imm,\
+ logic_reg,logic_imm,logics_reg,logics_imm,\
+ csel,adr,mov_imm,shift_reg,shift_imm,bfm,\
+ rbit,rev,extend"))
+ "vulcan_i012")
+
+(define_insn_reservation "vulcan_alu_shift" 2
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "alu_shift_imm,alu_ext,alu_shift_reg,\
+ alus_shift_imm,alus_ext,alus_shift_reg,\
+ logic_shift_imm,logics_shift_reg"))
+ "vulcan_i012,vulcan_i012")
+
+; NOTE: 13 is the minimum latency given. Use average or max instead?
+(define_insn_reservation "vulcan_div" 13
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "sdiv,udiv"))
+ "vulcan_i1*13")
+
+(define_insn_reservation "vulcan_madd" 5
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "mla,smlal,umlal"))
+ "vulcan_i1,vulcan_i1m1,vulcan_i1m2,vulcan_i1m3,vulcan_i012")
+
+; NOTE: smull, umull are used for "high part" multiplies too.
+(define_insn_reservation "vulcan_mul" 4
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "mul,smull,umull"))
+ "vulcan_i1,vulcan_i1m1,vulcan_i1m2,vulcan_i1m3")
+
+(define_insn_reservation "vulcan_countbits" 3
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "clz"))
+ "vulcan_i1")
+
+;; Integer loads and stores.
+
+(define_insn_reservation "vulcan_load_basic" 4
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "load1")
+ (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_SYMBOLIC
+ | AARCH64_ADDR_REG_IMM
+ | AARCH64_ADDR_LO_SUM)"))
+ "vulcan_ls01")
+
+(define_insn_reservation "vulcan_load_automod" 4
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "load1")
+ (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_WB)"))
+ "vulcan_ls01,vulcan_i012")
+
+(define_insn_reservation "vulcan_load_regoffset" 5
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "load1")
+ (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_REG)"))
+ "vulcan_i012,vulcan_ls01")
+
+(define_insn_reservation "vulcan_load_scale_ext" 6
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "load1")
+ (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_SHIFT
+ | AARCH64_ADDR_REG_EXT
+ | AARCH64_ADDR_REG_SHIFT_EXT)"))
+ "vulcan_i012,vulcan_i012,vulcan_ls01")
+
+(define_insn_reservation "vulcan_loadpair" 5
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "load2"))
+ "vulcan_i012,vulcan_ls01")
+
+(define_insn_reservation "vulcan_store_basic" 0
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "store1")
+ (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_SYMBOLIC
+ | AARCH64_ADDR_REG_IMM
+ | AARCH64_ADDR_LO_SUM)"))
+ "vulcan_ls01,vulcan_sd")
+
+(define_insn_reservation "vulcan_store_automod" 0
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "store1")
+ (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_WB)"))
+ "vulcan_ls01,(vulcan_sd+vulcan_i012)")
+
+(define_insn_reservation "vulcan_store_regoffset_scale_ext" 0
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "store1")
+ (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_REG
+ | AARCH64_ADDR_REG_SHIFT
+ | AARCH64_ADDR_REG_EXT
+ | AARCH64_ADDR_REG_SHIFT_EXT)"))
+ "vulcan_i012,vulcan_ls01,vulcan_sd")
+
+(define_insn_reservation "vulcan_storepair_basic" 0
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "store2")
+ (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_IMM
+ | AARCH64_ADDR_LO_SUM)"))
+ "vulcan_ls01,vulcan_sd")
+
+(define_insn_reservation "vulcan_storepair_automod" 0
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "store2")
+ (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_WB)"))
+ "vulcan_ls01,(vulcan_sd+vulcan_i012)")
+
+;; FP data processing instructions.
+
+(define_insn_reservation "vulcan_fp_simple" 5
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "ffariths,ffarithd,f_minmaxs,f_minmaxd"))
+ "vulcan_f01")
+
+(define_insn_reservation "vulcan_fp_addsub" 6
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "fadds,faddd"))
+ "vulcan_f01")
+
+(define_insn_reservation "vulcan_fp_cmp" 5
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "fcmps,fcmpd"))
+ "vulcan_f01")
+
+(define_insn_reservation "vulcan_fp_divsqrt_s" 16
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "fdivs,fsqrts"))
+ "vulcan_f0*8|vulcan_f1*8")
+
+(define_insn_reservation "vulcan_fp_divsqrt_d" 23
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "fdivd,fsqrtd"))
+ "vulcan_f0*12|vulcan_f1*12")
+
+(define_insn_reservation "vulcan_fp_mul_mac" 6
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "fmuls,fmuld,fmacs,fmacd"))
+ "vulcan_f01")
+
+(define_insn_reservation "vulcan_frint" 7
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "f_rints,f_rintd"))
+ "vulcan_f01")
+
+(define_insn_reservation "vulcan_fcsel" 4
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "fcsel"))
+ "vulcan_f01")
+
+;; FP miscellaneous instructions.
+
+(define_insn_reservation "vulcan_fp_cvt" 7
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "f_cvtf2i,f_cvt,f_cvti2f"))
+ "vulcan_f01")
+
+(define_insn_reservation "vulcan_fp_mov" 4
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "fconsts,fconstd,fmov,f_mrc"))
+ "vulcan_f01")
+
+(define_insn_reservation "vulcan_fp_mov_to_gen" 5
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "f_mcr"))
+ "vulcan_f01")
+
+;; FP loads and stores.
+
+(define_insn_reservation "vulcan_fp_load_basic" 4
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "f_loads,f_loadd")
+ (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_SYMBOLIC
+ | AARCH64_ADDR_REG_IMM
+ | AARCH64_ADDR_LO_SUM)"))
+ "vulcan_ls01")
+
+(define_insn_reservation "vulcan_fp_load_automod" 4
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "f_loads,f_loadd")
+ (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_WB)"))
+ "vulcan_ls01,vulcan_i012")
+
+(define_insn_reservation "vulcan_fp_load_regoffset" 5
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "f_loads,f_loadd")
+ (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_REG)"))
+ "vulcan_ls01,vulcan_i012")
+
+(define_insn_reservation "vulcan_fp_load_scale_ext" 6
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "f_loads,f_loadd")
+ (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_SHIFT
+ | AARCH64_ADDR_REG_EXT
+ | AARCH64_ADDR_REG_SHIFT_EXT)"))
+ "vulcan_ls01,vulcan_i012")
+
+(define_insn_reservation "vulcan_fp_loadpair_basic" 4
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "neon_load1_2reg")
+ (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_IMM
+ | AARCH64_ADDR_LO_SUM)"))
+ "vulcan_ls01*2")
+
+(define_insn_reservation "vulcan_fp_loadpair_automod" 4
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "neon_load1_2reg")
+ (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_WB)"))
+ "vulcan_ls01,(vulcan_ls01+vulcan_i012)")
+
+(define_insn_reservation "vulcan_fp_store_basic" 0
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "f_stores,f_stored")
+ (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_SYMBOLIC
+ | AARCH64_ADDR_REG_IMM
+ | AARCH64_ADDR_LO_SUM)"))
+ "vulcan_ls01,vulcan_sd")
+
+(define_insn_reservation "vulcan_fp_store_automod" 0
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "f_stores,f_stored")
+ (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_WB)"))
+ "vulcan_ls01,(vulcan_sd+vulcan_i012)")
+
+(define_insn_reservation "vulcan_fp_store_regoffset_scale_ext" 0
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "f_stores,f_stored")
+ (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_REG
+ | AARCH64_ADDR_REG_SHIFT
+ | AARCH64_ADDR_REG_EXT
+ | AARCH64_ADDR_REG_SHIFT_EXT)"))
+ "vulcan_i012,vulcan_ls01,vulcan_sd")
+
+(define_insn_reservation "vulcan_fp_storepair_basic" 0
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "neon_store1_2reg")
+ (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_IMM
+ | AARCH64_ADDR_LO_SUM)"))
+ "vulcan_ls01,(vulcan_ls01+vulcan_sd),vulcan_sd")
+
+(define_insn_reservation "vulcan_fp_storepair_automod" 0
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "neon_store1_2reg")
+ (match_test "aarch64_mem_type_p (insn, AARCH64_ADDR_REG_WB)"))
+ "vulcan_ls01,(vulcan_ls01+vulcan_sd+vulcan_i012),vulcan_sd")
+
+;; ASIMD integer instructions.
+
+(define_insn_reservation "vulcan_asimd_int" 7
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "neon_abd,neon_abd_q,\
+ neon_arith_acc,neon_arith_acc_q,\
+ neon_abs,neon_abs_q,\
+ neon_add,neon_add_q,\
+ neon_neg,neon_neg_q,\
+ neon_add_long,neon_add_widen,\
+ neon_add_halve,neon_add_halve_q,\
+ neon_sub_long,neon_sub_widen,\
+ neon_sub_halve,neon_sub_halve_q,\
+ neon_add_halve_narrow_q,neon_sub_halve_narrow_q,\
+ neon_qabs,neon_qabs_q,\
+ neon_qadd,neon_qadd_q,\
+ neon_qneg,neon_qneg_q,\
+ neon_qsub,neon_qsub_q,\
+ neon_minmax,neon_minmax_q,\
+ neon_reduc_minmax,neon_reduc_minmax_q,\
+ neon_mul_b,neon_mul_h,neon_mul_s,\
+ neon_mul_b_q,neon_mul_h_q,neon_mul_s_q,\
+ neon_sat_mul_b,neon_sat_mul_h,neon_sat_mul_s,\
+ neon_sat_mul_b_q,neon_sat_mul_h_q,neon_sat_mul_s_q,\
+ neon_mla_b,neon_mla_h,neon_mla_s,\
+ neon_mla_b_q,neon_mla_h_q,neon_mla_s_q,\
+ neon_mul_b_long,neon_mul_h_long,\
+ neon_mul_s_long,neon_mul_d_long,\
+ neon_sat_mul_b_long,neon_sat_mul_h_long,\
+ neon_sat_mul_s_long,\
+ neon_mla_b_long,neon_mla_h_long,neon_mla_s_long,\
+ neon_sat_mla_b_long,neon_sat_mla_h_long,\
+ neon_sat_mla_s_long,\
+ neon_shift_acc,neon_shift_acc_q,\
+ neon_shift_imm,neon_shift_imm_q,\
+ neon_shift_reg,neon_shift_reg_q,\
+ neon_shift_imm_long,neon_shift_imm_narrow_q,\
+ neon_sat_shift_imm,neon_sat_shift_imm_q,\
+ neon_sat_shift_reg,neon_sat_shift_reg_q,\
+ neon_sat_shift_imm_narrow_q"))
+ "vulcan_f01")
+
+(define_insn_reservation "vulcan_asimd_reduc_add" 5
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "neon_reduc_add,neon_reduc_add_q"))
+ "vulcan_f01")
+
+(define_insn_reservation "vulcan_asimd_cmp" 7
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "neon_compare,neon_compare_q,neon_compare_zero,\
+ neon_tst,neon_tst_q"))
+ "vulcan_f01")
+
+; Note: logical AND should have a latency of 7, not 5.
+
+(define_insn_reservation "vulcan_asimd_logic" 5
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "neon_logic,neon_logic_q"))
+ "vulcan_f01")
+
+;; ASIMD floating-point instructions.
+
+(define_insn_reservation "vulcan_asimd_fp_simple" 5
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "neon_fp_abs_s,neon_fp_abs_d,\
+ neon_fp_abs_s_q,neon_fp_abs_d_q,\
+ neon_fp_compare_s,neon_fp_compare_d,\
+ neon_fp_compare_s_q,neon_fp_compare_d_q,\
+ neon_fp_minmax_s,neon_fp_minmax_d,\
+ neon_fp_minmax_s_q,neon_fp_minmax_d_q,\
+ neon_fp_reduc_minmax_s,neon_fp_reduc_minmax_d,\
+ neon_fp_reduc_minmax_s_q,neon_fp_reduc_minmax_d_q,\
+ neon_fp_neg_s,neon_fp_neg_d,\
+ neon_fp_neg_s_q,neon_fp_neg_d_q"))
+ "vulcan_f01")
+
+(define_insn_reservation "vulcan_asimd_fp_arith" 6
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "neon_fp_abd_s,neon_fp_abd_d,\
+ neon_fp_abd_s_q,neon_fp_abd_d_q,\
+ neon_fp_addsub_s,neon_fp_addsub_d,\
+ neon_fp_addsub_s_q,neon_fp_addsub_d_q,\
+ neon_fp_reduc_add_s,neon_fp_reduc_add_d,\
+ neon_fp_reduc_add_s_q,neon_fp_reduc_add_d_q,\
+ neon_fp_mul_s,neon_fp_mul_d,\
+ neon_fp_mul_s_q,neon_fp_mul_d_q,\
+ neon_fp_mla_s,neon_fp_mla_d,\
+ neon_fp_mla_s_q,neon_fp_mla_d_q"))
+ "vulcan_f01")
+
+(define_insn_reservation "vulcan_asimd_fp_conv" 7
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "neon_fp_cvt_widen_s,neon_fp_cvt_narrow_d_q,\
+ neon_fp_to_int_s,neon_fp_to_int_d,\
+ neon_fp_to_int_s_q,neon_fp_to_int_d_q,\
+ neon_fp_round_s,neon_fp_round_d,\
+ neon_fp_round_s_q,neon_fp_round_d_q"))
+ "vulcan_f01")
+
+(define_insn_reservation "vulcan_asimd_fp_div_s" 16
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "neon_fp_div_s,neon_fp_div_s_q"))
+ "vulcan_f01")
+
+(define_insn_reservation "vulcan_asimd_fp_div_d" 23
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "neon_fp_div_d,neon_fp_div_d_q"))
+ "vulcan_f01")
+
+;; ASIMD miscellaneous instructions.
+
+(define_insn_reservation "vulcan_asimd_misc" 5
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "neon_rbit,\
+ neon_bsl,neon_bsl_q,\
+ neon_cls,neon_cls_q,\
+ neon_cnt,neon_cnt_q,\
+ neon_from_gp,neon_from_gp_q,\
+ neon_dup,neon_dup_q,\
+ neon_ext,neon_ext_q,\
+ neon_ins,neon_ins_q,\
+ neon_move,neon_move_q,\
+ neon_fp_recpe_s,neon_fp_recpe_d,\
+ neon_fp_recpe_s_q,neon_fp_recpe_d_q,\
+ neon_fp_recpx_s,neon_fp_recpx_d,\
+ neon_fp_recpx_s_q,neon_fp_recpx_d_q,\
+ neon_rev,neon_rev_q,\
+ neon_dup,neon_dup_q,\
+ neon_permute,neon_permute_q"))
+ "vulcan_f01")
+
+(define_insn_reservation "vulcan_asimd_recip_step" 6
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "neon_fp_recps_s,neon_fp_recps_s_q,\
+ neon_fp_recps_d,neon_fp_recps_d_q"))
+ "vulcan_f01")
+
+(define_insn_reservation "vulcan_asimd_lut" 8
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "neon_tbl1,neon_tbl1_q,neon_tbl2_q"))
+ "vulcan_f01")
+
+(define_insn_reservation "vulcan_asimd_elt_to_gr" 6
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "neon_to_gp,neon_to_gp_q"))
+ "vulcan_f01")
+
+;; ASIMD load instructions.
+
+; NOTE: These reservations attempt to model latency and throughput correctly,
+; but the cycle timing of unit allocation is not necessarily accurate (because
+; insns are split into uops, and those may be issued out-of-order).
+
+(define_insn_reservation "vulcan_asimd_load1_1_mult" 4
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "neon_load1_1reg,neon_load1_1reg_q"))
+ "vulcan_ls01")
+
+(define_insn_reservation "vulcan_asimd_load1_2_mult" 4
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q"))
+ "vulcan_ls_both")
+
+(define_insn_reservation "vulcan_asimd_load1_3_mult" 5
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "neon_load1_3reg,neon_load1_3reg_q"))
+ "(vulcan_ls_both,vulcan_ls01)|(vulcan_ls01,vulcan_ls_both)")
+
+(define_insn_reservation "vulcan_asimd_load1_4_mult" 6
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "neon_load1_4reg,neon_load1_4reg_q"))
+ "vulcan_ls_both*2")
+
+(define_insn_reservation "vulcan_asimd_load1_onelane" 5
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "neon_load1_one_lane,neon_load1_one_lane_q"))
+ "vulcan_l01delay,vulcan_f01")
+
+(define_insn_reservation "vulcan_asimd_load1_all" 5
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "neon_load1_all_lanes,neon_load1_all_lanes_q"))
+ "vulcan_l01delay,vulcan_f01")
+
+(define_insn_reservation "vulcan_asimd_load2" 5
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "neon_load2_2reg,neon_load2_2reg_q,\
+ neon_load2_one_lane,neon_load2_one_lane_q,\
+ neon_load2_all_lanes,neon_load2_all_lanes_q"))
+ "(vulcan_l0delay,vulcan_f01)|(vulcan_l1delay,vulcan_f01)")
+
+(define_insn_reservation "vulcan_asimd_load3_mult" 8
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q"))
+ "vulcan_ls_both*3,(vulcan_ls0d1+vulcan_ls1d1),(vulcan_ls0d2+vulcan_ls1d2),\
+ (vulcan_ls0d3+vulcan_ls1d3),vulcan_f01")
+
+(define_insn_reservation "vulcan_asimd_load3_elts" 7
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "neon_load3_one_lane,neon_load3_one_lane_q,\
+ neon_load3_all_lanes,neon_load3_all_lanes_q"))
+ "vulcan_ls_both,vulcan_l01delay,vulcan_f01")
+
+(define_insn_reservation "vulcan_asimd_load4_mult" 8
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "neon_load4_4reg,neon_load4_4reg_q"))
+ "vulcan_ls_both*4,(vulcan_ls0d1+vulcan_ls1d1),(vulcan_ls0d2+vulcan_ls1d2),\
+ (vulcan_ls0d3+vulcan_ls1d3),vulcan_f01")
+
+(define_insn_reservation "vulcan_asimd_load4_elts" 6
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "neon_load4_one_lane,neon_load4_one_lane_q,\
+ neon_load4_all_lanes,neon_load4_all_lanes_q"))
+ "vulcan_ls_both*2,(vulcan_ls0d1+vulcan_ls1d1),(vulcan_ls0d2+vulcan_ls1d2),\
+ (vulcan_ls0d3+vulcan_ls1d3),vulcan_f01")
+
+;; ASIMD store instructions.
+
+; Same note applies as for ASIMD load instructions.
+
+(define_insn_reservation "vulcan_asimd_store1_1_mult" 0
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "neon_store1_1reg,neon_store1_1reg_q"))
+ "vulcan_ls01")
+
+(define_insn_reservation "vulcan_asimd_store1_2_mult" 0
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "neon_store1_2reg,neon_store1_2reg_q"))
+ "vulcan_ls_both")
+
+(define_insn_reservation "vulcan_asimd_store1_3_mult" 0
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "neon_store1_3reg,neon_store1_3reg_q"))
+ "(vulcan_ls_both,vulcan_ls01)|(vulcan_ls01,vulcan_ls_both)")
+
+(define_insn_reservation "vulcan_asimd_store1_4_mult" 0
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "neon_store1_4reg,neon_store1_4reg_q"))
+ "vulcan_ls_both*2")
+
+(define_insn_reservation "vulcan_asimd_store1_onelane" 0
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "neon_store1_one_lane,neon_store1_one_lane_q"))
+ "vulcan_ls01,vulcan_f01")
+
+(define_insn_reservation "vulcan_asimd_store2_mult" 0
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "neon_store2_2reg,neon_store2_2reg_q"))
+ "vulcan_ls_both,vulcan_f01")
+
+(define_insn_reservation "vulcan_asimd_store2_onelane" 0
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "neon_store2_one_lane,neon_store2_one_lane_q"))
+ "vulcan_ls01,vulcan_f01")
+
+(define_insn_reservation "vulcan_asimd_store3_mult" 0
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "neon_store3_3reg,neon_store3_3reg_q"))
+ "vulcan_ls_both*3,vulcan_f01")
+
+(define_insn_reservation "vulcan_asimd_store3_onelane" 0
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "neon_store3_one_lane,neon_store3_one_lane_q"))
+ "vulcan_ls_both,vulcan_f01")
+
+(define_insn_reservation "vulcan_asimd_store4_mult" 0
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "neon_store4_4reg,neon_store4_4reg_q"))
+ "vulcan_ls_both*4,vulcan_f01")
+
+(define_insn_reservation "vulcan_asimd_store4_onelane" 0
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "neon_store4_one_lane,neon_store4_one_lane_q"))
+ "vulcan_ls_both,vulcan_f01")
+
+;; Crypto extensions.
+
+; FIXME: Forwarding path for aese/aesmc or aesd/aesimc pairs?
+
+(define_insn_reservation "vulcan_aes" 5
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "crypto_aese,crypto_aesmc"))
+ "vulcan_f1")
+
+(define_insn_reservation "vulcan_sha" 7
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "crypto_sha1_fast,crypto_sha1_xor,crypto_sha1_slow,\
+ crypto_sha256_fast,crypto_sha256_slow"))
+ "vulcan_f1")
+
+;; CRC extension.
+
+(define_insn_reservation "vulcan_crc" 4
+ (and (eq_attr "tune" "vulcan")
+ (eq_attr "type" "crc"))
+ "vulcan_i1")
--
2.4.11
More information about the Gcc-patches
mailing list