[gcc.git] / gcc / config / aarch64 / thunderx.md

;; Cavium ThunderX pipeline description
;; Copyright (C) 2014-2015 Free Software Foundation, Inc.
;;
;; Written by Andrew Pinski  <apinski@cavium.com>

;; This file is part of GCC.

;; GCC is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 3, or (at your option)
;; any later version.

;; GCC is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.

;; You should have received a copy of the GNU General Public License
;; along with GCC; see the file COPYING3.  If not see
;; <http://www.gnu.org/licenses/>.


;; Thunder is a dual-issue processor that can issue all instructions on
;; pipe0 and a subset on pipe1.


(define_automaton "thunderx_main, thunderx_mult, thunderx_divide, thunderx_simd")

(define_cpu_unit "thunderx_pipe0" "thunderx_main")
(define_cpu_unit "thunderx_pipe1" "thunderx_main")
(define_cpu_unit "thunderx_mult" "thunderx_mult")
(define_cpu_unit "thunderx_divide" "thunderx_divide")
(define_cpu_unit "thunderx_simd" "thunderx_simd")

(define_insn_reservation "thunderx_add" 1
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "adc_imm,adc_reg,adr,alu_imm,alu_sreg,alus_imm,alus_sreg,extend,logic_imm,logic_reg,logics_imm,logics_reg,mov_imm,mov_reg"))
  "thunderx_pipe0 | thunderx_pipe1")

(define_insn_reservation "thunderx_shift" 1
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "bfm,extend,shift_imm,shift_reg"))
  "thunderx_pipe0 | thunderx_pipe1")


;; Arthimentic instructions with an extra shift or extend is two cycles.
;; FIXME: This needs more attributes on aarch64 than what is currently there;
;;    this is conserative for now.
;; Except this is not correct as this is only for !(LSL && shift by 0/1/2/3)
;; Except this is not correct as this is only for !(zero extend)

(define_insn_reservation "thunderx_arith_shift" 2
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "alu_ext,alu_shift_imm,alu_shift_reg,alus_ext,logic_shift_imm,logic_shift_reg,logics_shift_imm,logics_shift_reg,alus_shift_imm"))
  "thunderx_pipe0 | thunderx_pipe1")

(define_insn_reservation "thunderx_csel" 2
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "csel"))
  "thunderx_pipe0 | thunderx_pipe1")

;; Multiply and mulitply accumulate and count leading zeros can only happen on pipe 1

(define_insn_reservation "thunderx_mul" 4
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "mul,muls,mla,mlas,clz,smull,umull,smlal,umlal"))
  "thunderx_pipe1 + thunderx_mult")

;; Multiply high instructions take an extra cycle and cause the muliply unit to
;; be busy for an extra cycle.

;(define_insn_reservation "thunderx_mul_high" 5
;  (and (eq_attr "tune" "thunderx")
;       (eq_attr "type" "smull,umull"))
;  "thunderx_pipe1 + thunderx_mult")

(define_insn_reservation "thunderx_div32" 22
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "udiv,sdiv"))
  "thunderx_pipe1 + thunderx_divide, thunderx_divide * 21")

;(define_insn_reservation "thunderx_div64" 38
;  (and (eq_attr "tune" "thunderx")
;       (eq_attr "type" "udiv,sdiv")
;       (eq_attr "mode" "DI"))
;  "thunderx_pipe1 + thunderx_divide, thunderx_divide * 34")

;; Stores take one cycle in pipe 0
(define_insn_reservation "thunderx_store" 1
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "store1"))
  "thunderx_pipe0")

;; Store pair are single issued
(define_insn_reservation "thunderx_storepair" 1
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "store2"))
  "thunderx_pipe0 + thunderx_pipe1")


;; loads (and load pairs) from L1 take 3 cycles in pipe 0
(define_insn_reservation "thunderx_load" 3
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "load1, load2"))
  "thunderx_pipe0")

(define_insn_reservation "thunderx_brj" 1
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "branch,trap,call"))
  "thunderx_pipe1")

;; FPU

(define_insn_reservation "thunderx_fadd" 4
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "faddd,fadds"))
  "thunderx_pipe1")

(define_insn_reservation "thunderx_fconst" 1
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "fconsts,fconstd"))
  "thunderx_pipe1")

;; Moves between fp are 2 cycles including min/max/select/abs/neg
(define_insn_reservation "thunderx_fmov" 2
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "fmov,f_minmaxs,f_minmaxd,fcsel,ffarithd,ffariths"))
  "thunderx_pipe1")

(define_insn_reservation "thunderx_fmovgpr" 2
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "f_mrc, f_mcr"))
  "thunderx_pipe1")

(define_insn_reservation "thunderx_fmul" 6
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "fmacs,fmacd,fmuls,fmuld"))
  "thunderx_pipe1")

(define_insn_reservation "thunderx_fdivs" 12
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "fdivs"))
  "thunderx_pipe1 + thunderx_divide, thunderx_divide*8")

(define_insn_reservation "thunderx_fdivd" 22
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "fdivd"))
  "thunderx_pipe1 + thunderx_divide, thunderx_divide*18")

(define_insn_reservation "thunderx_fsqrts" 17
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "fsqrts"))
  "thunderx_pipe1 + thunderx_divide, thunderx_divide*13")

(define_insn_reservation "thunderx_fsqrtd" 28
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "fsqrtd"))
  "thunderx_pipe1 + thunderx_divide, thunderx_divide*31")

;; The rounding conversion inside fp is 4 cycles
(define_insn_reservation "thunderx_frint" 4
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "f_rints,f_rintd"))
  "thunderx_pipe1")

;; Float to integer with a move from int to/from float is 6 cycles
(define_insn_reservation "thunderx_f_cvt" 6
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "f_cvt,f_cvtf2i,f_cvti2f"))
  "thunderx_pipe1")

;; FP/SIMD load/stores happen in pipe 0
;; 64bit Loads register/pairs are 4 cycles from L1
(define_insn_reservation "thunderx_64simd_fp_load" 4
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "f_loadd,f_loads,neon_load1_1reg,\
			neon_load1_1reg_q,neon_load1_2reg"))
  "thunderx_pipe0")

;; 128bit load pair is singled issue and 4 cycles from L1
(define_insn_reservation "thunderx_128simd_pair_load" 4
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "neon_load1_2reg_q"))
  "thunderx_pipe0+thunderx_pipe1")

;; FP/SIMD Stores takes one cycle in pipe 0
(define_insn_reservation "thunderx_simd_fp_store" 1
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "f_stored,f_stores,neon_store1_1reg,neon_store1_1reg_q"))
  "thunderx_pipe0")

;; 64bit neon store pairs are single issue for one cycle
(define_insn_reservation "thunderx_64neon_storepair" 1
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "neon_store1_2reg"))
  "thunderx_pipe0 + thunderx_pipe1")

;; 128bit neon store pair are single issued for two cycles
(define_insn_reservation "thunderx_128neon_storepair" 2
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "neon_store1_2reg_q"))
  "(thunderx_pipe0 + thunderx_pipe1)*2")


;; SIMD/NEON (q forms take an extra cycle)

;; Thunder simd move instruction types - 2/3 cycles
(define_insn_reservation "thunderx_neon_move" 2
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "neon_logic, neon_bsl, neon_fp_compare_s, \
			neon_fp_compare_d, neon_move"))
  "thunderx_pipe1 + thunderx_simd")

(define_insn_reservation "thunderx_neon_move_q" 3
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "neon_logic_q, neon_bsl_q, neon_fp_compare_s_q, \
			neon_fp_compare_d_q, neon_move_q"))
  "thunderx_pipe1 + thunderx_simd, thunderx_simd")


;; Thunder simd simple/add instruction types - 4/5 cycles

(define_insn_reservation "thunderx_neon_add" 4
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "neon_reduc_add, neon_reduc_minmax, neon_fp_reduc_add_s, \
			neon_fp_reduc_add_d, neon_fp_to_int_s, neon_fp_to_int_d, \
			neon_add_halve, neon_sub_halve, neon_qadd, neon_compare, \
			neon_compare_zero, neon_minmax, neon_abd, neon_add, neon_sub, \
			neon_fp_minmax_s, neon_fp_minmax_d, neon_reduc_add, neon_cls, \
			neon_qabs, neon_qneg, neon_fp_addsub_s, neon_fp_addsub_d"))
  "thunderx_pipe1 + thunderx_simd")

;; BIG NOTE: neon_add_long/neon_sub_long don't have a q form which is incorrect

(define_insn_reservation "thunderx_neon_add_q" 5
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "neon_reduc_add_q, neon_reduc_minmax_q, neon_fp_reduc_add_s_q, \
			neon_fp_reduc_add_d_q, neon_fp_to_int_s_q, neon_fp_to_int_d_q, \
			neon_add_halve_q, neon_sub_halve_q, neon_qadd_q, neon_compare_q, \
			neon_compare_zero_q, neon_minmax_q, neon_abd_q, neon_add_q, neon_sub_q, \
			neon_fp_minmax_s_q, neon_fp_minmax_d_q, neon_reduc_add_q, neon_cls_q, \
			neon_qabs_q, neon_qneg_q, neon_fp_addsub_s_q, neon_fp_addsub_d_q, \
			neon_add_long, neon_sub_long"))
  "thunderx_pipe1 + thunderx_simd, thunderx_simd")


;; Thunder 128bit SIMD reads the upper halve in cycle 2 and writes in the last cycle
(define_bypass 2 "thunderx_neon_move_q" "thunderx_neon_move_q, thunderx_neon_add_q")
(define_bypass 4 "thunderx_neon_add_q" "thunderx_neon_move_q, thunderx_neon_add_q")

;; Assume both pipes are needed for unknown and multiple-instruction
;; patterns.

(define_insn_reservation "thunderx_unknown" 1
  (and (eq_attr "tune" "thunderx")
       (eq_attr "type" "untyped,multiple"))
  "thunderx_pipe0 + thunderx_pipe1")
Commit	Line	Data
2d41ed58	1	;; Cavium ThunderX pipeline description
5624e564	2	;; Copyright (C) 2014-2015 Free Software Foundation, Inc.
2d41ed58 AP	3	;;
	4	;; Written by Andrew Pinski <apinski@cavium.com>
	5
	6	;; This file is part of GCC.
	7
	8	;; GCC is free software; you can redistribute it and/or modify
	9	;; it under the terms of the GNU General Public License as published by
	10	;; the Free Software Foundation; either version 3, or (at your option)
	11	;; any later version.
	12
	13	;; GCC is distributed in the hope that it will be useful,
	14	;; but WITHOUT ANY WARRANTY; without even the implied warranty of
	15	;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	16	;; GNU General Public License for more details.
	17
	18	;; You should have received a copy of the GNU General Public License
	19	;; along with GCC; see the file COPYING3. If not see
	20	;; <http://www.gnu.org/licenses/>.
2d41ed58 AP	21
	22
	23	;; Thunder is a dual-issue processor that can issue all instructions on
	24	;; pipe0 and a subset on pipe1.
	25
	26
	27	(define_automaton "thunderx_main, thunderx_mult, thunderx_divide, thunderx_simd")
	28
	29	(define_cpu_unit "thunderx_pipe0" "thunderx_main")
	30	(define_cpu_unit "thunderx_pipe1" "thunderx_main")
	31	(define_cpu_unit "thunderx_mult" "thunderx_mult")
	32	(define_cpu_unit "thunderx_divide" "thunderx_divide")
	33	(define_cpu_unit "thunderx_simd" "thunderx_simd")
	34
	35	(define_insn_reservation "thunderx_add" 1
	36	(and (eq_attr "tune" "thunderx")
	37	(eq_attr "type" "adc_imm,adc_reg,adr,alu_imm,alu_sreg,alus_imm,alus_sreg,extend,logic_imm,logic_reg,logics_imm,logics_reg,mov_imm,mov_reg"))
	38	"thunderx_pipe0 \| thunderx_pipe1")
	39
	40	(define_insn_reservation "thunderx_shift" 1
	41	(and (eq_attr "tune" "thunderx")
	42	(eq_attr "type" "bfm,extend,shift_imm,shift_reg"))
	43	"thunderx_pipe0 \| thunderx_pipe1")
	44
	45
	46	;; Arthimentic instructions with an extra shift or extend is two cycles.
	47	;; FIXME: This needs more attributes on aarch64 than what is currently there;
	48	;; this is conserative for now.
	49	;; Except this is not correct as this is only for !(LSL && shift by 0/1/2/3)
	50	;; Except this is not correct as this is only for !(zero extend)
	51
	52	(define_insn_reservation "thunderx_arith_shift" 2
	53	(and (eq_attr "tune" "thunderx")
	54	(eq_attr "type" "alu_ext,alu_shift_imm,alu_shift_reg,alus_ext,logic_shift_imm,logic_shift_reg,logics_shift_imm,logics_shift_reg,alus_shift_imm"))
	55	"thunderx_pipe0 \| thunderx_pipe1")
	56
	57	(define_insn_reservation "thunderx_csel" 2
	58	(and (eq_attr "tune" "thunderx")
	59	(eq_attr "type" "csel"))
	60	"thunderx_pipe0 \| thunderx_pipe1")
	61
	62	;; Multiply and mulitply accumulate and count leading zeros can only happen on pipe 1
	63
	64	(define_insn_reservation "thunderx_mul" 4
	65	(and (eq_attr "tune" "thunderx")
	66	(eq_attr "type" "mul,muls,mla,mlas,clz,smull,umull,smlal,umlal"))
	67	"thunderx_pipe1 + thunderx_mult")
	68
	69	;; Multiply high instructions take an extra cycle and cause the muliply unit to
	70	;; be busy for an extra cycle.
	71
	72	;(define_insn_reservation "thunderx_mul_high" 5
	73	; (and (eq_attr "tune" "thunderx")
	74	; (eq_attr "type" "smull,umull"))
	75	; "thunderx_pipe1 + thunderx_mult")
	76
	77	(define_insn_reservation "thunderx_div32" 22
	78	(and (eq_attr "tune" "thunderx")
	79	(eq_attr "type" "udiv,sdiv"))
	80	"thunderx_pipe1 + thunderx_divide, thunderx_divide * 21")
	81
	82	;(define_insn_reservation "thunderx_div64" 38
	83	; (and (eq_attr "tune" "thunderx")
	84	; (eq_attr "type" "udiv,sdiv")
85	; (eq_attr "mode" "DI"))
86	; "thunderx_pipe1 + thunderx_divide, thunderx_divide * 34")
87
88	;; Stores take one cycle in pipe 0
89	(define_insn_reservation "thunderx_store" 1
90	(and (eq_attr "tune" "thunderx")
91	(eq_attr "type" "store1"))
92	"thunderx_pipe0")
93
94	;; Store pair are single issued
95	(define_insn_reservation "thunderx_storepair" 1
96	(and (eq_attr "tune" "thunderx")
97	(eq_attr "type" "store2"))
98	"thunderx_pipe0 + thunderx_pipe1")
99
100
101	;; loads (and load pairs) from L1 take 3 cycles in pipe 0
102	(define_insn_reservation "thunderx_load" 3
103	(and (eq_attr "tune" "thunderx")
104	(eq_attr "type" "load1, load2"))
105	"thunderx_pipe0")
106
107	(define_insn_reservation "thunderx_brj" 1
108	(and (eq_attr "tune" "thunderx")
109	(eq_attr "type" "branch,trap,call"))
110	"thunderx_pipe1")
111
112	;; FPU
113
114	(define_insn_reservation "thunderx_fadd" 4
115	(and (eq_attr "tune" "thunderx")
116	(eq_attr "type" "faddd,fadds"))
117	"thunderx_pipe1")
118
119	(define_insn_reservation "thunderx_fconst" 1
120	(and (eq_attr "tune" "thunderx")
121	(eq_attr "type" "fconsts,fconstd"))
122	"thunderx_pipe1")
123
124	;; Moves between fp are 2 cycles including min/max/select/abs/neg
125	(define_insn_reservation "thunderx_fmov" 2
126	(and (eq_attr "tune" "thunderx")
127	(eq_attr "type" "fmov,f_minmaxs,f_minmaxd,fcsel,ffarithd,ffariths"))
128	"thunderx_pipe1")
129
130	(define_insn_reservation "thunderx_fmovgpr" 2
131	(and (eq_attr "tune" "thunderx")
132	(eq_attr "type" "f_mrc, f_mcr"))
133	"thunderx_pipe1")
134
135	(define_insn_reservation "thunderx_fmul" 6
136	(and (eq_attr "tune" "thunderx")
137	(eq_attr "type" "fmacs,fmacd,fmuls,fmuld"))
138	"thunderx_pipe1")
139
140	(define_insn_reservation "thunderx_fdivs" 12
141	(and (eq_attr "tune" "thunderx")
142	(eq_attr "type" "fdivs"))
143	"thunderx_pipe1 + thunderx_divide, thunderx_divide*8")
144
145	(define_insn_reservation "thunderx_fdivd" 22
146	(and (eq_attr "tune" "thunderx")
147	(eq_attr "type" "fdivd"))
148	"thunderx_pipe1 + thunderx_divide, thunderx_divide*18")
149
150	(define_insn_reservation "thunderx_fsqrts" 17
151	(and (eq_attr "tune" "thunderx")
152	(eq_attr "type" "fsqrts"))
153	"thunderx_pipe1 + thunderx_divide, thunderx_divide*13")
154
155	(define_insn_reservation "thunderx_fsqrtd" 28
156	(and (eq_attr "tune" "thunderx")
157	(eq_attr "type" "fsqrtd"))
158	"thunderx_pipe1 + thunderx_divide, thunderx_divide*31")
159
160	;; The rounding conversion inside fp is 4 cycles
161	(define_insn_reservation "thunderx_frint" 4
162	(and (eq_attr "tune" "thunderx")
163	(eq_attr "type" "f_rints,f_rintd"))
164	"thunderx_pipe1")
165
166	;; Float to integer with a move from int to/from float is 6 cycles
167	(define_insn_reservation "thunderx_f_cvt" 6
168	(and (eq_attr "tune" "thunderx")
169	(eq_attr "type" "f_cvt,f_cvtf2i,f_cvti2f"))
170	"thunderx_pipe1")
171
172	;; FP/SIMD load/stores happen in pipe 0
173	;; 64bit Loads register/pairs are 4 cycles from L1
174	(define_insn_reservation "thunderx_64simd_fp_load" 4
175	(and (eq_attr "tune" "thunderx")
176	(eq_attr "type" "f_loadd,f_loads,neon_load1_1reg,\
177	neon_load1_1reg_q,neon_load1_2reg"))
178	"thunderx_pipe0")
179
180	;; 128bit load pair is singled issue and 4 cycles from L1
181	(define_insn_reservation "thunderx_128simd_pair_load" 4
182	(and (eq_attr "tune" "thunderx")
183	(eq_attr "type" "neon_load1_2reg_q"))
184	"thunderx_pipe0+thunderx_pipe1")
185
186	;; FP/SIMD Stores takes one cycle in pipe 0
187	(define_insn_reservation "thunderx_simd_fp_store" 1
188	(and (eq_attr "tune" "thunderx")
189	(eq_attr "type" "f_stored,f_stores,neon_store1_1reg,neon_store1_1reg_q"))
190	"thunderx_pipe0")
191
192	;; 64bit neon store pairs are single issue for one cycle
193	(define_insn_reservation "thunderx_64neon_storepair" 1
194	(and (eq_attr "tune" "thunderx")
195	(eq_attr "type" "neon_store1_2reg"))
196	"thunderx_pipe0 + thunderx_pipe1")
197
198	;; 128bit neon store pair are single issued for two cycles
199	(define_insn_reservation "thunderx_128neon_storepair" 2
200	(and (eq_attr "tune" "thunderx")
201	(eq_attr "type" "neon_store1_2reg_q"))
202	"(thunderx_pipe0 + thunderx_pipe1)*2")
203
204
205	;; SIMD/NEON (q forms take an extra cycle)
206
207	;; Thunder simd move instruction types - 2/3 cycles
208	(define_insn_reservation "thunderx_neon_move" 2
209	(and (eq_attr "tune" "thunderx")
210	(eq_attr "type" "neon_logic, neon_bsl, neon_fp_compare_s, \
211	neon_fp_compare_d, neon_move"))
212	"thunderx_pipe1 + thunderx_simd")
213
214	(define_insn_reservation "thunderx_neon_move_q" 3
215	(and (eq_attr "tune" "thunderx")
216	(eq_attr "type" "neon_logic_q, neon_bsl_q, neon_fp_compare_s_q, \
217	neon_fp_compare_d_q, neon_move_q"))
218	"thunderx_pipe1 + thunderx_simd, thunderx_simd")
219
220
221	;; Thunder simd simple/add instruction types - 4/5 cycles
222
223	(define_insn_reservation "thunderx_neon_add" 4
224	(and (eq_attr "tune" "thunderx")
225	(eq_attr "type" "neon_reduc_add, neon_reduc_minmax, neon_fp_reduc_add_s, \
226	neon_fp_reduc_add_d, neon_fp_to_int_s, neon_fp_to_int_d, \
227	neon_add_halve, neon_sub_halve, neon_qadd, neon_compare, \
228	neon_compare_zero, neon_minmax, neon_abd, neon_add, neon_sub, \
229	neon_fp_minmax_s, neon_fp_minmax_d, neon_reduc_add, neon_cls, \
230	neon_qabs, neon_qneg, neon_fp_addsub_s, neon_fp_addsub_d"))
231	"thunderx_pipe1 + thunderx_simd")
232
233	;; BIG NOTE: neon_add_long/neon_sub_long don't have a q form which is incorrect
234
235	(define_insn_reservation "thunderx_neon_add_q" 5
236	(and (eq_attr "tune" "thunderx")
237	(eq_attr "type" "neon_reduc_add_q, neon_reduc_minmax_q, neon_fp_reduc_add_s_q, \
238	neon_fp_reduc_add_d_q, neon_fp_to_int_s_q, neon_fp_to_int_d_q, \
239	neon_add_halve_q, neon_sub_halve_q, neon_qadd_q, neon_compare_q, \
240	neon_compare_zero_q, neon_minmax_q, neon_abd_q, neon_add_q, neon_sub_q, \
241	neon_fp_minmax_s_q, neon_fp_minmax_d_q, neon_reduc_add_q, neon_cls_q, \
242	neon_qabs_q, neon_qneg_q, neon_fp_addsub_s_q, neon_fp_addsub_d_q, \
243	neon_add_long, neon_sub_long"))
244	"thunderx_pipe1 + thunderx_simd, thunderx_simd")
245
246
247	;; Thunder 128bit SIMD reads the upper halve in cycle 2 and writes in the last cycle
248	(define_bypass 2 "thunderx_neon_move_q" "thunderx_neon_move_q, thunderx_neon_add_q")
249	(define_bypass 4 "thunderx_neon_add_q" "thunderx_neon_move_q, thunderx_neon_add_q")
250
251	;; Assume both pipes are needed for unknown and multiple-instruction
252	;; patterns.
253
254	(define_insn_reservation "thunderx_unknown" 1
255	(and (eq_attr "tune" "thunderx")
256	(eq_attr "type" "untyped,multiple"))
257	"thunderx_pipe0 + thunderx_pipe1")
258
259