]> gcc.gnu.org Git - gcc.git/blob - gcc/config/rs6000/power10.md
Add -mcpu=future tuning support.
[gcc.git] / gcc / config / rs6000 / power10.md
1 ;; Scheduling description for the IBM POWER10 and POWER11 processors as well as
2 ;; potential future processors.
3 ;; Copyright (C) 2020-2024 Free Software Foundation, Inc.
4 ;;
5 ;; Contributed by Pat Haugen (pthaugen@us.ibm.com).
6
7 ;; This file is part of GCC.
8 ;;
9 ;; GCC is free software; you can redistribute it and/or modify it
10 ;; under the terms of the GNU General Public License as published
11 ;; by the Free Software Foundation; either version 3, or (at your
12 ;; option) any later version.
13 ;;
14 ;; GCC is distributed in the hope that it will be useful, but WITHOUT
15 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 ;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 ;; License for more details.
18 ;;
19 ;; You should have received a copy of the GNU General Public License
20 ;; along with GCC; see the file COPYING3. If not see
21 ;; <http://www.gnu.org/licenses/>.
22
23 ; For Power10 we model (and try to pack) the in-order decode/dispatch groups
24 ; which consist of 8 instructions max. We do not try to model the details of
25 ; the out-of-order issue queues and how insns flow to the various execution
26 ; units except for the simple representation of the issue limitation of at
27 ; most 4 insns to the execution units/2 insns to the load units/2 insns to
28 ; the store units.
29 (define_automaton "power10dispatch,power10issue")
30
31 ; Decode/dispatch slots
32 (define_cpu_unit "du0_power10,du1_power10,du2_power10,du3_power10,
33 du4_power10,du5_power10,du6_power10,du7_power10" "power10dispatch")
34
35 ; Four execution units
36 (define_cpu_unit "exu0_power10,exu1_power10,exu2_power10,exu3_power10"
37 "power10issue")
38 ; Two load units and two store units
39 (define_cpu_unit "lu0_power10,lu1_power10" "power10issue")
40 (define_cpu_unit "stu0_power10,stu1_power10" "power10issue")
41
42
43 ; Dispatch slots are allocated in order conforming to program order.
44 (absence_set "du0_power10" "du1_power10,du2_power10,du3_power10,du4_power10,\
45 du5_power10,du6_power10,du7_power10")
46 (absence_set "du1_power10" "du2_power10,du3_power10,du4_power10,du5_power10,\
47 du6_power10,du7_power10")
48 (absence_set "du2_power10" "du3_power10,du4_power10,du5_power10,du6_power10,\
49 du7_power10")
50 (absence_set "du3_power10" "du4_power10,du5_power10,du6_power10,du7_power10")
51 (absence_set "du4_power10" "du5_power10,du6_power10,du7_power10")
52 (absence_set "du5_power10" "du6_power10,du7_power10")
53 (absence_set "du6_power10" "du7_power10")
54
55
56 ; Dispatch port reservations
57 ;
58 ; Power10 can dispatch a maximum of 8 iops per cycle. With a maximum of
59 ; 4 VSU/2 Load/2 Store per cycle.
60
61 ; Any dispatch slot
62 (define_reservation "DU_any_power10"
63 "du0_power10|du1_power10|du2_power10|du3_power10|
64 du4_power10|du5_power10|du6_power10|du7_power10")
65
66 ; Even slot, actually takes even/odd slots
67 (define_reservation "DU_even_power10"
68 "du0_power10+du1_power10|du2_power10+du3_power10|
69 du4_power10+du5_power10|du6_power10+du7_power10")
70
71 ; 4-way cracked (consumes whole decode/dispatch cycle)
72 (define_reservation "DU_all_power10"
73 "du0_power10+du1_power10+du2_power10+du3_power10+
74 du4_power10+du5_power10+du6_power10+du7_power10")
75
76
77 ; Execution unit reservations
78 (define_reservation "LU_power10"
79 "lu0_power10|lu1_power10")
80
81 (define_reservation "STU_power10"
82 "stu0_power10|stu1_power10")
83
84 ; Certain simple fixed-point insns can execute in the Store-agen pipe
85 (define_reservation "SXU_power10"
86 "stu0_power10|stu1_power10")
87
88 (define_reservation "EXU_power10"
89 "exu0_power10|exu1_power10|exu2_power10|exu3_power10")
90
91 (define_reservation "EXU_super_power10"
92 "exu0_power10+exu1_power10|exu2_power10+exu3_power10")
93
94
95 ; Load Unit
96 (define_insn_reservation "power10-load" 4
97 (and (eq_attr "type" "load")
98 (eq_attr "update" "no")
99 (eq_attr "size" "!128")
100 (eq_attr "prefixed" "no")
101 (eq_attr "cpu" "power10,power11,future"))
102 "DU_any_power10,LU_power10")
103
104 (define_insn_reservation "power10-fused-load" 4
105 (and (eq_attr "type" "fused_load_cmpi,fused_addis_load,fused_load_load")
106 (eq_attr "cpu" "power10,power11,future"))
107 "DU_even_power10,LU_power10")
108
109 (define_insn_reservation "power10-prefixed-load" 4
110 (and (eq_attr "type" "load")
111 (eq_attr "update" "no")
112 (eq_attr "size" "!128")
113 (eq_attr "prefixed" "yes")
114 (eq_attr "cpu" "power10,power11,future"))
115 "DU_even_power10,LU_power10")
116
117 (define_insn_reservation "power10-load-update" 4
118 (and (eq_attr "type" "load")
119 (eq_attr "update" "yes")
120 (eq_attr "cpu" "power10,power11,future"))
121 "DU_even_power10,LU_power10+SXU_power10")
122
123 (define_insn_reservation "power10-fpload-double" 4
124 (and (eq_attr "type" "fpload")
125 (eq_attr "update" "no")
126 (eq_attr "size" "64")
127 (eq_attr "prefixed" "no")
128 (eq_attr "cpu" "power10,power11,future"))
129 "DU_any_power10,LU_power10")
130
131 (define_insn_reservation "power10-prefixed-fpload-double" 4
132 (and (eq_attr "type" "fpload")
133 (eq_attr "update" "no")
134 (eq_attr "size" "64")
135 (eq_attr "prefixed" "yes")
136 (eq_attr "cpu" "power10,power11,future"))
137 "DU_even_power10,LU_power10")
138
139 (define_insn_reservation "power10-fpload-update-double" 4
140 (and (eq_attr "type" "fpload")
141 (eq_attr "update" "yes")
142 (eq_attr "size" "64")
143 (eq_attr "cpu" "power10,power11,future"))
144 "DU_even_power10,LU_power10+SXU_power10")
145
146 ; SFmode loads are cracked and have additional 3 cycles over DFmode
147 ; Prefixed forms behave the same
148 (define_insn_reservation "power10-fpload-single" 7
149 (and (eq_attr "type" "fpload")
150 (eq_attr "update" "no")
151 (eq_attr "size" "32")
152 (eq_attr "cpu" "power10,power11,future"))
153 "DU_even_power10,LU_power10")
154
155 (define_insn_reservation "power10-fpload-update-single" 7
156 (and (eq_attr "type" "fpload")
157 (eq_attr "update" "yes")
158 (eq_attr "size" "32")
159 (eq_attr "cpu" "power10,power11,future"))
160 "DU_even_power10,LU_power10+SXU_power10")
161
162 (define_insn_reservation "power10-vecload" 4
163 (and (eq_attr "type" "vecload")
164 (eq_attr "size" "!256")
165 (eq_attr "cpu" "power10,power11,future"))
166 "DU_any_power10,LU_power10")
167
168 ; lxvp
169 (define_insn_reservation "power10-vecload-pair" 4
170 (and (eq_attr "type" "vecload")
171 (eq_attr "size" "256")
172 (eq_attr "cpu" "power10,power11,future"))
173 "DU_even_power10,LU_power10+SXU_power10")
174
175 ; Store Unit
176 (define_insn_reservation "power10-store" 0
177 (and (eq_attr "type" "store,fpstore,vecstore")
178 (eq_attr "update" "no")
179 (eq_attr "prefixed" "no")
180 (eq_attr "size" "!128")
181 (eq_attr "size" "!256")
182 (eq_attr "cpu" "power10,power11,future"))
183 "DU_any_power10,STU_power10")
184
185 (define_insn_reservation "power10-fused-store" 0
186 (and (eq_attr "type" "fused_store_store")
187 (eq_attr "cpu" "power10,power11,future"))
188 "DU_even_power10,STU_power10")
189
190 (define_insn_reservation "power10-prefixed-store" 0
191 (and (eq_attr "type" "store,fpstore,vecstore")
192 (eq_attr "prefixed" "yes")
193 (eq_attr "size" "!128")
194 (eq_attr "size" "!256")
195 (eq_attr "cpu" "power10,power11,future"))
196 "DU_even_power10,STU_power10")
197
198 ; Update forms have 2 cycle latency for updated addr reg
199 (define_insn_reservation "power10-store-update" 2
200 (and (eq_attr "type" "store,fpstore")
201 (eq_attr "update" "yes")
202 (eq_attr "cpu" "power10,power11,future"))
203 "DU_any_power10,STU_power10")
204
205 ; stxvp
206 (define_insn_reservation "power10-vecstore-pair" 0
207 (and (eq_attr "type" "vecstore")
208 (eq_attr "size" "256")
209 (eq_attr "cpu" "power10,power11,future"))
210 "DU_even_power10,stu0_power10+stu1_power10")
211
212 (define_insn_reservation "power10-larx" 4
213 (and (eq_attr "type" "load_l")
214 (eq_attr "size" "!128")
215 (eq_attr "cpu" "power10,power11,future"))
216 "DU_any_power10,LU_power10")
217
218 ; All load quad forms
219 (define_insn_reservation "power10-lq" 4
220 (and (eq_attr "type" "load,load_l")
221 (eq_attr "size" "128")
222 (eq_attr "cpu" "power10,power11,future"))
223 "DU_even_power10,LU_power10+SXU_power10")
224
225 (define_insn_reservation "power10-stcx" 0
226 (and (eq_attr "type" "store_c")
227 (eq_attr "size" "!128")
228 (eq_attr "cpu" "power10,power11,future"))
229 "DU_any_power10,STU_power10")
230
231 ; All store quad forms
232 (define_insn_reservation "power10-stq" 0
233 (and (eq_attr "type" "store,store_c")
234 (eq_attr "size" "128")
235 (eq_attr "cpu" "power10,power11,future"))
236 "DU_even_power10,stu0_power10+stu1_power10")
237
238 (define_insn_reservation "power10-sync" 1
239 (and (eq_attr "type" "sync,isync")
240 (eq_attr "cpu" "power10,power11,future"))
241 "DU_even_power10,STU_power10")
242
243
244 ; VSU Execution Unit
245
246 ; Fixed point ops
247
248 ; Most ALU insns are simple 2 cycle, including record form
249 (define_insn_reservation "power10-alu" 2
250 (and (eq_attr "type" "add,exts,integer,logical,isel")
251 (eq_attr "prefixed" "no")
252 (eq_attr "cpu" "power10,power11,future"))
253 "DU_any_power10,EXU_power10")
254 ; 4 cycle CR latency
255 (define_bypass 4 "power10-alu"
256 "power10-crlogical,power10-mfcr,power10-mfcrf")
257
258 (define_insn_reservation "power10-fused_alu" 2
259 (and (eq_attr "type" "fused_arith_logical,fused_cmp_isel,fused_carry")
260 (eq_attr "cpu" "power10,power11,future"))
261 "DU_even_power10,EXU_power10")
262
263 ; paddi
264 (define_insn_reservation "power10-paddi" 2
265 (and (eq_attr "type" "add")
266 (eq_attr "prefixed" "yes")
267 (eq_attr "cpu" "power10,power11,future"))
268 "DU_even_power10,EXU_power10")
269
270 ; Rotate/shift (non-record form)
271 (define_insn_reservation "power10-rot" 2
272 (and (eq_attr "type" "insert,shift")
273 (eq_attr "dot" "no")
274 (eq_attr "cpu" "power10,power11,future"))
275 "DU_any_power10,EXU_power10")
276
277 ; Record form rotate/shift
278 (define_insn_reservation "power10-rot-compare" 3
279 (and (eq_attr "type" "insert,shift")
280 (eq_attr "dot" "yes")
281 (eq_attr "cpu" "power10,power11,future"))
282 "DU_any_power10,EXU_power10")
283 ; 5 cycle CR latency
284 (define_bypass 5 "power10-rot-compare"
285 "power10-crlogical,power10-mfcr,power10-mfcrf")
286
287 (define_insn_reservation "power10-alu2" 3
288 (and (eq_attr "type" "cntlz,popcnt,trap")
289 (eq_attr "cpu" "power10,power11,future"))
290 "DU_any_power10,EXU_power10")
291 ; 5 cycle CR latency
292 (define_bypass 5 "power10-alu2"
293 "power10-crlogical,power10-mfcr,power10-mfcrf")
294
295 (define_insn_reservation "power10-cmp" 2
296 (and (eq_attr "type" "cmp")
297 (eq_attr "cpu" "power10,power11,future"))
298 "DU_any_power10,EXU_power10")
299
300 ; Treat 'two' and 'three' types as 2 or 3 way cracked
301 (define_insn_reservation "power10-two" 4
302 (and (eq_attr "type" "two")
303 (eq_attr "cpu" "power10,power11,future"))
304 "DU_even_power10,EXU_power10")
305
306 (define_insn_reservation "power10-three" 6
307 (and (eq_attr "type" "three")
308 (eq_attr "cpu" "power10,power11,future"))
309 "DU_all_power10,EXU_power10")
310
311 (define_insn_reservation "power10-mul" 5
312 (and (eq_attr "type" "mul")
313 (eq_attr "dot" "no")
314 (eq_attr "cpu" "power10,power11,future"))
315 "DU_any_power10,EXU_power10")
316 ; 4 cycle MUL->MUL latency
317 (define_bypass 4 "power10-mul"
318 "power10-mul,power10-mul-compare")
319
320 (define_insn_reservation "power10-mul-compare" 5
321 (and (eq_attr "type" "mul")
322 (eq_attr "dot" "yes")
323 (eq_attr "cpu" "power10,power11,future"))
324 "DU_even_power10,EXU_power10")
325 ; 4 cycle MUL->MUL latency
326 (define_bypass 4 "power10-mul-compare"
327 "power10-mul,power10-mul-compare")
328 ; 7 cycle CR latency
329 (define_bypass 7 "power10-mul-compare"
330 "power10-crlogical,power10-mfcr,power10-mfcrf")
331
332 (define_insn_reservation "power10-div" 12
333 (and (eq_attr "type" "div")
334 (eq_attr "dot" "no")
335 (eq_attr "cpu" "power10,power11,future"))
336 "DU_any_power10,EXU_power10")
337
338 (define_insn_reservation "power10-div-compare" 12
339 (and (eq_attr "type" "div")
340 (eq_attr "dot" "yes")
341 (eq_attr "cpu" "power10,power11,future"))
342 "DU_even_power10,EXU_power10")
343 ; 14 cycle CR latency
344 (define_bypass 14 "power10-div-compare"
345 "power10-crlogical,power10-mfcr,power10-mfcrf")
346
347 (define_insn_reservation "power10-crlogical" 2
348 (and (eq_attr "type" "cr_logical")
349 (eq_attr "cpu" "power10,power11,future"))
350 "DU_any_power10,EXU_power10")
351
352 (define_insn_reservation "power10-mfcrf" 2
353 (and (eq_attr "type" "mfcrf")
354 (eq_attr "cpu" "power10,power11,future"))
355 "DU_any_power10,EXU_power10")
356
357 (define_insn_reservation "power10-mfcr" 3
358 (and (eq_attr "type" "mfcr")
359 (eq_attr "cpu" "power10,power11,future"))
360 "DU_even_power10,EXU_power10")
361
362 ; Should differentiate between 1 cr field and > 1 since target of > 1 cr
363 ; is cracked
364 (define_insn_reservation "power10-mtcr" 3
365 (and (eq_attr "type" "mtcr")
366 (eq_attr "cpu" "power10,power11,future"))
367 "DU_any_power10,EXU_power10")
368
369 (define_insn_reservation "power10-mtjmpr" 3
370 (and (eq_attr "type" "mtjmpr")
371 (eq_attr "cpu" "power10,power11,future"))
372 "DU_any_power10,EXU_power10")
373
374 (define_insn_reservation "power10-mfjmpr" 2
375 (and (eq_attr "type" "mfjmpr")
376 (eq_attr "cpu" "power10,power11,future"))
377 "DU_any_power10,EXU_power10")
378
379
380 ; Floating point/Vector ops
381
382 (define_insn_reservation "power10-fpsimple" 3
383 (and (eq_attr "type" "fpsimple")
384 (eq_attr "cpu" "power10,power11,future"))
385 "DU_any_power10,EXU_power10")
386
387 (define_insn_reservation "power10-fp" 5
388 (and (eq_attr "type" "fp,dmul")
389 (eq_attr "cpu" "power10,power11,future"))
390 "DU_any_power10,EXU_power10")
391
392 (define_insn_reservation "power10-fpcompare" 3
393 (and (eq_attr "type" "fpcompare")
394 (eq_attr "cpu" "power10,power11,future"))
395 "DU_any_power10,EXU_power10")
396
397 (define_insn_reservation "power10-sdiv" 22
398 (and (eq_attr "type" "sdiv")
399 (eq_attr "cpu" "power10,power11,future"))
400 "DU_any_power10,EXU_power10")
401
402 (define_insn_reservation "power10-ddiv" 27
403 (and (eq_attr "type" "ddiv")
404 (eq_attr "cpu" "power10,power11,future"))
405 "DU_any_power10,EXU_power10")
406
407 (define_insn_reservation "power10-sqrt" 26
408 (and (eq_attr "type" "ssqrt")
409 (eq_attr "cpu" "power10,power11,future"))
410 "DU_any_power10,EXU_power10")
411
412 (define_insn_reservation "power10-dsqrt" 36
413 (and (eq_attr "type" "dsqrt")
414 (eq_attr "cpu" "power10,power11,future"))
415 "DU_any_power10,EXU_power10")
416
417 (define_insn_reservation "power10-vec-2cyc" 2
418 (and (eq_attr "type" "vecmove,veclogical,vecexts,veccmpfx")
419 (eq_attr "cpu" "power10,power11,future"))
420 "DU_any_power10,EXU_power10")
421
422 (define_insn_reservation "power10-fused-vec" 2
423 (and (eq_attr "type" "fused_vector")
424 (eq_attr "cpu" "power10,power11,future"))
425 "DU_even_power10,EXU_power10")
426
427 (define_insn_reservation "power10-veccmp" 3
428 (and (eq_attr "type" "veccmp")
429 (eq_attr "cpu" "power10,power11,future"))
430 "DU_any_power10,EXU_power10")
431
432 (define_insn_reservation "power10-vecsimple" 2
433 (and (eq_attr "type" "vecsimple")
434 (eq_attr "cpu" "power10,power11,future"))
435 "DU_any_power10,EXU_power10")
436
437 (define_insn_reservation "power10-vecnormal" 5
438 (and (eq_attr "type" "vecfloat,vecdouble")
439 (eq_attr "size" "!128")
440 (eq_attr "cpu" "power10,power11,future"))
441 "DU_any_power10,EXU_power10")
442
443 (define_insn_reservation "power10-qp" 12
444 (and (eq_attr "type" "vecfloat,vecdouble")
445 (eq_attr "size" "128")
446 (eq_attr "cpu" "power10,power11,future"))
447 "DU_any_power10,EXU_power10")
448
449 (define_insn_reservation "power10-vecperm" 3
450 (and (eq_attr "type" "vecperm")
451 (eq_attr "prefixed" "no")
452 (eq_attr "dot" "no")
453 (eq_attr "cpu" "power10,power11,future"))
454 "DU_any_power10,EXU_power10")
455
456 (define_insn_reservation "power10-vecperm-compare" 3
457 (and (eq_attr "type" "vecperm")
458 (eq_attr "dot" "yes")
459 (eq_attr "cpu" "power10,power11,future"))
460 "DU_even_power10,EXU_power10")
461
462 (define_insn_reservation "power10-prefixed-vecperm" 3
463 (and (eq_attr "type" "vecperm")
464 (eq_attr "prefixed" "yes")
465 (eq_attr "cpu" "power10,power11,future"))
466 "DU_even_power10,EXU_power10")
467
468 (define_insn_reservation "power10-veccomplex" 6
469 (and (eq_attr "type" "veccomplex")
470 (eq_attr "cpu" "power10,power11,future"))
471 "DU_any_power10,EXU_power10")
472
473 (define_insn_reservation "power10-vecfdiv" 24
474 (and (eq_attr "type" "vecfdiv")
475 (eq_attr "cpu" "power10,power11,future"))
476 "DU_any_power10,EXU_power10")
477
478 (define_insn_reservation "power10-vecdiv" 27
479 (and (eq_attr "type" "vecdiv")
480 (eq_attr "size" "!128")
481 (eq_attr "cpu" "power10,power11,future"))
482 "DU_any_power10,EXU_power10")
483
484 (define_insn_reservation "power10-qpdiv" 56
485 (and (eq_attr "type" "vecdiv")
486 (eq_attr "size" "128")
487 (eq_attr "cpu" "power10,power11,future"))
488 "DU_any_power10,EXU_power10")
489
490 (define_insn_reservation "power10-qpmul" 24
491 (and (eq_attr "type" "qmul")
492 (eq_attr "size" "128")
493 (eq_attr "cpu" "power10,power11,future"))
494 "DU_any_power10,EXU_power10")
495
496 (define_insn_reservation "power10-mtvsr" 2
497 (and (eq_attr "type" "mtvsr")
498 (eq_attr "cpu" "power10,power11,future"))
499 "DU_any_power10,EXU_power10")
500
501 (define_insn_reservation "power10-mfvsr" 2
502 (and (eq_attr "type" "mfvsr")
503 (eq_attr "cpu" "power10,power11,future"))
504 "DU_any_power10,EXU_power10")
505
506
507 ; Branch
508 ; Branch is 2 cycles, grouped with STU for issue
509 (define_insn_reservation "power10-branch" 2
510 (and (eq_attr "type" "jmpreg,branch")
511 (eq_attr "cpu" "power10,power11,future"))
512 "DU_any_power10,STU_power10")
513
514 (define_insn_reservation "power10-fused-branch" 3
515 (and (eq_attr "type" "fused_mtbc")
516 (eq_attr "cpu" "power10,power11,future"))
517 "DU_even_power10,STU_power10")
518
519
520 ; Crypto
521 (define_insn_reservation "power10-crypto" 4
522 (and (eq_attr "type" "crypto")
523 (eq_attr "cpu" "power10,power11,future"))
524 "DU_any_power10,EXU_power10")
525
526
527 ; HTM
528 (define_insn_reservation "power10-htm" 2
529 (and (eq_attr "type" "htmsimple,htm")
530 (eq_attr "cpu" "power10,power11,future"))
531 "DU_any_power10,EXU_power10")
532
533
534 ; DFP
535 ; Use the minimum 12 cycle latency for all DFP insns
536 (define_insn_reservation "power10-dfp" 12
537 (and (eq_attr "type" "dfp")
538 (eq_attr "size" "!128")
539 (eq_attr "cpu" "power10,power11,future"))
540 "DU_any_power10,EXU_power10")
541
542 (define_insn_reservation "power10-dfpq" 12
543 (and (eq_attr "type" "dfp")
544 (eq_attr "size" "128")
545 (eq_attr "cpu" "power10,power11,future"))
546 "DU_even_power10,EXU_power10")
547
548 ; MMA
549 (define_insn_reservation "power10-mma" 9
550 (and (eq_attr "type" "mma")
551 (eq_attr "prefixed" "no")
552 (eq_attr "cpu" "power10,power11,future"))
553 "DU_any_power10,EXU_super_power10")
554
555 (define_insn_reservation "power10-prefixed-mma" 9
556 (and (eq_attr "type" "mma")
557 (eq_attr "prefixed" "yes")
558 (eq_attr "cpu" "power10,power11,future"))
559 "DU_even_power10,EXU_super_power10")
560 ; 4 cycle MMA->MMA latency
561 (define_bypass 4 "power10-mma,power10-prefixed-mma"
562 "power10-mma,power10-prefixed-mma")
563
564
This page took 0.063074 seconds and 5 git commands to generate.