1 ;; Scheduling description for the IBM POWER10 and POWER11 processors.
2 ;; Copyright (C)
2020-
2024 Free Software Foundation, Inc.
4 ;; Contributed by Pat Haugen (pthaugen@us.ibm.com).
6 ;; This file is part of GCC.
8 ;; GCC is free software; you can redistribute it and/or modify it
9 ;; under the terms of the GNU General Public License as published
10 ;; by the Free Software Foundation; either version
3, or (at your
11 ;; option) any later version.
13 ;; GCC is distributed in the hope that it will be useful, but WITHOUT
14 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 ;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 ;; License for more details.
18 ;; You should have received a copy of the GNU General Public License
19 ;; along with GCC; see the file COPYING3. If not see
20 ;; <http://www.gnu.org/licenses/>.
22 ; For Power10 we model (and try to pack) the in-order decode/dispatch groups
23 ; which consist of
8 instructions max. We do not try to model the details of
24 ; the out-of-order issue queues and how insns flow to the various execution
25 ; units except for the simple representation of the issue limitation of at
26 ; most
4 insns to the execution units/
2 insns to the load units/
2 insns to
28 (define_automaton "power10dispatch,power10issue")
30 ; Decode/dispatch slots
31 (define_cpu_unit "du0_power10,du1_power10,du2_power10,du3_power10,
32 du4_power10,du5_power10,du6_power10,du7_power10" "power10dispatch")
34 ; Four execution units
35 (define_cpu_unit "exu0_power10,exu1_power10,exu2_power10,exu3_power10"
37 ; Two load units and two store units
38 (define_cpu_unit "lu0_power10,lu1_power10" "power10issue")
39 (define_cpu_unit "stu0_power10,stu1_power10" "power10issue")
42 ; Dispatch slots are allocated in order conforming to program order.
43 (absence_set "du0_power10" "du1_power10,du2_power10,du3_power10,du4_power10,\
44 du5_power10,du6_power10,du7_power10")
45 (absence_set "du1_power10" "du2_power10,du3_power10,du4_power10,du5_power10,\
46 du6_power10,du7_power10")
47 (absence_set "du2_power10" "du3_power10,du4_power10,du5_power10,du6_power10,\
49 (absence_set "du3_power10" "du4_power10,du5_power10,du6_power10,du7_power10")
50 (absence_set "du4_power10" "du5_power10,du6_power10,du7_power10")
51 (absence_set "du5_power10" "du6_power10,du7_power10")
52 (absence_set "du6_power10" "du7_power10")
55 ; Dispatch port reservations
57 ; Power10 can dispatch a maximum of
8 iops per cycle. With a maximum of
58 ;
4 VSU/
2 Load/
2 Store per cycle.
61 (define_reservation "DU_any_power10"
62 "du0_power10|du1_power10|du2_power10|du3_power10|
63 du4_power10|du5_power10|du6_power10|du7_power10")
65 ; Even slot, actually takes even/odd slots
66 (define_reservation "DU_even_power10"
67 "du0_power10+du1_power10|du2_power10+du3_power10|
68 du4_power10+du5_power10|du6_power10+du7_power10")
70 ;
4-way cracked (consumes whole decode/dispatch cycle)
71 (define_reservation "DU_all_power10"
72 "du0_power10+du1_power10+du2_power10+du3_power10+
73 du4_power10+du5_power10+du6_power10+du7_power10")
76 ; Execution unit reservations
77 (define_reservation "LU_power10"
78 "lu0_power10|lu1_power10")
80 (define_reservation "STU_power10"
81 "stu0_power10|stu1_power10")
83 ; Certain simple fixed-point insns can execute in the Store-agen pipe
84 (define_reservation "SXU_power10"
85 "stu0_power10|stu1_power10")
87 (define_reservation "EXU_power10"
88 "exu0_power10|exu1_power10|exu2_power10|exu3_power10")
90 (define_reservation "EXU_super_power10"
91 "exu0_power10+exu1_power10|exu2_power10+exu3_power10")
95 (define_insn_reservation "power10-load"
4
96 (and (eq_attr "type" "load")
97 (eq_attr "update" "no")
98 (eq_attr "size" "!
128")
99 (eq_attr "prefixed" "no")
100 (eq_attr "cpu" "power10,power11"))
101 "DU_any_power10,LU_power10")
103 (define_insn_reservation "power10-fused-load"
4
104 (and (eq_attr "type" "fused_load_cmpi,fused_addis_load,fused_load_load")
105 (eq_attr "cpu" "power10,power11"))
106 "DU_even_power10,LU_power10")
108 (define_insn_reservation "power10-prefixed-load"
4
109 (and (eq_attr "type" "load")
110 (eq_attr "update" "no")
111 (eq_attr "size" "!
128")
112 (eq_attr "prefixed" "yes")
113 (eq_attr "cpu" "power10,power11"))
114 "DU_even_power10,LU_power10")
116 (define_insn_reservation "power10-load-update"
4
117 (and (eq_attr "type" "load")
118 (eq_attr "update" "yes")
119 (eq_attr "cpu" "power10,power11"))
120 "DU_even_power10,LU_power10+SXU_power10")
122 (define_insn_reservation "power10-fpload-double"
4
123 (and (eq_attr "type" "fpload")
124 (eq_attr "update" "no")
125 (eq_attr "size" "
64")
126 (eq_attr "prefixed" "no")
127 (eq_attr "cpu" "power10,power11"))
128 "DU_any_power10,LU_power10")
130 (define_insn_reservation "power10-prefixed-fpload-double"
4
131 (and (eq_attr "type" "fpload")
132 (eq_attr "update" "no")
133 (eq_attr "size" "
64")
134 (eq_attr "prefixed" "yes")
135 (eq_attr "cpu" "power10,power11"))
136 "DU_even_power10,LU_power10")
138 (define_insn_reservation "power10-fpload-update-double"
4
139 (and (eq_attr "type" "fpload")
140 (eq_attr "update" "yes")
141 (eq_attr "size" "
64")
142 (eq_attr "cpu" "power10,power11"))
143 "DU_even_power10,LU_power10+SXU_power10")
145 ; SFmode loads are cracked and have additional
3 cycles over DFmode
146 ; Prefixed forms behave the same
147 (define_insn_reservation "power10-fpload-single"
7
148 (and (eq_attr "type" "fpload")
149 (eq_attr "update" "no")
150 (eq_attr "size" "
32")
151 (eq_attr "cpu" "power10,power11"))
152 "DU_even_power10,LU_power10")
154 (define_insn_reservation "power10-fpload-update-single"
7
155 (and (eq_attr "type" "fpload")
156 (eq_attr "update" "yes")
157 (eq_attr "size" "
32")
158 (eq_attr "cpu" "power10,power11"))
159 "DU_even_power10,LU_power10+SXU_power10")
161 (define_insn_reservation "power10-vecload"
4
162 (and (eq_attr "type" "vecload")
163 (eq_attr "size" "!
256")
164 (eq_attr "cpu" "power10,power11"))
165 "DU_any_power10,LU_power10")
168 (define_insn_reservation "power10-vecload-pair"
4
169 (and (eq_attr "type" "vecload")
170 (eq_attr "size" "
256")
171 (eq_attr "cpu" "power10,power11"))
172 "DU_even_power10,LU_power10+SXU_power10")
175 (define_insn_reservation "power10-store"
0
176 (and (eq_attr "type" "store,fpstore,vecstore")
177 (eq_attr "update" "no")
178 (eq_attr "prefixed" "no")
179 (eq_attr "size" "!
128")
180 (eq_attr "size" "!
256")
181 (eq_attr "cpu" "power10,power11"))
182 "DU_any_power10,STU_power10")
184 (define_insn_reservation "power10-fused-store"
0
185 (and (eq_attr "type" "fused_store_store")
186 (eq_attr "cpu" "power10,power11"))
187 "DU_even_power10,STU_power10")
189 (define_insn_reservation "power10-prefixed-store"
0
190 (and (eq_attr "type" "store,fpstore,vecstore")
191 (eq_attr "prefixed" "yes")
192 (eq_attr "size" "!
128")
193 (eq_attr "size" "!
256")
194 (eq_attr "cpu" "power10,power11"))
195 "DU_even_power10,STU_power10")
197 ; Update forms have
2 cycle latency for updated addr reg
198 (define_insn_reservation "power10-store-update"
2
199 (and (eq_attr "type" "store,fpstore")
200 (eq_attr "update" "yes")
201 (eq_attr "cpu" "power10,power11"))
202 "DU_any_power10,STU_power10")
205 (define_insn_reservation "power10-vecstore-pair"
0
206 (and (eq_attr "type" "vecstore")
207 (eq_attr "size" "
256")
208 (eq_attr "cpu" "power10,power11"))
209 "DU_even_power10,stu0_power10+stu1_power10")
211 (define_insn_reservation "power10-larx"
4
212 (and (eq_attr "type" "load_l")
213 (eq_attr "size" "!
128")
214 (eq_attr "cpu" "power10,power11"))
215 "DU_any_power10,LU_power10")
217 ; All load quad forms
218 (define_insn_reservation "power10-lq"
4
219 (and (eq_attr "type" "load,load_l")
220 (eq_attr "size" "
128")
221 (eq_attr "cpu" "power10,power11"))
222 "DU_even_power10,LU_power10+SXU_power10")
224 (define_insn_reservation "power10-stcx"
0
225 (and (eq_attr "type" "store_c")
226 (eq_attr "size" "!
128")
227 (eq_attr "cpu" "power10,power11"))
228 "DU_any_power10,STU_power10")
230 ; All store quad forms
231 (define_insn_reservation "power10-stq"
0
232 (and (eq_attr "type" "store,store_c")
233 (eq_attr "size" "
128")
234 (eq_attr "cpu" "power10,power11"))
235 "DU_even_power10,stu0_power10+stu1_power10")
237 (define_insn_reservation "power10-sync"
1
238 (and (eq_attr "type" "sync,isync")
239 (eq_attr "cpu" "power10,power11"))
240 "DU_even_power10,STU_power10")
247 ; Most ALU insns are simple
2 cycle, including record form
248 (define_insn_reservation "power10-alu"
2
249 (and (eq_attr "type" "add,exts,integer,logical,isel")
250 (eq_attr "prefixed" "no")
251 (eq_attr "cpu" "power10,power11"))
252 "DU_any_power10,EXU_power10")
254 (define_bypass
4 "power10-alu"
255 "power10-crlogical,power10-mfcr,power10-mfcrf")
257 (define_insn_reservation "power10-fused_alu"
2
258 (and (eq_attr "type" "fused_arith_logical,fused_cmp_isel,fused_carry")
259 (eq_attr "cpu" "power10,power11"))
260 "DU_even_power10,EXU_power10")
263 (define_insn_reservation "power10-paddi"
2
264 (and (eq_attr "type" "add")
265 (eq_attr "prefixed" "yes")
266 (eq_attr "cpu" "power10,power11"))
267 "DU_even_power10,EXU_power10")
269 ; Rotate/shift (non-record form)
270 (define_insn_reservation "power10-rot"
2
271 (and (eq_attr "type" "insert,shift")
273 (eq_attr "cpu" "power10,power11"))
274 "DU_any_power10,EXU_power10")
276 ; Record form rotate/shift
277 (define_insn_reservation "power10-rot-compare"
3
278 (and (eq_attr "type" "insert,shift")
279 (eq_attr "dot" "yes")
280 (eq_attr "cpu" "power10,power11"))
281 "DU_any_power10,EXU_power10")
283 (define_bypass
5 "power10-rot-compare"
284 "power10-crlogical,power10-mfcr,power10-mfcrf")
286 (define_insn_reservation "power10-alu2"
3
287 (and (eq_attr "type" "cntlz,popcnt,trap")
288 (eq_attr "cpu" "power10,power11"))
289 "DU_any_power10,EXU_power10")
291 (define_bypass
5 "power10-alu2"
292 "power10-crlogical,power10-mfcr,power10-mfcrf")
294 (define_insn_reservation "power10-cmp"
2
295 (and (eq_attr "type" "cmp")
296 (eq_attr "cpu" "power10,power11"))
297 "DU_any_power10,EXU_power10")
299 ; Treat 'two' and 'three' types as
2 or
3 way cracked
300 (define_insn_reservation "power10-two"
4
301 (and (eq_attr "type" "two")
302 (eq_attr "cpu" "power10,power11"))
303 "DU_even_power10,EXU_power10")
305 (define_insn_reservation "power10-three"
6
306 (and (eq_attr "type" "three")
307 (eq_attr "cpu" "power10,power11"))
308 "DU_all_power10,EXU_power10")
310 (define_insn_reservation "power10-mul"
5
311 (and (eq_attr "type" "mul")
313 (eq_attr "cpu" "power10,power11"))
314 "DU_any_power10,EXU_power10")
315 ;
4 cycle MUL->MUL latency
316 (define_bypass
4 "power10-mul"
317 "power10-mul,power10-mul-compare")
319 (define_insn_reservation "power10-mul-compare"
5
320 (and (eq_attr "type" "mul")
321 (eq_attr "dot" "yes")
322 (eq_attr "cpu" "power10,power11"))
323 "DU_even_power10,EXU_power10")
324 ;
4 cycle MUL->MUL latency
325 (define_bypass
4 "power10-mul-compare"
326 "power10-mul,power10-mul-compare")
328 (define_bypass
7 "power10-mul-compare"
329 "power10-crlogical,power10-mfcr,power10-mfcrf")
331 (define_insn_reservation "power10-div"
12
332 (and (eq_attr "type" "div")
334 (eq_attr "cpu" "power10,power11"))
335 "DU_any_power10,EXU_power10")
337 (define_insn_reservation "power10-div-compare"
12
338 (and (eq_attr "type" "div")
339 (eq_attr "dot" "yes")
340 (eq_attr "cpu" "power10,power11"))
341 "DU_even_power10,EXU_power10")
342 ;
14 cycle CR latency
343 (define_bypass
14 "power10-div-compare"
344 "power10-crlogical,power10-mfcr,power10-mfcrf")
346 (define_insn_reservation "power10-crlogical"
2
347 (and (eq_attr "type" "cr_logical")
348 (eq_attr "cpu" "power10,power11"))
349 "DU_any_power10,EXU_power10")
351 (define_insn_reservation "power10-mfcrf"
2
352 (and (eq_attr "type" "mfcrf")
353 (eq_attr "cpu" "power10,power11"))
354 "DU_any_power10,EXU_power10")
356 (define_insn_reservation "power10-mfcr"
3
357 (and (eq_attr "type" "mfcr")
358 (eq_attr "cpu" "power10,power11"))
359 "DU_even_power10,EXU_power10")
361 ; Should differentiate between
1 cr field and >
1 since target of >
1 cr
363 (define_insn_reservation "power10-mtcr"
3
364 (and (eq_attr "type" "mtcr")
365 (eq_attr "cpu" "power10,power11"))
366 "DU_any_power10,EXU_power10")
368 (define_insn_reservation "power10-mtjmpr"
3
369 (and (eq_attr "type" "mtjmpr")
370 (eq_attr "cpu" "power10,power11"))
371 "DU_any_power10,EXU_power10")
373 (define_insn_reservation "power10-mfjmpr"
2
374 (and (eq_attr "type" "mfjmpr")
375 (eq_attr "cpu" "power10,power11"))
376 "DU_any_power10,EXU_power10")
379 ; Floating point/Vector ops
381 (define_insn_reservation "power10-fpsimple"
3
382 (and (eq_attr "type" "fpsimple")
383 (eq_attr "cpu" "power10,power11"))
384 "DU_any_power10,EXU_power10")
386 (define_insn_reservation "power10-fp"
5
387 (and (eq_attr "type" "fp,dmul")
388 (eq_attr "cpu" "power10,power11"))
389 "DU_any_power10,EXU_power10")
391 (define_insn_reservation "power10-fpcompare"
3
392 (and (eq_attr "type" "fpcompare")
393 (eq_attr "cpu" "power10,power11"))
394 "DU_any_power10,EXU_power10")
396 (define_insn_reservation "power10-sdiv"
22
397 (and (eq_attr "type" "sdiv")
398 (eq_attr "cpu" "power10,power11"))
399 "DU_any_power10,EXU_power10")
401 (define_insn_reservation "power10-ddiv"
27
402 (and (eq_attr "type" "ddiv")
403 (eq_attr "cpu" "power10,power11"))
404 "DU_any_power10,EXU_power10")
406 (define_insn_reservation "power10-sqrt"
26
407 (and (eq_attr "type" "ssqrt")
408 (eq_attr "cpu" "power10,power11"))
409 "DU_any_power10,EXU_power10")
411 (define_insn_reservation "power10-dsqrt"
36
412 (and (eq_attr "type" "dsqrt")
413 (eq_attr "cpu" "power10,power11"))
414 "DU_any_power10,EXU_power10")
416 (define_insn_reservation "power10-vec-
2cyc"
2
417 (and (eq_attr "type" "vecmove,veclogical,vecexts,veccmpfx")
418 (eq_attr "cpu" "power10,power11"))
419 "DU_any_power10,EXU_power10")
421 (define_insn_reservation "power10-fused-vec"
2
422 (and (eq_attr "type" "fused_vector")
423 (eq_attr "cpu" "power10,power11"))
424 "DU_even_power10,EXU_power10")
426 (define_insn_reservation "power10-veccmp"
3
427 (and (eq_attr "type" "veccmp")
428 (eq_attr "cpu" "power10,power11"))
429 "DU_any_power10,EXU_power10")
431 (define_insn_reservation "power10-vecsimple"
2
432 (and (eq_attr "type" "vecsimple")
433 (eq_attr "cpu" "power10,power11"))
434 "DU_any_power10,EXU_power10")
436 (define_insn_reservation "power10-vecnormal"
5
437 (and (eq_attr "type" "vecfloat,vecdouble")
438 (eq_attr "size" "!
128")
439 (eq_attr "cpu" "power10,power11"))
440 "DU_any_power10,EXU_power10")
442 (define_insn_reservation "power10-qp"
12
443 (and (eq_attr "type" "vecfloat,vecdouble")
444 (eq_attr "size" "
128")
445 (eq_attr "cpu" "power10,power11"))
446 "DU_any_power10,EXU_power10")
448 (define_insn_reservation "power10-vecperm"
3
449 (and (eq_attr "type" "vecperm")
450 (eq_attr "prefixed" "no")
452 (eq_attr "cpu" "power10,power11"))
453 "DU_any_power10,EXU_power10")
455 (define_insn_reservation "power10-vecperm-compare"
3
456 (and (eq_attr "type" "vecperm")
457 (eq_attr "dot" "yes")
458 (eq_attr "cpu" "power10,power11"))
459 "DU_even_power10,EXU_power10")
461 (define_insn_reservation "power10-prefixed-vecperm"
3
462 (and (eq_attr "type" "vecperm")
463 (eq_attr "prefixed" "yes")
464 (eq_attr "cpu" "power10,power11"))
465 "DU_even_power10,EXU_power10")
467 (define_insn_reservation "power10-veccomplex"
6
468 (and (eq_attr "type" "veccomplex")
469 (eq_attr "cpu" "power10,power11"))
470 "DU_any_power10,EXU_power10")
472 (define_insn_reservation "power10-vecfdiv"
24
473 (and (eq_attr "type" "vecfdiv")
474 (eq_attr "cpu" "power10,power11"))
475 "DU_any_power10,EXU_power10")
477 (define_insn_reservation "power10-vecdiv"
27
478 (and (eq_attr "type" "vecdiv")
479 (eq_attr "size" "!
128")
480 (eq_attr "cpu" "power10,power11"))
481 "DU_any_power10,EXU_power10")
483 (define_insn_reservation "power10-qpdiv"
56
484 (and (eq_attr "type" "vecdiv")
485 (eq_attr "size" "
128")
486 (eq_attr "cpu" "power10,power11"))
487 "DU_any_power10,EXU_power10")
489 (define_insn_reservation "power10-qpmul"
24
490 (and (eq_attr "type" "qmul")
491 (eq_attr "size" "
128")
492 (eq_attr "cpu" "power10,power11"))
493 "DU_any_power10,EXU_power10")
495 (define_insn_reservation "power10-mtvsr"
2
496 (and (eq_attr "type" "mtvsr")
497 (eq_attr "cpu" "power10,power11"))
498 "DU_any_power10,EXU_power10")
500 (define_insn_reservation "power10-mfvsr"
2
501 (and (eq_attr "type" "mfvsr")
502 (eq_attr "cpu" "power10,power11"))
503 "DU_any_power10,EXU_power10")
507 ; Branch is
2 cycles, grouped with STU for issue
508 (define_insn_reservation "power10-branch"
2
509 (and (eq_attr "type" "jmpreg,branch")
510 (eq_attr "cpu" "power10,power11"))
511 "DU_any_power10,STU_power10")
513 (define_insn_reservation "power10-fused-branch"
3
514 (and (eq_attr "type" "fused_mtbc")
515 (eq_attr "cpu" "power10,power11"))
516 "DU_even_power10,STU_power10")
520 (define_insn_reservation "power10-crypto"
4
521 (and (eq_attr "type" "crypto")
522 (eq_attr "cpu" "power10,power11"))
523 "DU_any_power10,EXU_power10")
527 (define_insn_reservation "power10-htm"
2
528 (and (eq_attr "type" "htmsimple,htm")
529 (eq_attr "cpu" "power10,power11"))
530 "DU_any_power10,EXU_power10")
534 ; Use the minimum
12 cycle latency for all DFP insns
535 (define_insn_reservation "power10-dfp"
12
536 (and (eq_attr "type" "dfp")
537 (eq_attr "size" "!
128")
538 (eq_attr "cpu" "power10,power11"))
539 "DU_any_power10,EXU_power10")
541 (define_insn_reservation "power10-dfpq"
12
542 (and (eq_attr "type" "dfp")
543 (eq_attr "size" "
128")
544 (eq_attr "cpu" "power10,power11"))
545 "DU_even_power10,EXU_power10")
548 (define_insn_reservation "power10-mma"
9
549 (and (eq_attr "type" "mma")
550 (eq_attr "prefixed" "no")
551 (eq_attr "cpu" "power10,power11"))
552 "DU_any_power10,EXU_super_power10")
554 (define_insn_reservation "power10-prefixed-mma"
9
555 (and (eq_attr "type" "mma")
556 (eq_attr "prefixed" "yes")
557 (eq_attr "cpu" "power10,power11"))
558 "DU_even_power10,EXU_super_power10")
559 ;
4 cycle MMA->MMA latency
560 (define_bypass
4 "power10-mma,power10-prefixed-mma"
561 "power10-mma,power10-prefixed-mma")