1 ;; Scheduling description for the IBM POWER10 and POWER11 processors as well as
2 ;; potential future processors.
3 ;; Copyright (C)
2020-
2024 Free Software Foundation, Inc.
5 ;; Contributed by Pat Haugen (pthaugen@us.ibm.com).
7 ;; This file is part of GCC.
9 ;; GCC is free software; you can redistribute it and/or modify it
10 ;; under the terms of the GNU General Public License as published
11 ;; by the Free Software Foundation; either version
3, or (at your
12 ;; option) any later version.
14 ;; GCC is distributed in the hope that it will be useful, but WITHOUT
15 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 ;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 ;; License for more details.
19 ;; You should have received a copy of the GNU General Public License
20 ;; along with GCC; see the file COPYING3. If not see
21 ;; <http://www.gnu.org/licenses/>.
23 ; For Power10 we model (and try to pack) the in-order decode/dispatch groups
24 ; which consist of
8 instructions max. We do not try to model the details of
25 ; the out-of-order issue queues and how insns flow to the various execution
26 ; units except for the simple representation of the issue limitation of at
27 ; most
4 insns to the execution units/
2 insns to the load units/
2 insns to
29 (define_automaton "power10dispatch,power10issue")
31 ; Decode/dispatch slots
32 (define_cpu_unit "du0_power10,du1_power10,du2_power10,du3_power10,
33 du4_power10,du5_power10,du6_power10,du7_power10" "power10dispatch")
35 ; Four execution units
36 (define_cpu_unit "exu0_power10,exu1_power10,exu2_power10,exu3_power10"
38 ; Two load units and two store units
39 (define_cpu_unit "lu0_power10,lu1_power10" "power10issue")
40 (define_cpu_unit "stu0_power10,stu1_power10" "power10issue")
43 ; Dispatch slots are allocated in order conforming to program order.
44 (absence_set "du0_power10" "du1_power10,du2_power10,du3_power10,du4_power10,\
45 du5_power10,du6_power10,du7_power10")
46 (absence_set "du1_power10" "du2_power10,du3_power10,du4_power10,du5_power10,\
47 du6_power10,du7_power10")
48 (absence_set "du2_power10" "du3_power10,du4_power10,du5_power10,du6_power10,\
50 (absence_set "du3_power10" "du4_power10,du5_power10,du6_power10,du7_power10")
51 (absence_set "du4_power10" "du5_power10,du6_power10,du7_power10")
52 (absence_set "du5_power10" "du6_power10,du7_power10")
53 (absence_set "du6_power10" "du7_power10")
56 ; Dispatch port reservations
58 ; Power10 can dispatch a maximum of
8 iops per cycle. With a maximum of
59 ;
4 VSU/
2 Load/
2 Store per cycle.
62 (define_reservation "DU_any_power10"
63 "du0_power10|du1_power10|du2_power10|du3_power10|
64 du4_power10|du5_power10|du6_power10|du7_power10")
66 ; Even slot, actually takes even/odd slots
67 (define_reservation "DU_even_power10"
68 "du0_power10+du1_power10|du2_power10+du3_power10|
69 du4_power10+du5_power10|du6_power10+du7_power10")
71 ;
4-way cracked (consumes whole decode/dispatch cycle)
72 (define_reservation "DU_all_power10"
73 "du0_power10+du1_power10+du2_power10+du3_power10+
74 du4_power10+du5_power10+du6_power10+du7_power10")
77 ; Execution unit reservations
78 (define_reservation "LU_power10"
79 "lu0_power10|lu1_power10")
81 (define_reservation "STU_power10"
82 "stu0_power10|stu1_power10")
84 ; Certain simple fixed-point insns can execute in the Store-agen pipe
85 (define_reservation "SXU_power10"
86 "stu0_power10|stu1_power10")
88 (define_reservation "EXU_power10"
89 "exu0_power10|exu1_power10|exu2_power10|exu3_power10")
91 (define_reservation "EXU_super_power10"
92 "exu0_power10+exu1_power10|exu2_power10+exu3_power10")
96 (define_insn_reservation "power10-load"
4
97 (and (eq_attr "type" "load")
98 (eq_attr "update" "no")
99 (eq_attr "size" "!
128")
100 (eq_attr "prefixed" "no")
101 (eq_attr "cpu" "power10,power11,future"))
102 "DU_any_power10,LU_power10")
104 (define_insn_reservation "power10-fused-load"
4
105 (and (eq_attr "type" "fused_load_cmpi,fused_addis_load,fused_load_load")
106 (eq_attr "cpu" "power10,power11,future"))
107 "DU_even_power10,LU_power10")
109 (define_insn_reservation "power10-prefixed-load"
4
110 (and (eq_attr "type" "load")
111 (eq_attr "update" "no")
112 (eq_attr "size" "!
128")
113 (eq_attr "prefixed" "yes")
114 (eq_attr "cpu" "power10,power11,future"))
115 "DU_even_power10,LU_power10")
117 (define_insn_reservation "power10-load-update"
4
118 (and (eq_attr "type" "load")
119 (eq_attr "update" "yes")
120 (eq_attr "cpu" "power10,power11,future"))
121 "DU_even_power10,LU_power10+SXU_power10")
123 (define_insn_reservation "power10-fpload-double"
4
124 (and (eq_attr "type" "fpload")
125 (eq_attr "update" "no")
126 (eq_attr "size" "
64")
127 (eq_attr "prefixed" "no")
128 (eq_attr "cpu" "power10,power11,future"))
129 "DU_any_power10,LU_power10")
131 (define_insn_reservation "power10-prefixed-fpload-double"
4
132 (and (eq_attr "type" "fpload")
133 (eq_attr "update" "no")
134 (eq_attr "size" "
64")
135 (eq_attr "prefixed" "yes")
136 (eq_attr "cpu" "power10,power11,future"))
137 "DU_even_power10,LU_power10")
139 (define_insn_reservation "power10-fpload-update-double"
4
140 (and (eq_attr "type" "fpload")
141 (eq_attr "update" "yes")
142 (eq_attr "size" "
64")
143 (eq_attr "cpu" "power10,power11,future"))
144 "DU_even_power10,LU_power10+SXU_power10")
146 ; SFmode loads are cracked and have additional
3 cycles over DFmode
147 ; Prefixed forms behave the same
148 (define_insn_reservation "power10-fpload-single"
7
149 (and (eq_attr "type" "fpload")
150 (eq_attr "update" "no")
151 (eq_attr "size" "
32")
152 (eq_attr "cpu" "power10,power11,future"))
153 "DU_even_power10,LU_power10")
155 (define_insn_reservation "power10-fpload-update-single"
7
156 (and (eq_attr "type" "fpload")
157 (eq_attr "update" "yes")
158 (eq_attr "size" "
32")
159 (eq_attr "cpu" "power10,power11,future"))
160 "DU_even_power10,LU_power10+SXU_power10")
162 (define_insn_reservation "power10-vecload"
4
163 (and (eq_attr "type" "vecload")
164 (eq_attr "size" "!
256")
165 (eq_attr "cpu" "power10,power11,future"))
166 "DU_any_power10,LU_power10")
169 (define_insn_reservation "power10-vecload-pair"
4
170 (and (eq_attr "type" "vecload")
171 (eq_attr "size" "
256")
172 (eq_attr "cpu" "power10,power11,future"))
173 "DU_even_power10,LU_power10+SXU_power10")
176 (define_insn_reservation "power10-store"
0
177 (and (eq_attr "type" "store,fpstore,vecstore")
178 (eq_attr "update" "no")
179 (eq_attr "prefixed" "no")
180 (eq_attr "size" "!
128")
181 (eq_attr "size" "!
256")
182 (eq_attr "cpu" "power10,power11,future"))
183 "DU_any_power10,STU_power10")
185 (define_insn_reservation "power10-fused-store"
0
186 (and (eq_attr "type" "fused_store_store")
187 (eq_attr "cpu" "power10,power11,future"))
188 "DU_even_power10,STU_power10")
190 (define_insn_reservation "power10-prefixed-store"
0
191 (and (eq_attr "type" "store,fpstore,vecstore")
192 (eq_attr "prefixed" "yes")
193 (eq_attr "size" "!
128")
194 (eq_attr "size" "!
256")
195 (eq_attr "cpu" "power10,power11,future"))
196 "DU_even_power10,STU_power10")
198 ; Update forms have
2 cycle latency for updated addr reg
199 (define_insn_reservation "power10-store-update"
2
200 (and (eq_attr "type" "store,fpstore")
201 (eq_attr "update" "yes")
202 (eq_attr "cpu" "power10,power11,future"))
203 "DU_any_power10,STU_power10")
206 (define_insn_reservation "power10-vecstore-pair"
0
207 (and (eq_attr "type" "vecstore")
208 (eq_attr "size" "
256")
209 (eq_attr "cpu" "power10,power11,future"))
210 "DU_even_power10,stu0_power10+stu1_power10")
212 (define_insn_reservation "power10-larx"
4
213 (and (eq_attr "type" "load_l")
214 (eq_attr "size" "!
128")
215 (eq_attr "cpu" "power10,power11,future"))
216 "DU_any_power10,LU_power10")
218 ; All load quad forms
219 (define_insn_reservation "power10-lq"
4
220 (and (eq_attr "type" "load,load_l")
221 (eq_attr "size" "
128")
222 (eq_attr "cpu" "power10,power11,future"))
223 "DU_even_power10,LU_power10+SXU_power10")
225 (define_insn_reservation "power10-stcx"
0
226 (and (eq_attr "type" "store_c")
227 (eq_attr "size" "!
128")
228 (eq_attr "cpu" "power10,power11,future"))
229 "DU_any_power10,STU_power10")
231 ; All store quad forms
232 (define_insn_reservation "power10-stq"
0
233 (and (eq_attr "type" "store,store_c")
234 (eq_attr "size" "
128")
235 (eq_attr "cpu" "power10,power11,future"))
236 "DU_even_power10,stu0_power10+stu1_power10")
238 (define_insn_reservation "power10-sync"
1
239 (and (eq_attr "type" "sync,isync")
240 (eq_attr "cpu" "power10,power11,future"))
241 "DU_even_power10,STU_power10")
248 ; Most ALU insns are simple
2 cycle, including record form
249 (define_insn_reservation "power10-alu"
2
250 (and (eq_attr "type" "add,exts,integer,logical,isel")
251 (eq_attr "prefixed" "no")
252 (eq_attr "cpu" "power10,power11,future"))
253 "DU_any_power10,EXU_power10")
255 (define_bypass
4 "power10-alu"
256 "power10-crlogical,power10-mfcr,power10-mfcrf")
258 (define_insn_reservation "power10-fused_alu"
2
259 (and (eq_attr "type" "fused_arith_logical,fused_cmp_isel,fused_carry")
260 (eq_attr "cpu" "power10,power11,future"))
261 "DU_even_power10,EXU_power10")
264 (define_insn_reservation "power10-paddi"
2
265 (and (eq_attr "type" "add")
266 (eq_attr "prefixed" "yes")
267 (eq_attr "cpu" "power10,power11,future"))
268 "DU_even_power10,EXU_power10")
270 ; Rotate/shift (non-record form)
271 (define_insn_reservation "power10-rot"
2
272 (and (eq_attr "type" "insert,shift")
274 (eq_attr "cpu" "power10,power11,future"))
275 "DU_any_power10,EXU_power10")
277 ; Record form rotate/shift
278 (define_insn_reservation "power10-rot-compare"
3
279 (and (eq_attr "type" "insert,shift")
280 (eq_attr "dot" "yes")
281 (eq_attr "cpu" "power10,power11,future"))
282 "DU_any_power10,EXU_power10")
284 (define_bypass
5 "power10-rot-compare"
285 "power10-crlogical,power10-mfcr,power10-mfcrf")
287 (define_insn_reservation "power10-alu2"
3
288 (and (eq_attr "type" "cntlz,popcnt,trap")
289 (eq_attr "cpu" "power10,power11,future"))
290 "DU_any_power10,EXU_power10")
292 (define_bypass
5 "power10-alu2"
293 "power10-crlogical,power10-mfcr,power10-mfcrf")
295 (define_insn_reservation "power10-cmp"
2
296 (and (eq_attr "type" "cmp")
297 (eq_attr "cpu" "power10,power11,future"))
298 "DU_any_power10,EXU_power10")
300 ; Treat 'two' and 'three' types as
2 or
3 way cracked
301 (define_insn_reservation "power10-two"
4
302 (and (eq_attr "type" "two")
303 (eq_attr "cpu" "power10,power11,future"))
304 "DU_even_power10,EXU_power10")
306 (define_insn_reservation "power10-three"
6
307 (and (eq_attr "type" "three")
308 (eq_attr "cpu" "power10,power11,future"))
309 "DU_all_power10,EXU_power10")
311 (define_insn_reservation "power10-mul"
5
312 (and (eq_attr "type" "mul")
314 (eq_attr "cpu" "power10,power11,future"))
315 "DU_any_power10,EXU_power10")
316 ;
4 cycle MUL->MUL latency
317 (define_bypass
4 "power10-mul"
318 "power10-mul,power10-mul-compare")
320 (define_insn_reservation "power10-mul-compare"
5
321 (and (eq_attr "type" "mul")
322 (eq_attr "dot" "yes")
323 (eq_attr "cpu" "power10,power11,future"))
324 "DU_even_power10,EXU_power10")
325 ;
4 cycle MUL->MUL latency
326 (define_bypass
4 "power10-mul-compare"
327 "power10-mul,power10-mul-compare")
329 (define_bypass
7 "power10-mul-compare"
330 "power10-crlogical,power10-mfcr,power10-mfcrf")
332 (define_insn_reservation "power10-div"
12
333 (and (eq_attr "type" "div")
335 (eq_attr "cpu" "power10,power11,future"))
336 "DU_any_power10,EXU_power10")
338 (define_insn_reservation "power10-div-compare"
12
339 (and (eq_attr "type" "div")
340 (eq_attr "dot" "yes")
341 (eq_attr "cpu" "power10,power11,future"))
342 "DU_even_power10,EXU_power10")
343 ;
14 cycle CR latency
344 (define_bypass
14 "power10-div-compare"
345 "power10-crlogical,power10-mfcr,power10-mfcrf")
347 (define_insn_reservation "power10-crlogical"
2
348 (and (eq_attr "type" "cr_logical")
349 (eq_attr "cpu" "power10,power11,future"))
350 "DU_any_power10,EXU_power10")
352 (define_insn_reservation "power10-mfcrf"
2
353 (and (eq_attr "type" "mfcrf")
354 (eq_attr "cpu" "power10,power11,future"))
355 "DU_any_power10,EXU_power10")
357 (define_insn_reservation "power10-mfcr"
3
358 (and (eq_attr "type" "mfcr")
359 (eq_attr "cpu" "power10,power11,future"))
360 "DU_even_power10,EXU_power10")
362 ; Should differentiate between
1 cr field and >
1 since target of >
1 cr
364 (define_insn_reservation "power10-mtcr"
3
365 (and (eq_attr "type" "mtcr")
366 (eq_attr "cpu" "power10,power11,future"))
367 "DU_any_power10,EXU_power10")
369 (define_insn_reservation "power10-mtjmpr"
3
370 (and (eq_attr "type" "mtjmpr")
371 (eq_attr "cpu" "power10,power11,future"))
372 "DU_any_power10,EXU_power10")
374 (define_insn_reservation "power10-mfjmpr"
2
375 (and (eq_attr "type" "mfjmpr")
376 (eq_attr "cpu" "power10,power11,future"))
377 "DU_any_power10,EXU_power10")
380 ; Floating point/Vector ops
382 (define_insn_reservation "power10-fpsimple"
3
383 (and (eq_attr "type" "fpsimple")
384 (eq_attr "cpu" "power10,power11,future"))
385 "DU_any_power10,EXU_power10")
387 (define_insn_reservation "power10-fp"
5
388 (and (eq_attr "type" "fp,dmul")
389 (eq_attr "cpu" "power10,power11,future"))
390 "DU_any_power10,EXU_power10")
392 (define_insn_reservation "power10-fpcompare"
3
393 (and (eq_attr "type" "fpcompare")
394 (eq_attr "cpu" "power10,power11,future"))
395 "DU_any_power10,EXU_power10")
397 (define_insn_reservation "power10-sdiv"
22
398 (and (eq_attr "type" "sdiv")
399 (eq_attr "cpu" "power10,power11,future"))
400 "DU_any_power10,EXU_power10")
402 (define_insn_reservation "power10-ddiv"
27
403 (and (eq_attr "type" "ddiv")
404 (eq_attr "cpu" "power10,power11,future"))
405 "DU_any_power10,EXU_power10")
407 (define_insn_reservation "power10-sqrt"
26
408 (and (eq_attr "type" "ssqrt")
409 (eq_attr "cpu" "power10,power11,future"))
410 "DU_any_power10,EXU_power10")
412 (define_insn_reservation "power10-dsqrt"
36
413 (and (eq_attr "type" "dsqrt")
414 (eq_attr "cpu" "power10,power11,future"))
415 "DU_any_power10,EXU_power10")
417 (define_insn_reservation "power10-vec-
2cyc"
2
418 (and (eq_attr "type" "vecmove,veclogical,vecexts,veccmpfx")
419 (eq_attr "cpu" "power10,power11,future"))
420 "DU_any_power10,EXU_power10")
422 (define_insn_reservation "power10-fused-vec"
2
423 (and (eq_attr "type" "fused_vector")
424 (eq_attr "cpu" "power10,power11,future"))
425 "DU_even_power10,EXU_power10")
427 (define_insn_reservation "power10-veccmp"
3
428 (and (eq_attr "type" "veccmp")
429 (eq_attr "cpu" "power10,power11,future"))
430 "DU_any_power10,EXU_power10")
432 (define_insn_reservation "power10-vecsimple"
2
433 (and (eq_attr "type" "vecsimple")
434 (eq_attr "cpu" "power10,power11,future"))
435 "DU_any_power10,EXU_power10")
437 (define_insn_reservation "power10-vecnormal"
5
438 (and (eq_attr "type" "vecfloat,vecdouble")
439 (eq_attr "size" "!
128")
440 (eq_attr "cpu" "power10,power11,future"))
441 "DU_any_power10,EXU_power10")
443 (define_insn_reservation "power10-qp"
12
444 (and (eq_attr "type" "vecfloat,vecdouble")
445 (eq_attr "size" "
128")
446 (eq_attr "cpu" "power10,power11,future"))
447 "DU_any_power10,EXU_power10")
449 (define_insn_reservation "power10-vecperm"
3
450 (and (eq_attr "type" "vecperm")
451 (eq_attr "prefixed" "no")
453 (eq_attr "cpu" "power10,power11,future"))
454 "DU_any_power10,EXU_power10")
456 (define_insn_reservation "power10-vecperm-compare"
3
457 (and (eq_attr "type" "vecperm")
458 (eq_attr "dot" "yes")
459 (eq_attr "cpu" "power10,power11,future"))
460 "DU_even_power10,EXU_power10")
462 (define_insn_reservation "power10-prefixed-vecperm"
3
463 (and (eq_attr "type" "vecperm")
464 (eq_attr "prefixed" "yes")
465 (eq_attr "cpu" "power10,power11,future"))
466 "DU_even_power10,EXU_power10")
468 (define_insn_reservation "power10-veccomplex"
6
469 (and (eq_attr "type" "veccomplex")
470 (eq_attr "cpu" "power10,power11,future"))
471 "DU_any_power10,EXU_power10")
473 (define_insn_reservation "power10-vecfdiv"
24
474 (and (eq_attr "type" "vecfdiv")
475 (eq_attr "cpu" "power10,power11,future"))
476 "DU_any_power10,EXU_power10")
478 (define_insn_reservation "power10-vecdiv"
27
479 (and (eq_attr "type" "vecdiv")
480 (eq_attr "size" "!
128")
481 (eq_attr "cpu" "power10,power11,future"))
482 "DU_any_power10,EXU_power10")
484 (define_insn_reservation "power10-qpdiv"
56
485 (and (eq_attr "type" "vecdiv")
486 (eq_attr "size" "
128")
487 (eq_attr "cpu" "power10,power11,future"))
488 "DU_any_power10,EXU_power10")
490 (define_insn_reservation "power10-qpmul"
24
491 (and (eq_attr "type" "qmul")
492 (eq_attr "size" "
128")
493 (eq_attr "cpu" "power10,power11,future"))
494 "DU_any_power10,EXU_power10")
496 (define_insn_reservation "power10-mtvsr"
2
497 (and (eq_attr "type" "mtvsr")
498 (eq_attr "cpu" "power10,power11,future"))
499 "DU_any_power10,EXU_power10")
501 (define_insn_reservation "power10-mfvsr"
2
502 (and (eq_attr "type" "mfvsr")
503 (eq_attr "cpu" "power10,power11,future"))
504 "DU_any_power10,EXU_power10")
508 ; Branch is
2 cycles, grouped with STU for issue
509 (define_insn_reservation "power10-branch"
2
510 (and (eq_attr "type" "jmpreg,branch")
511 (eq_attr "cpu" "power10,power11,future"))
512 "DU_any_power10,STU_power10")
514 (define_insn_reservation "power10-fused-branch"
3
515 (and (eq_attr "type" "fused_mtbc")
516 (eq_attr "cpu" "power10,power11,future"))
517 "DU_even_power10,STU_power10")
521 (define_insn_reservation "power10-crypto"
4
522 (and (eq_attr "type" "crypto")
523 (eq_attr "cpu" "power10,power11,future"))
524 "DU_any_power10,EXU_power10")
528 (define_insn_reservation "power10-htm"
2
529 (and (eq_attr "type" "htmsimple,htm")
530 (eq_attr "cpu" "power10,power11,future"))
531 "DU_any_power10,EXU_power10")
535 ; Use the minimum
12 cycle latency for all DFP insns
536 (define_insn_reservation "power10-dfp"
12
537 (and (eq_attr "type" "dfp")
538 (eq_attr "size" "!
128")
539 (eq_attr "cpu" "power10,power11,future"))
540 "DU_any_power10,EXU_power10")
542 (define_insn_reservation "power10-dfpq"
12
543 (and (eq_attr "type" "dfp")
544 (eq_attr "size" "
128")
545 (eq_attr "cpu" "power10,power11,future"))
546 "DU_even_power10,EXU_power10")
549 (define_insn_reservation "power10-mma"
9
550 (and (eq_attr "type" "mma")
551 (eq_attr "prefixed" "no")
552 (eq_attr "cpu" "power10,power11,future"))
553 "DU_any_power10,EXU_super_power10")
555 (define_insn_reservation "power10-prefixed-mma"
9
556 (and (eq_attr "type" "mma")
557 (eq_attr "prefixed" "yes")
558 (eq_attr "cpu" "power10,power11,future"))
559 "DU_even_power10,EXU_super_power10")
560 ;
4 cycle MMA->MMA latency
561 (define_bypass
4 "power10-mma,power10-prefixed-mma"
562 "power10-mma,power10-prefixed-mma")