]> gcc.gnu.org Git - gcc.git/blob - gcc/config/rs6000/power10.md
90312643858e58fedbd57542c41969e6dfe27350
[gcc.git] / gcc / config / rs6000 / power10.md
1 ;; Scheduling description for the IBM POWER10 and POWER11 processors.
2 ;; Copyright (C) 2020-2024 Free Software Foundation, Inc.
3 ;;
4 ;; Contributed by Pat Haugen (pthaugen@us.ibm.com).
5
6 ;; This file is part of GCC.
7 ;;
8 ;; GCC is free software; you can redistribute it and/or modify it
9 ;; under the terms of the GNU General Public License as published
10 ;; by the Free Software Foundation; either version 3, or (at your
11 ;; option) any later version.
12 ;;
13 ;; GCC is distributed in the hope that it will be useful, but WITHOUT
14 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 ;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 ;; License for more details.
17 ;;
18 ;; You should have received a copy of the GNU General Public License
19 ;; along with GCC; see the file COPYING3. If not see
20 ;; <http://www.gnu.org/licenses/>.
21
22 ; For Power10 we model (and try to pack) the in-order decode/dispatch groups
23 ; which consist of 8 instructions max. We do not try to model the details of
24 ; the out-of-order issue queues and how insns flow to the various execution
25 ; units except for the simple representation of the issue limitation of at
26 ; most 4 insns to the execution units/2 insns to the load units/2 insns to
27 ; the store units.
28 (define_automaton "power10dispatch,power10issue")
29
30 ; Decode/dispatch slots
31 (define_cpu_unit "du0_power10,du1_power10,du2_power10,du3_power10,
32 du4_power10,du5_power10,du6_power10,du7_power10" "power10dispatch")
33
34 ; Four execution units
35 (define_cpu_unit "exu0_power10,exu1_power10,exu2_power10,exu3_power10"
36 "power10issue")
37 ; Two load units and two store units
38 (define_cpu_unit "lu0_power10,lu1_power10" "power10issue")
39 (define_cpu_unit "stu0_power10,stu1_power10" "power10issue")
40
41
42 ; Dispatch slots are allocated in order conforming to program order.
43 (absence_set "du0_power10" "du1_power10,du2_power10,du3_power10,du4_power10,\
44 du5_power10,du6_power10,du7_power10")
45 (absence_set "du1_power10" "du2_power10,du3_power10,du4_power10,du5_power10,\
46 du6_power10,du7_power10")
47 (absence_set "du2_power10" "du3_power10,du4_power10,du5_power10,du6_power10,\
48 du7_power10")
49 (absence_set "du3_power10" "du4_power10,du5_power10,du6_power10,du7_power10")
50 (absence_set "du4_power10" "du5_power10,du6_power10,du7_power10")
51 (absence_set "du5_power10" "du6_power10,du7_power10")
52 (absence_set "du6_power10" "du7_power10")
53
54
55 ; Dispatch port reservations
56 ;
57 ; Power10 can dispatch a maximum of 8 iops per cycle. With a maximum of
58 ; 4 VSU/2 Load/2 Store per cycle.
59
60 ; Any dispatch slot
61 (define_reservation "DU_any_power10"
62 "du0_power10|du1_power10|du2_power10|du3_power10|
63 du4_power10|du5_power10|du6_power10|du7_power10")
64
65 ; Even slot, actually takes even/odd slots
66 (define_reservation "DU_even_power10"
67 "du0_power10+du1_power10|du2_power10+du3_power10|
68 du4_power10+du5_power10|du6_power10+du7_power10")
69
70 ; 4-way cracked (consumes whole decode/dispatch cycle)
71 (define_reservation "DU_all_power10"
72 "du0_power10+du1_power10+du2_power10+du3_power10+
73 du4_power10+du5_power10+du6_power10+du7_power10")
74
75
76 ; Execution unit reservations
77 (define_reservation "LU_power10"
78 "lu0_power10|lu1_power10")
79
80 (define_reservation "STU_power10"
81 "stu0_power10|stu1_power10")
82
83 ; Certain simple fixed-point insns can execute in the Store-agen pipe
84 (define_reservation "SXU_power10"
85 "stu0_power10|stu1_power10")
86
87 (define_reservation "EXU_power10"
88 "exu0_power10|exu1_power10|exu2_power10|exu3_power10")
89
90 (define_reservation "EXU_super_power10"
91 "exu0_power10+exu1_power10|exu2_power10+exu3_power10")
92
93
94 ; Load Unit
95 (define_insn_reservation "power10-load" 4
96 (and (eq_attr "type" "load")
97 (eq_attr "update" "no")
98 (eq_attr "size" "!128")
99 (eq_attr "prefixed" "no")
100 (eq_attr "cpu" "power10,power11"))
101 "DU_any_power10,LU_power10")
102
103 (define_insn_reservation "power10-fused-load" 4
104 (and (eq_attr "type" "fused_load_cmpi,fused_addis_load,fused_load_load")
105 (eq_attr "cpu" "power10,power11"))
106 "DU_even_power10,LU_power10")
107
108 (define_insn_reservation "power10-prefixed-load" 4
109 (and (eq_attr "type" "load")
110 (eq_attr "update" "no")
111 (eq_attr "size" "!128")
112 (eq_attr "prefixed" "yes")
113 (eq_attr "cpu" "power10,power11"))
114 "DU_even_power10,LU_power10")
115
116 (define_insn_reservation "power10-load-update" 4
117 (and (eq_attr "type" "load")
118 (eq_attr "update" "yes")
119 (eq_attr "cpu" "power10,power11"))
120 "DU_even_power10,LU_power10+SXU_power10")
121
122 (define_insn_reservation "power10-fpload-double" 4
123 (and (eq_attr "type" "fpload")
124 (eq_attr "update" "no")
125 (eq_attr "size" "64")
126 (eq_attr "prefixed" "no")
127 (eq_attr "cpu" "power10,power11"))
128 "DU_any_power10,LU_power10")
129
130 (define_insn_reservation "power10-prefixed-fpload-double" 4
131 (and (eq_attr "type" "fpload")
132 (eq_attr "update" "no")
133 (eq_attr "size" "64")
134 (eq_attr "prefixed" "yes")
135 (eq_attr "cpu" "power10,power11"))
136 "DU_even_power10,LU_power10")
137
138 (define_insn_reservation "power10-fpload-update-double" 4
139 (and (eq_attr "type" "fpload")
140 (eq_attr "update" "yes")
141 (eq_attr "size" "64")
142 (eq_attr "cpu" "power10,power11"))
143 "DU_even_power10,LU_power10+SXU_power10")
144
145 ; SFmode loads are cracked and have additional 3 cycles over DFmode
146 ; Prefixed forms behave the same
147 (define_insn_reservation "power10-fpload-single" 7
148 (and (eq_attr "type" "fpload")
149 (eq_attr "update" "no")
150 (eq_attr "size" "32")
151 (eq_attr "cpu" "power10,power11"))
152 "DU_even_power10,LU_power10")
153
154 (define_insn_reservation "power10-fpload-update-single" 7
155 (and (eq_attr "type" "fpload")
156 (eq_attr "update" "yes")
157 (eq_attr "size" "32")
158 (eq_attr "cpu" "power10,power11"))
159 "DU_even_power10,LU_power10+SXU_power10")
160
161 (define_insn_reservation "power10-vecload" 4
162 (and (eq_attr "type" "vecload")
163 (eq_attr "size" "!256")
164 (eq_attr "cpu" "power10,power11"))
165 "DU_any_power10,LU_power10")
166
167 ; lxvp
168 (define_insn_reservation "power10-vecload-pair" 4
169 (and (eq_attr "type" "vecload")
170 (eq_attr "size" "256")
171 (eq_attr "cpu" "power10,power11"))
172 "DU_even_power10,LU_power10+SXU_power10")
173
174 ; Store Unit
175 (define_insn_reservation "power10-store" 0
176 (and (eq_attr "type" "store,fpstore,vecstore")
177 (eq_attr "update" "no")
178 (eq_attr "prefixed" "no")
179 (eq_attr "size" "!128")
180 (eq_attr "size" "!256")
181 (eq_attr "cpu" "power10,power11"))
182 "DU_any_power10,STU_power10")
183
184 (define_insn_reservation "power10-fused-store" 0
185 (and (eq_attr "type" "fused_store_store")
186 (eq_attr "cpu" "power10,power11"))
187 "DU_even_power10,STU_power10")
188
189 (define_insn_reservation "power10-prefixed-store" 0
190 (and (eq_attr "type" "store,fpstore,vecstore")
191 (eq_attr "prefixed" "yes")
192 (eq_attr "size" "!128")
193 (eq_attr "size" "!256")
194 (eq_attr "cpu" "power10,power11"))
195 "DU_even_power10,STU_power10")
196
197 ; Update forms have 2 cycle latency for updated addr reg
198 (define_insn_reservation "power10-store-update" 2
199 (and (eq_attr "type" "store,fpstore")
200 (eq_attr "update" "yes")
201 (eq_attr "cpu" "power10,power11"))
202 "DU_any_power10,STU_power10")
203
204 ; stxvp
205 (define_insn_reservation "power10-vecstore-pair" 0
206 (and (eq_attr "type" "vecstore")
207 (eq_attr "size" "256")
208 (eq_attr "cpu" "power10,power11"))
209 "DU_even_power10,stu0_power10+stu1_power10")
210
211 (define_insn_reservation "power10-larx" 4
212 (and (eq_attr "type" "load_l")
213 (eq_attr "size" "!128")
214 (eq_attr "cpu" "power10,power11"))
215 "DU_any_power10,LU_power10")
216
217 ; All load quad forms
218 (define_insn_reservation "power10-lq" 4
219 (and (eq_attr "type" "load,load_l")
220 (eq_attr "size" "128")
221 (eq_attr "cpu" "power10,power11"))
222 "DU_even_power10,LU_power10+SXU_power10")
223
224 (define_insn_reservation "power10-stcx" 0
225 (and (eq_attr "type" "store_c")
226 (eq_attr "size" "!128")
227 (eq_attr "cpu" "power10,power11"))
228 "DU_any_power10,STU_power10")
229
230 ; All store quad forms
231 (define_insn_reservation "power10-stq" 0
232 (and (eq_attr "type" "store,store_c")
233 (eq_attr "size" "128")
234 (eq_attr "cpu" "power10,power11"))
235 "DU_even_power10,stu0_power10+stu1_power10")
236
237 (define_insn_reservation "power10-sync" 1
238 (and (eq_attr "type" "sync,isync")
239 (eq_attr "cpu" "power10,power11"))
240 "DU_even_power10,STU_power10")
241
242
243 ; VSU Execution Unit
244
245 ; Fixed point ops
246
247 ; Most ALU insns are simple 2 cycle, including record form
248 (define_insn_reservation "power10-alu" 2
249 (and (eq_attr "type" "add,exts,integer,logical,isel")
250 (eq_attr "prefixed" "no")
251 (eq_attr "cpu" "power10,power11"))
252 "DU_any_power10,EXU_power10")
253 ; 4 cycle CR latency
254 (define_bypass 4 "power10-alu"
255 "power10-crlogical,power10-mfcr,power10-mfcrf")
256
257 (define_insn_reservation "power10-fused_alu" 2
258 (and (eq_attr "type" "fused_arith_logical,fused_cmp_isel,fused_carry")
259 (eq_attr "cpu" "power10,power11"))
260 "DU_even_power10,EXU_power10")
261
262 ; paddi
263 (define_insn_reservation "power10-paddi" 2
264 (and (eq_attr "type" "add")
265 (eq_attr "prefixed" "yes")
266 (eq_attr "cpu" "power10,power11"))
267 "DU_even_power10,EXU_power10")
268
269 ; Rotate/shift (non-record form)
270 (define_insn_reservation "power10-rot" 2
271 (and (eq_attr "type" "insert,shift")
272 (eq_attr "dot" "no")
273 (eq_attr "cpu" "power10,power11"))
274 "DU_any_power10,EXU_power10")
275
276 ; Record form rotate/shift
277 (define_insn_reservation "power10-rot-compare" 3
278 (and (eq_attr "type" "insert,shift")
279 (eq_attr "dot" "yes")
280 (eq_attr "cpu" "power10,power11"))
281 "DU_any_power10,EXU_power10")
282 ; 5 cycle CR latency
283 (define_bypass 5 "power10-rot-compare"
284 "power10-crlogical,power10-mfcr,power10-mfcrf")
285
286 (define_insn_reservation "power10-alu2" 3
287 (and (eq_attr "type" "cntlz,popcnt,trap")
288 (eq_attr "cpu" "power10,power11"))
289 "DU_any_power10,EXU_power10")
290 ; 5 cycle CR latency
291 (define_bypass 5 "power10-alu2"
292 "power10-crlogical,power10-mfcr,power10-mfcrf")
293
294 (define_insn_reservation "power10-cmp" 2
295 (and (eq_attr "type" "cmp")
296 (eq_attr "cpu" "power10,power11"))
297 "DU_any_power10,EXU_power10")
298
299 ; Treat 'two' and 'three' types as 2 or 3 way cracked
300 (define_insn_reservation "power10-two" 4
301 (and (eq_attr "type" "two")
302 (eq_attr "cpu" "power10,power11"))
303 "DU_even_power10,EXU_power10")
304
305 (define_insn_reservation "power10-three" 6
306 (and (eq_attr "type" "three")
307 (eq_attr "cpu" "power10,power11"))
308 "DU_all_power10,EXU_power10")
309
310 (define_insn_reservation "power10-mul" 5
311 (and (eq_attr "type" "mul")
312 (eq_attr "dot" "no")
313 (eq_attr "cpu" "power10,power11"))
314 "DU_any_power10,EXU_power10")
315 ; 4 cycle MUL->MUL latency
316 (define_bypass 4 "power10-mul"
317 "power10-mul,power10-mul-compare")
318
319 (define_insn_reservation "power10-mul-compare" 5
320 (and (eq_attr "type" "mul")
321 (eq_attr "dot" "yes")
322 (eq_attr "cpu" "power10,power11"))
323 "DU_even_power10,EXU_power10")
324 ; 4 cycle MUL->MUL latency
325 (define_bypass 4 "power10-mul-compare"
326 "power10-mul,power10-mul-compare")
327 ; 7 cycle CR latency
328 (define_bypass 7 "power10-mul-compare"
329 "power10-crlogical,power10-mfcr,power10-mfcrf")
330
331 (define_insn_reservation "power10-div" 12
332 (and (eq_attr "type" "div")
333 (eq_attr "dot" "no")
334 (eq_attr "cpu" "power10,power11"))
335 "DU_any_power10,EXU_power10")
336
337 (define_insn_reservation "power10-div-compare" 12
338 (and (eq_attr "type" "div")
339 (eq_attr "dot" "yes")
340 (eq_attr "cpu" "power10,power11"))
341 "DU_even_power10,EXU_power10")
342 ; 14 cycle CR latency
343 (define_bypass 14 "power10-div-compare"
344 "power10-crlogical,power10-mfcr,power10-mfcrf")
345
346 (define_insn_reservation "power10-crlogical" 2
347 (and (eq_attr "type" "cr_logical")
348 (eq_attr "cpu" "power10,power11"))
349 "DU_any_power10,EXU_power10")
350
351 (define_insn_reservation "power10-mfcrf" 2
352 (and (eq_attr "type" "mfcrf")
353 (eq_attr "cpu" "power10,power11"))
354 "DU_any_power10,EXU_power10")
355
356 (define_insn_reservation "power10-mfcr" 3
357 (and (eq_attr "type" "mfcr")
358 (eq_attr "cpu" "power10,power11"))
359 "DU_even_power10,EXU_power10")
360
361 ; Should differentiate between 1 cr field and > 1 since target of > 1 cr
362 ; is cracked
363 (define_insn_reservation "power10-mtcr" 3
364 (and (eq_attr "type" "mtcr")
365 (eq_attr "cpu" "power10,power11"))
366 "DU_any_power10,EXU_power10")
367
368 (define_insn_reservation "power10-mtjmpr" 3
369 (and (eq_attr "type" "mtjmpr")
370 (eq_attr "cpu" "power10,power11"))
371 "DU_any_power10,EXU_power10")
372
373 (define_insn_reservation "power10-mfjmpr" 2
374 (and (eq_attr "type" "mfjmpr")
375 (eq_attr "cpu" "power10,power11"))
376 "DU_any_power10,EXU_power10")
377
378
379 ; Floating point/Vector ops
380
381 (define_insn_reservation "power10-fpsimple" 3
382 (and (eq_attr "type" "fpsimple")
383 (eq_attr "cpu" "power10,power11"))
384 "DU_any_power10,EXU_power10")
385
386 (define_insn_reservation "power10-fp" 5
387 (and (eq_attr "type" "fp,dmul")
388 (eq_attr "cpu" "power10,power11"))
389 "DU_any_power10,EXU_power10")
390
391 (define_insn_reservation "power10-fpcompare" 3
392 (and (eq_attr "type" "fpcompare")
393 (eq_attr "cpu" "power10,power11"))
394 "DU_any_power10,EXU_power10")
395
396 (define_insn_reservation "power10-sdiv" 22
397 (and (eq_attr "type" "sdiv")
398 (eq_attr "cpu" "power10,power11"))
399 "DU_any_power10,EXU_power10")
400
401 (define_insn_reservation "power10-ddiv" 27
402 (and (eq_attr "type" "ddiv")
403 (eq_attr "cpu" "power10,power11"))
404 "DU_any_power10,EXU_power10")
405
406 (define_insn_reservation "power10-sqrt" 26
407 (and (eq_attr "type" "ssqrt")
408 (eq_attr "cpu" "power10,power11"))
409 "DU_any_power10,EXU_power10")
410
411 (define_insn_reservation "power10-dsqrt" 36
412 (and (eq_attr "type" "dsqrt")
413 (eq_attr "cpu" "power10,power11"))
414 "DU_any_power10,EXU_power10")
415
416 (define_insn_reservation "power10-vec-2cyc" 2
417 (and (eq_attr "type" "vecmove,veclogical,vecexts,veccmpfx")
418 (eq_attr "cpu" "power10,power11"))
419 "DU_any_power10,EXU_power10")
420
421 (define_insn_reservation "power10-fused-vec" 2
422 (and (eq_attr "type" "fused_vector")
423 (eq_attr "cpu" "power10,power11"))
424 "DU_even_power10,EXU_power10")
425
426 (define_insn_reservation "power10-veccmp" 3
427 (and (eq_attr "type" "veccmp")
428 (eq_attr "cpu" "power10,power11"))
429 "DU_any_power10,EXU_power10")
430
431 (define_insn_reservation "power10-vecsimple" 2
432 (and (eq_attr "type" "vecsimple")
433 (eq_attr "cpu" "power10,power11"))
434 "DU_any_power10,EXU_power10")
435
436 (define_insn_reservation "power10-vecnormal" 5
437 (and (eq_attr "type" "vecfloat,vecdouble")
438 (eq_attr "size" "!128")
439 (eq_attr "cpu" "power10,power11"))
440 "DU_any_power10,EXU_power10")
441
442 (define_insn_reservation "power10-qp" 12
443 (and (eq_attr "type" "vecfloat,vecdouble")
444 (eq_attr "size" "128")
445 (eq_attr "cpu" "power10,power11"))
446 "DU_any_power10,EXU_power10")
447
448 (define_insn_reservation "power10-vecperm" 3
449 (and (eq_attr "type" "vecperm")
450 (eq_attr "prefixed" "no")
451 (eq_attr "dot" "no")
452 (eq_attr "cpu" "power10,power11"))
453 "DU_any_power10,EXU_power10")
454
455 (define_insn_reservation "power10-vecperm-compare" 3
456 (and (eq_attr "type" "vecperm")
457 (eq_attr "dot" "yes")
458 (eq_attr "cpu" "power10,power11"))
459 "DU_even_power10,EXU_power10")
460
461 (define_insn_reservation "power10-prefixed-vecperm" 3
462 (and (eq_attr "type" "vecperm")
463 (eq_attr "prefixed" "yes")
464 (eq_attr "cpu" "power10,power11"))
465 "DU_even_power10,EXU_power10")
466
467 (define_insn_reservation "power10-veccomplex" 6
468 (and (eq_attr "type" "veccomplex")
469 (eq_attr "cpu" "power10,power11"))
470 "DU_any_power10,EXU_power10")
471
472 (define_insn_reservation "power10-vecfdiv" 24
473 (and (eq_attr "type" "vecfdiv")
474 (eq_attr "cpu" "power10,power11"))
475 "DU_any_power10,EXU_power10")
476
477 (define_insn_reservation "power10-vecdiv" 27
478 (and (eq_attr "type" "vecdiv")
479 (eq_attr "size" "!128")
480 (eq_attr "cpu" "power10,power11"))
481 "DU_any_power10,EXU_power10")
482
483 (define_insn_reservation "power10-qpdiv" 56
484 (and (eq_attr "type" "vecdiv")
485 (eq_attr "size" "128")
486 (eq_attr "cpu" "power10,power11"))
487 "DU_any_power10,EXU_power10")
488
489 (define_insn_reservation "power10-qpmul" 24
490 (and (eq_attr "type" "qmul")
491 (eq_attr "size" "128")
492 (eq_attr "cpu" "power10,power11"))
493 "DU_any_power10,EXU_power10")
494
495 (define_insn_reservation "power10-mtvsr" 2
496 (and (eq_attr "type" "mtvsr")
497 (eq_attr "cpu" "power10,power11"))
498 "DU_any_power10,EXU_power10")
499
500 (define_insn_reservation "power10-mfvsr" 2
501 (and (eq_attr "type" "mfvsr")
502 (eq_attr "cpu" "power10,power11"))
503 "DU_any_power10,EXU_power10")
504
505
506 ; Branch
507 ; Branch is 2 cycles, grouped with STU for issue
508 (define_insn_reservation "power10-branch" 2
509 (and (eq_attr "type" "jmpreg,branch")
510 (eq_attr "cpu" "power10,power11"))
511 "DU_any_power10,STU_power10")
512
513 (define_insn_reservation "power10-fused-branch" 3
514 (and (eq_attr "type" "fused_mtbc")
515 (eq_attr "cpu" "power10,power11"))
516 "DU_even_power10,STU_power10")
517
518
519 ; Crypto
520 (define_insn_reservation "power10-crypto" 4
521 (and (eq_attr "type" "crypto")
522 (eq_attr "cpu" "power10,power11"))
523 "DU_any_power10,EXU_power10")
524
525
526 ; HTM
527 (define_insn_reservation "power10-htm" 2
528 (and (eq_attr "type" "htmsimple,htm")
529 (eq_attr "cpu" "power10,power11"))
530 "DU_any_power10,EXU_power10")
531
532
533 ; DFP
534 ; Use the minimum 12 cycle latency for all DFP insns
535 (define_insn_reservation "power10-dfp" 12
536 (and (eq_attr "type" "dfp")
537 (eq_attr "size" "!128")
538 (eq_attr "cpu" "power10,power11"))
539 "DU_any_power10,EXU_power10")
540
541 (define_insn_reservation "power10-dfpq" 12
542 (and (eq_attr "type" "dfp")
543 (eq_attr "size" "128")
544 (eq_attr "cpu" "power10,power11"))
545 "DU_even_power10,EXU_power10")
546
547 ; MMA
548 (define_insn_reservation "power10-mma" 9
549 (and (eq_attr "type" "mma")
550 (eq_attr "prefixed" "no")
551 (eq_attr "cpu" "power10,power11"))
552 "DU_any_power10,EXU_super_power10")
553
554 (define_insn_reservation "power10-prefixed-mma" 9
555 (and (eq_attr "type" "mma")
556 (eq_attr "prefixed" "yes")
557 (eq_attr "cpu" "power10,power11"))
558 "DU_even_power10,EXU_super_power10")
559 ; 4 cycle MMA->MMA latency
560 (define_bypass 4 "power10-mma,power10-prefixed-mma"
561 "power10-mma,power10-prefixed-mma")
562
563
This page took 0.059147 seconds and 4 git commands to generate.