]> gcc.gnu.org Git - gcc.git/blob - gcc/config/rs6000/mma.md
PowerPC: Switch to dense math names for all MMA operations.
[gcc.git] / gcc / config / rs6000 / mma.md
1 ;; Matrix-Multiply Assist (MMA) patterns.
2 ;; Copyright (C) 2020-2024 Free Software Foundation, Inc.
3 ;; Contributed by Peter Bergner <bergner@linux.ibm.com> and
4 ;; Michael Meissner <meissner@linux.ibm.com>
5 ;;
6 ;; This file is part of GCC.
7 ;;
8 ;; GCC is free software; you can redistribute it and/or modify it
9 ;; under the terms of the GNU General Public License as published
10 ;; by the Free Software Foundation; either version 3, or (at your
11 ;; option) any later version.
12 ;;
13 ;; GCC is distributed in the hope that it will be useful, but WITHOUT
14 ;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 ;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 ;; License for more details.
17 ;;
18 ;; You should have received a copy of the GNU General Public License
19 ;; along with GCC; see the file COPYING3. If not see
20 ;; <http://www.gnu.org/licenses/>.
21
22 ;; The MMA patterns use the multi-register XOmode and OOmode opaque
23 ;; modes to implement the target specific __vector_quad and
24 ;; __vector_pair types that the MMA built-in functions reference. We
25 ;; use OPAQUE_MODE to prevent anything from trying to open them up.
26
27 (define_constants [(MAX_MMA_OPERANDS 7)])
28
29 ;; Constants for creating unspecs
30
31 (define_c_enum "unspec"
32 [UNSPEC_VSX_ASSEMBLE
33 UNSPEC_MMA_EXTRACT
34 UNSPEC_MMA_PMXVBF16GER2
35 UNSPEC_MMA_PMXVBF16GER2NN
36 UNSPEC_MMA_PMXVBF16GER2NP
37 UNSPEC_MMA_PMXVBF16GER2PN
38 UNSPEC_MMA_PMXVBF16GER2PP
39 UNSPEC_MMA_PMXVF16GER2
40 UNSPEC_MMA_PMXVF16GER2NN
41 UNSPEC_MMA_PMXVF16GER2NP
42 UNSPEC_MMA_PMXVF16GER2PN
43 UNSPEC_MMA_PMXVF16GER2PP
44 UNSPEC_MMA_PMXVF32GER
45 UNSPEC_MMA_PMXVF32GERNN
46 UNSPEC_MMA_PMXVF32GERNP
47 UNSPEC_MMA_PMXVF32GERPN
48 UNSPEC_MMA_PMXVF32GERPP
49 UNSPEC_MMA_PMXVF64GER
50 UNSPEC_MMA_PMXVF64GERNN
51 UNSPEC_MMA_PMXVF64GERNP
52 UNSPEC_MMA_PMXVF64GERPN
53 UNSPEC_MMA_PMXVF64GERPP
54 UNSPEC_MMA_PMXVI16GER2
55 UNSPEC_MMA_PMXVI16GER2PP
56 UNSPEC_MMA_PMXVI16GER2S
57 UNSPEC_MMA_PMXVI16GER2SPP
58 UNSPEC_MMA_PMXVI4GER8
59 UNSPEC_MMA_PMXVI4GER8PP
60 UNSPEC_MMA_PMXVI8GER4
61 UNSPEC_MMA_PMXVI8GER4PP
62 UNSPEC_MMA_PMXVI8GER4SPP
63 UNSPEC_MMA_XVBF16GER2
64 UNSPEC_MMA_XVBF16GER2NN
65 UNSPEC_MMA_XVBF16GER2NP
66 UNSPEC_MMA_XVBF16GER2PN
67 UNSPEC_MMA_XVBF16GER2PP
68 UNSPEC_MMA_XVF16GER2
69 UNSPEC_MMA_XVF16GER2NN
70 UNSPEC_MMA_XVF16GER2NP
71 UNSPEC_MMA_XVF16GER2PN
72 UNSPEC_MMA_XVF16GER2PP
73 UNSPEC_MMA_XVF32GER
74 UNSPEC_MMA_XVF32GERNN
75 UNSPEC_MMA_XVF32GERNP
76 UNSPEC_MMA_XVF32GERPN
77 UNSPEC_MMA_XVF32GERPP
78 UNSPEC_MMA_XVF64GER
79 UNSPEC_MMA_XVF64GERNN
80 UNSPEC_MMA_XVF64GERNP
81 UNSPEC_MMA_XVF64GERPN
82 UNSPEC_MMA_XVF64GERPP
83 UNSPEC_MMA_XVI16GER2
84 UNSPEC_MMA_XVI16GER2PP
85 UNSPEC_MMA_XVI16GER2S
86 UNSPEC_MMA_XVI16GER2SPP
87 UNSPEC_MMA_XVI4GER8
88 UNSPEC_MMA_XVI4GER8PP
89 UNSPEC_MMA_XVI8GER4
90 UNSPEC_MMA_XVI8GER4PP
91 UNSPEC_MMA_XVI8GER4SPP
92 UNSPEC_MMA_XXMFACC
93 UNSPEC_MMA_XXMTACC
94 ])
95
96 (define_c_enum "unspecv"
97 [UNSPECV_MMA_ASSEMBLE
98 UNSPECV_MMA_XXSETACCZ
99 ])
100
101 ;; MMA instructions with 1 accumulator argument
102 (define_int_iterator MMA_ACC [UNSPEC_MMA_XXMFACC
103 UNSPEC_MMA_XXMTACC])
104
105 ;; MMA instructions with 2 vector arguments
106 (define_int_iterator MMA_VV [UNSPEC_MMA_XVI4GER8
107 UNSPEC_MMA_XVI8GER4
108 UNSPEC_MMA_XVI16GER2
109 UNSPEC_MMA_XVI16GER2S
110 UNSPEC_MMA_XVF16GER2
111 UNSPEC_MMA_XVBF16GER2
112 UNSPEC_MMA_XVF32GER])
113
114 ;; MMA instructions with 1 accumulator and 2 vector arguments
115 (define_int_iterator MMA_AVV [UNSPEC_MMA_XVI4GER8PP
116 UNSPEC_MMA_XVI8GER4PP
117 UNSPEC_MMA_XVI8GER4SPP
118 UNSPEC_MMA_XVI16GER2PP
119 UNSPEC_MMA_XVI16GER2SPP
120 UNSPEC_MMA_XVF16GER2PP
121 UNSPEC_MMA_XVF16GER2PN
122 UNSPEC_MMA_XVF16GER2NP
123 UNSPEC_MMA_XVF16GER2NN
124 UNSPEC_MMA_XVBF16GER2PP
125 UNSPEC_MMA_XVBF16GER2PN
126 UNSPEC_MMA_XVBF16GER2NP
127 UNSPEC_MMA_XVBF16GER2NN
128 UNSPEC_MMA_XVF32GERPP
129 UNSPEC_MMA_XVF32GERPN
130 UNSPEC_MMA_XVF32GERNP
131 UNSPEC_MMA_XVF32GERNN])
132
133 ;; MMA instructions with 1 vector pair and 1 vector arguments
134 (define_int_iterator MMA_PV [UNSPEC_MMA_XVF64GER])
135
136 ;; MMA instructions with 1 accumulator, 1 vector pair and 1 vector arguments
137 (define_int_iterator MMA_APV [UNSPEC_MMA_XVF64GERPP
138 UNSPEC_MMA_XVF64GERPN
139 UNSPEC_MMA_XVF64GERNP
140 UNSPEC_MMA_XVF64GERNN])
141
142 ;; MMA instructions with 2 vector, 2 4-bit and 1 8-bit arguments
143 (define_int_iterator MMA_VVI4I4I8 [UNSPEC_MMA_PMXVI4GER8])
144
145 ;; MMA instructions with 1 accumulator, 2 vector, 2 4-bit and 1 8-bit arguments
146 (define_int_iterator MMA_AVVI4I4I8 [UNSPEC_MMA_PMXVI4GER8PP])
147
148 ;; MMA instructions with 2 vector, 2 4-bit and 1 2-bit arguments
149 (define_int_iterator MMA_VVI4I4I2 [UNSPEC_MMA_PMXVI16GER2
150 UNSPEC_MMA_PMXVI16GER2S
151 UNSPEC_MMA_PMXVF16GER2
152 UNSPEC_MMA_PMXVBF16GER2])
153
154 ;; MMA instructions with 1 accumulator, 2 vector, 2 4-bit and 1 2-bit arguments
155 (define_int_iterator MMA_AVVI4I4I2 [UNSPEC_MMA_PMXVI16GER2PP
156 UNSPEC_MMA_PMXVI16GER2SPP
157 UNSPEC_MMA_PMXVF16GER2PP
158 UNSPEC_MMA_PMXVF16GER2PN
159 UNSPEC_MMA_PMXVF16GER2NP
160 UNSPEC_MMA_PMXVF16GER2NN
161 UNSPEC_MMA_PMXVBF16GER2PP
162 UNSPEC_MMA_PMXVBF16GER2PN
163 UNSPEC_MMA_PMXVBF16GER2NP
164 UNSPEC_MMA_PMXVBF16GER2NN])
165
166 ;; MMA instructions with 2 vector and 2 4-bit arguments
167 (define_int_iterator MMA_VVI4I4 [UNSPEC_MMA_PMXVF32GER])
168
169 ;; MMA instructions with 1 accumulator, 2 vector and 2 4-bit arguments
170 (define_int_iterator MMA_AVVI4I4 [UNSPEC_MMA_PMXVF32GERPP
171 UNSPEC_MMA_PMXVF32GERPN
172 UNSPEC_MMA_PMXVF32GERNP
173 UNSPEC_MMA_PMXVF32GERNN])
174
175 ;; MMA instructions with 2 vector, 1 4-bit and 1 2-bit arguments
176 (define_int_iterator MMA_PVI4I2 [UNSPEC_MMA_PMXVF64GER])
177
178 ;; MMA instructions with 1 accumulator, 2 vector, 1 4-bit and 1 2-bit arguments
179 (define_int_iterator MMA_APVI4I2 [UNSPEC_MMA_PMXVF64GERPP
180 UNSPEC_MMA_PMXVF64GERPN
181 UNSPEC_MMA_PMXVF64GERNP
182 UNSPEC_MMA_PMXVF64GERNN])
183
184 ;; MMA instructions with 2 vector and 3 4-bit arguments
185 (define_int_iterator MMA_VVI4I4I4 [UNSPEC_MMA_PMXVI8GER4])
186
187 ;; MMA instructions with 1 accumulator, 2 vector and 3 4-bit arguments
188 (define_int_iterator MMA_AVVI4I4I4 [UNSPEC_MMA_PMXVI8GER4PP
189 UNSPEC_MMA_PMXVI8GER4SPP])
190
191 (define_int_attr acc [(UNSPEC_MMA_XXMFACC "xxmfacc")
192 (UNSPEC_MMA_XXMTACC "xxmtacc")])
193
194 (define_int_attr vv [(UNSPEC_MMA_XVI4GER8 "xvi4ger8")
195 (UNSPEC_MMA_XVI8GER4 "xvi8ger4")
196 (UNSPEC_MMA_XVI16GER2 "xvi16ger2")
197 (UNSPEC_MMA_XVI16GER2S "xvi16ger2s")
198 (UNSPEC_MMA_XVF16GER2 "xvf16ger2")
199 (UNSPEC_MMA_XVBF16GER2 "xvbf16ger2")
200 (UNSPEC_MMA_XVF32GER "xvf32ger")])
201
202 (define_int_attr avv [(UNSPEC_MMA_XVI4GER8PP "xvi4ger8pp")
203 (UNSPEC_MMA_XVI8GER4PP "xvi8ger4pp")
204 (UNSPEC_MMA_XVI8GER4SPP "xvi8ger4spp")
205 (UNSPEC_MMA_XVI16GER2PP "xvi16ger2pp")
206 (UNSPEC_MMA_XVI16GER2SPP "xvi16ger2spp")
207 (UNSPEC_MMA_XVF16GER2PP "xvf16ger2pp")
208 (UNSPEC_MMA_XVF16GER2PN "xvf16ger2pn")
209 (UNSPEC_MMA_XVF16GER2NP "xvf16ger2np")
210 (UNSPEC_MMA_XVF16GER2NN "xvf16ger2nn")
211 (UNSPEC_MMA_XVBF16GER2PP "xvbf16ger2pp")
212 (UNSPEC_MMA_XVBF16GER2PN "xvbf16ger2pn")
213 (UNSPEC_MMA_XVBF16GER2NP "xvbf16ger2np")
214 (UNSPEC_MMA_XVBF16GER2NN "xvbf16ger2nn")
215 (UNSPEC_MMA_XVF32GERPP "xvf32gerpp")
216 (UNSPEC_MMA_XVF32GERPN "xvf32gerpn")
217 (UNSPEC_MMA_XVF32GERNP "xvf32gernp")
218 (UNSPEC_MMA_XVF32GERNN "xvf32gernn")])
219
220 (define_int_attr pv [(UNSPEC_MMA_XVF64GER "xvf64ger")])
221
222 (define_int_attr apv [(UNSPEC_MMA_XVF64GERPP "xvf64gerpp")
223 (UNSPEC_MMA_XVF64GERPN "xvf64gerpn")
224 (UNSPEC_MMA_XVF64GERNP "xvf64gernp")
225 (UNSPEC_MMA_XVF64GERNN "xvf64gernn")])
226
227 ;; The "pm" prefix is not in these expansions, so that we can generate
228 ;; pmdmxvi4ger8 on systems with dense math registers and xvi4ger8 on systems
229 ;; without dense math registers.
230 (define_int_attr vvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8 "xvi4ger8")])
231
232 (define_int_attr avvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8PP "xvi4ger8pp")])
233
234 (define_int_attr vvi4i4i2 [(UNSPEC_MMA_PMXVI16GER2 "xvi16ger2")
235 (UNSPEC_MMA_PMXVI16GER2S "xvi16ger2s")
236 (UNSPEC_MMA_PMXVF16GER2 "xvf16ger2")
237 (UNSPEC_MMA_PMXVBF16GER2 "xvbf16ger2")])
238
239 (define_int_attr avvi4i4i2 [(UNSPEC_MMA_PMXVI16GER2PP "xvi16ger2pp")
240 (UNSPEC_MMA_PMXVI16GER2SPP "xvi16ger2spp")
241 (UNSPEC_MMA_PMXVF16GER2PP "xvf16ger2pp")
242 (UNSPEC_MMA_PMXVF16GER2PN "xvf16ger2pn")
243 (UNSPEC_MMA_PMXVF16GER2NP "xvf16ger2np")
244 (UNSPEC_MMA_PMXVF16GER2NN "xvf16ger2nn")
245 (UNSPEC_MMA_PMXVBF16GER2PP "xvbf16ger2pp")
246 (UNSPEC_MMA_PMXVBF16GER2PN "xvbf16ger2pn")
247 (UNSPEC_MMA_PMXVBF16GER2NP "xvbf16ger2np")
248 (UNSPEC_MMA_PMXVBF16GER2NN "xvbf16ger2nn")])
249
250 (define_int_attr vvi4i4 [(UNSPEC_MMA_PMXVF32GER "xvf32ger")])
251
252 (define_int_attr avvi4i4 [(UNSPEC_MMA_PMXVF32GERPP "xvf32gerpp")
253 (UNSPEC_MMA_PMXVF32GERPN "xvf32gerpn")
254 (UNSPEC_MMA_PMXVF32GERNP "xvf32gernp")
255 (UNSPEC_MMA_PMXVF32GERNN "xvf32gernn")])
256
257 (define_int_attr pvi4i2 [(UNSPEC_MMA_PMXVF64GER "xvf64ger")])
258
259 (define_int_attr apvi4i2 [(UNSPEC_MMA_PMXVF64GERPP "xvf64gerpp")
260 (UNSPEC_MMA_PMXVF64GERPN "xvf64gerpn")
261 (UNSPEC_MMA_PMXVF64GERNP "xvf64gernp")
262 (UNSPEC_MMA_PMXVF64GERNN "xvf64gernn")])
263
264 (define_int_attr vvi4i4i4 [(UNSPEC_MMA_PMXVI8GER4 "xvi8ger4")])
265
266 (define_int_attr avvi4i4i4 [(UNSPEC_MMA_PMXVI8GER4PP "xvi8ger4pp")
267 (UNSPEC_MMA_PMXVI8GER4SPP "xvi8ger4spp")])
268
269
270 ;; Vector pair support. OOmode can only live in VSRs.
271 (define_expand "movoo"
272 [(set (match_operand:OO 0 "nonimmediate_operand")
273 (match_operand:OO 1 "input_operand"))]
274 ""
275 {
276 if (TARGET_MMA)
277 {
278 rs6000_emit_move (operands[0], operands[1], OOmode);
279 DONE;
280 }
281 else if (currently_expanding_to_rtl && seen_error ())
282 {
283 /* PR103353 shows we may want to continue to expand the __builtin_vsx_lxvp
284 built-in function, even if we have already emitted error messages about
285 some missing required conditions. As shown in that PR, without one
286 explicit mov optab on OOmode provided, it would call emit_move_insn
287 recursively. So we allow this pattern to be generated when we are
288 expanding to RTL and have seen errors. It would not cause further ICEs
289 as the compilation would stop soon after expanding. */
290 }
291 else if (rs6000_opaque_type_invalid_use_p (currently_expanding_gimple_stmt))
292 ;
293 else
294 /* Catch unexpected cases. */
295 gcc_assert (false);
296 })
297
298 (define_insn_and_split "*movoo"
299 [(set (match_operand:OO 0 "nonimmediate_operand" "=wa,ZwO,wa")
300 (match_operand:OO 1 "input_operand" "ZwO,wa,wa"))]
301 "TARGET_MMA
302 && (gpc_reg_operand (operands[0], OOmode)
303 || gpc_reg_operand (operands[1], OOmode))"
304 "@
305 lxvp%X1 %x0,%1
306 stxvp%X0 %x1,%0
307 #"
308 "&& reload_completed
309 && (!MEM_P (operands[0]) && !MEM_P (operands[1]))"
310 [(const_int 0)]
311 {
312 rs6000_split_multireg_move (operands[0], operands[1]);
313 DONE;
314 }
315 [(set_attr "type" "vecload,vecstore,veclogical")
316 (set_attr "size" "256")
317 (set_attr "length" "*,*,8")])
318
319 \f
320 ;; Vector quad support. Under the original MMA, XOmode can only live in VSX
321 ;; registers 0..31. With dense math, XOmode can live in either VSX registers
322 ;; (0..63) or DMR registers.
323 (define_expand "movxo"
324 [(set (match_operand:XO 0 "nonimmediate_operand")
325 (match_operand:XO 1 "input_operand"))]
326 ""
327 {
328 if (TARGET_MMA)
329 {
330 rs6000_emit_move (operands[0], operands[1], XOmode);
331 DONE;
332 }
333 else if (currently_expanding_to_rtl && seen_error ())
334 {
335 /* PR103353 shows we may want to continue to expand the __builtin_vsx_lxvp
336 built-in function, even if we have already emitted error messages about
337 some missing required conditions. So do the same handlings for XOmode
338 as OOmode here. */
339 }
340 else if (rs6000_opaque_type_invalid_use_p (currently_expanding_gimple_stmt))
341 ;
342 else
343 /* Catch unexpected cases. */
344 gcc_assert (false);
345 })
346
347 (define_insn_and_split "*movxo_nodm"
348 [(set (match_operand:XO 0 "nonimmediate_operand" "=d,ZwO,d")
349 (match_operand:XO 1 "input_operand" "ZwO,d,d"))]
350 "TARGET_MMA_NO_DENSE_MATH
351 && (gpc_reg_operand (operands[0], XOmode)
352 || gpc_reg_operand (operands[1], XOmode))"
353 "@
354 #
355 #
356 #"
357 "&& reload_completed"
358 [(const_int 0)]
359 {
360 rs6000_split_multireg_move (operands[0], operands[1]);
361 DONE;
362 }
363 [(set_attr "type" "vecload,vecstore,veclogical")
364 (set_attr "length" "*,*,16")
365 (set_attr "max_prefixed_insns" "2,2,*")])
366
367 (define_insn_and_split "*movxo_dm"
368 [(set (match_operand:XO 0 "nonimmediate_operand" "=wa,ZwO,wa,wD,wD,wa")
369 (match_operand:XO 1 "input_operand" "ZwO,wa, wa,wa,wD,wD"))]
370 "TARGET_MMA_DENSE_MATH
371 && (gpc_reg_operand (operands[0], XOmode)
372 || gpc_reg_operand (operands[1], XOmode))"
373 "@
374 #
375 #
376 #
377 dmxxinstdmr512 %0,%1,%Y1,0
378 dmmr %0,%1
379 dmxxextfdmr512 %0,%Y0,%1,0"
380 "&& reload_completed
381 && !dmr_operand (operands[0], XOmode)
382 && !dmr_operand (operands[1], XOmode)"
383 [(const_int 0)]
384 {
385 rs6000_split_multireg_move (operands[0], operands[1]);
386 DONE;
387 }
388 [(set_attr "type" "vecload,vecstore,veclogical,mma,mma,mma")
389 (set_attr "length" "*,*,16,*,*,*")
390 (set_attr "max_prefixed_insns" "2,2,*,*,*,*")])
391
392 (define_expand "vsx_assemble_pair"
393 [(match_operand:OO 0 "vsx_register_operand")
394 (match_operand:V16QI 1 "mma_assemble_input_operand")
395 (match_operand:V16QI 2 "mma_assemble_input_operand")]
396 "TARGET_MMA"
397 {
398 rtx src = gen_rtx_UNSPEC (OOmode,
399 gen_rtvec (2, operands[1], operands[2]),
400 UNSPEC_VSX_ASSEMBLE);
401 emit_move_insn (operands[0], src);
402 DONE;
403 })
404
405 ;; We cannot update the two output registers atomically, so mark the output
406 ;; as an early clobber so we don't accidentally clobber the input operands. */
407
408 (define_insn_and_split "*vsx_assemble_pair"
409 [(set (match_operand:OO 0 "vsx_register_operand" "=&wa")
410 (unspec:OO [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
411 (match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")]
412 UNSPEC_VSX_ASSEMBLE))]
413 "TARGET_MMA"
414 "#"
415 "&& reload_completed"
416 [(const_int 0)]
417 {
418 rtx src = gen_rtx_UNSPEC (OOmode,
419 gen_rtvec (2, operands[1], operands[2]),
420 UNSPEC_VSX_ASSEMBLE);
421 rs6000_split_multireg_move (operands[0], src);
422 DONE;
423 })
424
425 (define_expand "vsx_disassemble_pair"
426 [(match_operand:V16QI 0 "mma_disassemble_output_operand")
427 (match_operand:OO 1 "vsx_register_operand")
428 (match_operand 2 "const_0_to_1_operand")]
429 "TARGET_MMA"
430 {
431 rtx src;
432 int regoff = INTVAL (operands[2]);
433 src = gen_rtx_UNSPEC (V16QImode,
434 gen_rtvec (2, operands[1], GEN_INT (regoff)),
435 UNSPEC_MMA_EXTRACT);
436 emit_move_insn (operands[0], src);
437 DONE;
438 })
439
440 (define_insn_and_split "*vsx_disassemble_pair"
441 [(set (match_operand:V16QI 0 "mma_disassemble_output_operand" "=mwa")
442 (unspec:V16QI [(match_operand:OO 1 "vsx_register_operand" "wa")
443 (match_operand 2 "const_0_to_1_operand")]
444 UNSPEC_MMA_EXTRACT))]
445 "TARGET_MMA
446 && vsx_register_operand (operands[1], OOmode)"
447 "#"
448 "&& reload_completed"
449 [(const_int 0)]
450 {
451 int reg = REGNO (operands[1]);
452 int regoff = INTVAL (operands[2]);
453 rtx src = gen_rtx_REG (V16QImode, reg + regoff);
454 emit_move_insn (operands[0], src);
455 DONE;
456 })
457
458 (define_expand "mma_assemble_acc"
459 [(match_operand:XO 0 "fpr_reg_operand")
460 (match_operand:V16QI 1 "mma_assemble_input_operand")
461 (match_operand:V16QI 2 "mma_assemble_input_operand")
462 (match_operand:V16QI 3 "mma_assemble_input_operand")
463 (match_operand:V16QI 4 "mma_assemble_input_operand")]
464 "TARGET_MMA"
465 {
466 rtx src = gen_rtx_UNSPEC_VOLATILE (XOmode,
467 gen_rtvec (4, operands[1], operands[2],
468 operands[3], operands[4]),
469 UNSPECV_MMA_ASSEMBLE);
470 emit_move_insn (operands[0], src);
471 DONE;
472 })
473
474 ;; We cannot update the four output registers atomically, so mark the output
475 ;; as an early clobber so we don't accidentally clobber the input operands. */
476
477 (define_insn_and_split "*mma_assemble_acc"
478 [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
479 (unspec_volatile:XO
480 [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
481 (match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")
482 (match_operand:V16QI 3 "mma_assemble_input_operand" "mwa")
483 (match_operand:V16QI 4 "mma_assemble_input_operand" "mwa")]
484 UNSPECV_MMA_ASSEMBLE))]
485 "TARGET_MMA"
486 "#"
487 "&& reload_completed"
488 [(const_int 0)]
489 {
490 rtx src = gen_rtx_UNSPEC_VOLATILE (XOmode,
491 gen_rtvec (4, operands[1], operands[2],
492 operands[3], operands[4]),
493 UNSPECV_MMA_ASSEMBLE);
494 rs6000_split_multireg_move (operands[0], src);
495 DONE;
496 })
497
498 (define_expand "mma_disassemble_acc"
499 [(match_operand:V16QI 0 "mma_disassemble_output_operand")
500 (match_operand:XO 1 "fpr_reg_operand")
501 (match_operand 2 "const_0_to_3_operand")]
502 "TARGET_MMA"
503 {
504 rtx src;
505 int regoff = INTVAL (operands[2]);
506 src = gen_rtx_UNSPEC (V16QImode,
507 gen_rtvec (2, operands[1], GEN_INT (regoff)),
508 UNSPEC_MMA_EXTRACT);
509 emit_move_insn (operands[0], src);
510 DONE;
511 })
512
513 (define_insn_and_split "*mma_disassemble_acc"
514 [(set (match_operand:V16QI 0 "mma_disassemble_output_operand" "=mwa")
515 (unspec:V16QI [(match_operand:XO 1 "fpr_reg_operand" "d")
516 (match_operand 2 "const_0_to_3_operand")]
517 UNSPEC_MMA_EXTRACT))]
518 "TARGET_MMA"
519 "#"
520 "&& reload_completed"
521 [(const_int 0)]
522 {
523 int reg = REGNO (operands[1]);
524 int regoff = INTVAL (operands[2]);
525 rtx src = gen_rtx_REG (V16QImode, reg + regoff);
526 emit_move_insn (operands[0], src);
527 DONE;
528 })
529
530 ;; MMA instructions that do not use their accumulators as an input, still must
531 ;; not allow their vector operands to overlap the registers used by the
532 ;; accumulator. We enforce this by marking the output as early clobber. The
533 ;; prime and de-prime instructions are not needed on systems with dense math
534 ;; registers.
535
536 (define_insn "mma_<acc>"
537 [(set (match_operand:XO 0 "accumulator_operand" "=&wD")
538 (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")]
539 MMA_ACC))]
540 "TARGET_MMA_NO_DENSE_MATH"
541 "<acc> %A0"
542 [(set_attr "type" "mma")])
543
544 ;; We can't have integer constants in XOmode so we wrap this in an
545 ;; UNSPEC_VOLATILE.
546
547 (define_insn "mma_xxsetaccz"
548 [(set (match_operand:XO 0 "accumulator_operand" "=wD")
549 (unspec_volatile:XO [(const_int 0)]
550 UNSPECV_MMA_XXSETACCZ))]
551 "TARGET_MMA"
552 {
553 return TARGET_DENSE_MATH ? "dmsetdmrz %A0" : "xxsetaccz %A0";
554 }
555 [(set_attr "type" "mma")])
556
557 (define_insn "mma_<vv>"
558 [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
559 (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
560 (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
561 MMA_VV))]
562 "TARGET_MMA"
563 {
564 return TARGET_DENSE_MATH ? "dm<vv> %A0,%x1,%x2" : "<vv> %A0,%x1,%x2";
565 }
566 [(set_attr "type" "mma")])
567
568 (define_insn "mma_<avv>"
569 [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
570 (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
571 (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
572 (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
573 MMA_AVV))]
574 "TARGET_MMA"
575 {
576 return TARGET_DENSE_MATH ? "dm<avv> %A0,%x2,%x3" : "<avv> %A0,%x2,%x3";
577 }
578 [(set_attr "type" "mma")])
579
580 (define_insn "mma_<pv>"
581 [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
582 (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
583 (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
584 MMA_PV))]
585 "TARGET_MMA"
586 {
587 return TARGET_DENSE_MATH ? "dm<pv> %A0,%x1,%x2" : "<pv> %A0,%x1,%x2";
588 }
589 [(set_attr "type" "mma")])
590
591 (define_insn "mma_<apv>"
592 [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
593 (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
594 (match_operand:OO 2 "vsx_register_operand" "v,?wa")
595 (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
596 MMA_APV))]
597 "TARGET_MMA"
598 {
599 return TARGET_DENSE_MATH ? "dm<apv> %A0,%x2,%x3" : "<apv> %A0,%x2,%x3";
600 }
601 [(set_attr "type" "mma")])
602
603 (define_insn "mma_pm<vvi4i4i8>"
604 [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
605 (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
606 (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
607 (match_operand:SI 3 "const_0_to_15_operand" "n,n")
608 (match_operand:SI 4 "const_0_to_15_operand" "n,n")
609 (match_operand:SI 5 "u8bit_cint_operand" "n,n")]
610 MMA_VVI4I4I8))]
611 "TARGET_MMA"
612 {
613 return (TARGET_DENSE_MATH
614 ? "pmdm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
615 : "pm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5");
616 }
617 [(set_attr "type" "mma")
618 (set_attr "prefixed" "yes")])
619
620 (define_insn "mma_pm<avvi4i4i8>"
621 [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
622 (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
623 (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
624 (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
625 (match_operand:SI 4 "const_0_to_15_operand" "n,n")
626 (match_operand:SI 5 "const_0_to_15_operand" "n,n")
627 (match_operand:SI 6 "u8bit_cint_operand" "n,n")]
628 MMA_AVVI4I4I8))]
629 "TARGET_MMA"
630 {
631 return (TARGET_DENSE_MATH
632 ? "pmdm<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6"
633 : "pm<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6");
634 }
635 [(set_attr "type" "mma")
636 (set_attr "prefixed" "yes")])
637
638 (define_insn "mma_pm<vvi4i4i2>"
639 [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
640 (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
641 (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
642 (match_operand:SI 3 "const_0_to_15_operand" "n,n")
643 (match_operand:SI 4 "const_0_to_15_operand" "n,n")
644 (match_operand:SI 5 "const_0_to_3_operand" "n,n")]
645 MMA_VVI4I4I2))]
646 "TARGET_MMA"
647 {
648 return (TARGET_DENSE_MATH
649 ? "pmdm<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5"
650 : "pm<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5");
651 }
652 [(set_attr "type" "mma")
653 (set_attr "prefixed" "yes")])
654
655 (define_insn "mma_pm<avvi4i4i2>"
656 [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
657 (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
658 (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
659 (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
660 (match_operand:SI 4 "const_0_to_15_operand" "n,n")
661 (match_operand:SI 5 "const_0_to_15_operand" "n,n")
662 (match_operand:SI 6 "const_0_to_3_operand" "n,n")]
663 MMA_AVVI4I4I2))]
664 "TARGET_MMA"
665 {
666 return (TARGET_DENSE_MATH
667 ? "pmdm<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6"
668 : "pm<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6");
669 }
670 [(set_attr "type" "mma")
671 (set_attr "prefixed" "yes")])
672
673 (define_insn "mma_pm<vvi4i4>"
674 [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
675 (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
676 (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
677 (match_operand:SI 3 "const_0_to_15_operand" "n,n")
678 (match_operand:SI 4 "const_0_to_15_operand" "n,n")]
679 MMA_VVI4I4))]
680 "TARGET_MMA"
681 {
682 return (TARGET_DENSE_MATH
683 ? "pmdm<vvi4i4> %A0,%x1,%x2,%3,%4"
684 : "pm<vvi4i4> %A0,%x1,%x2,%3,%4");
685 }
686 [(set_attr "type" "mma")
687 (set_attr "prefixed" "yes")])
688
689 (define_insn "mma_pm<avvi4i4>"
690 [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
691 (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
692 (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
693 (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
694 (match_operand:SI 4 "const_0_to_15_operand" "n,n")
695 (match_operand:SI 5 "const_0_to_15_operand" "n,n")]
696 MMA_AVVI4I4))]
697 "TARGET_MMA"
698 {
699 return (TARGET_DENSE_MATH
700 ? "pmdm<avvi4i4> %A0,%x2,%x3,%4,%5"
701 : "pm<avvi4i4> %A0,%x2,%x3,%4,%5");
702 }
703 [(set_attr "type" "mma")
704 (set_attr "prefixed" "yes")])
705
706 (define_insn "mma_pm<pvi4i2>"
707 [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
708 (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
709 (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
710 (match_operand:SI 3 "const_0_to_15_operand" "n,n")
711 (match_operand:SI 4 "const_0_to_3_operand" "n,n")]
712 MMA_PVI4I2))]
713 "TARGET_MMA"
714 {
715 return (TARGET_DENSE_MATH
716 ? "pmdm<pvi4i2> %A0,%x1,%x2,%3,%4"
717 : "pm<pvi4i2> %A0,%x1,%x2,%3,%4");
718 }
719 [(set_attr "type" "mma")
720 (set_attr "prefixed" "yes")])
721
722 (define_insn "mma_pm<apvi4i2>"
723 [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
724 (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
725 (match_operand:OO 2 "vsx_register_operand" "v,?wa")
726 (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
727 (match_operand:SI 4 "const_0_to_15_operand" "n,n")
728 (match_operand:SI 5 "const_0_to_3_operand" "n,n")]
729 MMA_APVI4I2))]
730 "TARGET_MMA"
731 {
732 return (TARGET_DENSE_MATH
733 ? "pmdm<apvi4i2> %A0,%x2,%x3,%4,%5"
734 : "pm<apvi4i2> %A0,%x2,%x3,%4,%5");
735 }
736 [(set_attr "type" "mma")
737 (set_attr "prefixed" "yes")])
738
739 (define_insn "mma_pm<vvi4i4i4>"
740 [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
741 (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
742 (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
743 (match_operand:SI 3 "const_0_to_15_operand" "n,n")
744 (match_operand:SI 4 "const_0_to_15_operand" "n,n")
745 (match_operand:SI 5 "const_0_to_15_operand" "n,n")]
746 MMA_VVI4I4I4))]
747 "TARGET_MMA"
748 {
749 return (TARGET_DENSE_MATH
750 ? "pmdm<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5"
751 : "pm<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5");
752 }
753 [(set_attr "type" "mma")
754 (set_attr "prefixed" "yes")])
755
756 (define_insn "mma_pm<avvi4i4i4>"
757 [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
758 (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
759 (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
760 (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
761 (match_operand:SI 4 "const_0_to_15_operand" "n,n")
762 (match_operand:SI 5 "const_0_to_15_operand" "n,n")
763 (match_operand:SI 6 "const_0_to_15_operand" "n,n")]
764 MMA_AVVI4I4I4))]
765 "TARGET_MMA"
766 {
767 return (TARGET_DENSE_MATH
768 ? "pmdm<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6"
769 : "pm<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6");
770 }
771 [(set_attr "type" "mma")
772 (set_attr "prefixed" "yes")])
This page took 0.070621 seconds and 5 git commands to generate.