]> gcc.gnu.org Git - gcc.git/blame - gcc/config/rs6000/mma.md
PowerPC: Switch to dense math names for all MMA operations.
[gcc.git] / gcc / config / rs6000 / mma.md
CommitLineData
f002c046 1;; Matrix-Multiply Assist (MMA) patterns.
a945c346 2;; Copyright (C) 2020-2024 Free Software Foundation, Inc.
f002c046
PB
3;; Contributed by Peter Bergner <bergner@linux.ibm.com> and
4;; Michael Meissner <meissner@linux.ibm.com>
5;;
6;; This file is part of GCC.
7;;
8;; GCC is free software; you can redistribute it and/or modify it
9;; under the terms of the GNU General Public License as published
10;; by the Free Software Foundation; either version 3, or (at your
11;; option) any later version.
12;;
13;; GCC is distributed in the hope that it will be useful, but WITHOUT
14;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16;; License for more details.
17;;
18;; You should have received a copy of the GNU General Public License
19;; along with GCC; see the file COPYING3. If not see
20;; <http://www.gnu.org/licenses/>.
21
f8f8909a
AS
22;; The MMA patterns use the multi-register XOmode and OOmode opaque
23;; modes to implement the target specific __vector_quad and
24;; __vector_pair types that the MMA built-in functions reference. We
25;; use OPAQUE_MODE to prevent anything from trying to open them up.
f002c046 26
8ee2640b
PB
27(define_constants [(MAX_MMA_OPERANDS 7)])
28
29;; Constants for creating unspecs
30
31(define_c_enum "unspec"
15c02ab2 32 [UNSPEC_VSX_ASSEMBLE
f8f8909a 33 UNSPEC_MMA_EXTRACT
8ee2640b
PB
34 UNSPEC_MMA_PMXVBF16GER2
35 UNSPEC_MMA_PMXVBF16GER2NN
36 UNSPEC_MMA_PMXVBF16GER2NP
37 UNSPEC_MMA_PMXVBF16GER2PN
38 UNSPEC_MMA_PMXVBF16GER2PP
39 UNSPEC_MMA_PMXVF16GER2
40 UNSPEC_MMA_PMXVF16GER2NN
41 UNSPEC_MMA_PMXVF16GER2NP
42 UNSPEC_MMA_PMXVF16GER2PN
43 UNSPEC_MMA_PMXVF16GER2PP
44 UNSPEC_MMA_PMXVF32GER
45 UNSPEC_MMA_PMXVF32GERNN
46 UNSPEC_MMA_PMXVF32GERNP
47 UNSPEC_MMA_PMXVF32GERPN
48 UNSPEC_MMA_PMXVF32GERPP
49 UNSPEC_MMA_PMXVF64GER
50 UNSPEC_MMA_PMXVF64GERNN
51 UNSPEC_MMA_PMXVF64GERNP
52 UNSPEC_MMA_PMXVF64GERPN
53 UNSPEC_MMA_PMXVF64GERPP
54 UNSPEC_MMA_PMXVI16GER2
55 UNSPEC_MMA_PMXVI16GER2PP
56 UNSPEC_MMA_PMXVI16GER2S
57 UNSPEC_MMA_PMXVI16GER2SPP
58 UNSPEC_MMA_PMXVI4GER8
59 UNSPEC_MMA_PMXVI4GER8PP
60 UNSPEC_MMA_PMXVI8GER4
61 UNSPEC_MMA_PMXVI8GER4PP
62 UNSPEC_MMA_PMXVI8GER4SPP
63 UNSPEC_MMA_XVBF16GER2
64 UNSPEC_MMA_XVBF16GER2NN
65 UNSPEC_MMA_XVBF16GER2NP
66 UNSPEC_MMA_XVBF16GER2PN
67 UNSPEC_MMA_XVBF16GER2PP
68 UNSPEC_MMA_XVF16GER2
69 UNSPEC_MMA_XVF16GER2NN
70 UNSPEC_MMA_XVF16GER2NP
71 UNSPEC_MMA_XVF16GER2PN
72 UNSPEC_MMA_XVF16GER2PP
73 UNSPEC_MMA_XVF32GER
74 UNSPEC_MMA_XVF32GERNN
75 UNSPEC_MMA_XVF32GERNP
76 UNSPEC_MMA_XVF32GERPN
77 UNSPEC_MMA_XVF32GERPP
78 UNSPEC_MMA_XVF64GER
79 UNSPEC_MMA_XVF64GERNN
80 UNSPEC_MMA_XVF64GERNP
81 UNSPEC_MMA_XVF64GERPN
82 UNSPEC_MMA_XVF64GERPP
83 UNSPEC_MMA_XVI16GER2
84 UNSPEC_MMA_XVI16GER2PP
85 UNSPEC_MMA_XVI16GER2S
86 UNSPEC_MMA_XVI16GER2SPP
87 UNSPEC_MMA_XVI4GER8
88 UNSPEC_MMA_XVI4GER8PP
89 UNSPEC_MMA_XVI8GER4
90 UNSPEC_MMA_XVI8GER4PP
91 UNSPEC_MMA_XVI8GER4SPP
92 UNSPEC_MMA_XXMFACC
93 UNSPEC_MMA_XXMTACC
f80b9be0
PB
94 ])
95
96(define_c_enum "unspecv"
15c02ab2
PB
97 [UNSPECV_MMA_ASSEMBLE
98 UNSPECV_MMA_XXSETACCZ
8ee2640b
PB
99 ])
100
101;; MMA instructions with 1 accumulator argument
102(define_int_iterator MMA_ACC [UNSPEC_MMA_XXMFACC
103 UNSPEC_MMA_XXMTACC])
104
105;; MMA instructions with 2 vector arguments
106(define_int_iterator MMA_VV [UNSPEC_MMA_XVI4GER8
107 UNSPEC_MMA_XVI8GER4
108 UNSPEC_MMA_XVI16GER2
109 UNSPEC_MMA_XVI16GER2S
110 UNSPEC_MMA_XVF16GER2
111 UNSPEC_MMA_XVBF16GER2
112 UNSPEC_MMA_XVF32GER])
113
114;; MMA instructions with 1 accumulator and 2 vector arguments
115(define_int_iterator MMA_AVV [UNSPEC_MMA_XVI4GER8PP
116 UNSPEC_MMA_XVI8GER4PP
117 UNSPEC_MMA_XVI8GER4SPP
118 UNSPEC_MMA_XVI16GER2PP
119 UNSPEC_MMA_XVI16GER2SPP
120 UNSPEC_MMA_XVF16GER2PP
121 UNSPEC_MMA_XVF16GER2PN
122 UNSPEC_MMA_XVF16GER2NP
123 UNSPEC_MMA_XVF16GER2NN
124 UNSPEC_MMA_XVBF16GER2PP
125 UNSPEC_MMA_XVBF16GER2PN
126 UNSPEC_MMA_XVBF16GER2NP
127 UNSPEC_MMA_XVBF16GER2NN
128 UNSPEC_MMA_XVF32GERPP
129 UNSPEC_MMA_XVF32GERPN
130 UNSPEC_MMA_XVF32GERNP
131 UNSPEC_MMA_XVF32GERNN])
132
133;; MMA instructions with 1 vector pair and 1 vector arguments
134(define_int_iterator MMA_PV [UNSPEC_MMA_XVF64GER])
135
136;; MMA instructions with 1 accumulator, 1 vector pair and 1 vector arguments
137(define_int_iterator MMA_APV [UNSPEC_MMA_XVF64GERPP
138 UNSPEC_MMA_XVF64GERPN
139 UNSPEC_MMA_XVF64GERNP
140 UNSPEC_MMA_XVF64GERNN])
141
142;; MMA instructions with 2 vector, 2 4-bit and 1 8-bit arguments
143(define_int_iterator MMA_VVI4I4I8 [UNSPEC_MMA_PMXVI4GER8])
144
145;; MMA instructions with 1 accumulator, 2 vector, 2 4-bit and 1 8-bit arguments
146(define_int_iterator MMA_AVVI4I4I8 [UNSPEC_MMA_PMXVI4GER8PP])
147
148;; MMA instructions with 2 vector, 2 4-bit and 1 2-bit arguments
149(define_int_iterator MMA_VVI4I4I2 [UNSPEC_MMA_PMXVI16GER2
150 UNSPEC_MMA_PMXVI16GER2S
151 UNSPEC_MMA_PMXVF16GER2
152 UNSPEC_MMA_PMXVBF16GER2])
153
154;; MMA instructions with 1 accumulator, 2 vector, 2 4-bit and 1 2-bit arguments
155(define_int_iterator MMA_AVVI4I4I2 [UNSPEC_MMA_PMXVI16GER2PP
156 UNSPEC_MMA_PMXVI16GER2SPP
157 UNSPEC_MMA_PMXVF16GER2PP
158 UNSPEC_MMA_PMXVF16GER2PN
159 UNSPEC_MMA_PMXVF16GER2NP
160 UNSPEC_MMA_PMXVF16GER2NN
161 UNSPEC_MMA_PMXVBF16GER2PP
162 UNSPEC_MMA_PMXVBF16GER2PN
163 UNSPEC_MMA_PMXVBF16GER2NP
164 UNSPEC_MMA_PMXVBF16GER2NN])
165
166;; MMA instructions with 2 vector and 2 4-bit arguments
167(define_int_iterator MMA_VVI4I4 [UNSPEC_MMA_PMXVF32GER])
168
169;; MMA instructions with 1 accumulator, 2 vector and 2 4-bit arguments
170(define_int_iterator MMA_AVVI4I4 [UNSPEC_MMA_PMXVF32GERPP
171 UNSPEC_MMA_PMXVF32GERPN
172 UNSPEC_MMA_PMXVF32GERNP
173 UNSPEC_MMA_PMXVF32GERNN])
174
175;; MMA instructions with 2 vector, 1 4-bit and 1 2-bit arguments
176(define_int_iterator MMA_PVI4I2 [UNSPEC_MMA_PMXVF64GER])
177
178;; MMA instructions with 1 accumulator, 2 vector, 1 4-bit and 1 2-bit arguments
179(define_int_iterator MMA_APVI4I2 [UNSPEC_MMA_PMXVF64GERPP
180 UNSPEC_MMA_PMXVF64GERPN
181 UNSPEC_MMA_PMXVF64GERNP
182 UNSPEC_MMA_PMXVF64GERNN])
183
184;; MMA instructions with 2 vector and 3 4-bit arguments
185(define_int_iterator MMA_VVI4I4I4 [UNSPEC_MMA_PMXVI8GER4])
186
187;; MMA instructions with 1 accumulator, 2 vector and 3 4-bit arguments
188(define_int_iterator MMA_AVVI4I4I4 [UNSPEC_MMA_PMXVI8GER4PP
189 UNSPEC_MMA_PMXVI8GER4SPP])
190
191(define_int_attr acc [(UNSPEC_MMA_XXMFACC "xxmfacc")
192 (UNSPEC_MMA_XXMTACC "xxmtacc")])
193
194(define_int_attr vv [(UNSPEC_MMA_XVI4GER8 "xvi4ger8")
195 (UNSPEC_MMA_XVI8GER4 "xvi8ger4")
196 (UNSPEC_MMA_XVI16GER2 "xvi16ger2")
197 (UNSPEC_MMA_XVI16GER2S "xvi16ger2s")
198 (UNSPEC_MMA_XVF16GER2 "xvf16ger2")
199 (UNSPEC_MMA_XVBF16GER2 "xvbf16ger2")
200 (UNSPEC_MMA_XVF32GER "xvf32ger")])
201
202(define_int_attr avv [(UNSPEC_MMA_XVI4GER8PP "xvi4ger8pp")
203 (UNSPEC_MMA_XVI8GER4PP "xvi8ger4pp")
204 (UNSPEC_MMA_XVI8GER4SPP "xvi8ger4spp")
205 (UNSPEC_MMA_XVI16GER2PP "xvi16ger2pp")
206 (UNSPEC_MMA_XVI16GER2SPP "xvi16ger2spp")
207 (UNSPEC_MMA_XVF16GER2PP "xvf16ger2pp")
208 (UNSPEC_MMA_XVF16GER2PN "xvf16ger2pn")
209 (UNSPEC_MMA_XVF16GER2NP "xvf16ger2np")
210 (UNSPEC_MMA_XVF16GER2NN "xvf16ger2nn")
211 (UNSPEC_MMA_XVBF16GER2PP "xvbf16ger2pp")
212 (UNSPEC_MMA_XVBF16GER2PN "xvbf16ger2pn")
213 (UNSPEC_MMA_XVBF16GER2NP "xvbf16ger2np")
214 (UNSPEC_MMA_XVBF16GER2NN "xvbf16ger2nn")
215 (UNSPEC_MMA_XVF32GERPP "xvf32gerpp")
216 (UNSPEC_MMA_XVF32GERPN "xvf32gerpn")
217 (UNSPEC_MMA_XVF32GERNP "xvf32gernp")
218 (UNSPEC_MMA_XVF32GERNN "xvf32gernn")])
219
220(define_int_attr pv [(UNSPEC_MMA_XVF64GER "xvf64ger")])
221
222(define_int_attr apv [(UNSPEC_MMA_XVF64GERPP "xvf64gerpp")
223 (UNSPEC_MMA_XVF64GERPN "xvf64gerpn")
224 (UNSPEC_MMA_XVF64GERNP "xvf64gernp")
225 (UNSPEC_MMA_XVF64GERNN "xvf64gernn")])
226
f4c6cc29
MM
227;; The "pm" prefix is not in these expansions, so that we can generate
228;; pmdmxvi4ger8 on systems with dense math registers and xvi4ger8 on systems
229;; without dense math registers.
230(define_int_attr vvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8 "xvi4ger8")])
8ee2640b 231
f4c6cc29 232(define_int_attr avvi4i4i8 [(UNSPEC_MMA_PMXVI4GER8PP "xvi4ger8pp")])
8ee2640b 233
f4c6cc29
MM
234(define_int_attr vvi4i4i2 [(UNSPEC_MMA_PMXVI16GER2 "xvi16ger2")
235 (UNSPEC_MMA_PMXVI16GER2S "xvi16ger2s")
236 (UNSPEC_MMA_PMXVF16GER2 "xvf16ger2")
237 (UNSPEC_MMA_PMXVBF16GER2 "xvbf16ger2")])
8ee2640b 238
f4c6cc29
MM
239(define_int_attr avvi4i4i2 [(UNSPEC_MMA_PMXVI16GER2PP "xvi16ger2pp")
240 (UNSPEC_MMA_PMXVI16GER2SPP "xvi16ger2spp")
241 (UNSPEC_MMA_PMXVF16GER2PP "xvf16ger2pp")
242 (UNSPEC_MMA_PMXVF16GER2PN "xvf16ger2pn")
243 (UNSPEC_MMA_PMXVF16GER2NP "xvf16ger2np")
244 (UNSPEC_MMA_PMXVF16GER2NN "xvf16ger2nn")
245 (UNSPEC_MMA_PMXVBF16GER2PP "xvbf16ger2pp")
246 (UNSPEC_MMA_PMXVBF16GER2PN "xvbf16ger2pn")
247 (UNSPEC_MMA_PMXVBF16GER2NP "xvbf16ger2np")
248 (UNSPEC_MMA_PMXVBF16GER2NN "xvbf16ger2nn")])
8ee2640b 249
f4c6cc29 250(define_int_attr vvi4i4 [(UNSPEC_MMA_PMXVF32GER "xvf32ger")])
8ee2640b 251
f4c6cc29
MM
252(define_int_attr avvi4i4 [(UNSPEC_MMA_PMXVF32GERPP "xvf32gerpp")
253 (UNSPEC_MMA_PMXVF32GERPN "xvf32gerpn")
254 (UNSPEC_MMA_PMXVF32GERNP "xvf32gernp")
255 (UNSPEC_MMA_PMXVF32GERNN "xvf32gernn")])
8ee2640b 256
f4c6cc29 257(define_int_attr pvi4i2 [(UNSPEC_MMA_PMXVF64GER "xvf64ger")])
8ee2640b 258
f4c6cc29
MM
259(define_int_attr apvi4i2 [(UNSPEC_MMA_PMXVF64GERPP "xvf64gerpp")
260 (UNSPEC_MMA_PMXVF64GERPN "xvf64gerpn")
261 (UNSPEC_MMA_PMXVF64GERNP "xvf64gernp")
262 (UNSPEC_MMA_PMXVF64GERNN "xvf64gernn")])
8ee2640b 263
f4c6cc29 264(define_int_attr vvi4i4i4 [(UNSPEC_MMA_PMXVI8GER4 "xvi8ger4")])
8ee2640b 265
f4c6cc29
MM
266(define_int_attr avvi4i4i4 [(UNSPEC_MMA_PMXVI8GER4PP "xvi8ger4pp")
267 (UNSPEC_MMA_PMXVI8GER4SPP "xvi8ger4spp")])
8ee2640b
PB
268
269
f8f8909a
AS
270;; Vector pair support. OOmode can only live in VSRs.
271(define_expand "movoo"
272 [(set (match_operand:OO 0 "nonimmediate_operand")
273 (match_operand:OO 1 "input_operand"))]
9367e3a6 274 ""
f002c046 275{
9367e3a6
KL
276 if (TARGET_MMA)
277 {
278 rs6000_emit_move (operands[0], operands[1], OOmode);
279 DONE;
280 }
281 else if (currently_expanding_to_rtl && seen_error ())
282 {
283 /* PR103353 shows we may want to continue to expand the __builtin_vsx_lxvp
284 built-in function, even if we have already emitted error messages about
285 some missing required conditions. As shown in that PR, without one
286 explicit mov optab on OOmode provided, it would call emit_move_insn
287 recursively. So we allow this pattern to be generated when we are
288 expanding to RTL and have seen errors. It would not cause further ICEs
289 as the compilation would stop soon after expanding. */
290 }
282462b3
KL
291 else if (rs6000_opaque_type_invalid_use_p (currently_expanding_gimple_stmt))
292 ;
9367e3a6 293 else
282462b3
KL
294 /* Catch unexpected cases. */
295 gcc_assert (false);
f002c046
PB
296})
297
f8f8909a 298(define_insn_and_split "*movoo"
9ea12486
J
299 [(set (match_operand:OO 0 "nonimmediate_operand" "=wa,ZwO,wa")
300 (match_operand:OO 1 "input_operand" "ZwO,wa,wa"))]
f002c046 301 "TARGET_MMA
f8f8909a
AS
302 && (gpc_reg_operand (operands[0], OOmode)
303 || gpc_reg_operand (operands[1], OOmode))"
f002c046
PB
304 "@
305 lxvp%X1 %x0,%1
306 stxvp%X0 %x1,%0
307 #"
308 "&& reload_completed
309 && (!MEM_P (operands[0]) && !MEM_P (operands[1]))"
310 [(const_int 0)]
311{
312 rs6000_split_multireg_move (operands[0], operands[1]);
313 DONE;
314}
315 [(set_attr "type" "vecload,vecstore,veclogical")
9a5a1e27 316 (set_attr "size" "256")
f002c046
PB
317 (set_attr "length" "*,*,8")])
318
319\f
64bb9c0b
MM
320;; Vector quad support. Under the original MMA, XOmode can only live in VSX
321;; registers 0..31. With dense math, XOmode can live in either VSX registers
322;; (0..63) or DMR registers.
f8f8909a
AS
323(define_expand "movxo"
324 [(set (match_operand:XO 0 "nonimmediate_operand")
325 (match_operand:XO 1 "input_operand"))]
9367e3a6 326 ""
f002c046 327{
9367e3a6
KL
328 if (TARGET_MMA)
329 {
330 rs6000_emit_move (operands[0], operands[1], XOmode);
331 DONE;
332 }
333 else if (currently_expanding_to_rtl && seen_error ())
334 {
335 /* PR103353 shows we may want to continue to expand the __builtin_vsx_lxvp
336 built-in function, even if we have already emitted error messages about
337 some missing required conditions. So do the same handlings for XOmode
338 as OOmode here. */
339 }
282462b3
KL
340 else if (rs6000_opaque_type_invalid_use_p (currently_expanding_gimple_stmt))
341 ;
9367e3a6 342 else
282462b3
KL
343 /* Catch unexpected cases. */
344 gcc_assert (false);
f002c046
PB
345})
346
64bb9c0b 347(define_insn_and_split "*movxo_nodm"
9ea12486
J
348 [(set (match_operand:XO 0 "nonimmediate_operand" "=d,ZwO,d")
349 (match_operand:XO 1 "input_operand" "ZwO,d,d"))]
64bb9c0b 350 "TARGET_MMA_NO_DENSE_MATH
f8f8909a
AS
351 && (gpc_reg_operand (operands[0], XOmode)
352 || gpc_reg_operand (operands[1], XOmode))"
9c376d1c
PB
353 "@
354 #
355 #
f8f8909a
AS
356 #"
357 "&& reload_completed"
f002c046
PB
358 [(const_int 0)]
359{
360 rs6000_split_multireg_move (operands[0], operands[1]);
361 DONE;
362}
f8f8909a 363 [(set_attr "type" "vecload,vecstore,veclogical")
e128aaa6 364 (set_attr "length" "*,*,16")
f8f8909a 365 (set_attr "max_prefixed_insns" "2,2,*")])
8ee2640b 366
64bb9c0b
MM
367(define_insn_and_split "*movxo_dm"
368 [(set (match_operand:XO 0 "nonimmediate_operand" "=wa,ZwO,wa,wD,wD,wa")
369 (match_operand:XO 1 "input_operand" "ZwO,wa, wa,wa,wD,wD"))]
370 "TARGET_MMA_DENSE_MATH
371 && (gpc_reg_operand (operands[0], XOmode)
372 || gpc_reg_operand (operands[1], XOmode))"
373 "@
374 #
375 #
376 #
377 dmxxinstdmr512 %0,%1,%Y1,0
378 dmmr %0,%1
379 dmxxextfdmr512 %0,%Y0,%1,0"
380 "&& reload_completed
381 && !dmr_operand (operands[0], XOmode)
382 && !dmr_operand (operands[1], XOmode)"
383 [(const_int 0)]
384{
385 rs6000_split_multireg_move (operands[0], operands[1]);
386 DONE;
387}
388 [(set_attr "type" "vecload,vecstore,veclogical,mma,mma,mma")
389 (set_attr "length" "*,*,16,*,*,*")
390 (set_attr "max_prefixed_insns" "2,2,*,*,*,*")])
391
77ef995c 392(define_expand "vsx_assemble_pair"
f8f8909a
AS
393 [(match_operand:OO 0 "vsx_register_operand")
394 (match_operand:V16QI 1 "mma_assemble_input_operand")
395 (match_operand:V16QI 2 "mma_assemble_input_operand")]
8ee2640b
PB
396 "TARGET_MMA"
397{
f8f8909a
AS
398 rtx src = gen_rtx_UNSPEC (OOmode,
399 gen_rtvec (2, operands[1], operands[2]),
15c02ab2 400 UNSPEC_VSX_ASSEMBLE);
f8f8909a
AS
401 emit_move_insn (operands[0], src);
402 DONE;
403})
8ee2640b 404
4cdf7db9
PB
405;; We cannot update the two output registers atomically, so mark the output
406;; as an early clobber so we don't accidentally clobber the input operands. */
407
77ef995c 408(define_insn_and_split "*vsx_assemble_pair"
4cdf7db9 409 [(set (match_operand:OO 0 "vsx_register_operand" "=&wa")
f8f8909a
AS
410 (unspec:OO [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
411 (match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")]
15c02ab2 412 UNSPEC_VSX_ASSEMBLE))]
f8f8909a
AS
413 "TARGET_MMA"
414 "#"
415 "&& reload_completed"
416 [(const_int 0)]
417{
418 rtx src = gen_rtx_UNSPEC (OOmode,
419 gen_rtvec (2, operands[1], operands[2]),
15c02ab2 420 UNSPEC_VSX_ASSEMBLE);
f8f8909a
AS
421 rs6000_split_multireg_move (operands[0], src);
422 DONE;
423})
424
77ef995c 425(define_expand "vsx_disassemble_pair"
f8f8909a
AS
426 [(match_operand:V16QI 0 "mma_disassemble_output_operand")
427 (match_operand:OO 1 "vsx_register_operand")
428 (match_operand 2 "const_0_to_1_operand")]
429 "TARGET_MMA"
430{
431 rtx src;
432 int regoff = INTVAL (operands[2]);
433 src = gen_rtx_UNSPEC (V16QImode,
434 gen_rtvec (2, operands[1], GEN_INT (regoff)),
435 UNSPEC_MMA_EXTRACT);
436 emit_move_insn (operands[0], src);
437 DONE;
438})
8ee2640b 439
77ef995c 440(define_insn_and_split "*vsx_disassemble_pair"
f8f8909a
AS
441 [(set (match_operand:V16QI 0 "mma_disassemble_output_operand" "=mwa")
442 (unspec:V16QI [(match_operand:OO 1 "vsx_register_operand" "wa")
443 (match_operand 2 "const_0_to_1_operand")]
444 UNSPEC_MMA_EXTRACT))]
445 "TARGET_MMA
446 && vsx_register_operand (operands[1], OOmode)"
447 "#"
448 "&& reload_completed"
449 [(const_int 0)]
450{
451 int reg = REGNO (operands[1]);
452 int regoff = INTVAL (operands[2]);
453 rtx src = gen_rtx_REG (V16QImode, reg + regoff);
454 emit_move_insn (operands[0], src);
8ee2640b
PB
455 DONE;
456})
457
458(define_expand "mma_assemble_acc"
f8f8909a
AS
459 [(match_operand:XO 0 "fpr_reg_operand")
460 (match_operand:V16QI 1 "mma_assemble_input_operand")
461 (match_operand:V16QI 2 "mma_assemble_input_operand")
462 (match_operand:V16QI 3 "mma_assemble_input_operand")
463 (match_operand:V16QI 4 "mma_assemble_input_operand")]
8ee2640b
PB
464 "TARGET_MMA"
465{
15c02ab2
PB
466 rtx src = gen_rtx_UNSPEC_VOLATILE (XOmode,
467 gen_rtvec (4, operands[1], operands[2],
468 operands[3], operands[4]),
469 UNSPECV_MMA_ASSEMBLE);
8ee2640b
PB
470 emit_move_insn (operands[0], src);
471 DONE;
472})
473
4cdf7db9
PB
474;; We cannot update the four output registers atomically, so mark the output
475;; as an early clobber so we don't accidentally clobber the input operands. */
476
8ee2640b 477(define_insn_and_split "*mma_assemble_acc"
4cdf7db9 478 [(set (match_operand:XO 0 "fpr_reg_operand" "=&d")
15c02ab2
PB
479 (unspec_volatile:XO
480 [(match_operand:V16QI 1 "mma_assemble_input_operand" "mwa")
481 (match_operand:V16QI 2 "mma_assemble_input_operand" "mwa")
482 (match_operand:V16QI 3 "mma_assemble_input_operand" "mwa")
483 (match_operand:V16QI 4 "mma_assemble_input_operand" "mwa")]
484 UNSPECV_MMA_ASSEMBLE))]
98e71fc4 485 "TARGET_MMA"
8ee2640b
PB
486 "#"
487 "&& reload_completed"
488 [(const_int 0)]
489{
15c02ab2
PB
490 rtx src = gen_rtx_UNSPEC_VOLATILE (XOmode,
491 gen_rtvec (4, operands[1], operands[2],
492 operands[3], operands[4]),
493 UNSPECV_MMA_ASSEMBLE);
8ee2640b
PB
494 rs6000_split_multireg_move (operands[0], src);
495 DONE;
496})
497
f8f8909a
AS
498(define_expand "mma_disassemble_acc"
499 [(match_operand:V16QI 0 "mma_disassemble_output_operand")
500 (match_operand:XO 1 "fpr_reg_operand")
501 (match_operand 2 "const_0_to_3_operand")]
502 "TARGET_MMA"
503{
504 rtx src;
505 int regoff = INTVAL (operands[2]);
506 src = gen_rtx_UNSPEC (V16QImode,
507 gen_rtvec (2, operands[1], GEN_INT (regoff)),
508 UNSPEC_MMA_EXTRACT);
509 emit_move_insn (operands[0], src);
510 DONE;
511})
512
513(define_insn_and_split "*mma_disassemble_acc"
514 [(set (match_operand:V16QI 0 "mma_disassemble_output_operand" "=mwa")
515 (unspec:V16QI [(match_operand:XO 1 "fpr_reg_operand" "d")
516 (match_operand 2 "const_0_to_3_operand")]
517 UNSPEC_MMA_EXTRACT))]
98e71fc4 518 "TARGET_MMA"
f8f8909a
AS
519 "#"
520 "&& reload_completed"
521 [(const_int 0)]
522{
523 int reg = REGNO (operands[1]);
524 int regoff = INTVAL (operands[2]);
525 rtx src = gen_rtx_REG (V16QImode, reg + regoff);
526 emit_move_insn (operands[0], src);
527 DONE;
528})
529
64bb9c0b
MM
530;; MMA instructions that do not use their accumulators as an input, still must
531;; not allow their vector operands to overlap the registers used by the
532;; accumulator. We enforce this by marking the output as early clobber. The
533;; prime and de-prime instructions are not needed on systems with dense math
534;; registers.
8ee2640b
PB
535
536(define_insn "mma_<acc>"
98e71fc4 537 [(set (match_operand:XO 0 "accumulator_operand" "=&wD")
64bb9c0b 538 (unspec:XO [(match_operand:XO 1 "fpr_reg_operand" "0")]
8ee2640b 539 MMA_ACC))]
64bb9c0b 540 "TARGET_MMA_NO_DENSE_MATH"
8ee2640b
PB
541 "<acc> %A0"
542 [(set_attr "type" "mma")])
543
f80b9be0
PB
544;; We can't have integer constants in XOmode so we wrap this in an
545;; UNSPEC_VOLATILE.
f8f8909a 546
f80b9be0 547(define_insn "mma_xxsetaccz"
98e71fc4 548 [(set (match_operand:XO 0 "accumulator_operand" "=wD")
f80b9be0
PB
549 (unspec_volatile:XO [(const_int 0)]
550 UNSPECV_MMA_XXSETACCZ))]
f8f8909a 551 "TARGET_MMA"
f4c6cc29
MM
552{
553 return TARGET_DENSE_MATH ? "dmsetdmrz %A0" : "xxsetaccz %A0";
554}
f80b9be0 555 [(set_attr "type" "mma")])
f8f8909a 556
8ee2640b 557(define_insn "mma_<vv>"
98e71fc4 558 [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
c6e36f05
PB
559 (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
560 (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
f8f8909a 561 MMA_VV))]
8ee2640b 562 "TARGET_MMA"
f4c6cc29
MM
563{
564 return TARGET_DENSE_MATH ? "dm<vv> %A0,%x1,%x2" : "<vv> %A0,%x1,%x2";
565}
8ee2640b
PB
566 [(set_attr "type" "mma")])
567
568(define_insn "mma_<avv>"
98e71fc4
MM
569 [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
570 (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
c6e36f05
PB
571 (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
572 (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
f8f8909a 573 MMA_AVV))]
8ee2640b 574 "TARGET_MMA"
f4c6cc29
MM
575{
576 return TARGET_DENSE_MATH ? "dm<avv> %A0,%x2,%x3" : "<avv> %A0,%x2,%x3";
577}
8ee2640b
PB
578 [(set_attr "type" "mma")])
579
580(define_insn "mma_<pv>"
98e71fc4 581 [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
c6e36f05
PB
582 (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
583 (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")]
f8f8909a 584 MMA_PV))]
8ee2640b 585 "TARGET_MMA"
f4c6cc29
MM
586{
587 return TARGET_DENSE_MATH ? "dm<pv> %A0,%x1,%x2" : "<pv> %A0,%x1,%x2";
588}
8ee2640b
PB
589 [(set_attr "type" "mma")])
590
591(define_insn "mma_<apv>"
98e71fc4
MM
592 [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
593 (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
c6e36f05
PB
594 (match_operand:OO 2 "vsx_register_operand" "v,?wa")
595 (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")]
f8f8909a 596 MMA_APV))]
8ee2640b 597 "TARGET_MMA"
f4c6cc29
MM
598{
599 return TARGET_DENSE_MATH ? "dm<apv> %A0,%x2,%x3" : "<apv> %A0,%x2,%x3";
600}
8ee2640b
PB
601 [(set_attr "type" "mma")])
602
f4c6cc29 603(define_insn "mma_pm<vvi4i4i8>"
98e71fc4 604 [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
c6e36f05
PB
605 (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
606 (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
607 (match_operand:SI 3 "const_0_to_15_operand" "n,n")
608 (match_operand:SI 4 "const_0_to_15_operand" "n,n")
609 (match_operand:SI 5 "u8bit_cint_operand" "n,n")]
f8f8909a 610 MMA_VVI4I4I8))]
8ee2640b 611 "TARGET_MMA"
f4c6cc29
MM
612{
613 return (TARGET_DENSE_MATH
614 ? "pmdm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5"
615 : "pm<vvi4i4i8> %A0,%x1,%x2,%3,%4,%5");
616}
ea9a39e6
PH
617 [(set_attr "type" "mma")
618 (set_attr "prefixed" "yes")])
8ee2640b 619
f4c6cc29 620(define_insn "mma_pm<avvi4i4i8>"
98e71fc4
MM
621 [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
622 (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
c6e36f05
PB
623 (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
624 (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
625 (match_operand:SI 4 "const_0_to_15_operand" "n,n")
626 (match_operand:SI 5 "const_0_to_15_operand" "n,n")
627 (match_operand:SI 6 "u8bit_cint_operand" "n,n")]
f8f8909a 628 MMA_AVVI4I4I8))]
8ee2640b 629 "TARGET_MMA"
f4c6cc29
MM
630{
631 return (TARGET_DENSE_MATH
632 ? "pmdm<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6"
633 : "pm<avvi4i4i8> %A0,%x2,%x3,%4,%5,%6");
634}
ea9a39e6
PH
635 [(set_attr "type" "mma")
636 (set_attr "prefixed" "yes")])
8ee2640b 637
f4c6cc29 638(define_insn "mma_pm<vvi4i4i2>"
98e71fc4 639 [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
c6e36f05
PB
640 (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
641 (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
642 (match_operand:SI 3 "const_0_to_15_operand" "n,n")
643 (match_operand:SI 4 "const_0_to_15_operand" "n,n")
644 (match_operand:SI 5 "const_0_to_3_operand" "n,n")]
f8f8909a 645 MMA_VVI4I4I2))]
8ee2640b 646 "TARGET_MMA"
f4c6cc29
MM
647{
648 return (TARGET_DENSE_MATH
649 ? "pmdm<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5"
650 : "pm<vvi4i4i2> %A0,%x1,%x2,%3,%4,%5");
651}
ea9a39e6
PH
652 [(set_attr "type" "mma")
653 (set_attr "prefixed" "yes")])
8ee2640b 654
f4c6cc29 655(define_insn "mma_pm<avvi4i4i2>"
98e71fc4
MM
656 [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
657 (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
c6e36f05
PB
658 (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
659 (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
660 (match_operand:SI 4 "const_0_to_15_operand" "n,n")
661 (match_operand:SI 5 "const_0_to_15_operand" "n,n")
662 (match_operand:SI 6 "const_0_to_3_operand" "n,n")]
f8f8909a 663 MMA_AVVI4I4I2))]
8ee2640b 664 "TARGET_MMA"
f4c6cc29
MM
665{
666 return (TARGET_DENSE_MATH
667 ? "pmdm<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6"
668 : "pm<avvi4i4i2> %A0,%x2,%x3,%4,%5,%6");
669}
ea9a39e6
PH
670 [(set_attr "type" "mma")
671 (set_attr "prefixed" "yes")])
8ee2640b 672
f4c6cc29 673(define_insn "mma_pm<vvi4i4>"
98e71fc4 674 [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
c6e36f05
PB
675 (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
676 (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
677 (match_operand:SI 3 "const_0_to_15_operand" "n,n")
678 (match_operand:SI 4 "const_0_to_15_operand" "n,n")]
f8f8909a 679 MMA_VVI4I4))]
8ee2640b 680 "TARGET_MMA"
f4c6cc29
MM
681{
682 return (TARGET_DENSE_MATH
683 ? "pmdm<vvi4i4> %A0,%x1,%x2,%3,%4"
684 : "pm<vvi4i4> %A0,%x1,%x2,%3,%4");
685}
ea9a39e6
PH
686 [(set_attr "type" "mma")
687 (set_attr "prefixed" "yes")])
8ee2640b 688
f4c6cc29 689(define_insn "mma_pm<avvi4i4>"
98e71fc4
MM
690 [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
691 (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
c6e36f05
PB
692 (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
693 (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
694 (match_operand:SI 4 "const_0_to_15_operand" "n,n")
695 (match_operand:SI 5 "const_0_to_15_operand" "n,n")]
f8f8909a 696 MMA_AVVI4I4))]
8ee2640b 697 "TARGET_MMA"
f4c6cc29
MM
698{
699 return (TARGET_DENSE_MATH
700 ? "pmdm<avvi4i4> %A0,%x2,%x3,%4,%5"
701 : "pm<avvi4i4> %A0,%x2,%x3,%4,%5");
702}
ea9a39e6
PH
703 [(set_attr "type" "mma")
704 (set_attr "prefixed" "yes")])
8ee2640b 705
f4c6cc29 706(define_insn "mma_pm<pvi4i2>"
98e71fc4 707 [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
c6e36f05
PB
708 (unspec:XO [(match_operand:OO 1 "vsx_register_operand" "v,?wa")
709 (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
710 (match_operand:SI 3 "const_0_to_15_operand" "n,n")
711 (match_operand:SI 4 "const_0_to_3_operand" "n,n")]
f8f8909a 712 MMA_PVI4I2))]
8ee2640b 713 "TARGET_MMA"
f4c6cc29
MM
714{
715 return (TARGET_DENSE_MATH
716 ? "pmdm<pvi4i2> %A0,%x1,%x2,%3,%4"
717 : "pm<pvi4i2> %A0,%x1,%x2,%3,%4");
718}
ea9a39e6
PH
719 [(set_attr "type" "mma")
720 (set_attr "prefixed" "yes")])
8ee2640b 721
f4c6cc29 722(define_insn "mma_pm<apvi4i2>"
98e71fc4
MM
723 [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
724 (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
c6e36f05
PB
725 (match_operand:OO 2 "vsx_register_operand" "v,?wa")
726 (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
727 (match_operand:SI 4 "const_0_to_15_operand" "n,n")
728 (match_operand:SI 5 "const_0_to_3_operand" "n,n")]
f8f8909a 729 MMA_APVI4I2))]
8ee2640b 730 "TARGET_MMA"
f4c6cc29
MM
731{
732 return (TARGET_DENSE_MATH
733 ? "pmdm<apvi4i2> %A0,%x2,%x3,%4,%5"
734 : "pm<apvi4i2> %A0,%x2,%x3,%4,%5");
735}
ea9a39e6
PH
736 [(set_attr "type" "mma")
737 (set_attr "prefixed" "yes")])
8ee2640b 738
f4c6cc29 739(define_insn "mma_pm<vvi4i4i4>"
98e71fc4 740 [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
c6e36f05
PB
741 (unspec:XO [(match_operand:V16QI 1 "vsx_register_operand" "v,?wa")
742 (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
743 (match_operand:SI 3 "const_0_to_15_operand" "n,n")
744 (match_operand:SI 4 "const_0_to_15_operand" "n,n")
745 (match_operand:SI 5 "const_0_to_15_operand" "n,n")]
f8f8909a 746 MMA_VVI4I4I4))]
8ee2640b 747 "TARGET_MMA"
f4c6cc29
MM
748{
749 return (TARGET_DENSE_MATH
750 ? "pmdm<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5"
751 : "pm<vvi4i4i4> %A0,%x1,%x2,%3,%4,%5");
752}
ea9a39e6
PH
753 [(set_attr "type" "mma")
754 (set_attr "prefixed" "yes")])
8ee2640b 755
f4c6cc29 756(define_insn "mma_pm<avvi4i4i4>"
98e71fc4
MM
757 [(set (match_operand:XO 0 "accumulator_operand" "=&wD,&wD")
758 (unspec:XO [(match_operand:XO 1 "accumulator_operand" "0,0")
c6e36f05
PB
759 (match_operand:V16QI 2 "vsx_register_operand" "v,?wa")
760 (match_operand:V16QI 3 "vsx_register_operand" "v,?wa")
761 (match_operand:SI 4 "const_0_to_15_operand" "n,n")
762 (match_operand:SI 5 "const_0_to_15_operand" "n,n")
763 (match_operand:SI 6 "const_0_to_15_operand" "n,n")]
f8f8909a 764 MMA_AVVI4I4I4))]
8ee2640b 765 "TARGET_MMA"
f4c6cc29
MM
766{
767 return (TARGET_DENSE_MATH
768 ? "pmdm<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6"
769 : "pm<avvi4i4i4> %A0,%x2,%x3,%4,%5,%6");
770}
ea9a39e6
PH
771 [(set_attr "type" "mma")
772 (set_attr "prefixed" "yes")])
This page took 1.446162 seconds and 5 git commands to generate.