[rs6000] Avoid rotates of floating-point modes
Richard Sandiford
richard.sandiford@linaro.org
Wed Jul 12 16:33:00 GMT 2017
The little-endian VSX code uses rotates to swap the two 64-bit halves of
128-bit scalar modes. This is fine for TImode and V1TImode, but it
isn't really valid to use RTL rotates on floating-point modes like
KFmode and TFmode, and doing that triggered an assert added by the
SVE series. This patch uses bit-casts to V1TImode instead.
Tested on powerpc64le-linux-gnu. OK to install?
Richard
2017-07-12 Richard Sandiford <richard.sandiford@linaro.org>
gcc/
* config/rs6000/rs6000-protos.h (rs6000_emit_le_vsx_permute): Declare.
* config/rs6000/rs6000.c (rs6000_gen_le_vsx_permute): Replace with...
(rs6000_emit_le_vsx_permute): ...this. Take the destination as input.
Emit instructions rather than returning an expression. Handle TFmode
and KFmode by casting to TImode.
(rs6000_emit_le_vsx_load): Update to use rs6000_emit_le_vsx_permute.
(rs6000_emit_le_vsx_store): Likewise.
* config/rs6000/vsx.md (VSX_LE_128I): New iterator.
(*vsx_le_permute_<mode>): Use it instead of VSX_LE_128.
(*vsx_le_undo_permute_<mode>): Likewise.
(*vsx_le_perm_load_<mode>): Use rs6000_emit_le_vsx_permute to
emit the split sequence.
(*vsx_le_perm_store_<mode>): Likewise.
Index: gcc/config/rs6000/rs6000-protos.h
===================================================================
--- gcc/config/rs6000/rs6000-protos.h 2017-06-30 12:50:38.886633045 +0100
+++ gcc/config/rs6000/rs6000-protos.h 2017-07-12 16:30:38.728631839 +0100
@@ -151,6 +151,7 @@ extern rtx rs6000_longcall_ref (rtx);
extern void rs6000_fatal_bad_address (rtx);
extern rtx create_TOC_reference (rtx, rtx);
extern void rs6000_split_multireg_move (rtx, rtx);
+extern void rs6000_emit_le_vsx_permute (rtx, rtx, machine_mode);
extern void rs6000_emit_le_vsx_move (rtx, rtx, machine_mode);
extern bool valid_sf_si_move (rtx, rtx, machine_mode);
extern void rs6000_emit_move (rtx, rtx, machine_mode);
Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c 2017-07-08 11:37:45.740795846 +0100
+++ gcc/config/rs6000/rs6000.c 2017-07-12 16:30:38.732631678 +0100
@@ -10503,17 +10503,24 @@ rs6000_const_vec (machine_mode mode)
/* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi
for a VSX load or store operation. */
-rtx
-rs6000_gen_le_vsx_permute (rtx source, machine_mode mode)
+void
+rs6000_emit_le_vsx_permute (rtx dest, rtx source, machine_mode mode)
{
/* Use ROTATE instead of VEC_SELECT on IEEE 128-bit floating point, and
128-bit integers if they are allowed in VSX registers. */
- if (FLOAT128_VECTOR_P (mode) || mode == TImode || mode == V1TImode)
- return gen_rtx_ROTATE (mode, source, GEN_INT (64));
+ if (FLOAT128_VECTOR_P (mode))
+ {
+ dest = gen_lowpart (V1TImode, dest);
+ source = gen_lowpart (V1TImode, source);
+ mode = V1TImode;
+ }
+ if (mode == TImode || mode == V1TImode)
+ emit_insn (gen_rtx_SET (dest, gen_rtx_ROTATE (mode, source,
+ GEN_INT (64))));
else
{
rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
- return gen_rtx_VEC_SELECT (mode, source, par);
+ emit_insn (gen_rtx_SET (dest, gen_rtx_VEC_SELECT (mode, source, par)));
}
}
@@ -10523,8 +10530,6 @@ rs6000_gen_le_vsx_permute (rtx source, m
void
rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
{
- rtx tmp, permute_mem, permute_reg;
-
/* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
V1TImode). */
if (mode == TImode || mode == V1TImode)
@@ -10534,11 +10539,9 @@ rs6000_emit_le_vsx_load (rtx dest, rtx s
source = adjust_address (source, V2DImode, 0);
}
- tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
- permute_mem = rs6000_gen_le_vsx_permute (source, mode);
- permute_reg = rs6000_gen_le_vsx_permute (tmp, mode);
- emit_insn (gen_rtx_SET (tmp, permute_mem));
- emit_insn (gen_rtx_SET (dest, permute_reg));
+ rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
+ rs6000_emit_le_vsx_permute (tmp, source, mode);
+ rs6000_emit_le_vsx_permute (dest, tmp, mode);
}
/* Emit a little-endian store to vector memory location DEST from VSX
@@ -10547,8 +10550,6 @@ rs6000_emit_le_vsx_load (rtx dest, rtx s
void
rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
{
- rtx tmp, permute_src, permute_tmp;
-
/* This should never be called during or after reload, because it does
not re-permute the source register. It is intended only for use
during expand. */
@@ -10563,11 +10564,9 @@ rs6000_emit_le_vsx_store (rtx dest, rtx
source = gen_lowpart (V2DImode, source);
}
- tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
- permute_src = rs6000_gen_le_vsx_permute (source, mode);
- permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode);
- emit_insn (gen_rtx_SET (tmp, permute_src));
- emit_insn (gen_rtx_SET (dest, permute_tmp));
+ rtx tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
+ rs6000_emit_le_vsx_permute (tmp, source, mode);
+ rs6000_emit_le_vsx_permute (dest, tmp, mode);
}
/* Emit a sequence representing a little-endian VSX load or store,
Index: gcc/config/rs6000/vsx.md
===================================================================
--- gcc/config/rs6000/vsx.md 2017-06-30 12:50:38.889632907 +0100
+++ gcc/config/rs6000/vsx.md 2017-07-12 16:30:38.734631598 +0100
@@ -37,6 +37,10 @@ (define_mode_iterator VSX_LE_128 [(KF
(TI "TARGET_VSX_TIMODE")
V1TI])
+;; Same, but with just the integer modes.
+(define_mode_iterator VSX_LE_128I [(TI "TARGET_VSX_TIMODE")
+ V1TI])
+
;; Iterator for the 2 32-bit vector types
(define_mode_iterator VSX_W [V4SF V4SI])
@@ -750,9 +754,9 @@ (define_split
;; special V1TI container class, which it is not appropriate to use vec_select
;; for the type.
(define_insn "*vsx_le_permute_<mode>"
- [(set (match_operand:VSX_LE_128 0 "nonimmediate_operand" "=<VSa>,<VSa>,Z")
- (rotate:VSX_LE_128
- (match_operand:VSX_LE_128 1 "input_operand" "<VSa>,Z,<VSa>")
+ [(set (match_operand:VSX_LE_128I 0 "nonimmediate_operand" "=<VSa>,<VSa>,Z")
+ (rotate:VSX_LE_128I
+ (match_operand:VSX_LE_128I 1 "input_operand" "<VSa>,Z,<VSa>")
(const_int 64)))]
"!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
"@
@@ -763,10 +767,10 @@ (define_insn "*vsx_le_permute_<mode>"
(set_attr "type" "vecperm,vecload,vecstore")])
(define_insn_and_split "*vsx_le_undo_permute_<mode>"
- [(set (match_operand:VSX_LE_128 0 "vsx_register_operand" "=<VSa>,<VSa>")
- (rotate:VSX_LE_128
- (rotate:VSX_LE_128
- (match_operand:VSX_LE_128 1 "vsx_register_operand" "0,<VSa>")
+ [(set (match_operand:VSX_LE_128I 0 "vsx_register_operand" "=<VSa>,<VSa>")
+ (rotate:VSX_LE_128I
+ (rotate:VSX_LE_128I
+ (match_operand:VSX_LE_128I 1 "vsx_register_operand" "0,<VSa>")
(const_int 64))
(const_int 64)))]
"!BYTES_BIG_ENDIAN && TARGET_VSX"
@@ -791,16 +795,15 @@ (define_insn_and_split "*vsx_le_perm_loa
"!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
"#"
"!BYTES_BIG_ENDIAN && TARGET_VSX && !TARGET_P9_VECTOR"
- [(set (match_dup 2)
- (rotate:VSX_LE_128 (match_dup 1)
- (const_int 64)))
- (set (match_dup 0)
- (rotate:VSX_LE_128 (match_dup 2)
- (const_int 64)))]
+ [(const_int 0)]
"
{
- operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
- : operands[0];
+ rtx tmp = (can_create_pseudo_p ()
+ ? gen_reg_rtx_and_attrs (operands[0])
+ : operands[0]);
+ rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
+ rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
+ DONE;
}
"
[(set_attr "type" "vecload")
@@ -818,15 +821,14 @@ (define_split
[(set (match_operand:VSX_LE_128 0 "memory_operand" "")
(match_operand:VSX_LE_128 1 "vsx_register_operand" ""))]
"!BYTES_BIG_ENDIAN && TARGET_VSX && !reload_completed && !TARGET_P9_VECTOR"
- [(set (match_dup 2)
- (rotate:VSX_LE_128 (match_dup 1)
- (const_int 64)))
- (set (match_dup 0)
- (rotate:VSX_LE_128 (match_dup 2)
- (const_int 64)))]
+ [(const_int 0)]
{
- operands[2] = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (operands[0])
- : operands[0];
+ rtx tmp = (can_create_pseudo_p ()
+ ? gen_reg_rtx_and_attrs (operands[0])
+ : operands[0]);
+ rs6000_emit_le_vsx_permute (tmp, operands[1], <MODE>mode);
+ rs6000_emit_le_vsx_permute (operands[0], tmp, <MODE>mode);
+ DONE;
})
;; Peephole to catch memory to memory transfers for TImode if TImode landed in
@@ -850,16 +852,13 @@ (define_split
[(set (match_operand:VSX_LE_128 0 "memory_operand" "")
(match_operand:VSX_LE_128 1 "vsx_register_operand" ""))]
"!BYTES_BIG_ENDIAN && TARGET_VSX && reload_completed && !TARGET_P9_VECTOR"
- [(set (match_dup 1)
- (rotate:VSX_LE_128 (match_dup 1)
- (const_int 64)))
- (set (match_dup 0)
- (rotate:VSX_LE_128 (match_dup 1)
- (const_int 64)))
- (set (match_dup 1)
- (rotate:VSX_LE_128 (match_dup 1)
- (const_int 64)))]
- "")
+ [(const_int 0)]
+{
+ rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
+ rs6000_emit_le_vsx_permute (operands[0], operands[1], <MODE>mode);
+ rs6000_emit_le_vsx_permute (operands[1], operands[1], <MODE>mode);
+ DONE;
+})
;; Vector constants that can be generated with XXSPLTIB that was added in ISA
;; 3.0. Both (const_vector [..]) and (vec_duplicate ...) forms are recognized.
More information about the Gcc-patches
mailing list