This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[Patch]: Splitter for Altivec moves
- From: "Hartmut Penner" <HPENNER at de dot ibm dot com>
- To: gcc-patches at gcc dot gnu dot org
- Cc: aldyh at redhat dot com
- Date: Wed, 18 Jun 2003 08:36:09 +0200
- Subject: [Patch]: Splitter for Altivec moves
Hello,
the altivec move insns have right now multi assembler insn sequences
for moving
vectors between memory and general purpose registers. This implementation
has three
shortcomings:
- They might be wrong in case of destructive overlap of source and
target register range
- For the load case, one of the register range might be used as base
register, subsequent loads
will load from garbage location
- They are wrong for 64 bit mode
In order to fix this, I would propose following patch, using post reload
splitter.
Index: altivec.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/rs6000/altivec.md,v
retrieving revision 1.7
diff -c -p -r1.7 altivec.md
*** altivec.md 13 Apr 2003 17:51:06 -0000 1.7
--- altivec.md 18 Jun 2003 06:14:10 -0000
***************
*** 101,115 ****
case 0: return \"stvx %1,%y0\";
case 1: return \"lvx %0,%y1\";
case 2: return \"vor %0,%1,%1\";
! case 3: return \"stw%U0 %1,%0\;stw %L1,%L0\;stw %Y1,%Y0\;stw %Z1,%Z0\";
! case 4: return \"lwz%U1 %0,%1\;lwz %L0,%L1\;lwz %Y0,%Y1\;lwz %Z0,%Z1\";
! case 5: return \"mr %0,%1\;mr %L0,%L1\;mr %Y0,%Y1\;mr %Z0,%Z1\";
case 6: return output_vec_const_move (operands);
default: abort();
}
}"
! [(set_attr "type" "vecstore,vecload,vecsimple,store,load,*,*")
! (set_attr "length" "*,*,*,16,16,16,*")])
(define_split
[(set (match_operand:V4SI 0 "altivec_register_operand" "")
--- 101,138 ----
case 0: return \"stvx %1,%y0\";
case 1: return \"lvx %0,%y1\";
case 2: return \"vor %0,%1,%1\";
! case 3: return \"#\";
! case 4: return \"#\";
! case 5: return \"#\";
case 6: return output_vec_const_move (operands);
default: abort();
}
}"
! [(set_attr "type" "vecstore,vecload,vecsimple,store,load,*,*")])
!
! (define_split
! [(set (match_operand:V4SI 0 "nonimmediate_operand" "")
! (match_operand:V4SI 1 "input_operand" ""))]
! "TARGET_ALTIVEC && reload_completed && TARGET_POWERPC64
! && altivecs_in_gpr_p (operands[0], operands[1])"
! [(set (match_dup 2) (match_dup 4))
! (set (match_dup 3) (match_dup 5))]
! "{
! rs6000_split_altivecs_in_gpr (&operands[0], 6);
! }")
!
! (define_split
! [(set (match_operand:V4SI 0 "nonimmediate_operand" "")
! (match_operand:V4SI 1 "input_operand" ""))]
! "TARGET_ALTIVEC && reload_completed && !TARGET_POWERPC64
! && altivecs_in_gpr_p (operands[0], operands[1])"
! [(set (match_dup 2) (match_dup 6))
! (set (match_dup 3) (match_dup 7))
! (set (match_dup 4) (match_dup 8))
! (set (match_dup 5) (match_dup 9))]
! "{
! rs6000_split_altivecs_in_gpr (&operands[0], 10);
! }")
(define_split
[(set (match_operand:V4SI 0 "altivec_register_operand" "")
***************
*** 140,154 ****
case 0: return \"stvx %1,%y0\";
case 1: return \"lvx %0,%y1\";
case 2: return \"vor %0,%1,%1\";
! case 3: return \"stw%U0 %1,%0\;stw %L1,%L0\;stw %Y1,%Y0\;stw %Z1,%Z0\";
! case 4: return \"lwz%U1 %0,%1\;lwz %L0,%L1\;lwz %Y0,%Y1\;lwz %Z0,%Z1\";
! case 5: return \"mr %0,%1\;mr %L0,%L1\;mr %Y0,%Y1\;mr %Z0,%Z1\";
case 6: return output_vec_const_move (operands);
default: abort ();
}
}"
! [(set_attr "type" "vecstore,vecload,vecsimple,store,load,*,*")
! (set_attr "length" "*,*,*,16,16,16,*")])
(define_split
[(set (match_operand:V8HI 0 "altivec_register_operand" "")
--- 163,200 ----
case 0: return \"stvx %1,%y0\";
case 1: return \"lvx %0,%y1\";
case 2: return \"vor %0,%1,%1\";
! case 3: return \"#\";
! case 4: return \"#\";
! case 5: return \"#\";
case 6: return output_vec_const_move (operands);
default: abort ();
}
}"
! [(set_attr "type" "vecstore,vecload,vecsimple,store,load,*,*")])
!
! (define_split
! [(set (match_operand:V8HI 0 "nonimmediate_operand" "")
! (match_operand:V8HI 1 "input_operand" ""))]
! "TARGET_ALTIVEC && reload_completed && TARGET_POWERPC64
! && altivecs_in_gpr_p (operands[0], operands[1])"
! [(set (match_dup 2) (match_dup 4))
! (set (match_dup 3) (match_dup 5))]
! "{
! rs6000_split_altivecs_in_gpr (&operands[0], 6);
! }")
!
! (define_split
! [(set (match_operand:V8HI 0 "nonimmediate_operand" "")
! (match_operand:V8HI 1 "input_operand" ""))]
! "TARGET_ALTIVEC && reload_completed && !TARGET_POWERPC64
! && altivecs_in_gpr_p (operands[0], operands[1])"
! [(set (match_dup 2) (match_dup 6))
! (set (match_dup 3) (match_dup 7))
! (set (match_dup 4) (match_dup 8))
! (set (match_dup 5) (match_dup 9))]
! "{
! rs6000_split_altivecs_in_gpr (&operands[0], 10);
! }")
(define_split
[(set (match_operand:V8HI 0 "altivec_register_operand" "")
***************
*** 179,193 ****
case 0: return \"stvx %1,%y0\";
case 1: return \"lvx %0,%y1\";
case 2: return \"vor %0,%1,%1\";
! case 3: return \"stw%U0 %1,%0\;stw %L1,%L0\;stw %Y1,%Y0\;stw %Z1,%Z0\";
! case 4: return \"lwz%U1 %0,%1\;lwz %L0,%L1\;lwz %Y0,%Y1\;lwz %Z0,%Z1\";
! case 5: return \"mr %0,%1\;mr %L0,%L1\;mr %Y0,%Y1\;mr %Z0,%Z1\";
case 6: return output_vec_const_move (operands);
default: abort ();
}
}"
! [(set_attr "type" "vecstore,vecload,vecsimple,store,load,*,*")
! (set_attr "length" "*,*,*,16,16,16,*")])
(define_split
[(set (match_operand:V16QI 0 "altivec_register_operand" "")
--- 225,262 ----
case 0: return \"stvx %1,%y0\";
case 1: return \"lvx %0,%y1\";
case 2: return \"vor %0,%1,%1\";
! case 3: return \"#\";
! case 4: return \"#\";
! case 5: return \"#\";
case 6: return output_vec_const_move (operands);
default: abort ();
}
}"
! [(set_attr "type" "vecstore,vecload,vecsimple,store,load,*,*")])
!
! (define_split
! [(set (match_operand:V16QI 0 "nonimmediate_operand" "")
! (match_operand:V16QI 1 "input_operand" ""))]
! "TARGET_ALTIVEC && reload_completed && TARGET_POWERPC64
! && altivecs_in_gpr_p (operands[0], operands[1])"
! [(set (match_dup 2) (match_dup 4))
! (set (match_dup 3) (match_dup 5))]
! "{
! rs6000_split_altivecs_in_gpr (&operands[0], 6);
! }")
!
! (define_split
! [(set (match_operand:V16QI 0 "nonimmediate_operand" "")
! (match_operand:V16QI 1 "input_operand" ""))]
! "TARGET_ALTIVEC && reload_completed && !TARGET_POWERPC64
! && altivecs_in_gpr_p (operands[0], operands[1])"
! [(set (match_dup 2) (match_dup 6))
! (set (match_dup 3) (match_dup 7))
! (set (match_dup 4) (match_dup 8))
! (set (match_dup 5) (match_dup 9))]
! "{
! rs6000_split_altivecs_in_gpr (&operands[0], 10);
! }")
(define_split
[(set (match_operand:V16QI 0 "altivec_register_operand" "")
***************
*** 218,232 ****
case 0: return \"stvx %1,%y0\";
case 1: return \"lvx %0,%y1\";
case 2: return \"vor %0,%1,%1\";
! case 3: return \"stw%U0 %1,%0\;stw %L1,%L0\;stw %Y1,%Y0\;stw %Z1,%Z0\";
! case 4: return \"lwz%U1 %0,%1\;lwz %L0,%L1\;lwz %Y0,%Y1\;lwz %Z0,%Z1\";
! case 5: return \"mr %0,%1\;mr %L0,%L1\;mr %Y0,%Y1\;mr %Z0,%Z1\";
case 6: return output_vec_const_move (operands);
default: abort ();
}
}"
! [(set_attr "type" "vecstore,vecload,vecsimple,store,load,*,*")
! (set_attr "length" "*,*,*,16,16,16,*")])
(define_insn "get_vrsave_internal"
[(set (match_operand:SI 0 "register_operand" "=r")
--- 287,324 ----
case 0: return \"stvx %1,%y0\";
case 1: return \"lvx %0,%y1\";
case 2: return \"vor %0,%1,%1\";
! case 3: return \"#\";
! case 4: return \"#\";
! case 5: return \"#\";
case 6: return output_vec_const_move (operands);
default: abort ();
}
}"
! [(set_attr "type" "vecstore,vecload,vecsimple,store,load,*,*")])
!
! (define_split
! [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
! (match_operand:V4SF 1 "input_operand" ""))]
! "TARGET_ALTIVEC && reload_completed && TARGET_POWERPC64
! && altivecs_in_gpr_p (operands[0], operands[1])"
! [(set (match_dup 2) (match_dup 4))
! (set (match_dup 3) (match_dup 5))]
! "{
! rs6000_split_altivecs_in_gpr (&operands[0], 6);
! }")
!
! (define_split
! [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
! (match_operand:V4SF 1 "input_operand" ""))]
! "TARGET_ALTIVEC && reload_completed && !TARGET_POWERPC64
! && altivecs_in_gpr_p (operands[0], operands[1])"
! [(set (match_dup 2) (match_dup 6))
! (set (match_dup 3) (match_dup 7))
! (set (match_dup 4) (match_dup 8))
! (set (match_dup 5) (match_dup 9))]
! "{
! rs6000_split_altivecs_in_gpr (&operands[0], 10);
! }")
(define_insn "get_vrsave_internal"
[(set (match_operand:SI 0 "register_operand" "=r")
Index: rs6000-protos.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/rs6000/rs6000-protos.h,v
retrieving revision 1.56
diff -c -p -r1.56 rs6000-protos.h
*** rs6000-protos.h 4 Jun 2003 15:23:42 -0000 1.56
--- rs6000-protos.h 18 Jun 2003 06:14:10 -0000
*************** extern int includes_rshift_p PARAMS ((rt
*** 100,105 ****
--- 100,106 ----
extern int includes_rldic_lshift_p PARAMS ((rtx, rtx));
extern int includes_rldicr_lshift_p PARAMS ((rtx, rtx));
extern int registers_ok_for_quad_peep PARAMS ((rtx, rtx));
+ extern int altivecs_in_gpr_p PARAMS ((rtx, rtx));
extern int addrs_ok_for_quad_peep PARAMS ((rtx, rtx));
extern enum reg_class secondary_reload_class PARAMS ((enum reg_class,
enum machine_mode, rtx));
*************** extern int mtcrf_operation PARAMS ((rtx,
*** 125,130 ****
--- 126,132 ----
extern int lmw_operation PARAMS ((rtx, enum machine_mode));
extern struct rtx_def *create_TOC_reference PARAMS ((rtx));
extern void rs6000_emit_eh_toc_restore PARAMS ((rtx));
+ extern void rs6000_split_altivecs_in_gpr PARAMS ((rtx*, int));
extern void rs6000_emit_move PARAMS ((rtx, rtx, enum machine_mode));
extern rtx rs6000_legitimize_address PARAMS ((rtx, rtx, enum machine_mode));
extern rtx rs6000_legitimize_reload_address PARAMS ((rtx, enum machine_mode,
Index: rs6000.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/rs6000/rs6000.c,v
retrieving revision 1.493
diff -c -p -r1.493 rs6000.c
*** rs6000.c 15 Jun 2003 18:01:55 -0000 1.493
--- rs6000.c 18 Jun 2003 06:14:11 -0000
*************** small_data_operand (op, mode)
*** 2293,2298 ****
--- 2293,2314 ----
return 0;
#endif
}
+
+ /* Return 1 for all valid move insn operand combination
+ involving altivec vectors in gprs. */
+
+ int
+ altivecs_in_gpr_p (op0, op1)
+ rtx op0, op1;
+ {
+ if (REG_P (op0) && ALTIVEC_REGNO_P (REGNO (op0)))
+ return 0;
+
+ if (REG_P (op1) && ALTIVEC_REGNO_P (REGNO (op1)))
+ return 0;
+ return 1;
+ }
+
/* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address. */
*************** rs6000_emit_minmax (dest, code, op0, op1
*** 9622,9627 ****
--- 9638,9710 ----
if (target != dest)
emit_move_insn (dest, target);
}
+
+ /* Called by altivec splitter.
+ Input:
+ operands[0] : Destination of move
+ operands[1] : Source of move
+ noperands : Size of operands vector
+ Output:
+ operands[2-5] ([2-3] in 64 bit) : Destination slots
+ operands[6-9] ([4-5] in 64 bit) : Source slots
+ */
+
+ void
+ rs6000_split_altivecs_in_gpr (operands, noperands)
+ rtx *operands;
+ int noperands;
+ {
+ int nregs, i;
+ enum machine_mode mode;
+
+ /* Number to move (2/4 for 32/64 bit). */
+ nregs = (noperands - 2) / 2;
+ mode = GET_MODE (operands[0]);
+
+ if (REG_P (operands[1])
+ && REG_P (operands[0])
+ && (REGNO (operands[1]) < REGNO (operands[0])))
+ {
+ /* Move register range backwards,
+ if we have destructive overlap. */
+
+ for (i = nregs; i > 0; i--)
+ {
+ operands[i+2] = operand_subword (operands[0], i, 0, mode);
+ operands[i+2+nregs] =
+ operand_subword (operands[1], i, 0, mode);
+ }
+ }
+ else
+ {
+ int j = 0;
+ rtx breg;
+
+ if (GET_CODE (operands[1]) == MEM)
+ {
+ /* We have offset-table addresses only.
+ If we use one of the registers to address memory,
+ we have change that register as last. */
+
+ breg = GET_CODE (XEXP (operands[1], 0)) == PLUS ?
+ XEXP (XEXP (operands[1], 0), 0) :
+ XEXP (operands[1], 0);
+
+ if (GET_CODE (breg) == REG
+ && REGNO (breg) >= REGNO (operands[0])
+ && REGNO (breg) < REGNO (operands[0]) + nregs)
+ j = REGNO (breg) - REGNO (operands[0]) + 1;
+ }
+
+ for (i = 0; i < nregs; i++, j = (j == nregs) ? 0 : j + 1)
+ {
+ operands[j + 2] = operand_subword (operands[0], j, 0, mode);
+ operands[j + 2 + nregs] =
+ operand_subword (operands[1], j, 0, mode);
+ }
+ }
+ }
+
/* This page contains routines that are used to determine what the
function prologue and epilogue code will do and write them out. */