This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [Patch]: Splitter for Altivec moves
- From: "Hartmut Penner" <HPENNER at de dot ibm dot com>
- To: gcc-patches at gcc dot gnu dot org
- Cc: Aldy Hernandez <aldyh at redhat dot com>
- Date: Thu, 26 Jun 2003 08:48:30 +0200
- Subject: Re: [Patch]: Splitter for Altivec moves
> Ok, then test on a cross powerpc-eabialtivec configuration. When you
> repost the patch with my suggestions, I can test on a native altivec
> platform.
I reworked patch and tested it with a cross powerpc-eabialtivec
configuration.
My testing was done by emiting critical move insn into the instruction
stream.
In particular I checked, if the splitter work alright for destructive
overlap of register
ranges and when changing base register in a load. Assembler code did look
alright.
ChangeLog:
2003-06-26 Hartmut Penner <hpenner@de.ibm.com>
* config/rs6000/rs6000.c (rs6000_split_altivec_in_gprs): New function.
(altivec_in_gprs_p): New function.
* config/rs6000/rs6000-protos (rs6000_split_altivec_in_gprs): New
prototype.
(altivec_in_gprs_p): New prototype.
* config/rs6000/altivec.md (*movv4si_internal): Change
multi-assembler alternative to '#'. Add postreload splitter to
handle this cases.
(*movv4hi_internal): Likewise.
(*movv4qi_internal): Likewise.
(*movv4sf_internal): Likewise.
Index: altivec.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/rs6000/altivec.md,v
retrieving revision 1.7
diff -c -p -r1.7 altivec.md
*** altivec.md 13 Apr 2003 17:51:06 -0000 1.7
--- altivec.md 26 Jun 2003 06:22:12 -0000
***************
*** 101,115 ****
case 0: return \"stvx %1,%y0\";
case 1: return \"lvx %0,%y1\";
case 2: return \"vor %0,%1,%1\";
! case 3: return \"stw%U0 %1,%0\;stw %L1,%L0\;stw %Y1,%Y0\;stw %Z1,%Z0\";
! case 4: return \"lwz%U1 %0,%1\;lwz %L0,%L1\;lwz %Y0,%Y1\;lwz %Z0,%Z1\";
! case 5: return \"mr %0,%1\;mr %L0,%L1\;mr %Y0,%Y1\;mr %Z0,%Z1\";
case 6: return output_vec_const_move (operands);
default: abort();
}
}"
! [(set_attr "type" "vecstore,vecload,vecsimple,store,load,*,*")
! (set_attr "length" "*,*,*,16,16,16,*")])
(define_split
[(set (match_operand:V4SI 0 "altivec_register_operand" "")
--- 101,138 ----
case 0: return \"stvx %1,%y0\";
case 1: return \"lvx %0,%y1\";
case 2: return \"vor %0,%1,%1\";
! case 3: return \"#\";
! case 4: return \"#\";
! case 5: return \"#\";
case 6: return output_vec_const_move (operands);
default: abort();
}
}"
! [(set_attr "type" "vecstore,vecload,vecsimple,store,load,*,*")])
!
! (define_split
! [(set (match_operand:V4SI 0 "nonimmediate_operand" "")
! (match_operand:V4SI 1 "input_operand" ""))]
! "TARGET_ALTIVEC && reload_completed && TARGET_POWERPC64
! && altivec_in_gprs_p (operands[0], operands[1])"
! [(set (match_dup 2) (match_dup 4))
! (set (match_dup 3) (match_dup 5))]
! "{
! rs6000_split_altivec_in_gprs (operands);
! }")
!
! (define_split
! [(set (match_operand:V4SI 0 "nonimmediate_operand" "")
! (match_operand:V4SI 1 "input_operand" ""))]
! "TARGET_ALTIVEC && reload_completed && !TARGET_POWERPC64
! && altivec_in_gprs_p (operands[0], operands[1])"
! [(set (match_dup 2) (match_dup 6))
! (set (match_dup 3) (match_dup 7))
! (set (match_dup 4) (match_dup 8))
! (set (match_dup 5) (match_dup 9))]
! "{
! rs6000_split_altivec_in_gprs (operands);
! }")
(define_split
[(set (match_operand:V4SI 0 "altivec_register_operand" "")
***************
*** 140,154 ****
case 0: return \"stvx %1,%y0\";
case 1: return \"lvx %0,%y1\";
case 2: return \"vor %0,%1,%1\";
! case 3: return \"stw%U0 %1,%0\;stw %L1,%L0\;stw %Y1,%Y0\;stw %Z1,%Z0\";
! case 4: return \"lwz%U1 %0,%1\;lwz %L0,%L1\;lwz %Y0,%Y1\;lwz %Z0,%Z1\";
! case 5: return \"mr %0,%1\;mr %L0,%L1\;mr %Y0,%Y1\;mr %Z0,%Z1\";
case 6: return output_vec_const_move (operands);
default: abort ();
}
}"
! [(set_attr "type" "vecstore,vecload,vecsimple,store,load,*,*")
! (set_attr "length" "*,*,*,16,16,16,*")])
(define_split
[(set (match_operand:V8HI 0 "altivec_register_operand" "")
--- 163,200 ----
case 0: return \"stvx %1,%y0\";
case 1: return \"lvx %0,%y1\";
case 2: return \"vor %0,%1,%1\";
! case 3: return \"#\";
! case 4: return \"#\";
! case 5: return \"#\";
case 6: return output_vec_const_move (operands);
default: abort ();
}
}"
! [(set_attr "type" "vecstore,vecload,vecsimple,store,load,*,*")])
!
! (define_split
! [(set (match_operand:V8HI 0 "nonimmediate_operand" "")
! (match_operand:V8HI 1 "input_operand" ""))]
! "TARGET_ALTIVEC && reload_completed && TARGET_POWERPC64
! && altivec_in_gprs_p (operands[0], operands[1])"
! [(set (match_dup 2) (match_dup 4))
! (set (match_dup 3) (match_dup 5))]
! "{
! rs6000_split_altivec_in_gprs (operands);
! }")
!
! (define_split
! [(set (match_operand:V8HI 0 "nonimmediate_operand" "")
! (match_operand:V8HI 1 "input_operand" ""))]
! "TARGET_ALTIVEC && reload_completed && !TARGET_POWERPC64
! && altivec_in_gprs_p (operands[0], operands[1])"
! [(set (match_dup 2) (match_dup 6))
! (set (match_dup 3) (match_dup 7))
! (set (match_dup 4) (match_dup 8))
! (set (match_dup 5) (match_dup 9))]
! "{
! rs6000_split_altivec_in_gprs (operands);
! }")
(define_split
[(set (match_operand:V8HI 0 "altivec_register_operand" "")
***************
*** 179,193 ****
case 0: return \"stvx %1,%y0\";
case 1: return \"lvx %0,%y1\";
case 2: return \"vor %0,%1,%1\";
! case 3: return \"stw%U0 %1,%0\;stw %L1,%L0\;stw %Y1,%Y0\;stw %Z1,%Z0\";
! case 4: return \"lwz%U1 %0,%1\;lwz %L0,%L1\;lwz %Y0,%Y1\;lwz %Z0,%Z1\";
! case 5: return \"mr %0,%1\;mr %L0,%L1\;mr %Y0,%Y1\;mr %Z0,%Z1\";
case 6: return output_vec_const_move (operands);
default: abort ();
}
}"
! [(set_attr "type" "vecstore,vecload,vecsimple,store,load,*,*")
! (set_attr "length" "*,*,*,16,16,16,*")])
(define_split
[(set (match_operand:V16QI 0 "altivec_register_operand" "")
--- 225,262 ----
case 0: return \"stvx %1,%y0\";
case 1: return \"lvx %0,%y1\";
case 2: return \"vor %0,%1,%1\";
! case 3: return \"#\";
! case 4: return \"#\";
! case 5: return \"#\";
case 6: return output_vec_const_move (operands);
default: abort ();
}
}"
! [(set_attr "type" "vecstore,vecload,vecsimple,store,load,*,*")])
!
! (define_split
! [(set (match_operand:V16QI 0 "nonimmediate_operand" "")
! (match_operand:V16QI 1 "input_operand" ""))]
! "TARGET_ALTIVEC && reload_completed && TARGET_POWERPC64
! && altivec_in_gprs_p (operands[0], operands[1])"
! [(set (match_dup 2) (match_dup 4))
! (set (match_dup 3) (match_dup 5))]
! "{
! rs6000_split_altivec_in_gprs (operands);
! }")
!
! (define_split
! [(set (match_operand:V16QI 0 "nonimmediate_operand" "")
! (match_operand:V16QI 1 "input_operand" ""))]
! "TARGET_ALTIVEC && reload_completed && !TARGET_POWERPC64
! && altivec_in_gprs_p (operands[0], operands[1])"
! [(set (match_dup 2) (match_dup 6))
! (set (match_dup 3) (match_dup 7))
! (set (match_dup 4) (match_dup 8))
! (set (match_dup 5) (match_dup 9))]
! "{
! rs6000_split_altivec_in_gprs (operands);
! }")
(define_split
[(set (match_operand:V16QI 0 "altivec_register_operand" "")
***************
*** 218,232 ****
case 0: return \"stvx %1,%y0\";
case 1: return \"lvx %0,%y1\";
case 2: return \"vor %0,%1,%1\";
! case 3: return \"stw%U0 %1,%0\;stw %L1,%L0\;stw %Y1,%Y0\;stw %Z1,%Z0\";
! case 4: return \"lwz%U1 %0,%1\;lwz %L0,%L1\;lwz %Y0,%Y1\;lwz %Z0,%Z1\";
! case 5: return \"mr %0,%1\;mr %L0,%L1\;mr %Y0,%Y1\;mr %Z0,%Z1\";
case 6: return output_vec_const_move (operands);
default: abort ();
}
}"
! [(set_attr "type" "vecstore,vecload,vecsimple,store,load,*,*")
! (set_attr "length" "*,*,*,16,16,16,*")])
(define_insn "get_vrsave_internal"
[(set (match_operand:SI 0 "register_operand" "=r")
--- 287,324 ----
case 0: return \"stvx %1,%y0\";
case 1: return \"lvx %0,%y1\";
case 2: return \"vor %0,%1,%1\";
! case 3: return \"#\";
! case 4: return \"#\";
! case 5: return \"#\";
case 6: return output_vec_const_move (operands);
default: abort ();
}
}"
! [(set_attr "type" "vecstore,vecload,vecsimple,store,load,*,*")])
!
! (define_split
! [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
! (match_operand:V4SF 1 "input_operand" ""))]
! "TARGET_ALTIVEC && reload_completed && TARGET_POWERPC64
! && altivec_in_gprs_p (operands[0], operands[1])"
! [(set (match_dup 2) (match_dup 4))
! (set (match_dup 3) (match_dup 5))]
! "{
! rs6000_split_altivec_in_gprs (operands);
! }")
!
! (define_split
! [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
! (match_operand:V4SF 1 "input_operand" ""))]
! "TARGET_ALTIVEC && reload_completed && !TARGET_POWERPC64
! && altivec_in_gprs_p (operands[0], operands[1])"
! [(set (match_dup 2) (match_dup 6))
! (set (match_dup 3) (match_dup 7))
! (set (match_dup 4) (match_dup 8))
! (set (match_dup 5) (match_dup 9))]
! "{
! rs6000_split_altivec_in_gprs (operands);
! }")
(define_insn "get_vrsave_internal"
[(set (match_operand:SI 0 "register_operand" "=r")
Index: rs6000-protos.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/rs6000/rs6000-protos.h,v
retrieving revision 1.58
diff -c -p -r1.58 rs6000-protos.h
*** rs6000-protos.h 19 Jun 2003 21:47:21 -0000 1.58
--- rs6000-protos.h 26 Jun 2003 06:22:13 -0000
*************** extern int includes_rldic_lshift_p PARAM
*** 101,106 ****
--- 101,107 ----
extern int includes_rldicr_lshift_p PARAMS ((rtx, rtx));
extern int registers_ok_for_quad_peep PARAMS ((rtx, rtx));
extern int addrs_ok_for_quad_peep PARAMS ((rtx, rtx));
+ extern int altivec_in_gprs_p (rtx, rtx);
extern enum reg_class secondary_reload_class PARAMS ((enum reg_class,
enum machine_mode, rtx));
extern int ccr_bit PARAMS ((rtx, int));
*************** extern int mtcrf_operation PARAMS ((rtx,
*** 125,130 ****
--- 126,132 ----
extern int lmw_operation PARAMS ((rtx, enum machine_mode));
extern struct rtx_def *create_TOC_reference PARAMS ((rtx));
extern void rs6000_emit_eh_toc_restore PARAMS ((rtx));
+ extern void rs6000_split_altivec_in_gprs (rtx*);
extern void rs6000_emit_move PARAMS ((rtx, rtx, enum machine_mode));
extern rtx rs6000_legitimize_address PARAMS ((rtx, rtx, enum machine_mode));
extern rtx rs6000_legitimize_reload_address PARAMS ((rtx, enum machine_mode,
Index: rs6000.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/rs6000/rs6000.c,v
retrieving revision 1.497
diff -c -p -r1.497 rs6000.c
*** rs6000.c 19 Jun 2003 21:47:21 -0000 1.497
--- rs6000.c 26 Jun 2003 06:22:17 -0000
*************** small_data_operand (op, mode)
*** 2305,2310 ****
--- 2305,2325 ----
return 0;
#endif
}
+
+ /* Return 1 for all valid move insn operand combination involving altivec
+ vectors in gprs. */
+
+ int
+ altivec_in_gprs_p (rtx op0, rtx op1)
+ {
+ if (REG_P (op0) && !ALTIVEC_REGNO_P (REGNO (op0)))
+ return 1;
+
+ if (REG_P (op1) && !ALTIVEC_REGNO_P (REGNO (op1)))
+ return 1;
+ return 0;
+ }
+
/* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address. */
*************** rs6000_emit_minmax (dest, code, op0, op1
*** 9637,9642 ****
--- 9652,9734 ----
if (target != dest)
emit_move_insn (dest, target);
}
+
+ /* Called by altivec splitter.
+ Input:
+ operands[0] : Destination of move
+ operands[1] : Source of move
+ noperands : Size of operands vector
+ Output:
+ operands[2-5] ([2-3] in 64 bit) : Destination slots
+ operands[6-9] ([4-5] in 64 bit) : Source slots
+
+ Splits the move of operands[1] to operands[0].
+ This is done, if a gprs is at least one of the operands. In this case
+ a sequence of simple move insns has to be issued. The sequence of this
+ move insns have to be done in the right order to avoid early clobber of
+ the base register or destructive overlap of registers.
+ */
+
+ void
+ rs6000_split_altivec_in_gprs (rtx *operands)
+ {
+ int nregs, reg, i, j;
+ enum machine_mode mode;
+
+ /* Calculate number to move (2/4 for 32/64 bit mode). */
+
+ reg = REG_P (operands[0]) ? REGNO (operands[0]) : REGNO (operands[1]);
+ mode = GET_MODE (operands[0]);
+ nregs = HARD_REGNO_NREGS(reg, mode);
+
+ if (REG_P (operands[1])
+ && REG_P (operands[0])
+ && (REGNO (operands[1]) < REGNO (operands[0])))
+ {
+ /* Move register range backwards, if we have destructive overlap. */
+
+ j = nregs;
+ for (i = 0; i < nregs; i++)
+ {
+ j--;
+ operands[i + 2] = operand_subword (operands[0], j, 0, mode);
+ operands[i + 2 + nregs] =
+ operand_subword (operands[1], j, 0, mode);
+ }
+ }
+ else
+ {
+ j = -1;
+
+ if (GET_CODE (operands[1]) == MEM)
+ {
+ rtx breg;
+ /* We have offsettable addresses only. If we use one of the
+ registers to address memory, we have change that register last. */
+ breg = GET_CODE (XEXP (operands[1], 0)) == PLUS ?
+ XEXP (XEXP (operands[1], 0), 0) :
+ XEXP (operands[1], 0);
+
+ if (REGNO (breg) >= REGNO (operands[0])
+ && REGNO (breg) < REGNO (operands[0]) + nregs)
+ j = REGNO (breg) - REGNO (operands[0]);
+ }
+
+ for (i = 0; i < nregs; i++)
+ {
+ /* Calculate index to next subword. */
+ j++;
+ if (j == nregs)
+ j = 0;
+
+ operands[i + 2] = operand_subword (operands[0], j, 0, mode);
+ operands[i + 2 + nregs] =
+ operand_subword (operands[1], j, 0, mode);
+
+ }
+ }
+ }
+
/* This page contains routines that are used to determine what the
function prologue and epilogue code will do and write them out. */