This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
ia64 shift patch
- To: gcc-patches at gcc dot gnu dot org
- Subject: ia64 shift patch
- From: Richard Henderson <rth at cygnus dot com>
- Date: Sat, 19 Aug 2000 00:20:08 -0700
Mostly cleans up the way SImode shifts are handled. Constant shifts
can be done directly with bitfield extractions; variable shifts require
expansion to DImode. Thought that in and of itself can be simplified.
There is also a tweek to ashldi3 to use shladd when possible; it's
an A slot insn instead of an I0 slot insn.
This is worth about 2% to spec95.
r~
* config/ia64/ia64.c (reg_or_5bit_operand): New.
(ia64_depz_field_mask): New.
* config/ia64/ia64.h (CONSTRAINT_OK_FOR_R): New.
(PREDICATE_CODES): Update.
* config/ia64/ia64.md: Update commentary.
(depz_internal): New.
(ashlsi3): Implement directly.
(ashrsi3, lshrsi3): Simplify; rely on extv and extzv for constants.
(ashldi3): Use shladd.
* config/ia64/ia64-protos.h: Update.
Index: ia64-protos.h
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/ia64/ia64-protos.h,v
retrieving revision 1.17
diff -c -p -d -r1.17 ia64-protos.h
*** ia64-protos.h 2000/08/14 21:01:24 1.17
--- ia64-protos.h 2000/08/19 07:10:26
*************** extern int function_operand PARAMS((rtx,
*** 35,40 ****
--- 35,41 ----
extern int setjmp_operand PARAMS((rtx, enum machine_mode));
extern int move_operand PARAMS((rtx, enum machine_mode));
extern int reg_or_0_operand PARAMS((rtx, enum machine_mode));
+ extern int reg_or_5bit_operand PARAMS((rtx, enum machine_mode));
extern int reg_or_6bit_operand PARAMS((rtx, enum machine_mode));
extern int reg_or_8bit_operand PARAMS((rtx, enum machine_mode));
extern int reg_or_8bit_adjusted_operand PARAMS((rtx, enum machine_mode));
*************** extern int normal_comparison_operator PA
*** 50,61 ****
extern int adjusted_comparison_operator PARAMS((rtx, enum machine_mode));
extern int call_multiple_values_operation PARAMS((rtx, enum machine_mode));
extern int destination_operand PARAMS((rtx, enum machine_mode));
- extern HOST_WIDE_INT ia64_initial_elimination_offset PARAMS((int, int));
- extern void ia64_expand_prologue PARAMS((void));
- extern void ia64_expand_epilogue PARAMS((void));
- extern void ia64_function_prologue PARAMS((FILE *, int));
- extern void ia64_function_epilogue PARAMS((FILE *, int));
- extern int ia64_direct_return PARAMS((void));
extern int predicate_operator PARAMS((rtx, enum machine_mode));
extern int ar_lc_reg_operand PARAMS((rtx, enum machine_mode));
extern int ar_ccv_reg_operand PARAMS((rtx, enum machine_mode));
--- 51,56 ----
*************** extern int destination_tfmode_operand PA
*** 64,73 ****
--- 59,76 ----
extern int tfreg_or_fp01_operand PARAMS((rtx, enum machine_mode));
extern int ia64_move_ok PARAMS((rtx, rtx));
+ extern int ia64_depz_field_mask PARAMS((rtx, rtx));
extern rtx ia64_gp_save_reg PARAMS((int));
extern rtx ia64_split_timode PARAMS((rtx[], rtx, rtx));
extern rtx spill_tfmode_operand PARAMS((rtx, int));
+ extern HOST_WIDE_INT ia64_initial_elimination_offset PARAMS((int, int));
+ extern void ia64_expand_prologue PARAMS((void));
+ extern void ia64_expand_epilogue PARAMS((void));
+ extern void ia64_function_prologue PARAMS((FILE *, int));
+ extern void ia64_function_epilogue PARAMS((FILE *, int));
+
+ extern int ia64_direct_return PARAMS((void));
extern void ia64_expand_load_address PARAMS((rtx, rtx));
extern void ia64_expand_fetch_and_op PARAMS ((enum fetchop_code,
enum machine_mode, rtx []));
Index: ia64.c
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/ia64/ia64.c,v
retrieving revision 1.43
diff -c -p -d -r1.43 ia64.c
*** ia64.c 2000/08/18 03:03:50 1.43
--- ia64.c 2000/08/19 07:10:26
*************** reg_or_0_operand (op, mode)
*** 316,321 ****
--- 316,333 ----
return (op == const0_rtx || register_operand (op, mode));
}
+ /* Return 1 if OP is a register operand, or a 5 bit immediate operand. */
+
+ int
+ reg_or_5bit_operand (op, mode)
+ rtx op;
+ enum machine_mode mode;
+ {
+ return ((GET_CODE (op) == CONST_INT && INTVAL (op) >= 0 && INTVAL (op) < 32)
+ || GET_CODE (op) == CONSTANT_P_RTX
+ || register_operand (op, mode));
+ }
+
/* Return 1 if OP is a register operand, or a 6 bit immediate operand. */
int
*************** ia64_move_ok (dst, src)
*** 633,638 ****
--- 645,667 ----
return src == const0_rtx;
else
return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src);
+ }
+
+ /* Check if OP is a mask suitible for use with SHIFT in a dep.z instruction.
+ Return the length of the field, or <= 0 on failure. */
+
+ int
+ ia64_depz_field_mask (rop, rshift)
+ rtx rop, rshift;
+ {
+ unsigned HOST_WIDE_INT op = INTVAL (rop);
+ unsigned HOST_WIDE_INT shift = INTVAL (rshift);
+
+ /* Get rid of the zero bits we're shifting in. */
+ op >>= shift;
+
+ /* We must now have a solid block of 1's at bit 0. */
+ return exact_log2 (op + 1);
}
/* Expand a symbolic constant load. */
Index: ia64.h
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/ia64/ia64.h,v
retrieving revision 1.36
diff -c -p -d -r1.36 ia64.h
*** ia64.h 2000/08/14 21:01:24 1.36
--- ia64.h 2000/08/19 07:10:26
*************** enum reg_class
*** 1108,1118 ****
letters (`Q', `R', `S', `T', `U') that can be used to segregate specific
types of operands, usually memory references, for the target machine. */
#define CONSTRAINT_OK_FOR_Q(VALUE) \
(memory_operand((VALUE), VOIDmode) && ! MEM_VOLATILE_P (VALUE))
#define EXTRA_CONSTRAINT(VALUE, C) \
! ((C) == 'Q' ? CONSTRAINT_OK_FOR_Q (VALUE) : 0)
/* Basic Stack Layout */
--- 1108,1124 ----
letters (`Q', `R', `S', `T', `U') that can be used to segregate specific
types of operands, usually memory references, for the target machine. */
+ /* Non-volatile memory for FP_REG loads/stores. */
#define CONSTRAINT_OK_FOR_Q(VALUE) \
(memory_operand((VALUE), VOIDmode) && ! MEM_VOLATILE_P (VALUE))
+ /* 1..4 for shladd arguments. */
+ #define CONSTRAINT_OK_FOR_R(VALUE) \
+ (GET_CODE (VALUE) == CONST_INT && INTVAL (VALUE) >= 1 && INTVAL (VALUE) <= 4)
#define EXTRA_CONSTRAINT(VALUE, C) \
! ((C) == 'Q' ? CONSTRAINT_OK_FOR_Q (VALUE) \
! : (C) == 'R' ? CONSTRAINT_OK_FOR_R (VALUE) \
! : 0)
/* Basic Stack Layout */
*************** do { \
*** 2639,2644 ****
--- 2645,2651 ----
{ "move_operand", {SUBREG, REG, MEM, CONST_INT, CONST_DOUBLE, \
CONSTANT_P_RTX, SYMBOL_REF, CONST, LABEL_REF}}, \
{ "reg_or_0_operand", {SUBREG, REG, CONST_INT}}, \
+ { "reg_or_5bit_operand", {SUBREG, REG, CONST_INT, CONSTANT_P_RTX}}, \
{ "reg_or_6bit_operand", {SUBREG, REG, CONST_INT, CONSTANT_P_RTX}}, \
{ "reg_or_8bit_operand", {SUBREG, REG, CONST_INT, CONSTANT_P_RTX}}, \
{ "reg_or_8bit_adjusted_operand", {SUBREG, REG, CONST_INT, \
Index: ia64.md
===================================================================
RCS file: /cvs/gcc/egcs/gcc/config/ia64/ia64.md,v
retrieving revision 1.34
diff -c -p -d -r1.34 ia64.md
*** ia64.md 2000/08/14 21:01:24 1.34
--- ia64.md 2000/08/19 07:10:26
***************
*** 1113,1121 ****
;; ::
;; ::::::::::::::::::::
- ;; ??? It would be useful to have SImode versions of the extract and insert
- ;; patterns.
-
(define_insn "extv"
[(set (match_operand:DI 0 "register_operand" "=r")
(sign_extract:DI (match_operand:DI 1 "register_operand" "r")
--- 1113,1118 ----
***************
*** 1229,1234 ****
--- 1226,1246 ----
"dep %0 = %3, %0, %2, %1"
[(set_attr "type" "I")])
+ ;; Combine doesn't like to create bitfield insertions into zero.
+ (define_insn "*depz_internal"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (and:DI (ashift:DI (match_operand:DI 1 "register_operand" "r")
+ (match_operand:DI 2 "const_int_operand" "n"))
+ (match_operand:DI 3 "const_int_operand" "n")))]
+ "CONST_OK_FOR_M (INTVAL (operands[2]))
+ && ia64_depz_field_mask (operands[3], operands[2]) > 0"
+ "*
+ {
+ operands[3] = GEN_INT (ia64_depz_field_mask (operands[3], operands[2]));
+ return \"%,dep.z %0 = %1, %2, %3\";
+ }"
+ [(set_attr "type" "I")])
+
(define_insn "shift_mix4left"
[(set (zero_extract:DI (match_operand:DI 0 "register_operand" "+r")
(const_int 32) (const_int 0))
***************
*** 1238,1246 ****
"#"
[(set_attr "type" "unknown")])
- ;; ??? Need to emit an instruction group barrier here because this gets split
- ;; after md_reorg.
-
(define_split
[(set (zero_extract:DI (match_operand:DI 0 "register_operand" "")
(const_int 32) (const_int 0))
--- 1250,1255 ----
***************
*** 2002,2091 ****
;; :: 32 bit Integer Shifts and Rotates
;; ::
;; ::::::::::::::::::::
-
- ;; There is no sign-extend form of dep, so we only get 32 bits of valid result
- ;; instead of 64 like the patterns below.
-
- ;; Using a predicate that accepts only constants doesn't work, because optabs
- ;; will load the operand into a register and call the pattern if the predicate
- ;; did not accept it on the first try. So we use nonmemory_operand and then
- ;; verify that we have an appropriate constant in the expander.
-
- (define_expand "ashlsi3"
- [(set (match_operand:SI 0 "register_operand" "")
- (ashift:SI (match_operand:SI 1 "register_operand" "")
- (match_operand:SI 2 "nonmemory_operand" "")))]
- ""
- "
- {
- if (! shift_32bit_count_operand (operands[2], SImode))
- FAIL;
- }")
! (define_insn "*ashlsi3_internal"
! [(set (match_operand:SI 0 "register_operand" "=r")
! (ashift:SI (match_operand:SI 1 "register_operand" "r")
! (match_operand:SI 2 "shift_32bit_count_operand" "n")))]
""
! "dep.z %0 = %1, %2, %E2"
! [(set_attr "type" "I")])
!
! ;; This is really an extract, but this is how combine canonicalizes the
! ;; operation.
(define_expand "ashrsi3"
! [(set (match_dup 3)
! (ashiftrt:DI (sign_extend:DI
! (match_operand:SI 1 "register_operand" ""))
! (match_operand:DI 2 "nonmemory_operand" "")))
! (set (match_operand:SI 0 "register_operand" "") (match_dup 4))]
""
"
{
! if (! shift_32bit_count_operand (operands[2], SImode))
! FAIL;
!
! operands[3] = gen_reg_rtx (DImode);
! operands[4] = gen_lowpart (SImode, operands[3]);
}")
- (define_insn "*ashrsi3_internal"
- [(set (match_operand:DI 0 "register_operand" "=r")
- (ashiftrt:DI (sign_extend:DI
- (match_operand:SI 1 "register_operand" "r"))
- (match_operand:DI 2 "shift_32bit_count_operand" "n")))]
- ""
- "extr %0 = %1, %2, %E2"
- [(set_attr "type" "I")])
-
- ;; This is really an extract, but this is how combine canonicalizes the
- ;; operation.
-
(define_expand "lshrsi3"
! [(set (match_dup 3)
! (lshiftrt:DI (zero_extend:DI
! (match_operand:SI 1 "register_operand" ""))
! (match_operand:DI 2 "nonmemory_operand" "")))
! (set (match_operand:SI 0 "register_operand" "") (match_dup 4))]
""
"
{
! if (! shift_32bit_count_operand (operands[2], SImode))
! FAIL;
!
! operands[3] = gen_reg_rtx (DImode);
! operands[4] = gen_lowpart (SImode, operands[3]);
}")
- (define_insn "*lshrsi3_internal"
- [(set (match_operand:DI 0 "register_operand" "=r")
- (lshiftrt:DI (zero_extend:DI
- (match_operand:SI 1 "register_operand" "r"))
- (match_operand:DI 2 "shift_32bit_count_operand" "n")))]
- ""
- "extr.u %0 = %1, %2, %E2"
- [(set_attr "type" "I")])
-
;; Use mix4.r/shr to implement rotrsi3. We only get 32 bits of valid result
;; here, instead of 64 like the patterns above.
--- 2011,2070 ----
;; :: 32 bit Integer Shifts and Rotates
;; ::
;; ::::::::::::::::::::
! (define_insn "ashlsi3"
! [(set (match_operand:SI 0 "register_operand" "=r,r,r")
! (ashift:SI (match_operand:SI 1 "register_operand" "r,r,r")
! (match_operand:SI 2 "reg_or_5bit_operand" "R,n,r")))]
""
! "@
! shladd %0 = %1, %2, r0
! dep.z %0 = %1, %2, %E2
! shl %0 = %1, %2"
! [(set_attr "type" "A,I,I")])
(define_expand "ashrsi3"
! [(set (match_operand:SI 0 "register_operand" "")
! (ashiftrt:SI (match_operand:SI 1 "register_operand" "")
! (match_operand:SI 2 "reg_or_5bit_operand" "")))]
""
"
{
! rtx subtarget = gen_reg_rtx (DImode);
! if (GET_CODE (operands[2]) == CONST_INT)
! emit_insn (gen_extv (subtarget, gen_lowpart (DImode, operands[1]),
! GEN_INT (32 - INTVAL (operands[2])), operands[2]));
! else
! {
! emit_insn (gen_extendsidi2 (subtarget, operands[1]));
! emit_insn (gen_ashrdi3 (subtarget, subtarget,
! gen_lowpart (DImode, operands[2])));
! }
! emit_move_insn (gen_lowpart (DImode, operands[0]), subtarget);
! DONE;
}")
(define_expand "lshrsi3"
! [(set (match_operand:SI 0 "register_operand" "")
! (lshiftrt:SI (match_operand:SI 1 "register_operand" "")
! (match_operand:SI 2 "reg_or_5bit_operand" "")))]
""
"
{
! rtx subtarget = gen_reg_rtx (DImode);
! if (GET_CODE (operands[2]) == CONST_INT)
! emit_insn (gen_extzv (subtarget, gen_lowpart (DImode, operands[1]),
! GEN_INT (32 - INTVAL (operands[2])), operands[2]));
! else
! {
! emit_insn (gen_zero_extendsidi2 (subtarget, operands[1]));
! emit_insn (gen_lshrdi3 (subtarget, subtarget,
! gen_lowpart (DImode, operands[2])));
! }
! emit_move_insn (gen_lowpart (DImode, operands[0]), subtarget);
! DONE;
}")
;; Use mix4.r/shr to implement rotrsi3. We only get 32 bits of valid result
;; here, instead of 64 like the patterns above.
***************
*** 2102,2112 ****
{
if (! shift_32bit_count_operand (operands[2], SImode))
FAIL;
-
operands[3] = gen_reg_rtx (DImode);
operands[4] = gen_lowpart (SImode, operands[3]);
}")
-
;; ::::::::::::::::::::
;; ::
--- 2081,2089 ----
***************
*** 2115,2126 ****
;; ::::::::::::::::::::
(define_insn "ashldi3"
! [(set (match_operand:DI 0 "register_operand" "=r")
! (ashift:DI (match_operand:DI 1 "register_operand" "r")
! (match_operand:DI 2 "reg_or_6bit_operand" "rM")))]
""
! "shl %0 = %1, %2"
! [(set_attr "type" "I")])
;; ??? Maybe combine this with the multiply and add instruction?
--- 2092,2105 ----
;; ::::::::::::::::::::
(define_insn "ashldi3"
! [(set (match_operand:DI 0 "register_operand" "=r,r")
! (ashift:DI (match_operand:DI 1 "register_operand" "r,r")
! (match_operand:DI 2 "reg_or_6bit_operand" "R,rM")))]
""
! "@
! shladd %0 = %1, %2, r0
! shl %0 = %1, %2"
! [(set_attr "type" "A,I")])
;; ??? Maybe combine this with the multiply and add instruction?