2005-10-03 Michael Meissner * i386.c (ix86_split_rotate): New function to split double word rotates. * i386-protos.h (ix86_split_rotate): Add declaration. * i386.md (swapsi): Remove '*', create generator function. (swapdi_rex64): Ditto. (rotlti3): Define on 64-bit systems to provide 128 bit rotate. (rotrti3): Ditto. (rotldi3): On 32-bit systems, define multi-instruction sequence. (rotrdi3): Ditto. (x86_64_rotate_adj_1): New rotlti3/rotrti3 helper functions. (x86_64_rotate_adj_2): Ditto. (rotlti3 define_split): Ditto. (rotrti3 define_split): Ditto. (x86_rotate_adj_1): New rotldi3/rotrdi3 helper functions. (x86_rotate_adj_2): Ditto. (rotldi3_32bit): Ditto. (rotldi3 define_split for 32-bit): Ditto. (rotrdi3 define_split for 32-bit): Ditto. --- gcc/config/i386/i386.c.~1~ 2005-10-03 16:48:08.000000000 -0400 +++ gcc/config/i386/i386.c 2005-10-03 16:35:20.000000000 -0400 @@ -11670,6 +11670,94 @@ ix86_split_lshr (rtx *operands, rtx scra } } +void +ix86_split_rotate (enum rtx_code code, rtx *operands, rtx scratch, enum machine_mode mode) +{ + rtx count = operands[2]; + rtx low_rtx, high_rtx; + rtx (*swap_func)(rtx arg1, rtx arg2); + void (*split_func) (rtx operands[], int num, rtx lo_half[], rtx hi_half[]); + rtx (*adj1_func) (rtx, rtx, rtx, rtx); + rtx (*adj2_func) (rtx, rtx, rtx); + rtx (*shift_func) (rtx, rtx, rtx); + bool swap_p = false; + + gcc_assert (code == ROTATE || code == ROTATERT); + gcc_assert (mode == DImode || mode == TImode); + gcc_assert (GET_CODE (count) == CONST_INT || GET_CODE (count) == REG); + + if (GET_CODE (count) == CONST_INT) + { + unsigned reg_size = (mode == TImode) ? 64 : 32; + unsigned count_val = (unsigned)(INTVAL (count) & ((HOST_WIDE_INT) (2*reg_size - 1))); + + /* If the rotate is large enough that we would need an exchange at the + end, convert it to a rotate in the other direction */ + if (count_val > reg_size) + { + count_val = 2*reg_size - count_val; + code = (code == ROTATE) ? ROTATERT : ROTATE; + } + + /* Check for a rotate that is just a swap */ + else if (count_val == reg_size) + { + swap_p = true; + count_val = 0; + } + + count = GEN_INT ((HOST_WIDE_INT)count_val); + } + + if (mode == TImode) + { + split_func = split_ti; + swap_func = gen_swapdi_rex64; + adj1_func = gen_x86_64_rotate_adj_1; + adj2_func = gen_x86_64_rotate_adj_2; + shift_func = (code == ROTATE) ? gen_x86_64_shld : gen_x86_64_shrd; + } + else + { + split_func = split_di; + swap_func = gen_swapsi; + adj1_func = gen_x86_rotate_adj_1; + adj2_func = gen_x86_rotate_adj_2; + shift_func = (code == ROTATE) ? gen_x86_shld_1 : gen_x86_shrd_1; + } + + /* In theory operands[0] should equal operands[1], but just in case it + doesn't, move it now */ + if (! rtx_equal_p (operands[0], operands[1])) + emit_move_insn (operands[0], operands[1]); + + split_func (operands, 1, &low_rtx, &high_rtx); + + /* Deal with pure swaps or no rotate at all */ + if (GET_CODE (count) == CONST_INT && INTVAL (count) == 0) + { + if (swap_p) + emit_insn (swap_func (low_rtx, high_rtx)); + + return; + } + + /* Do the actual rotate using two double sized rotates, using a scratch + register to hold the initial high value */ + emit_move_insn (scratch, high_rtx); + emit_insn (shift_func (high_rtx, low_rtx, count)); + emit_insn (shift_func (low_rtx, scratch, count)); + + /* Swap values if the rotate size is greater than the register size */ + if (GET_CODE (count) != CONST_INT) + { + if (TARGET_CMOVE) + emit_insn (adj1_func (high_rtx, low_rtx, scratch, count)); + else + emit_insn (adj2_func (high_rtx, low_rtx, count)); + } +} + /* Helper function for the string operations below. Dest VARIABLE whether it is aligned to VALUE bytes. If true, jump to the label. */ static rtx --- gcc/config/i386/i386.md.~1~ 2005-10-03 16:48:09.000000000 -0400 +++ gcc/config/i386/i386.md 2005-10-03 16:26:56.000000000 -0400 @@ -1249,7 +1249,7 @@ (set_attr "memory" "load") (set_attr "mode" "SI")]) -(define_insn "*swapsi" +(define_insn "swapsi" [(set (match_operand:SI 0 "register_operand" "+r") (match_operand:SI 1 "register_operand" "+r")) (set (match_dup 1) @@ -2092,7 +2092,7 @@ (set (match_dup 4) (match_dup 5))] "split_di (operands, 2, operands + 2, operands + 4);") -(define_insn "*swapdi_rex64" +(define_insn "swapdi_rex64" [(set (match_operand:DI 0 "register_operand" "+r") (match_operand:DI 1 "register_operand" "+r")) (set (match_dup 1) @@ -12003,13 +12003,180 @@ ;; Rotate instructions +(define_insn "rotlti3" + [(set (match_operand:TI 0 "register_operand" "=r") + (rotate:TI (match_operand:TI 1 "register_operand" "0") + (match_operand:QI 2 "nonmemory_operand" "Kc"))) + (clobber (match_scratch:DI 3 "=&r")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "#" + [(set_attr "type" "multi")]) + +(define_expand "x86_64_rotate_adj_1" + [(set (match_operand:DI 2 "register_operand" "") + (match_operand:DI 0 "register_operand" "")) + (set (reg:CCZ FLAGS_REG) + (compare:CCZ (and:QI (match_operand:QI 3 "register_operand" "") + (const_int 32)) + (const_int 0))) + (set (match_dup 0) + (if_then_else:DI (ne (reg:CCZ FLAGS_REG) (const_int 0)) + (match_operand:DI 1 "register_operand" "") + (match_dup 0))) + (set (match_dup 1) + (if_then_else:DI (ne (reg:CCZ FLAGS_REG) (const_int 0)) + (match_dup 2) + (match_dup 1)))] + "TARGET_64BIT && TARGET_CMOVE" + "") + +(define_expand "x86_64_rotate_adj_2" + [(use (match_operand:DI 0 "register_operand" "")) + (use (match_operand:DI 1 "register_operand" "")) + (use (match_operand:QI 2 "register_operand" ""))] + "TARGET_64BIT" +{ + rtx label = gen_label_rtx (); + rtx tmp; + + emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (32))); + + tmp = gen_rtx_REG (CCZmode, FLAGS_REG); + tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); + tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, + gen_rtx_LABEL_REF (VOIDmode, label), + pc_rtx); + tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); + JUMP_LABEL (tmp) = label; + + emit_insn (gen_swapdi_rex64 (operands[0], operands[1])); + + emit_label (label); + LABEL_NUSES (label) = 1; + + DONE; +}) + +(define_split + [(set (match_operand:TI 0 "register_operand" "") + (rotate:TI (match_operand:TI 1 "register_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (match_scratch:DI 3 "")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && reload_completed" + [(const_int 0)] + "ix86_split_rotate (ROTATE, operands, operands[3], TImode); DONE;") + +(define_insn "rotrti3" + [(set (match_operand:TI 0 "register_operand" "=r") + (rotatert:TI (match_operand:TI 1 "register_operand" "0") + (match_operand:QI 2 "nonmemory_operand" "Kc"))) + (clobber (match_scratch:DI 3 "=&r")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT" + "#" + [(set_attr "type" "multi")]) + +(define_split + [(set (match_operand:TI 0 "register_operand" "") + (rotatert:TI (match_operand:TI 1 "register_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (match_scratch:DI 3 "")) + (clobber (reg:CC FLAGS_REG))] + "TARGET_64BIT && reload_completed" + [(const_int 0)] + "ix86_split_rotate (ROTATERT, operands, operands[3], TImode); DONE;") + (define_expand "rotldi3" [(set (match_operand:DI 0 "nonimmediate_operand" "") (rotate:DI (match_operand:DI 1 "nonimmediate_operand" "") (match_operand:QI 2 "nonmemory_operand" ""))) (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT" - "ix86_expand_binary_operator (ROTATE, DImode, operands); DONE;") + "" + " +{ + if (TARGET_64BIT) + ix86_expand_binary_operator (ROTATE, DImode, operands); + + else + { + rtx dst; + operands[1] = force_reg (DImode, operands[1]); + dst = ix86_fixup_binary_operands (ROTATE, DImode, operands); + emit_insn (gen_rotldi3_32bit (dst, operands[1], operands[2])); + if (dst != operands[0]) + emit_move_insn (operands[0], dst); + } + + DONE; +} +") + +(define_insn "rotldi3_32bit" + [(set (match_operand:DI 0 "register_operand" "=r") + (rotate:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:QI 2 "nonmemory_operand" "Jc"))) + (clobber (match_scratch:SI 3 "=&r")) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT" + "#" + [(set_attr "type" "multi")]) + +(define_expand "x86_rotate_adj_1" + [(set (match_operand:SI 2 "register_operand" "") + (match_operand:SI 0 "register_operand" "")) + (set (reg:CCZ FLAGS_REG) + (compare:CCZ (and:QI (match_operand:QI 3 "register_operand" "") + (const_int 32)) + (const_int 0))) + (set (match_dup 0) + (if_then_else:SI (ne (reg:CCZ FLAGS_REG) (const_int 0)) + (match_operand:SI 1 "register_operand" "") + (match_dup 0))) + (set (match_dup 1) + (if_then_else:SI (ne (reg:CCZ FLAGS_REG) (const_int 0)) + (match_dup 2) + (match_dup 1)))] + "TARGET_CMOVE" + "") + +(define_expand "x86_rotate_adj_2" + [(use (match_operand:SI 0 "register_operand" "")) + (use (match_operand:SI 1 "register_operand" "")) + (use (match_operand:QI 2 "register_operand" ""))] + "" +{ + rtx label = gen_label_rtx (); + rtx tmp; + + emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (32))); + + tmp = gen_rtx_REG (CCZmode, FLAGS_REG); + tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); + tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, + gen_rtx_LABEL_REF (VOIDmode, label), + pc_rtx); + tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); + JUMP_LABEL (tmp) = label; + + emit_insn (gen_swapsi (operands[0], operands[1])); + + emit_label (label); + LABEL_NUSES (label) = 1; + + DONE; +}) + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (rotate:DI (match_operand:DI 1 "register_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (match_scratch:SI 3 "")) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && reload_completed" + [(const_int 0)] + "ix86_split_rotate (ROTATE, operands, operands[3], DImode); DONE;") (define_insn "*rotlsi3_1_one_bit_rex64" [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") @@ -12196,8 +12363,44 @@ (rotatert:DI (match_operand:DI 1 "nonimmediate_operand" "") (match_operand:QI 2 "nonmemory_operand" ""))) (clobber (reg:CC FLAGS_REG))] - "TARGET_64BIT" - "ix86_expand_binary_operator (ROTATERT, DImode, operands); DONE;") + "" + " +{ + if (TARGET_64BIT) + ix86_expand_binary_operator (ROTATERT, DImode, operands); + + else + { + rtx dst; + operands[1] = force_reg (DImode, operands[1]); + dst = ix86_fixup_binary_operands (ROTATERT, DImode, operands); + emit_insn (gen_rotrdi3_32bit (dst, operands[1], operands[2])); + if (dst != operands[0]) + emit_move_insn (operands[0], dst); + } + + DONE; +}") + +(define_insn "rotrdi3_32bit" + [(set (match_operand:DI 0 "register_operand" "=r") + (rotatert:DI (match_operand:DI 1 "register_operand" "0") + (match_operand:QI 2 "nonmemory_operand" "Jc"))) + (clobber (match_scratch:SI 3 "=&r")) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT" + "#" + [(set_attr "type" "multi")]) + +(define_split + [(set (match_operand:DI 0 "register_operand" "") + (rotatert:DI (match_operand:DI 1 "register_operand" "") + (match_operand:QI 2 "nonmemory_operand" ""))) + (clobber (match_scratch:SI 3 "")) + (clobber (reg:CC FLAGS_REG))] + "!TARGET_64BIT && reload_completed" + [(const_int 0)] + "ix86_split_rotate (ROTATERT, operands, operands[3], DImode); DONE;") (define_insn "*rotrdi3_1_one_bit_rex64" [(set (match_operand:DI 0 "nonimmediate_operand" "=rm") --- gcc/config/i386/i386-protos.h.~1~ 2005-10-03 16:48:08.000000000 -0400 +++ gcc/config/i386/i386-protos.h 2005-10-03 12:18:35.000000000 -0400 @@ -158,6 +158,7 @@ extern void ix86_split_long_move (rtx[]) extern void ix86_split_ashl (rtx *, rtx, enum machine_mode); extern void ix86_split_ashr (rtx *, rtx, enum machine_mode); extern void ix86_split_lshr (rtx *, rtx, enum machine_mode); +extern void ix86_split_rotate (enum rtx_code, rtx *, rtx, enum machine_mode); extern rtx ix86_find_base_term (rtx); extern int ix86_check_movabs (rtx, int);