This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: PATCH: Replace TARGET_64BIT with a pointer
- From: "Richard Guenther" <richard dot guenther at gmail dot com>
- To: "H.J. Lu" <hjl dot tools at gmail dot com>
- Cc: gcc-patches at gcc dot gnu dot org, ubizjak at gmail dot com
- Date: Mon, 2 Jun 2008 10:26:39 +0200
- Subject: Re: PATCH: Replace TARGET_64BIT with a pointer
- References: <20080602025258.GA24807@lucon.org>
On Mon, Jun 2, 2008 at 4:52 AM, H.J. Lu <hjl.tools@gmail.com> wrote:
> i386.c has many codes like
>
> if (TARGET_64BIT)
> output_asm_insn ("pop{q}\t%0", xops);
> else
> output_asm_insn ("pop{l}\t%0", xops);
>
> On stack branch, there are more codes like that. This patch changes
> it to
>
> output_asm_insn (ix86_pop1, xops);
>
> It will simplify i386.c. There are no regressions on Linux/Intel64.
> OK for trunk?
IMHO it is less nice to not see what the asm is in the insn. Can't you
use a new % specifier that expands to either {q} or {l} dependent on
TARGET_64BIT?
Thanks,
Richard.
> Thanks.
>
>
> H.J.
> ---
> 2008-06-01 H.J. Lu <hongjiu.lu@intel.com>
>
> * config/i386/i386.c (ix86_mov2_1): New.
> (ix86_mov2_2): Likewise.
> (ix86_mov3): Likewise.
> (ix86_pop1): Likewise.
> (ix86_add2_1): Likewise.
> (ix86_add2_2): Likewise.
> (ix86_add3): Likewise.
> (ix86_gen_leave): Likewise.
> (ix86_gen_pop1): Likewise.
> (ix86_gen_add3): Likewise.
> (ix86_gen_sub3): Likewise.
> (ix86_gen_sub3_carry): Likewise.
> (ix86_gen_one_cmpl2): Likewise.
> (ix86_gen_monitor): Likewise.
> (override_options): Initialize ix86_mov2_1, ix86_mov2_2,
> ix86_mov3, ix86_pop1, ix86_add2_1, ix86_add2_2, ix86_add3,
> ix86_gen_leave, ix86_gen_pop1, ix86_gen_add3, ix86_gen_sub3,
> ix86_gen_sub3_carry, ix86_gen_one_cmpl2 and ix86_gen_monitor.
> (ix86_file_end): Updated.
> (output_set_got): Likewise.
> (ix86_expand_epilogue): Likewise.
> (ix86_expand_strlensi_unroll_1): Likewise.
> (ix86_expand_strlen): Likewise.
> (ix86_expand_builtin): Likewise.
> (x86_output_mi_thunk): Likewise.
>
> --- config/i386/i386.c.gen 2008-06-01 13:28:12.000000000 -0700
> +++ config/i386/i386.c 2008-06-01 17:53:04.000000000 -0700
> @@ -1697,6 +1697,21 @@ static int ix86_regparm;
> extern int ix86_force_align_arg_pointer;
> static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
>
> +static const char *ix86_mov2_1;
> +static const char *ix86_mov2_2;
> +static const char *ix86_mov3;
> +static const char *ix86_pop1;
> +static const char *ix86_add2_1;
> +static const char *ix86_add2_2;
> +static const char *ix86_add3;
> +static rtx (*ix86_gen_leave) (void);
> +static rtx (*ix86_gen_pop1) (rtx);
> +static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
> +static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
> +static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx);
> +static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
> +static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
> +
> /* Preferred alignment for stack boundary in bits. */
> unsigned int ix86_preferred_stack_boundary;
>
> @@ -2765,6 +2780,41 @@ override_options (void)
> if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
> targetm.expand_builtin_va_start = NULL;
>
> + if (TARGET_64BIT)
> + {
> + ix86_mov2_1 = "mov{q}\t{%1, %0|%0, %1}";
> + ix86_mov2_2 = "mov{q}\t{%0, %1|%1, %0}";
> + ix86_mov3 = "mov{q}\t{%2, %0|%0, %2}";
> + ix86_pop1 = "pop{q}\t%0";
> + ix86_add2_1 = "add{q}\t{%1, %0|%0, %1}";
> + ix86_add2_2 = "add{q}\t{%0, %1|%1, %0}";
> + ix86_add3 = "add{q}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}";
> + ix86_gen_leave = gen_leave_rex64;
> + ix86_gen_pop1 = gen_popdi1;
> + ix86_gen_add3 = gen_adddi3;
> + ix86_gen_sub3 = gen_subdi3;
> + ix86_gen_sub3_carry = gen_subdi3_carry_rex64;
> + ix86_gen_one_cmpl2 = gen_one_cmpldi2;
> + ix86_gen_monitor = gen_sse3_monitor64;
> + }
> + else
> + {
> + ix86_mov2_1 = "mov{l}\t{%1, %0|%0, %1}";
> + ix86_mov2_2 = "mov{l}\t{%0, %1|%1, %0}";
> + ix86_mov3 = "mov{l}\t{%2, %0|%0, %2}";
> + ix86_pop1 = "pop{l}\t%0";
> + ix86_add2_1 = "add{l}\t{%1, %0|%0, %1}";
> + ix86_add2_2 = "add{l}\t{%0, %1|%1, %0}";
> + ix86_add3 = "add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}";
> + ix86_gen_leave = gen_leave;
> + ix86_gen_pop1 = gen_popsi1;
> + ix86_gen_add3 = gen_addsi3;
> + ix86_gen_sub3 = gen_subsi3;
> + ix86_gen_sub3_carry = gen_subsi3_carry;
> + ix86_gen_one_cmpl2 = gen_one_cmplsi2;
> + ix86_gen_monitor = gen_sse3_monitor;
> + }
> +
> #ifdef USE_IX86_CLD
> /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
> if (!TARGET_64BIT)
> @@ -5901,10 +5951,7 @@ ix86_file_end (void)
>
> xops[0] = gen_rtx_REG (Pmode, regno);
> xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
> - if (TARGET_64BIT)
> - output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
> - else
> - output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
> + output_asm_insn (ix86_mov2_1, xops);
> output_asm_insn ("ret", xops);
> }
>
> @@ -5944,12 +5991,7 @@ output_set_got (rtx dest, rtx label ATTR
> xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
>
> if (!flag_pic)
> - {
> - if (TARGET_64BIT)
> - output_asm_insn ("mov{q}\t{%2, %0|%0, %2}", xops);
> - else
> - output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
> - }
> + output_asm_insn (ix86_mov3, xops);
> else
> output_asm_insn ("call\t%a2", xops);
>
> @@ -5964,12 +6006,7 @@ output_set_got (rtx dest, rtx label ATTR
> CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
>
> if (flag_pic)
> - {
> - if (TARGET_64BIT)
> - output_asm_insn ("pop{q}\t%0", xops);
> - else
> - output_asm_insn ("pop{l}\t%0", xops);
> - }
> + output_asm_insn (ix86_pop1, xops);
> }
> else
> {
> @@ -5995,19 +6032,9 @@ output_set_got (rtx dest, rtx label ATTR
> return "";
>
> if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
> - {
> - if (TARGET_64BIT)
> - output_asm_insn ("add{q}\t{%1, %0|%0, %1}", xops);
> - else
> - output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
> - }
> + output_asm_insn (ix86_add2_1, xops);
> else
> - {
> - if (TARGET_64BIT)
> - output_asm_insn ("add{q}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
> - else
> - output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
> - }
> + output_asm_insn (ix86_add3, xops);
>
> return "";
> }
> @@ -6732,16 +6759,14 @@ ix86_expand_epilogue (int style)
> /* If not an i386, mov & pop is faster than "leave". */
> else if (TARGET_USE_LEAVE || optimize_size
> || !cfun->machine->use_fast_prologue_epilogue)
> - emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
> + emit_insn ((*ix86_gen_leave) ());
> else
> {
> pro_epilogue_adjust_stack (stack_pointer_rtx,
> hard_frame_pointer_rtx,
> const0_rtx, style);
> - if (TARGET_64BIT)
> - emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
> - else
> - emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
> +
> + emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
> }
> }
> else
> @@ -6761,22 +6786,15 @@ ix86_expand_epilogue (int style)
>
> for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
> if (ix86_save_reg (regno, false))
> - {
> - if (TARGET_64BIT)
> - emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
> - else
> - emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
> - }
> + emit_insn ((*ix86_gen_pop1) (gen_rtx_REG (Pmode, regno)));
> if (frame_pointer_needed)
> {
> /* Leave results in shorter dependency chains on CPUs that are
> able to grok it fast. */
> if (TARGET_USE_LEAVE)
> - emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
> - else if (TARGET_64BIT)
> - emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
> + emit_insn ((*ix86_gen_leave) ());
> else
> - emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
> + emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
> }
> }
>
> @@ -16130,10 +16148,7 @@ ix86_expand_strlensi_unroll_1 (rtx out,
> QImode, 1, end_0_label);
>
> /* Increment the address. */
> - if (TARGET_64BIT)
> - emit_insn (gen_adddi3 (out, out, const1_rtx));
> - else
> - emit_insn (gen_addsi3 (out, out, const1_rtx));
> + emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
>
> /* Not needed with an alignment of 2 */
> if (align != 2)
> @@ -16143,10 +16158,7 @@ ix86_expand_strlensi_unroll_1 (rtx out,
> emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
> end_0_label);
>
> - if (TARGET_64BIT)
> - emit_insn (gen_adddi3 (out, out, const1_rtx));
> - else
> - emit_insn (gen_addsi3 (out, out, const1_rtx));
> + emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
>
> emit_label (align_3_label);
> }
> @@ -16154,10 +16166,7 @@ ix86_expand_strlensi_unroll_1 (rtx out,
> emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
> end_0_label);
>
> - if (TARGET_64BIT)
> - emit_insn (gen_adddi3 (out, out, const1_rtx));
> - else
> - emit_insn (gen_addsi3 (out, out, const1_rtx));
> + emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
> }
>
> /* Generate loop to check 4 bytes at a time. It is not a good idea to
> @@ -16167,10 +16176,7 @@ ix86_expand_strlensi_unroll_1 (rtx out,
>
> mem = change_address (src, SImode, out);
> emit_move_insn (scratch, mem);
> - if (TARGET_64BIT)
> - emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
> - else
> - emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
> + emit_insn ((*ix86_gen_add3) (out, out, GEN_INT (4)));
>
> /* This formula yields a nonzero result iff one of the bytes is zero.
> This saves three branches inside loop and many cycles. */
> @@ -16226,10 +16232,7 @@ ix86_expand_strlensi_unroll_1 (rtx out,
>
> /* Not in the first two. Move two bytes forward. */
> emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
> - if (TARGET_64BIT)
> - emit_insn (gen_adddi3 (out, out, const2_rtx));
> - else
> - emit_insn (gen_addsi3 (out, out, const2_rtx));
> + emit_insn ((*ix86_gen_add3) (out, out, const2_rtx));
>
> emit_label (end_2_label);
>
> @@ -16239,10 +16242,7 @@ ix86_expand_strlensi_unroll_1 (rtx out,
> tmpreg = gen_lowpart (QImode, tmpreg);
> emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
> cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
> - if (TARGET_64BIT)
> - emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
> - else
> - emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
> + emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), cmp));
>
> emit_label (end_0_label);
> }
> @@ -16284,10 +16284,7 @@ ix86_expand_strlen (rtx out, rtx src, rt
> /* strlensi_unroll_1 returns the address of the zero at the end of
> the string, like memchr(), so compute the length by subtracting
> the start address. */
> - if (TARGET_64BIT)
> - emit_insn (gen_subdi3 (out, out, addr));
> - else
> - emit_insn (gen_subsi3 (out, out, addr));
> + emit_insn ((*ix86_gen_sub3) (out, out, addr));
> }
> else
> {
> @@ -16310,16 +16307,8 @@ ix86_expand_strlen (rtx out, rtx src, rt
> unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
> scratch4), UNSPEC_SCAS);
> emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
> - if (TARGET_64BIT)
> - {
> - emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
> - emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
> - }
> - else
> - {
> - emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
> - emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
> - }
> + emit_insn ((*ix86_gen_one_cmpl2) (scratch2, scratch1));
> + emit_insn ((*ix86_gen_add3) (out, scratch2, constm1_rtx));
> }
> return 1;
> }
> @@ -21413,10 +21402,7 @@ ix86_expand_builtin (tree exp, rtx targe
> op1 = copy_to_mode_reg (SImode, op1);
> if (!REG_P (op2))
> op2 = copy_to_mode_reg (SImode, op2);
> - if (!TARGET_64BIT)
> - emit_insn (gen_sse3_monitor (op0, op1, op2));
> - else
> - emit_insn (gen_sse3_monitor64 (op0, op1, op2));
> + emit_insn ((*ix86_gen_monitor) (op0, op1, op2));
> return 0;
>
> case IX86_BUILTIN_MWAIT:
> @@ -23092,10 +23078,7 @@ x86_output_mi_thunk (FILE *file ATTRIBUT
> /* Put the this parameter into %eax. */
> xops[0] = this_param;
> xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
> - if (TARGET_64BIT)
> - output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
> - else
> - output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
> + output_asm_insn (ix86_mov2_2, xops);
> }
> else
> this_reg = NULL_RTX;
> @@ -23137,10 +23120,7 @@ x86_output_mi_thunk (FILE *file ATTRIBUT
>
> xops[0] = gen_rtx_MEM (Pmode, this_reg);
> xops[1] = tmp;
> - if (TARGET_64BIT)
> - output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
> - else
> - output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
> + output_asm_insn (ix86_mov2_2, xops);
>
> /* Adjust the this parameter. */
> xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
> @@ -23153,10 +23133,7 @@ x86_output_mi_thunk (FILE *file ATTRIBUT
> xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
> }
> xops[1] = this_reg;
> - if (TARGET_64BIT)
> - output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
> - else
> - output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
> + output_asm_insn (ix86_add2_2, xops);
> }
>
> /* If necessary, drop THIS back to its stack slot. */
> @@ -23164,10 +23141,7 @@ x86_output_mi_thunk (FILE *file ATTRIBUT
> {
> xops[0] = this_reg;
> xops[1] = this_param;
> - if (TARGET_64BIT)
> - output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
> - else
> - output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
> + output_asm_insn (ix86_mov2_2, xops);
> }
>
> xops[0] = XEXP (DECL_RTL (function), 0);
>