This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [PATCH][AArch64][2/5] Improve immediate generation
- From: James Greenhalgh <james dot greenhalgh at arm dot com>
- To: Wilco Dijkstra <wdijkstr at arm dot com>
- Cc: "'GCC Patches'" <gcc-patches at gcc dot gnu dot org>
- Date: Fri, 18 Sep 2015 15:26:12 +0100
- Subject: Re: [PATCH][AArch64][2/5] Improve immediate generation
- Authentication-results: sourceware.org; auth=none
- References: <000a01d0e57b$d435d420$7ca17c60$ at com>
On Wed, Sep 02, 2015 at 01:35:19PM +0100, Wilco Dijkstra wrote:
> aarch64_internal_mov_immediate uses loops iterating over all legal bitmask
> immediates to find 2-instruction immediate combinations. One loop is
> quadratic and despite being extremely expensive very rarely finds a matching
> immediate (43 matches in all of SPEC2006 but none are emitted in final code),
> so it can be removed without any effect on code quality. The other loop can
> be replaced by a constant-time search: rather than iterating over all legal
> bitmask values, reconstruct a potential bitmask and query the fast
> aarch64_bitmask_imm.
>
> No change in generated code, passes GCC regression tests/bootstrap.
Well, presumably those 43 cases in SPEC2006 changed...
The code looks OK, and I haven't seen any objections from Marcus or
Richard on the overall direction of the patch set.
OK for trunk.
Thanks,
James
>
> ChangeLog:
> 2015-09-02 Wilco Dijkstra <wdijkstr@arm.com>
>
> * gcc/config/aarch64/aarch64.c (aarch64_internal_mov_immediate):
> Replace slow immediate matching loops with a faster algorithm.
>
> ---
> gcc/config/aarch64/aarch64.c | 96 +++++++++++---------------------------------
> 1 file changed, 23 insertions(+), 73 deletions(-)
>
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index c0280e6..d6f7cb0 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -1376,7 +1376,7 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
> unsigned HOST_WIDE_INT mask;
> int i;
> bool first;
> - unsigned HOST_WIDE_INT val;
> + unsigned HOST_WIDE_INT val, val2;
> bool subtargets;
> rtx subtarget;
> int one_match, zero_match, first_not_ffff_match;
> @@ -1503,85 +1503,35 @@ aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
> }
> }
>
> - /* See if we can do it by arithmetically combining two
> - immediates. */
> - for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
> + if (zero_match != 2 && one_match != 2)
> {
> - int j;
> - mask = 0xffff;
> + /* Try emitting a bitmask immediate with a movk replacing 16 bits.
> + For a 64-bit bitmask try whether changing 16 bits to all ones or
> + zeroes creates a valid bitmask. To check any repeated bitmask,
> + try using 16 bits from the other 32-bit half of val. */
>
> - if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
> - || aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
> + for (i = 0; i < 64; i += 16, mask <<= 16)
> {
> - if (generate)
> - {
> - subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
> - emit_insn (gen_rtx_SET (subtarget,
> - GEN_INT (aarch64_bitmasks[i])));
> - emit_insn (gen_adddi3 (dest, subtarget,
> - GEN_INT (val - aarch64_bitmasks[i])));
> - }
> - num_insns += 2;
> - return num_insns;
> + val2 = val & ~mask;
> + if (val2 != val && aarch64_bitmask_imm (val2, mode))
> + break;
> + val2 = val | mask;
> + if (val2 != val && aarch64_bitmask_imm (val2, mode))
> + break;
> + val2 = val2 & ~mask;
> + val2 = val2 | (((val2 >> 32) | (val2 << 32)) & mask);
> + if (val2 != val && aarch64_bitmask_imm (val2, mode))
> + break;
> }
> -
> - for (j = 0; j < 64; j += 16, mask <<= 16)
> + if (i != 64)
> {
> - if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
> + if (generate)
> {
> - if (generate)
> - {
> - emit_insn (gen_rtx_SET (dest,
> - GEN_INT (aarch64_bitmasks[i])));
> - emit_insn (gen_insv_immdi (dest, GEN_INT (j),
> - GEN_INT ((val >> j) & 0xffff)));
> - }
> - num_insns += 2;
> - return num_insns;
> + emit_insn (gen_rtx_SET (dest, GEN_INT (val2)));
> + emit_insn (gen_insv_immdi (dest, GEN_INT (i),
> + GEN_INT ((val >> i) & 0xffff)));
> }
> - }
> - }
> -
> - /* See if we can do it by logically combining two immediates. */
> - for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
> - {
> - if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
> - {
> - int j;
> -
> - for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
> - if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
> - {
> - if (generate)
> - {
> - subtarget = subtargets ? gen_reg_rtx (mode) : dest;
> - emit_insn (gen_rtx_SET (subtarget,
> - GEN_INT (aarch64_bitmasks[i])));
> - emit_insn (gen_iordi3 (dest, subtarget,
> - GEN_INT (aarch64_bitmasks[j])));
> - }
> - num_insns += 2;
> - return num_insns;
> - }
> - }
> - else if ((val & aarch64_bitmasks[i]) == val)
> - {
> - int j;
> -
> - for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
> - if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
> - {
> - if (generate)
> - {
> - subtarget = subtargets ? gen_reg_rtx (mode) : dest;
> - emit_insn (gen_rtx_SET (subtarget,
> - GEN_INT (aarch64_bitmasks[j])));
> - emit_insn (gen_anddi3 (dest, subtarget,
> - GEN_INT (aarch64_bitmasks[i])));
> - }
> - num_insns += 2;
> - return num_insns;
> - }
> + return 2;
> }
> }
>
> --
> 1.8.3
>
>