This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: [AArch64] Use arrays and loops rather than numbered variables in aarch64_operands_adjust_ok_for_ldpstp [1/2]
- From: "Richard Earnshaw (lists)" <Richard dot Earnshaw at arm dot com>
- To: Jackson Woodruff <jackson dot woodruff at arm dot com>, Sudakshina <sudi dot das at arm dot com>, gcc-patches at gcc dot gnu dot org
- Cc: james dot greenhalgh at arm dot com, marcus dot shawcroft at arm dot com, nd at arm dot com, kyrylo dot tkachov at foss dot arm dot com, christophe dot lyon at linaro dot org
- Date: Thu, 12 Jul 2018 11:28:57 +0100
- Subject: Re: [AArch64] Use arrays and loops rather than numbered variables in aarch64_operands_adjust_ok_for_ldpstp [1/2]
- References: <df81498b-ae67-c274-5349-c4952ca300bd@arm.com> <8c083bfc-daa9-dc7d-2c1e-35e1fc130dee@arm.com> <07d6686b-5615-bd71-bd2e-3f5dec3b4847@arm.com>
On 11/07/18 17:48, Jackson Woodruff wrote:
> Hi Sudi,
>
> Thanks for the review.
>
>
> On 07/10/2018 10:56 AM, Sudakshina wrote:
>> Hi Jackson
>>
>>
>> - if (!MEM_P (mem_1) || aarch64_mem_pair_operand (mem_1, mode))
>> + if (!MEM_P (mem[1]) || aarch64_mem_pair_operand (mem[1], mode))
>>
>> mem_1 == mem[1]?
> Oops, yes... That should be mem[0].
>>
>> return false;
>>
>> - /* The mems cannot be volatile. */
>> ...
>>
>> /* If we have SImode and slow unaligned ldp,
>> check the alignment to be at least 8 byte. */
>> if (mode == SImode
>> && (aarch64_tune_params.extra_tuning_flags
>> - & AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW)
>> + & AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW)
>> && !optimize_size
>> - && MEM_ALIGN (mem_1) < 8 * BITS_PER_UNIT)
>> + && MEM_ALIGN (mem[1]) < 8 * BITS_PER_UNIT)
>>
>> Likewise
> Done
>> ...
>> /* Check if the registers are of same class. */
>> - if (rclass_1 != rclass_2 || rclass_2 != rclass_3 || rclass_3 !=
>> rclass_4)
>> - return false;
>> + for (int i = 0; i < 3; i++)
>>
>> num_instructions -1 instead of 3 would be more consistent.
> Done
>>
>> + if (rclass[i] != rclass[i + 1])
>> + return false;
>>
>> It looks good otherwise.
>>
>> Thanks
>> Sudi
>
> Re-regtested and boostrapped.
>
> OK for trunk?
>
> Thanks,
>
> Jackson
>
> loops.patch
>
>
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 01f35f8e8525adb455780269757452c8c3eb20be..da44b33b2bc12f9aa2122cf5194e244437fb31a5 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -17026,23 +17026,21 @@ bool
> aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load,
> scalar_mode mode)
> {
> - enum reg_class rclass_1, rclass_2, rclass_3, rclass_4;
> - HOST_WIDE_INT offvals[4], msize;
> - rtx mem_1, mem_2, mem_3, mem_4, reg_1, reg_2, reg_3, reg_4;
> - rtx base_1, base_2, base_3, base_4, offset_1, offset_2, offset_3, offset_4;
> + const int num_instructions = 4;
It's conventional in GCC to use "insn(s)" rather than "instruction(s)";
saves a lot of typing.
> + enum reg_class rclass[num_instructions];
> + HOST_WIDE_INT offvals[num_instructions], msize;
> + rtx mem[num_instructions], reg[num_instructions],
> + base[num_instructions], offset[num_instructions];
>
> if (load)
> {
> - reg_1 = operands[0];
> - mem_1 = operands[1];
> - reg_2 = operands[2];
> - mem_2 = operands[3];
> - reg_3 = operands[4];
> - mem_3 = operands[5];
> - reg_4 = operands[6];
> - mem_4 = operands[7];
> - gcc_assert (REG_P (reg_1) && REG_P (reg_2)
> - && REG_P (reg_3) && REG_P (reg_4));
> + for (int i = 0; i < num_instructions; i++)
> + {
> + reg[i] = operands[2 * i];
> + mem[i] = operands[2 * i + 1];
> +
> + gcc_assert (REG_P (reg[i]));
> + }
>
> /* Do not attempt to merge the loads if the loads clobber each other. */
> for (int i = 0; i < 8; i += 2)
> @@ -17051,53 +17049,47 @@ aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load,
> return false;
> }
> else
> - {
> - mem_1 = operands[0];
> - reg_1 = operands[1];
> - mem_2 = operands[2];
> - reg_2 = operands[3];
> - mem_3 = operands[4];
> - reg_3 = operands[5];
> - mem_4 = operands[6];
> - reg_4 = operands[7];
> - }
> + for (int i = 0; i < num_instructions; i++)
> + {
> + mem[i] = operands[2 * i];
> + reg[i] = operands[2 * i + 1];
> + }
> +
> /* Skip if memory operand is by itslef valid for ldp/stp. */
Not technically a bug in your patch, but please fix the typo here when
you commit: itslef -> itself.
> - if (!MEM_P (mem_1) || aarch64_mem_pair_operand (mem_1, mode))
> + if (!MEM_P (mem[0]) || aarch64_mem_pair_operand (mem[0], mode))
> return false;
>
> - /* The mems cannot be volatile. */
> - if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2)
> - || MEM_VOLATILE_P (mem_3) ||MEM_VOLATILE_P (mem_4))
> - return false;
> + for (int i = 0; i < num_instructions; i++)
> + {
> + /* The mems cannot be volatile. */
> + if (MEM_VOLATILE_P (mem[i]))
> + return false;
>
> - /* Check if the addresses are in the form of [base+offset]. */
> - extract_base_offset_in_addr (mem_1, &base_1, &offset_1);
> - if (base_1 == NULL_RTX || offset_1 == NULL_RTX)
> - return false;
> - extract_base_offset_in_addr (mem_2, &base_2, &offset_2);
> - if (base_2 == NULL_RTX || offset_2 == NULL_RTX)
> - return false;
> - extract_base_offset_in_addr (mem_3, &base_3, &offset_3);
> - if (base_3 == NULL_RTX || offset_3 == NULL_RTX)
> - return false;
> - extract_base_offset_in_addr (mem_4, &base_4, &offset_4);
> - if (base_4 == NULL_RTX || offset_4 == NULL_RTX)
> - return false;
> + /* Check if the addresses are in the form of [base+offset]. */
> + extract_base_offset_in_addr (mem[i], base + i, offset + i);
> + if (base[i] == NULL_RTX || offset[i] == NULL_RTX)
> + return false;
> + }
> +
> + /* Check if addresses are clobbered by load. */
> + if (load)
> + for (int i = 0; i < num_instructions; i++)
> + if (reg_mentioned_p (reg[i], mem[i]))
> + return false;
>
> /* Check if the bases are same. */
> - if (!rtx_equal_p (base_1, base_2)
> - || !rtx_equal_p (base_2, base_3)
> - || !rtx_equal_p (base_3, base_4))
> - return false;
> + for (int i = 0; i < num_instructions - 1; i++)
> + if (!rtx_equal_p (base[i], base[i + 1]))
> + return false;
> +
> + for (int i = 0; i < num_instructions; i++)
> + offvals[i] = INTVAL (offset[i]);
>
> - offvals[0] = INTVAL (offset_1);
> - offvals[1] = INTVAL (offset_2);
> - offvals[2] = INTVAL (offset_3);
> - offvals[3] = INTVAL (offset_4);
> msize = GET_MODE_SIZE (mode);
>
> /* Check if the offsets can be put in the right order to do a ldp/stp. */
> - qsort (offvals, 4, sizeof (HOST_WIDE_INT), aarch64_host_wide_int_compare);
> + qsort (offvals, num_instructions, sizeof (HOST_WIDE_INT),
> + aarch64_host_wide_int_compare);
>
> if (!(offvals[1] == offvals[0] + msize
> && offvals[3] == offvals[2] + msize))
> @@ -17112,45 +17104,25 @@ aarch64_operands_adjust_ok_for_ldpstp (rtx *operands, bool load,
> if (offvals[0] % msize != offvals[2] % msize)
> return false;
>
> - /* Check if the addresses are clobbered by load. */
> - if (load && (reg_mentioned_p (reg_1, mem_1)
> - || reg_mentioned_p (reg_2, mem_2)
> - || reg_mentioned_p (reg_3, mem_3)
> - || reg_mentioned_p (reg_4, mem_4)))
> - return false;
> -
> /* If we have SImode and slow unaligned ldp,
> check the alignment to be at least 8 byte. */
> if (mode == SImode
> && (aarch64_tune_params.extra_tuning_flags
> - & AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW)
> + & AARCH64_EXTRA_TUNE_SLOW_UNALIGNED_LDPW)
> && !optimize_size
> - && MEM_ALIGN (mem_1) < 8 * BITS_PER_UNIT)
> + && MEM_ALIGN (mem[0]) < 8 * BITS_PER_UNIT)
> return false;
>
> - if (REG_P (reg_1) && FP_REGNUM_P (REGNO (reg_1)))
> - rclass_1 = FP_REGS;
> - else
> - rclass_1 = GENERAL_REGS;
> -
> - if (REG_P (reg_2) && FP_REGNUM_P (REGNO (reg_2)))
> - rclass_2 = FP_REGS;
> - else
> - rclass_2 = GENERAL_REGS;
> -
> - if (REG_P (reg_3) && FP_REGNUM_P (REGNO (reg_3)))
> - rclass_3 = FP_REGS;
> - else
> - rclass_3 = GENERAL_REGS;
> -
> - if (REG_P (reg_4) && FP_REGNUM_P (REGNO (reg_4)))
> - rclass_4 = FP_REGS;
> - else
> - rclass_4 = GENERAL_REGS;
> + for (int i = 0; i < num_instructions; i++)
> + if (REG_P (reg[i]) && FP_REGNUM_P (REGNO (reg[i])))
> + rclass[i] = FP_REGS;
> + else
> + rclass[i] = GENERAL_REGS;
>
> /* Check if the registers are of same class. */
> - if (rclass_1 != rclass_2 || rclass_2 != rclass_3 || rclass_3 != rclass_4)
> - return false;
> + for (int i = 0; i < num_instructions - 1; i++)
> + if (rclass[i] != rclass[i + 1])
> + return false;
>
Why not initialize rclass to the class of the first register and then
loop once over the remaining elements checking that it matches. Then
you don't need an array for rclass and you have one fewer loops?
> return true;
> }
>
OK with those changes.
Thanks.
R.