This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
Re: Small multiplier support in Cortex-M0/1/+
- From: Ramana Radhakrishnan <ramana dot gcc at googlemail dot com>
- To: Hale Wang <hale dot wang at arm dot com>
- Cc: gcc-patches <gcc-patches at gcc dot gnu dot org>
- Date: Wed, 5 Nov 2014 10:07:51 +0000
- Subject: Re: Small multiplier support in Cortex-M0/1/+
- Authentication-results: sourceware.org; auth=none
- References: <002f01cfed16$09e771a0$1db654e0$ at arm dot com>
- Reply-to: ramrad01 at arm dot com
On Tue, Oct 21, 2014 at 11:01 AM, Hale Wang <hale.wang@arm.com> wrote:
> Hi,
>
> Some configurations of the Cortex-M0 and Cortex-M1 come with a high latency
> multiplier. This patch adds support for such configurations.
>
> Small multiplier means using add/sub/shift instructions to replace the mul
> instruction for the MCU that has no fast multiplier.
>
> The following strategies are adopted in this patch:
> 1. Define new CPUs as
> -mcpu=cortex-m0.small-multiply,cortex-m0plus.small-multiply,cortex-m1.small-
> multiply to support small multiplier.
> 2. -Os means size is preferred. A threshold of 5 is set which means it will
> prevent spliting if ending up with more than 5 instructions. As for non-OS,
> there will be no such a limit.
>
> Some test cases are also added in the testsuite to verify this function.
>
> Is it ok for trunk?
This is OK .
Ramana
>
> Thanks and Best Regards,
> Hale Wang
>
> gcc/ChangeLog:
>
> 2014-08-29 Hale Wang <Hale.Wang@arm.com>
>
> * config/arm/arm-cores.def: Add support for
> -mcpu=cortex-m0.small-multiply,cortex-m0plus.small-multiply,
> cortex-m1.small-multiply.
> * config/arm/arm-tables.opt: Regenerate.
> * config/arm/arm-tune.md: Regenerate.
> * config/arm/arm.c: Update the rtx-costs for MUL.
> * config/arm/bpabi.h: Handle
> -mcpu=cortex-m0.small-multiply,cortex-m0plus.small-multiply,
> cortex-m1.small-multiply.
> * doc/invoke.texi: Document
> -mcpu=cortex-m0.small-multiply,cortex-m0plus.small-multiply,
> cortex-m1.small-multiply.
> * testsuite/gcc.target/arm/small-multiply-m0-1.c: New test case.
> * testsuite/gcc.target/arm/small-multiply-m0-2.c: Likewise.
> * testsuite/gcc.target/arm/small-multiply-m0-3.c: Likewise.
> * testsuite/gcc.target/arm/small-multiply-m0plus-1.c: Likewise.
> * testsuite/gcc.target/arm/small-multiply-m0plus-2.c: Likewise.
> * testsuite/gcc.target/arm/small-multiply-m0plus-3.c: Likewise.
> * testsuite/gcc.target/arm/small-multiply-m1-1.c: Likewise.
> * testsuite/gcc.target/arm/small-multiply-m1-2.c: Likewise.
> * testsuite/gcc.target/arm/small-multiply-m1-3.c: Likewise.
>
> ===================================================================
> diff --git a/gcc/config/arm/arm-cores.def b/gcc/config/arm/arm-cores.def
> index a830a83..af4b373 100644
> --- a/gcc/config/arm/arm-cores.def
> +++ b/gcc/config/arm/arm-cores.def
> @@ -137,6 +137,11 @@ ARM_CORE("cortex-m1", cortexm1, cortexm1,
> 6M, FL_LDSCHED, v6m)
> ARM_CORE("cortex-m0", cortexm0, cortexm0, 6M,
> FL_LDSCHED, v6m)
> ARM_CORE("cortex-m0plus", cortexm0plus, cortexm0plus, 6M,
> FL_LDSCHED, v6m)
>
> +/* V6M Architecture Processors for small-multiply implementations. */
> +ARM_CORE("cortex-m1.small-multiply", cortexm1smallmultiply, cortexm1,
> 6M, FL_LDSCHED | FL_SMALLMUL, v6m)
> +ARM_CORE("cortex-m0.small-multiply", cortexm0smallmultiply, cortexm0,
> 6M, FL_LDSCHED | FL_SMALLMUL, v6m)
> +ARM_CORE("cortex-m0plus.small-multiply",cortexm0plussmallmultiply,
> cortexm0plus,6M, FL_LDSCHED | FL_SMALLMUL, v6m)
> +
> /* V7 Architecture Processors */
> ARM_CORE("generic-armv7-a", genericv7a, genericv7a, 7A,
> FL_LDSCHED, cortex)
> ARM_CORE("cortex-a5", cortexa5, cortexa5, 7A,
> FL_LDSCHED, cortex_a5)
> diff --git a/gcc/config/arm/arm-tables.opt b/gcc/config/arm/arm-tables.opt
> index bc046a0..bd65bd2 100644
> --- a/gcc/config/arm/arm-tables.opt
> +++ b/gcc/config/arm/arm-tables.opt
> @@ -241,6 +241,15 @@ EnumValue
> Enum(processor_type) String(cortex-m0plus) Value(cortexm0plus)
>
> EnumValue
> +Enum(processor_type) String(cortex-m1.small-multiply)
> Value(cortexm1smallmultiply)
> +
> +EnumValue
> +Enum(processor_type) String(cortex-m0.small-multiply)
> Value(cortexm0smallmultiply)
> +
> +EnumValue
> +Enum(processor_type) String(cortex-m0plus.small-multiply)
> Value(cortexm0plussmallmultiply)
> +
> +EnumValue
> Enum(processor_type) String(generic-armv7-a) Value(genericv7a)
>
> EnumValue
> diff --git a/gcc/config/arm/arm-tune.md b/gcc/config/arm/arm-tune.md
> index 954cab8..8b5c778 100644
> --- a/gcc/config/arm/arm-tune.md
> +++ b/gcc/config/arm/arm-tune.md
> @@ -25,6 +25,7 @@
> arm1176jzs,arm1176jzfs,mpcorenovfp,
> mpcore,arm1156t2s,arm1156t2fs,
> cortexm1,cortexm0,cortexm0plus,
> +
> cortexm1smallmultiply,cortexm0smallmultiply,cortexm0plussmallmultiply,
> genericv7a,cortexa5,cortexa7,
> cortexa8,cortexa9,cortexa12,
> cortexa15,cortexr4,cortexr4f,
> diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
> index 93b989d..5062c85 100644
> --- a/gcc/config/arm/arm.c
> +++ b/gcc/config/arm/arm.c
> @@ -751,6 +751,8 @@ static int thumb_call_reg_needed;
> #define FL_ARCH8 (1 << 24) /* Architecture 8. */
> #define FL_CRC32 (1 << 25) /* ARMv8 CRC32 instructions.
> */
>
> +#define FL_SMALLMUL (1 << 26) /* Small multiply supported. */
> +
> #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel
> Wireless MMX technology". */
> #define FL_IWMMXT2 (1 << 30) /* "Intel Wireless MMX2 technology".
> */
>
> @@ -914,6 +916,9 @@ int arm_condexec_masklen = 0;
> /* Nonzero if chip supports the ARMv8 CRC instructions. */
> int arm_arch_crc = 0;
>
> +/* Nonzero if the core has a very small, high-latency, multiply unit. */
> +int arm_m_profile_small_mul = 0;
> +
> /* The condition codes of the ARM, and the inverse function. */
> static const char * const arm_condition_codes[] =
> {
> @@ -2784,6 +2789,7 @@ arm_option_override (void)
> arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
> arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
> arm_arch_crc = (insn_flags & FL_CRC32) != 0;
> + arm_m_profile_small_mul = (insn_flags & FL_SMALLMUL) != 0;
> if (arm_restrict_it == 2)
> arm_restrict_it = arm_arch8 && TARGET_THUMB2;
>
> @@ -8920,7 +8926,13 @@ thumb1_size_rtx_costs (rtx x, enum rtx_code code,
> enum rtx_code outer)
> /* Thumb1 mul instruction can't operate on const. We must Load it
> into a register first. */
> int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT,
> SET);
> - return COSTS_N_INSNS (1) + const_size;
> + /* For the targets which have a very small and high-latency
> multiply
> + unit, we prefer to synthesize the mult with up to 5
> instructions,
> + giving a good balance between size and performance. */
> + if (arm_arch6m && arm_m_profile_small_mul)
> + return COSTS_N_INSNS (5);
> + else
> + return COSTS_N_INSNS (1) + const_size;
> }
> return COSTS_N_INSNS (1);
>
> @@ -11338,7 +11350,11 @@ arm_9e_rtx_costs (rtx x, enum rtx_code code, enum
> rtx_code outer_code,
> switch (code)
> {
> case MULT:
> - *total = COSTS_N_INSNS (3);
> + /* Small multiply: 32 cycles for an integer multiply inst. */
> + if (arm_arch6m && arm_m_profile_small_mul)
> + *total = COSTS_N_INSNS (32);
> + else
> + *total = COSTS_N_INSNS (3);
> return true;
>
> default:
> diff --git a/gcc/config/arm/bpabi.h b/gcc/config/arm/bpabi.h
> index 7a576ac..06b9805 100644
> --- a/gcc/config/arm/bpabi.h
> +++ b/gcc/config/arm/bpabi.h
> @@ -70,6 +70,9 @@
> |mcpu=cortex-a53 \
> |mcpu=cortex-a57 \
> |mcpu=cortex-a57.cortex-a53 \
> + |mcpu=cortex-m1.small-multiply \
> + |mcpu=cortex-m0.small-multiply \
> + |mcpu=cortex-m0plus.small-multiply \
> |mcpu=generic-armv7-a \
> |march=armv7ve \
> |march=armv7-m|mcpu=cortex-m3 \
> @@ -87,6 +90,9 @@
> |mcpu=cortex-a53 \
> |mcpu=cortex-a57 \
> |mcpu=cortex-a57.cortex-a53 \
> + |mcpu=cortex-m1.small-multiply \
> + |mcpu=cortex-m0.small-multiply \
> + |mcpu=cortex-m0plus.small-multiply \
> |mcpu=marvell-pj4 \
> |mcpu=generic-armv7-a \
> |march=armv7ve \
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index eae4ab1..5dc8f26 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -12632,6 +12632,9 @@ Permissible names are: @samp{arm2}, @samp{arm250},
> @samp{cortex-m1},
> @samp{cortex-m0},
> @samp{cortex-m0plus},
> +@samp{cortex-m1.small-multiply},
> +@samp{cortex-m0.small-multiply},
> +@samp{cortex-m0plus.small-multiply},
> @samp{marvell-pj4},
> @samp{xscale}, @samp{iwmmxt}, @samp{iwmmxt2}, @samp{ep9312},
> @samp{fa526}, @samp{fa626},
> diff --git a/gcc/testsuite/gcc.target/arm/small-multiply-m0-1.c
> b/gcc/testsuite/gcc.target/arm/small-multiply-m0-1.c
> new file mode 100644
> index 0000000..77ec603
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/small-multiply-m0-1.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target arm_thumb1_ok } */
> +/* { dg-skip-if "Test is specific to cortex-m0.small-multiply" { arm*-*-* }
> { "-mcpu=*" } { "-mcpu=cortex-m0.small-multiply" } } */
> +/* { dg-options "-mcpu=cortex-m0.small-multiply -mthumb -O2" } */
> +
> +int
> +test (int a)
> +{
> + return a * 0x123456;
> +}
> +
> +/* { dg-final { scan-assembler-not "\[\\t \]+mul" } } */
> diff --git a/gcc/testsuite/gcc.target/arm/small-multiply-m0-2.c
> b/gcc/testsuite/gcc.target/arm/small-multiply-m0-2.c
> new file mode 100644
> index 0000000..c89b3ba
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/small-multiply-m0-2.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target arm_thumb1_ok } */
> +/* { dg-skip-if "Test is specific to cortex-m0.small-multiply" { arm*-*-* }
> { "-mcpu=*" } { "-mcpu=cortex-m0.small-multiply" } } */
> +/* { dg-options "-mcpu=cortex-m0.small-multiply -mthumb -Os" } */
> +
> +int
> +test (int a)
> +{
> + return a * 0x123456;
> +}
> +
> +/* { dg-final { scan-assembler "\[\\t \]+mul" } } */
> diff --git a/gcc/testsuite/gcc.target/arm/small-multiply-m0-3.c
> b/gcc/testsuite/gcc.target/arm/small-multiply-m0-3.c
> new file mode 100644
> index 0000000..b2df109
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/small-multiply-m0-3.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target arm_thumb1_ok } */
> +/* { dg-skip-if "Test is specific to cortex-m0.small-multiply" { arm*-*-* }
> { "-mcpu=*" } { "-mcpu=cortex-m0.small-multiply" } } */
> +/* { dg-options "-mcpu=cortex-m0.small-multiply -mthumb -Os" } */
> +
> +int
> +test (int a)
> +{
> + return a * 0x13;
> +}
> +
> +/* { dg-final { scan-assembler-not "\[\\t \]+mul" } } */
> diff --git a/gcc/testsuite/gcc.target/arm/small-multiply-m0plus-1.c
> b/gcc/testsuite/gcc.target/arm/small-multiply-m0plus-1.c
> new file mode 100644
> index 0000000..08a450b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/small-multiply-m0plus-1.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target arm_thumb1_ok } */
> +/* { dg-skip-if "Test is specific to cortex-m0plus.small-multiply" {
> arm*-*-* } { "-mcpu=*" } { "-mcpu=cortex-m0plus.small-multiply" } } */
> +/* { dg-options "-mcpu=cortex-m0plus.small-multiply -mthumb -O2" } */
> +
> +int
> +test (int a)
> +{
> + return a * 0x123456;
> +}
> +
> +/* { dg-final { scan-assembler-not "\[\\t \]+mul" } } */
> diff --git a/gcc/testsuite/gcc.target/arm/small-multiply-m0plus-2.c
> b/gcc/testsuite/gcc.target/arm/small-multiply-m0plus-2.c
> new file mode 100644
> index 0000000..17b52d3
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/small-multiply-m0plus-2.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target arm_thumb1_ok } */
> +/* { dg-skip-if "Test is specific to cortex-m0plus.small-multiply" {
> arm*-*-* } { "-mcpu=*" } { "-mcpu=cortex-m0plus.small-multiply" } } */
> +/* { dg-options "-mcpu=cortex-m0plus.small-multiply -mthumb -Os" } */
> +
> +int
> +test (int a)
> +{
> + return a * 0x123456;
> +}
> +
> +/* { dg-final { scan-assembler "\[\\t \]+mul" } } */
> diff --git a/gcc/testsuite/gcc.target/arm/small-multiply-m0plus-3.c
> b/gcc/testsuite/gcc.target/arm/small-multiply-m0plus-3.c
> new file mode 100644
> index 0000000..af69c75
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/small-multiply-m0plus-3.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target arm_thumb1_ok } */
> +/* { dg-skip-if "Test is specific to cortex-m0plus.small-multiply" {
> arm*-*-* } { "-mcpu=*" } { "-mcpu=cortex-m0plus.small-multiply" } } */
> +/* { dg-options "-mcpu=cortex-m0plus.small-multiply -mthumb -Os" } */
> +
> +int
> +test (int a)
> +{
> + return a * 0x13;
> +}
> +
> +/* { dg-final { scan-assembler-not "\[\\t \]+mul" } } */
> diff --git a/gcc/testsuite/gcc.target/arm/small-multiply-m1-1.c
> b/gcc/testsuite/gcc.target/arm/small-multiply-m1-1.c
> new file mode 100644
> index 0000000..d265aaf
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/small-multiply-m1-1.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target arm_thumb1_ok } */
> +/* { dg-skip-if "Test is specific to cortex-m1.small-multiply" { arm*-*-* }
> { "-mcpu=*" } { "-mcpu=cortex-m1.small-multiply" } } */
> +/* { dg-options "-mcpu=cortex-m1.small-multiply -mthumb -O2" } */
> +
> +int
> +test (int a)
> +{
> + return a * 0x123456;
> +}
> +
> +/* { dg-final { scan-assembler-not "\[\\t \]+mul" } } */
> diff --git a/gcc/testsuite/gcc.target/arm/small-multiply-m1-2.c
> b/gcc/testsuite/gcc.target/arm/small-multiply-m1-2.c
> new file mode 100644
> index 0000000..c50891c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/small-multiply-m1-2.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target arm_thumb1_ok } */
> +/* { dg-skip-if "Test is specific to cortex-m1.small-multiply" { arm*-*-* }
> { "-mcpu=*" } { "-mcpu=cortex-m1.small-multiply" } } */
> +/* { dg-options "-mcpu=cortex-m1.small-multiply -mthumb -Os" } */
> +
> +int
> +test (int a)
> +{
> + return a * 0x123456;
> +}
> +
> +/* { dg-final { scan-assembler "\[\\t \]+mul" } } */
> diff --git a/gcc/testsuite/gcc.target/arm/small-multiply-m1-3.c
> b/gcc/testsuite/gcc.target/arm/small-multiply-m1-3.c
> new file mode 100644
> index 0000000..1da21a6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/small-multiply-m1-3.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target arm_thumb1_ok } */
> +/* { dg-skip-if "Test is specific to cortex-m1.small-multiply" { arm*-*-* }
> { "-mcpu=*" } { "-mcpu=cortex-m1.small-multiply" } } */
> +/* { dg-options "-mcpu=cortex-m1.small-multiply -mthumb -Os" } */
> +
> +int
> +test (int a)
> +{
> + return a * 0x13;
> +}
> +
> +/* { dg-final { scan-assembler-not "\[\\t \]+mul" } } */