[PATCH][MIPS] Implement O32 FPXX ABI (GCC)

Sun May 18 20:52:00 GMT 2014

Matthew Fortune <Matthew.Fortune@imgtec.com> writes:
> *) Dwarf debug for 64-bit values in floating point values for FPXX can't
>    be strictly correct for both 32-bit and 64-bit registers but opts to
>    describe one 64-bit register as that is what the FPXX ABI is emulating.
>    I have not yet checked what exactly happens in GDB when confronted with
>    this and 32-bit registers. This also impacts frame information described
>    via mips_save_reg and mips_restore_reg. Advice on this would be
>    appreciated.

I'm not sure what's best either.  Clearly it's something that needs
to be spelled out in the ABI, but I can imagine it would be dictated
by what consumers like the unwinder and gdb find easiest to handle.

> *) ISA_HAS_MXHC1 could be defined as true for all three O32 FP ABIs but
>    I left out FP32 to maintain historic behaviour. It should be safe to
>    Include it though. Thoughts?

Sounds like the right call to me FWIW.  Enabling it for FP32 is a separate
change really.

> *) Because GCC can be built to have mfpxx or mfp64 as the default option
>    the ASM_SPEC has to handle these specially such that they are not
>    passed in conjunction with -msingle-float. Depending on how all this
>    option handling pans out then this may also need to account for
>    msoft-float as well. It is an error to have -msoft-float and -mfp64 in
>    the assembler.

The assembler and GCC shouldn't treat the options differently though.
Either it should be OK for both or neither.

> @@ -5141,7 +5141,7 @@ mips_get_arg_info (struct mips_arg_info *info, const CUMULATIVE_ARGS *cum,
>  			 || SCALAR_FLOAT_TYPE_P (type)
>  			 || VECTOR_FLOAT_TYPE_P (type))
>  		     && (GET_MODE_CLASS (mode) == MODE_FLOAT
> -			 || mode == V2SFmode)
> +			 || (TARGET_PAIRED_SINGLE_FLOAT && mode == V2SFmode))
>  		     && GET_MODE_SIZE (mode) <= UNITS_PER_FPVALUE);
>        break;

This looks odd.  We shouldn't have V2SF values if there's no ISA support
for them.

> @@ -5636,7 +5636,7 @@ mips_return_fpr_pair (enum machine_mode mode,
>  {
>    int inc;
>  
> -  inc = (TARGET_NEWABI ? 2 : MAX_FPRS_PER_FMT);
> +  inc = ((TARGET_NEWABI || mips_abi == ABI_32) ? 2 : MAX_FPRS_PER_FMT);

Formatting nit: no extra brackets here.

> @@ -6508,13 +6508,27 @@ mips_output_64bit_xfer (char direction, unsigned int gpreg, unsigned int fpreg)
>    if (TARGET_64BIT)
>      fprintf (asm_out_file, "\tdm%cc1\t%s,%s\n", direction,
>   	     reg_names[gpreg], reg_names[fpreg]);
> -  else if (TARGET_FLOAT64)
> +  else if (ISA_HAS_MXHC1)
>      {
>        fprintf (asm_out_file, "\tm%cc1\t%s,%s\n", direction,
>   	       reg_names[gpreg + TARGET_BIG_ENDIAN], reg_names[fpreg]);
>        fprintf (asm_out_file, "\tm%chc1\t%s,%s\n", direction,
>   	       reg_names[gpreg + TARGET_LITTLE_ENDIAN], reg_names[fpreg]);
>      }
> +  else if (TARGET_FLOATXX && direction == 't')
> +    {
> +      /* Use the argument save area to move via memory.  */
> +      fprintf (asm_out_file, "\tsw\t%s,0($sp)\n", reg_names[gpreg]);
> +      fprintf (asm_out_file, "\tsw\t%s,4($sp)\n", reg_names[gpreg + 1]);
> +      fprintf (asm_out_file, "\tldc1\t%s,0($sp)\n", reg_names[fpreg]);
> +    }
> +  else if (TARGET_FLOATXX && direction == 'f')
> +    {
> +      /* Use the argument save area to move via memory.  */
> +      fprintf (asm_out_file, "\tsdc1\t%s,0($sp)\n", reg_names[fpreg]);
> +      fprintf (asm_out_file, "\tlw\t%s,0($sp)\n", reg_names[gpreg]);
> +      fprintf (asm_out_file, "\tlw\t%s,4($sp)\n", reg_names[gpreg + 1]);
> +    }

The argument save area might be in use.  E.g. if an argument register
gets spilled, we'll generally try to spill it to the save area rather
than create a new stack slot for it.

This case should always be handled via SECONDARY_MEMORY_NEEDED.

> @@ -10499,7 +10544,7 @@ mips_for_each_saved_acc (HOST_WIDE_INT sp_offset, mips_save_restore_fn fn)
>  static void
>  mips_save_reg (rtx reg, rtx mem)
>  {
> -  if (GET_MODE (reg) == DFmode && !TARGET_FLOAT64)
> +  if (GET_MODE (reg) == DFmode && !TARGET_FLOAT64 && !TARGET_FLOATXX)

TARGET_FLOAT32, and elsewhere.

> @@ -12202,7 +12247,8 @@ mips_secondary_reload_class (enum reg_class rclass,
>  	return NO_REGS;
>  
>        /* Otherwise, we need to reload through an integer register.  */
> -      return GR_REGS;
> +      if (regno >= 0)
> +        return GR_REGS;
>      }
>    if (FP_REG_P (regno))
>      return reg_class_subset_p (rclass, GR_REGS) ? NO_REGS : GR_REGS;

Why's this change needed?  Although I assume it's dead code if you tested
against LRA.

> @@ -12210,6 +12256,22 @@ mips_secondary_reload_class (enum reg_class rclass,
>    return NO_REGS;
>  }
>  
> +/* Implement HARD_REGNO_CALLER_SAVE_MODE.
> +   Always save floating-point registers using their current mode to avoid
> +   using a 64-bit load/store when a 64-bit FP register only contains a 32-bit
> +   mode.  */
> +
> +enum machine_mode
> +mips_hard_regno_caller_save_mode (unsigned int regno, unsigned int nregs,
> +				  enum machine_mode mode)
> +{
> +  return ((FP_REG_P (regno) && (mode) != VOIDmode
> +	  && (unsigned) hard_regno_nregs[regno][mode] == nregs
> +	  && HARD_REGNO_MODE_OK (regno, mode))
> +	 ? mode
> +	 : choose_hard_reg_mode (regno, nregs, false));
> +}

I'd rather go with Chao-Ying's patch:

   https://gcc.gnu.org/ml/gcc/2013-01/msg00180.html

if there are no known downsides.

> @@ -17041,6 +17103,14 @@ mips_option_override (void)
>  	target_flags &= ~MASK_FLOAT64;
>      }
>  
> +  if (mips_abi != ABI_32 && TARGET_FLOATXX)
> +    error ("%<-mfpxx%> can only be used with the o32 ABI");
> +  else if (ISA_MIPS1
> +           && (TARGET_FLOATXX
> +               || TARGET_FLOAT64))
> +	error ("%<-march=%s%> requires %<-mfp32%>",
> +               mips_arch_info->name);

!TARGET_FLOAT32.  No need for the line split.

> @@ -1737,6 +1753,15 @@ struct mips_cpu_info {
>  
>  #define MODES_TIEABLE_P mips_modes_tieable_p
>  
> +/* Odd numbered single precision registers are not considered call saved
> +   for O32 FPXX as they will be clobbered when run on an FR=1 FPU.  */

Nit: Odd-numbered, single-precision

> +#define HARD_REGNO_CALL_PART_CLOBBERED(REGNO, MODE)			\
> +  (TARGET_FLOATXX && ((MODE) == SFmode || (MODE) == SImode)		\
> +   && FP_REG_P (REGNO) && (REGNO & 1))

hard_regno_nregs[REGNO][MODE] == 1 or GET_MODE_SIZE (MODE) <= 4
might be better than explicit mode checks.

> @@ -2069,6 +2094,16 @@ enum reg_class
>  #define SECONDARY_OUTPUT_RELOAD_CLASS(CLASS, MODE, X)			\
>    mips_secondary_reload_class (CLASS, MODE, X, false)
>  
> +/* When targetting the O32 FPXX ABI then all doubleword or greater moves
> +   to/from FP registers must be performed by FR mode aware instructions.

FR-mode-aware

> diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md
> index f914ab6..9acc565 100644
> --- a/gcc/config/mips/mips.md
> +++ b/gcc/config/mips/mips.md
> @@ -431,9 +431,12 @@
>    (const_string "none"))
>  
>  (define_attr "enabled" "no,yes"
> -  (if_then_else (ior (eq_attr "compression" "all,none")
> -		     (and (eq_attr "compression" "micromips")
> -	                  (match_test "TARGET_MICROMIPS")))
> +  (if_then_else (and (not (and (eq_attr "move_type" "mtc,mfc")
> +			       (match_test "TARGET_FLOATXX && !ISA_HAS_MXHC1")
> +			       (eq_attr "dword_mode" "yes")))
> +		     (ior (eq_attr "compression" "all,none")
> +			  (and (eq_attr "compression" "micromips")
> +			       (match_test "TARGET_MICROMIPS"))))
>  	        (const_string "yes")
>  	        (const_string "no")))

Probably time for this to become a (cond [...]).  Took me a couple of
goes to read it properly :-)

> diff --git a/gcc/testsuite/gcc.target/mips/call-clobbered-2.c b/gcc/testsuite/gcc.target/mips/call-clobbered-2.c
> new file mode 100644
> index 0000000..f47cdda
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/mips/call-clobbered-2.c
> @@ -0,0 +1,21 @@
> +/* Check that we handle call-clobbered FPRs correctly.  */
> +/* { dg-skip-if "code quality test" { *-*-* } { "-O0" } { "" } } */
> +/* { dg-options "-mabi=32 -modd-spreg -mfp32 -ffixed-f0 -ffixed-f1 -ffixed-f2 -ffixed-f3 -ffixed-f4 -ffixed-f5 -ffixed-f6 -ffixed-f7 -ffixed-f8 -ffixed-f9 -ffixed-f10 -ffixed-f11 -ffixed-f12 -ffixed-f13 -ffixed-f14 -ffixed-f15 -ffixed-f16 -ffixed-f17 -ffixed-f18 -ffixed-f19 -ffixed-f20 -ffixed-f22 -ffixed-f24 -ffixed-f26 -ffixed-f28 -ffixed-f30" } */
> +
> +void bar (void);
> +float a;
> +float
> +foo ()
> +{
> +  float b = a + 1.0f;
> +  bar();
> +  return b;
> +}
> +/* { dg-final { scan-assembler-times "lwc1" 2 } } */
> +/* { dg-final { scan-assembler-not "swc1" } } */
> +/* { dg-final { scan-assembler-times "sdc1" 2 } } */
> +/* { dg-final { scan-assembler-not "mtc" } } */
> +/* { dg-final { scan-assembler-not "mfc" } } */
> +/* { dg-final { scan-assembler-not "mthc" } } */
> +/* { dg-final { scan-assembler-not "mfhc" } } */

Why require sdc1 here?  Would Chao-Ying's patch make this use SWC1 and LWC1
exclusively?

> diff --git a/gcc/testsuite/gcc.target/mips/call-clobbered-3.c b/gcc/testsuite/gcc.target/mips/call-clobbered-3.c
> new file mode 100644
> index 0000000..3b30ce6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/mips/call-clobbered-3.c
> @@ -0,0 +1,21 @@
> +/* Check that we handle call-clobbered FPRs correctly.  */
> +/* { dg-skip-if "code quality test" { *-*-* } { "-O0" "-Os" } { "" } } */
> +/* { dg-options "-mabi=32 -modd-spreg -mfpxx -ffixed-f0 -ffixed-f1 -ffixed-f2 -ffixed-f3 -ffixed-f4 -ffixed-f5 -ffixed-f6 -ffixed-f7 -ffixed-f8 -ffixed-f9 -ffixed-f10 -ffixed-f11 -ffixed-f12 -ffixed-f13 -ffixed-f14 -ffixed-f15 -ffixed-f16 -ffixed-f17 -ffixed-f18 -ffixed-f19 -ffixed-f20 -ffixed-f22 -ffixed-f24 -ffixed-f26 -ffixed-f28 -ffixed-f30" } */
> +
> +void bar (void);
> +float a;
> +float
> +foo ()
> +{
> +  float b = a + 1.0f;
> +  bar();
> +  return b;
> +}
> +/* { dg-final { scan-assembler-times "lwc1" 3 } } */
> +/* { dg-final { scan-assembler-times "swc1" 1 } } */
> +/* { dg-final { scan-assembler-times "sdc1" 2 } } */
> +/* { dg-final { scan-assembler-not "mtc" } } */
> +/* { dg-final { scan-assembler-not "mfc" } } */
> +/* { dg-final { scan-assembler-not "mthc" } } */
> +/* { dg-final { scan-assembler-not "mfhc" } } */

Does there need to be an ldc1 as well?  Please explain the skips in more
detail.  "code quality test" only applies for -O0 skips.  Anything skipped
on -Os or restricted to -Os should be justified.

> diff --git a/gcc/testsuite/gcc.target/mips/call-clobbered-4.c b/gcc/testsuite/gcc.target/mips/call-clobbered-4.c
> new file mode 100644
> index 0000000..872fd5e
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/mips/call-clobbered-4.c
> @@ -0,0 +1,20 @@
> +/* Check that we handle call-clobbered FPRs correctly.  */
> +/* { dg-skip-if "code quality test" { *-*-* } { "*" } { "-Os" } } */
> +/* { dg-options "-mabi=32 -modd-spreg -mfpxx -ffixed-f0 -ffixed-f1 -ffixed-f2 -ffixed-f3 -ffixed-f4 -ffixed-f5 -ffixed-f6 -ffixed-f7 -ffixed-f8 -ffixed-f9 -ffixed-f10 -ffixed-f11 -ffixed-f12 -ffixed-f13 -ffixed-f14 -ffixed-f15 -ffixed-f16 -ffixed-f17 -ffixed-f18 -ffixed-f19 -ffixed-f20 -ffixed-f22 -ffixed-f24 -ffixed-f26 -ffixed-f28 -ffixed-f30" } */
> +
> +void bar (void);
> +float a;
> +float
> +foo ()
> +{
> +  float b = a + 1.0f;
> +  bar();
> +  return b;
> +}
> +/* { dg-final { scan-assembler-times "lwc1" 2 } } */
> +/* { dg-final { scan-assembler-times "sdc1" 2 } } */
> +/* { dg-final { scan-assembler-times "mtc" 1 } } */
> +/* { dg-final { scan-assembler-times "mfc" 1 } } */
> +/* { dg-final { scan-assembler-not "mthc" } } */
> +/* { dg-final { scan-assembler-not "mfhc" } } */

What's the difference between this and call-clobbered-3.c?

> @@ -248,6 +248,15 @@ set mips_option_groups {
>      dump "-fdump-.*"
>  }
>  
> +foreach option {
> +    0 1 2 3 4 5 6 7 8 9
> +    10 11 12 13 14 15 16 17 18 19
> +    20 21 22 23 24 25 26 27 28 29
> +    30 31
> +} {

for { set option 0 } { $option < 32 } { incr option } {

> @@ -1083,7 +1096,8 @@ proc mips-dg-options { args } {
>  	# (*) needed by both -mbranch-likely and -mfix-r10000
>  	} elseif { $isa < 2
>  		   && ([mips_have_test_option_p options "-mbranch-likely"]
> -		       || [mips_have_test_option_p options "-mfix-r10000"]) } {
> +		       || [mips_have_test_option_p options "-mfix-r10000"]
> +		       || ($gp_size == 32 && [mips_have_test_option_p options "-mfpxx"])) } {

Long line

> diff --git a/gcc/testsuite/gcc.target/mips/movdf-1.c b/gcc/testsuite/gcc.target/mips/movdf-1.c
> new file mode 100644
> index 0000000..d08b616
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/mips/movdf-1.c
> @@ -0,0 +1,13 @@
> +/* Check that we move DFmode values via memory between FP and GP.  */
> +/* { dg-options "-mabi=32 -mfpxx isa=2" } */
> +
> +void bar (void);
> +
> +double
> +foo (int x, double a)
> +{
> +  return a;
> +}
> +/* { dg-final { scan-assembler-not "mthc1" } } */
> +/* { dg-final { scan-assembler-not "mtc1" } } */

Probably worth adding some positive tests too, to make sure that the load
happens with ldc1, etc.

> +#elif __mips_fpr == 0
> +#define MOVE_DF_BYTE0t sw $4, 0($29); sw $5, 4($29); ldc1 $f12, 0($29)
> +#define MOVE_DF_BYTE0f sdc1 $f12, 0($29); lw $4, 0($29); lw $5, 4($29)
> +#define MOVE_DF_BYTE0(D) MOVE_DF_BYTE0##D
> +#define MOVE_DF_BYTE8t sw $6, 8($29); sw $7, 12($29); ldc1 $f14, 8($29)
> +#define MOVE_DF_BYTE8f sdc1 $f14, 8($29); lw $6, 8($29); lw $7, 12($29)
> +#define MOVE_DF_BYTE8(D) MOVE_DF_BYTE8##D
> +#define MOVE_DF_RETt(T) sw $2, 0($29); sw $3, 4($29); DELAYt (T, ldc1 $f0, 0($29))
> +#define MOVE_DF_RETf(T) sdc1 $f0, 0($29); lw $2, 0($29); DELAYf (T, lw $3, 4($29))
> +#define MOVE_DF_RET(D, T) MOVE_DF_RET##D(T)
> +#define MOVE_DC_RETt(T) sw $4, 8($29); sw $5, 12($29); ldc1 $f2, 8($29); MOVE_DF_RETt(T)
> +#define MOVE_DC_RETf(T) sdc1 $f2, 8($29); lw $4, 8($29); lw $5, 12($29); MOVE_DF_RETf(T)
> +#define MOVE_DC_RET(D, T) MOVE_DF_RET##D(T)
>  #elif defined(__MIPSEB__)
>  /* FPRs are little-endian.  */
>  #define MOVE_DF_BYTE0(D) m##D##c1 $4,$f13; m##D##c1 $5,$f12

Is this always safe?  CALL_STUB_RET in particular uses a special ABI
rather than the standard one, so I don't think we can assume that
it's safe to clobber the parameter save area.

Thanks,
Richard