[csl/arm-4.1] Improve loads from and store into VFP registers

Kazu Hirata kazu@codesourcery.com
Fri Apr 21 18:23:00 GMT 2006


Hi,

Attached is a patch to improve load from and stores into VFP
registers.

Tested on arm-none-eabi with csl/arm-4.1 branch.  Committed to
cs/arm-4_1.

Kazu Hirata

2006-04-21  Kazu Hirata  <kazu@codesourcery.com>

	* config/arm/arm-protos.h: Add a prototype for
	output_move_vfp.
	* config/arm/arm.c (arm_coproc_mem_operand): When WB is false,
	accept PRE_DEC and POST_INC.
	(output_move_vfp): New.
	* config/arm/vfp.md (*arm_movsi_vfp, *thumb2_movsi_vfp,
	*arm_movdi_vfp, *thumb2_movdi_vfp, *movsf_vfp,
	*thumb2_movsf_vfp, *movdf_vfp, *thumb2_movdf_vfp): Call
	output_move_vfp on loads to and stores from VFP registers.

2006-04-21  Kazu Hirata  <kazu@codesourcery.com>

	* testsuite/gcc.target/arm/arm.exp,
	testsuite/gcc.target/arm/vfp-ldmdbd.c,
	testsuite/gcc.target/arm/vfp-ldmdbs.c,
	testsuite/gcc.target/arm/vfp-ldmiad.c,
	testsuite/gcc.target/arm/vfp-ldmias.c,
	testsuite/gcc.target/arm/vfp-stmdbd.c,
	testsuite/gcc.target/arm/vfp-stmdbs.c,
	testsuite/gcc.target/arm/vfp-stmiad.c,
	testsuite/gcc.target/arm/vfp-stmias.c: New.

Index: config/arm/arm-protos.h
===================================================================
--- config/arm/arm-protos.h	(revision 112913)
+++ config/arm/arm-protos.h	(working copy)
@@ -110,6 +110,7 @@ extern const char *output_mov_long_doubl
 extern const char *output_mov_double_fpa_from_arm (rtx *);
 extern const char *output_mov_double_arm_from_fpa (rtx *);
 extern const char *output_move_double (rtx *);
+extern const char *output_move_vfp (rtx *operands);
 extern const char *output_add_immediate (rtx *);
 extern const char *arithmetic_instr (rtx, int);
 extern void output_ascii_pseudo_op (FILE *, const unsigned char *, int);
Index: config/arm/arm.c
===================================================================
--- config/arm/arm.c	(revision 112913)
+++ config/arm/arm.c	(working copy)
@@ -5593,7 +5593,9 @@ cirrus_memory_offset (rtx op)
 }
 
 /* Return TRUE if OP is a valid coprocessor memory address pattern.
-   WB if true if writeback address modes are allowed.  */
+   WB is true if full writeback address modes are allowed and is false
+   if limited writeback address modes (POST_INC and PRE_DEC) are
+   allowed.  */
 
 int
 arm_coproc_mem_operand (rtx op, bool wb)
@@ -5628,12 +5630,15 @@ arm_coproc_mem_operand (rtx op, bool wb)
   if (GET_CODE (ind) == REG)
     return arm_address_register_rtx_p (ind, 0);
 
-  /* Autoincremment addressing modes.  */
-  if (wb
-      && (GET_CODE (ind) == PRE_INC
-	  || GET_CODE (ind) == POST_INC
-	  || GET_CODE (ind) == PRE_DEC
-	  || GET_CODE (ind) == POST_DEC))
+  /* Autoincremment addressing modes.  POST_INC and PRE_DEC are
+     acceptable in any case (subject to verification by
+     arm_address_register_rtx_p).  We need WB to be true to accept
+     PRE_INC and POST_DEC.  */
+  if (GET_CODE (ind) == POST_INC
+      || GET_CODE (ind) == PRE_DEC
+      || (wb
+	  && (GET_CODE (ind) == PRE_INC
+	      || GET_CODE (ind) == POST_DEC)))
     return arm_address_register_rtx_p (XEXP (ind, 0), 0);
 
   if (wb
@@ -9220,6 +9225,62 @@ output_move_double (rtx *operands)
   return "";
 }
 
+/* Output a VFP load or store instruction.  */
+
+const char *
+output_move_vfp (rtx *operands)
+{
+  rtx reg, mem, addr, ops[2];
+  int load = REG_P (operands[0]);
+  int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
+  int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
+  const char *template;
+  char buff[50];
+
+  reg = operands[!load];
+  mem = operands[load];
+
+  gcc_assert (REG_P (reg));
+  gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
+  gcc_assert (GET_MODE (reg) == SFmode
+	      || GET_MODE (reg) == DFmode
+	      || GET_MODE (reg) == SImode
+	      || GET_MODE (reg) == DImode);
+  gcc_assert (MEM_P (mem));
+
+  addr = XEXP (mem, 0);
+
+  switch (GET_CODE (addr))
+    {
+    case PRE_DEC:
+      template = "f%smdb%c\t%%0!, {%%%s1}%s";
+      ops[0] = XEXP (addr, 0);
+      ops[1] = reg;
+      break;
+
+    case POST_INC:
+      template = "f%smia%c\t%%0!, {%%%s1}%s";
+      ops[0] = XEXP (addr, 0);
+      ops[1] = reg;
+      break;
+
+    default:
+      template = "f%s%c\t%%%s0, %%1%s";
+      ops[0] = reg;
+      ops[1] = mem;
+      break;
+    }
+
+  sprintf (buff, template,
+	   load ? "ld" : "st",
+	   dp ? 'd' : 's',
+	   dp ? "P" : "",
+	   integer_p ? "\\t%@ int" : "");
+  output_asm_insn (buff, ops);
+
+  return "";
+}
+
 /* Output an ADD r, s, #n where n may be too big for one instruction.
    If adding zero to one register, output nothing.  */
 const char *
Index: config/arm/vfp.md
===================================================================
--- config/arm/vfp.md	(revision 112913)
+++ config/arm/vfp.md	(working copy)
@@ -126,16 +126,29 @@ (define_insn "*arm_movsi_vfp"
   "TARGET_ARM && TARGET_VFP && TARGET_HARD_FLOAT
    && (   s_register_operand (operands[0], SImode)
        || s_register_operand (operands[1], SImode))"
-  "@
-  mov%?\\t%0, %1
-  mvn%?\\t%0, #%B1
-  ldr%?\\t%0, %1
-  str%?\\t%1, %0
-  fmsr%?\\t%0, %1\\t%@ int
-  fmrs%?\\t%0, %1\\t%@ int
-  fcpys%?\\t%0, %1\\t%@ int
-  flds%?\\t%0, %1\\t%@ int
-  fsts%?\\t%1, %0\\t%@ int"
+  "*
+  switch (which_alternative)
+    {
+    case 0:
+      return \"mov%?\\t%0, %1\";
+    case 1:
+      return \"mvn%?\\t%0, #%B1\";
+    case 2:
+      return \"ldr%?\\t%0, %1\";
+    case 3:
+      return \"str%?\\t%1, %0\";
+    case 4:
+      return \"fmsr%?\\t%0, %1\\t%@ int\";
+    case 5:
+      return \"fmrs%?\\t%0, %1\\t%@ int\";
+    case 6:
+      return \"fcpys%?\\t%0, %1\\t%@ int\";
+    case 7: case 8:
+      return output_move_vfp (operands);
+    default:
+      gcc_unreachable ();
+    }
+  "
   [(set_attr "predicable" "yes")
    (set_attr "type" "*,*,load1,store1,r_2_f,f_2_r,ffarith,f_loads,f_stores")
    (set_attr "pool_range"     "*,*,4096,*,*,*,*,1020,*")
@@ -148,16 +161,29 @@ (define_insn "*thumb2_movsi_vfp"
   "TARGET_THUMB2 && TARGET_VFP && TARGET_HARD_FLOAT
    && (   s_register_operand (operands[0], SImode)
        || s_register_operand (operands[1], SImode))"
-  "@
-  mov%?\\t%0, %1
-  mvn%?\\t%0, #%B1
-  ldr%?\\t%0, %1
-  str%?\\t%1, %0
-  fmsr%?\\t%0, %1\\t%@ int
-  fmrs%?\\t%0, %1\\t%@ int
-  fcpys%?\\t%0, %1\\t%@ int
-  flds%?\\t%0, %1\\t%@ int
-  fsts%?\\t%1, %0\\t%@ int"
+  "*
+  switch (which_alternative)
+    {
+    case 0:
+      return \"mov%?\\t%0, %1\";
+    case 1:
+      return \"mvn%?\\t%0, #%B1\";
+    case 2:
+      return \"ldr%?\\t%0, %1\";
+    case 3:
+      return \"str%?\\t%1, %0\";
+    case 4:
+      return \"fmsr%?\\t%0, %1\\t%@ int\";
+    case 5:
+      return \"fmrs%?\\t%0, %1\\t%@ int\";
+    case 6:
+      return \"fcpys%?\\t%0, %1\\t%@ int\";
+    case 7: case 8:
+      return output_move_vfp (operands);
+    default:
+      gcc_unreachable ();
+    }
+  "
   [(set_attr "predicable" "yes")
    (set_attr "type" "*,*,load1,store1,r_2_f,f_2_r,ffarith,f_load,f_store")
    (set_attr "pool_range"     "*,*,4096,*,*,*,*,1020,*")
@@ -187,10 +213,8 @@ (define_insn "*arm_movdi_vfp"
       return \"fmrrd%?\\t%0, %1\\t%@ int\";
     case 5:
       return \"fcpyd%?\\t%P0, %P1\\t%@ int\";
-    case 6:
-      return \"fldd%?\\t%P0, %1\\t%@ int\";
-    case 7:
-      return \"fstd%?\\t%P1, %0\\t%@ int\";
+    case 6: case 7:
+      return output_move_vfp (operands);
     default:
       gcc_unreachable ();
     }
@@ -216,10 +240,8 @@ (define_insn "*thumb2_movdi_vfp"
       return \"fmrrd%?\\t%0, %1\\t%@ int\";
     case 5:
       return \"fcpyd%?\\t%P0, %P1\\t%@ int\";
-    case 6:
-      return \"fldd%?\\t%P0, %1\\t%@ int\";
-    case 7:
-      return \"fstd%?\\t%P1, %0\\t%@ int\";
+    case 6: case 7:
+      return output_move_vfp (operands);
     default:
       abort ();
     }
@@ -241,15 +263,27 @@ (define_insn "*movsf_vfp"
   "TARGET_ARM && TARGET_HARD_FLOAT && TARGET_VFP
    && (   s_register_operand (operands[0], SFmode)
        || s_register_operand (operands[1], SFmode))"
-  "@
-  fmsr%?\\t%0, %1
-  fmrs%?\\t%0, %1
-  flds%?\\t%0, %1
-  fsts%?\\t%1, %0
-  ldr%?\\t%0, %1\\t%@ float
-  str%?\\t%1, %0\\t%@ float
-  fcpys%?\\t%0, %1
-  mov%?\\t%0, %1\\t%@ float"
+  "*
+  switch (which_alternative)
+    {
+    case 0:
+      return \"fmsr%?\\t%0, %1\";
+    case 1:
+      return \"fmrs%?\\t%0, %1\";
+    case 2: case 3:
+      return output_move_vfp (operands);
+    case 4:
+      return \"ldr%?\\t%0, %1\\t%@ float\";
+    case 5:
+      return \"str%?\\t%1, %0\\t%@ float\";
+    case 6:
+      return \"fcpys%?\\t%0, %1\";
+    case 7:
+      return \"mov%?\\t%0, %1\\t%@ float\";
+    default:
+      gcc_unreachable ();
+    }
+  "
   [(set_attr "predicable" "yes")
    (set_attr "type" "r_2_f,f_2_r,ffarith,*,f_loads,f_stores,load1,store1")
    (set_attr "pool_range" "*,*,1020,*,4096,*,*,*")
@@ -262,15 +296,27 @@ (define_insn "*thumb2_movsf_vfp"
   "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP
    && (   s_register_operand (operands[0], SFmode)
        || s_register_operand (operands[1], SFmode))"
-  "@
-  fmsr%?\\t%0, %1
-  fmrs%?\\t%0, %1
-  flds%?\\t%0, %1
-  fsts%?\\t%1, %0
-  ldr%?\\t%0, %1\\t%@ float
-  str%?\\t%1, %0\\t%@ float
-  fcpys%?\\t%0, %1
-  mov%?\\t%0, %1\\t%@ float"
+  "*
+  switch (which_alternative)
+    {
+    case 0:
+      return \"fmsr%?\\t%0, %1\";
+    case 1:
+      return \"fmrs%?\\t%0, %1\";
+    case 2: case 3:
+      return output_move_vfp (operands);
+    case 4:
+      return \"ldr%?\\t%0, %1\\t%@ float\";
+    case 5:
+      return \"str%?\\t%1, %0\\t%@ float\";
+    case 6:
+      return \"fcpys%?\\t%0, %1\";
+    case 7:
+      return \"mov%?\\t%0, %1\\t%@ float\";
+    default:
+      gcc_unreachable ();
+    }
+  "
   [(set_attr "predicable" "yes")
    (set_attr "type" "r_2_f,f_2_r,ffarith,*,f_load,f_store,load1,store1")
    (set_attr "pool_range" "*,*,1020,*,4092,*,*,*")
@@ -296,10 +342,8 @@ (define_insn "*movdf_vfp"
 	return \"fmrrd%?\\t%Q0, %R0, %P1\";
       case 2: case 3:
 	return output_move_double (operands);
-      case 4:
-	return \"fldd%?\\t%P0, %1\";
-      case 5:
-	return \"fstd%?\\t%P1, %0\";
+      case 4: case 5:
+	return output_move_vfp (operands);
       case 6:
 	return \"fcpyd%?\\t%P0, %P1\";
       case 7:
@@ -329,10 +373,8 @@ (define_insn "*thumb2_movdf_vfp"
 	return \"fmrrd%?\\t%Q0, %R0, %P1\";
       case 2: case 3: case 7:
 	return output_move_double (operands);
-      case 4:
-	return \"fldd%?\\t%P0, %1\";
-      case 5:
-	return \"fstd%?\\t%P1, %0\";
+      case 4: case 5:
+	return output_move_vfp (operands);
       case 6:
 	return \"fcpyd%?\\t%P0, %P1\";
       default:
Index: testsuite/gcc.target/arm/arm.exp
===================================================================
--- /dev/null	2006-03-11 08:41:44.866675760 -0800
+++ testsuite/gcc.target/arm/arm.exp	2006-04-11 21:58:01.000000000 -0700
@@ -0,0 +1,41 @@
+# Copyright (C) 1997, 2004, 2006 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+# 
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  
+
+# GCC testsuite that uses the `dg.exp' driver.
+
+# Exit immediately if this isn't an ARM target.
+if ![istarget arm*-*-*] then {
+  return
+}
+
+# Load support procs.
+load_lib gcc-dg.exp
+
+# If a testcase doesn't have special options, use these.
+global DEFAULT_CFLAGS
+if ![info exists DEFAULT_CFLAGS] then {
+    set DEFAULT_CFLAGS " -ansi -pedantic-errors"
+}
+
+# Initialize `dg'.
+dg-init
+
+# Main loop.
+dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cS\]]] \
+	"" $DEFAULT_CFLAGS
+
+# All done.
+dg-finish
Index: testsuite/gcc.target/arm/vfp-ldmdbd.c
===================================================================
--- /dev/null	2006-03-11 08:41:44.866675760 -0800
+++ testsuite/gcc.target/arm/vfp-ldmdbd.c	2006-04-12 15:46:53.000000000 -0700
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm32 } */
+/* { dg-options "-O2 -mfpu=vfp -mfloat-abi=softfp" } */
+
+extern void bar (double);
+
+void
+foo (double *p, double a, int n)
+{
+  do
+    bar (*--p + a);
+  while (n--);
+}
+
+/* { dg-final { scan-assembler "fldmdbd" } } */
Index: testsuite/gcc.target/arm/vfp-ldmdbs.c
===================================================================
--- /dev/null	2006-03-11 08:41:44.866675760 -0800
+++ testsuite/gcc.target/arm/vfp-ldmdbs.c	2006-04-12 15:46:53.000000000 -0700
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm32 } */
+/* { dg-options "-O2 -mfpu=vfp -mfloat-abi=softfp" } */
+
+extern void baz (float);
+
+void
+foo (float *p, float a, int n)
+{
+  do
+    bar (*--p + a);
+  while (n--);
+}
+
+/* { dg-final { scan-assembler "fldmdbs" } } */
Index: testsuite/gcc.target/arm/vfp-ldmiad.c
===================================================================
--- /dev/null	2006-03-11 08:41:44.866675760 -0800
+++ testsuite/gcc.target/arm/vfp-ldmiad.c	2006-04-12 15:46:53.000000000 -0700
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm32 } */
+/* { dg-options "-O2 -mfpu=vfp -mfloat-abi=softfp" } */
+
+extern void bar (double);
+
+void
+foo (double *p, double a, int n)
+{
+  do
+    bar (*p++ + a);
+  while (n--);
+}
+
+/* { dg-final { scan-assembler "fldmiad" } } */
Index: testsuite/gcc.target/arm/vfp-ldmias.c
===================================================================
--- /dev/null	2006-03-11 08:41:44.866675760 -0800
+++ testsuite/gcc.target/arm/vfp-ldmias.c	2006-04-12 15:46:53.000000000 -0700
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm32 } */
+/* { dg-options "-O2 -mfpu=vfp -mfloat-abi=softfp" } */
+
+extern void baz (float);
+
+void
+foo (float *p, float a, int n)
+{
+  do
+    bar (*p++ + a);
+  while (n--);
+}
+
+/* { dg-final { scan-assembler "fldmias" } } */
Index: testsuite/gcc.target/arm/vfp-stmdbd.c
===================================================================
--- /dev/null	2006-03-11 08:41:44.866675760 -0800
+++ testsuite/gcc.target/arm/vfp-stmdbd.c	2006-04-12 15:46:53.000000000 -0700
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm32 } */
+/* { dg-options "-O2 -mfpu=vfp -mfloat-abi=softfp" } */
+
+void
+foo (double *p, double a, double b, int n)
+{
+  double c = a + b;
+  do
+    *--p = c;
+  while (n--);
+}
+
+/* { dg-final { scan-assembler "fstmdbd" } } */
Index: testsuite/gcc.target/arm/vfp-stmdbs.c
===================================================================
--- /dev/null	2006-03-11 08:41:44.866675760 -0800
+++ testsuite/gcc.target/arm/vfp-stmdbs.c	2006-04-12 15:46:53.000000000 -0700
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm32 } */
+/* { dg-options "-O2 -mfpu=vfp -mfloat-abi=softfp" } */
+
+void
+foo (float *p, float a, float b, int n)
+{
+  float c = a + b;
+  do
+    *--p = c;
+  while (n--);
+}
+
+/* { dg-final { scan-assembler "fstmdbs" } } */
Index: testsuite/gcc.target/arm/vfp-stmiad.c
===================================================================
--- /dev/null	2006-03-11 08:41:44.866675760 -0800
+++ testsuite/gcc.target/arm/vfp-stmiad.c	2006-04-12 15:46:53.000000000 -0700
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm32 } */
+/* { dg-options "-O2 -mfpu=vfp -mfloat-abi=softfp" } */
+
+void
+foo (double *p, double a, double b, int n)
+{
+  double c = a + b;
+  do
+    *p++ = c;
+  while (n--);
+}
+
+/* { dg-final { scan-assembler "fstmiad" } } */
Index: testsuite/gcc.target/arm/vfp-stmias.c
===================================================================
--- /dev/null	2006-03-11 08:41:44.866675760 -0800
+++ testsuite/gcc.target/arm/vfp-stmias.c	2006-04-12 15:46:53.000000000 -0700
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm32 } */
+/* { dg-options "-O2 -mfpu=vfp -mfloat-abi=softfp" } */
+
+void
+foo (float *p, float a, float b, int n)
+{
+  float c = a + b;
+  do
+    *p++ = c;
+  while (n--);
+}
+
+/* { dg-final { scan-assembler "fstmias" } } */



More information about the Gcc-patches mailing list