ARM Cortex-R4F scheduler.

Mon Sep 1 13:39:00 GMT 2008

The patch below adds a scheduler for the ARM Cortex-R4F VFP unit.
This unit has very limited dual-issue, which means splitting up the 
VFP "farith" and "ffarith" types. Other than that it's fairly 
straightforward.

Tested on arm-none-eabi.
Applied to svn trunk.

Paul

2008-09-01  Paul Brook  <paul@codesourcery.com>

	gcc/
	* config/arm/arm.md: Include cortex-r4f.md.
	(attr fpu): Update type list.
	(attr type): Add fcpys, ffariths, ffarithd, fadds, faddd, fconsts,
	fconstd, fcmps and fcmpd. 
	(attr tune_cortexr4): Define.
	(attr generic_sched, attr generic_vfp): Use tune_cortexr4.
	* config/arm/vfp.md: Document fcpys, ffariths, ffarithd, fadds, faddd,
	fconsts, fconstd, fcmps and fcmpd.  Use them in insn patterns.
	* config/arm/arm.c (arm_issue_rate): Add cortexr4f.
	* config/arm/arm1020e.md (v10_ffarith, v10_farith): Use new insn
	types.
	* config/arm/cortex-a8-neon.md (cortex_a8_vfp_add_sub,
	cortex_a8_vfp_farith: Ditto.
	* config/arm/vfp11.md (vfp_ffarith, vfp_farith): Ditto.
	* config/arm/cortex-r4.md: Use tune_cortexr4.
	* config/arm/cortex-r4f.md: New file.

Index: gcc/config/arm/arm1020e.md
===================================================================

--- gcc/config/arm/arm1020e.md	(revision 139860)
+++ gcc/config/arm/arm1020e.md	(working copy)
@@ -269,12 +269,12 @@ (define_attr "vfp10" "yes,no" 
 ;; first execute state.  We model this by using 1020a_e in the first cycle.
 (define_insn_reservation "v10_ffarith" 5
  (and (eq_attr "vfp10" "yes")
-      (eq_attr "type" "ffarith"))
+      (eq_attr "type" "fcpys,ffariths,ffarithd,fcmps,fcmpd"))
  "1020a_e+v10_fmac")
 
 (define_insn_reservation "v10_farith" 5
  (and (eq_attr "vfp10" "yes")
-      (eq_attr "type" "farith"))
+      (eq_attr "type" "faddd,fadds"))
  "1020a_e+v10_fmac")
 
 (define_insn_reservation "v10_cvt" 5
Index: gcc/config/arm/arm.c
===================================================================
--- gcc/config/arm/arm.c	(revision 139860)
+++ gcc/config/arm/arm.c	(working copy)
@@ -19024,6 +19024,7 @@ arm_issue_rate (void)
   switch (arm_tune)
     {
     case cortexr4:
+    case cortexr4f:
     case cortexa8:
       return 2;
 
Index: gcc/config/arm/cortex-a8-neon.md
===================================================================
--- gcc/config/arm/cortex-a8-neon.md	(revision 139860)
+++ gcc/config/arm/cortex-a8-neon.md	(working copy)
@@ -134,7 +134,7 @@ (define_reservation "cortex_a8_vfp"
 
 (define_insn_reservation "cortex_a8_vfp_add_sub" 10
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "type" "farith"))
+       (eq_attr "type" "fconsts,fconstd,fadds,faddd"))
   "cortex_a8_vfp,cortex_a8_vfplite*9")
 
 (define_insn_reservation "cortex_a8_vfp_muls" 12
@@ -172,7 +172,7 @@ (define_insn_reservation "cortex_a8_vfp_
 ;; take four cycles, we pick that latency.
 (define_insn_reservation "cortex_a8_vfp_farith" 4
   (and (eq_attr "tune" "cortexa8")
-       (eq_attr "type" "ffarith"))
+       
(eq_attr "type" "fcpys,ffariths,ffarithd,fconsts,fconstd,fcmps,fcmpd"))
   "cortex_a8_vfp,cortex_a8_vfplite*3")
 
 (define_insn_reservation "cortex_a8_vfp_cvt" 7
Index: gcc/config/arm/cortex-r4.md
===================================================================
--- gcc/config/arm/cortex-r4.md	(revision 139860)
+++ gcc/config/arm/cortex-r4.md	(working copy)
@@ -77,24 +77,24 @@ (define_reservation "cortex_r4_branch" "
 ;; Data processing instructions.  Moves without shifts are kept separate
 ;; for the purposes of the dual-issue constraints above.
 (define_insn_reservation "cortex_r4_alu" 2
-  (and (eq_attr "tune" "cortexr4")
+  (and (eq_attr "tune_cortexr4" "yes")
        (and (eq_attr "type" "alu")
             (not (eq_attr "insn" "mov"))))
   "cortex_r4_alu")
 
 (define_insn_reservation "cortex_r4_mov" 2
-  (and (eq_attr "tune" "cortexr4")
+  (and (eq_attr "tune_cortexr4" "yes")
        (and (eq_attr "type" "alu")
             (eq_attr "insn" "mov")))
   "cortex_r4_mov")
 
 (define_insn_reservation "cortex_r4_alu_shift" 2
-  (and (eq_attr "tune" "cortexr4")
+  (and (eq_attr "tune_cortexr4" "yes")
        (eq_attr "type" "alu_shift"))
   "cortex_r4_alu")
 
 (define_insn_reservation "cortex_r4_alu_shift_reg" 2
-  (and (eq_attr "tune" "cortexr4")
+  (and (eq_attr "tune_cortexr4" "yes")
        (eq_attr "type" "alu_shift_reg"))
   "cortex_r4_alu_shift_reg")
 
@@ -127,32 +127,32 @@ (define_bypass 1 "cortex_r4_alu,cortex_r
 ;; Multiplication instructions.
 
 (define_insn_reservation "cortex_r4_mul_4" 4
-  (and (eq_attr "tune" "cortexr4")
+  (and (eq_attr "tune_cortexr4" "yes")
        (eq_attr "insn" "mul,smmul"))
   "cortex_r4_mul_2")
 
 (define_insn_reservation "cortex_r4_mul_3" 3
-  (and (eq_attr "tune" "cortexr4")
+  (and (eq_attr "tune_cortexr4" "yes")
        (eq_attr "insn" "smulxy,smulwy,smuad,smusd"))
   "cortex_r4_mul")
 
 (define_insn_reservation "cortex_r4_mla_4" 4
-  (and (eq_attr "tune" "cortexr4")
+  (and (eq_attr "tune_cortexr4" "yes")
        (eq_attr "insn" "mla,smmla"))
   "cortex_r4_mul_2")
 
 (define_insn_reservation "cortex_r4_mla_3" 3
-  (and (eq_attr "tune" "cortexr4")
+  (and (eq_attr "tune_cortexr4" "yes")
        (eq_attr "insn" "smlaxy,smlawy,smlad,smlsd"))
   "cortex_r4_mul")
 
 (define_insn_reservation "cortex_r4_smlald" 3
-  (and (eq_attr "tune" "cortexr4")
+  (and (eq_attr "tune_cortexr4" "yes")
        (eq_attr "insn" "smlald,smlsld"))
   "cortex_r4_mul")
 
 (define_insn_reservation "cortex_r4_mull" 4
-  (and (eq_attr "tune" "cortexr4")
+  (and (eq_attr "tune_cortexr4" "yes")
        (eq_attr "insn" "smull,umull,umlal,umaal"))
   "cortex_r4_mul_2")
 
@@ -195,19 +195,19 @@ (define_bypass 2 "cortex_r4_mul_4,cortex
 ;; is performed with B having ten more leading zeros than A.
 ;; This gives a latency of nine for udiv and ten for sdiv.
 (define_insn_reservation "cortex_r4_udiv" 9
-  (and (eq_attr "tune" "cortexr4")
+  (and (eq_attr "tune_cortexr4" "yes")
        (eq_attr "insn" "udiv"))
   "cortex_r4_div_9")
 
 (define_insn_reservation "cortex_r4_sdiv" 10
-  (and (eq_attr "tune" "cortexr4")
+  (and (eq_attr "tune_cortexr4" "yes")
        (eq_attr "insn" "sdiv"))
   "cortex_r4_div_10")
 
 ;; Branches.  We assume correct prediction.
 
 (define_insn_reservation "cortex_r4_branch" 0
-  (and (eq_attr "tune" "cortexr4")
+  (and (eq_attr "tune_cortexr4" "yes")
        (eq_attr "type" "branch"))
   "cortex_r4_branch")
 
@@ -215,7 +215,7 @@ (define_insn_reservation "cortex_r4_bran
 ;; number is used as "positive infinity" so that everything should be
 ;; finished by the time of return.
 (define_insn_reservation "cortex_r4_call" 32
-  (and (eq_attr "tune" "cortexr4")
+  (and (eq_attr "tune_cortexr4" "yes")
        (eq_attr "type" "call"))
   "nothing")
 
@@ -226,12 +226,12 @@ (define_insn_reservation "cortex_r4_call
 ;; accesses following are correctly aligned.
 
 (define_insn_reservation "cortex_r4_load_1_2" 3
-  (and (eq_attr "tune" "cortexr4")
+  (and (eq_attr "tune_cortexr4" "yes")
        (eq_attr "type" "load1,load2"))
   "cortex_r4_load_store")
 
 (define_insn_reservation "cortex_r4_load_3_4" 4
-  (and (eq_attr "tune" "cortexr4")
+  (and (eq_attr "tune_cortexr4" "yes")
        (eq_attr "type" "load3,load4"))
   "cortex_r4_load_store_2")
 
@@ -281,12 +281,12 @@ (define_bypass 5 "cortex_r4_mul_4,cortex
 ;; Store instructions.
 
 (define_insn_reservation "cortex_r4_store_1_2" 0
-  (and (eq_attr "tune" "cortexr4")
+  (and (eq_attr "tune_cortexr4" "yes")
        (eq_attr "type" "store1,store2"))
   "cortex_r4_load_store")
 
 (define_insn_reservation "cortex_r4_store_3_4" 0
-  (and (eq_attr "tune" "cortexr4")
+  (and (eq_attr "tune_cortexr4" "yes")
        (eq_attr "type" "store3,store4"))
   "cortex_r4_load_store_2")
 
Index: gcc/config/arm/vfp.md
===================================================================
--- gcc/config/arm/vfp.md	(revision 139860)
+++ gcc/config/arm/vfp.md	(working copy)
@@ -24,8 +24,15 @@ (define_constants
 )
 
 ;; The VFP "type" attributes differ from those used in the FPA model.
-;; ffarith	Fast floating point insns, e.g. abs, neg, cpy, cmp.
-;; farith	Most arithmetic insns.
+;; fcpys	Single precision cpy.
+;; ffariths	Single precision abs, neg.
+;; ffarithd	Double precision abs, neg, cpy.
+;; fadds	Single precision add/sub.
+;; faddd	Double precision add/sub.
+;; fconsts	Single precision load immediate.
+;; fconstd	Double precision load immediate.
+;; fcmps	Single precision comparison.
+;; fcmpd	Double precision comparison.
 ;; fmuls	Single precision multiply.
 ;; fmuld	Double precision multiply.
 ;; fmacs	Single precision multiply-accumulate.
@@ -74,7 +81,7 @@ (define_insn "*arm_movsi_vfp"
     }
   "
   [(set_attr "predicable" "yes")
-   
(set_attr "type" "*,*,*,*,load1,store1,r_2_f,f_2_r,ffarith,f_loads,f_stores")
+   
(set_attr "type" "*,*,*,*,load1,store1,r_2_f,f_2_r,fcpys,f_loads,f_stores")
    (set_attr "pool_range"     "*,*,*,*,4096,*,*,*,*,1020,*")
    (set_attr "neg_pool_range" "*,*,*,*,4084,*,*,*,*,1008,*")]
 )
@@ -111,7 +118,7 @@ (define_insn "*thumb2_movsi_vfp"
     }
   "
   [(set_attr "predicable" "yes")
-   
(set_attr "type" "*,*,*,*,load1,store1,r_2_f,f_2_r,ffarith,f_load,f_store")
+   (set_attr "type" "*,*,*,*,load1,store1,r_2_f,f_2_r,fcpys,f_load,f_store")
    (set_attr "pool_range"     "*,*,*,*,4096,*,*,*,*,1020,*")
    (set_attr "neg_pool_range" "*,*,*,*,   0,*,*,*,*,1008,*")]
 )
@@ -145,7 +152,7 @@ (define_insn "*arm_movdi_vfp"
       gcc_unreachable ();
     }
   "
-  [(set_attr "type" "*,load2,store2,r_2_f,f_2_r,ffarith,f_loadd,f_stored")
+  [(set_attr "type" "*,load2,store2,r_2_f,f_2_r,ffarithd,f_loadd,f_stored")
    (set_attr "length" "8,8,8,4,4,4,4,4")
    (set_attr "pool_range"     "*,1020,*,*,*,*,1020,*")
    (set_attr "neg_pool_range" "*,1008,*,*,*,*,1008,*")]
@@ -172,7 +179,7 @@ (define_insn "*thumb2_movdi_vfp"
       abort ();
     }
   "
-  [(set_attr "type" "*,load2,store2,r_2_f,f_2_r,ffarith,f_load,f_store")
+  [(set_attr "type" "*,load2,store2,r_2_f,f_2_r,ffarithd,f_load,f_store")
    (set_attr "length" "8,8,8,4,4,4,4,4")
    (set_attr "pool_range"     "*,4096,*,*,*,*,1020,*")
    (set_attr "neg_pool_range" "*,   0,*,*,*,*,1008,*")]
@@ -214,7 +221,7 @@ (define_insn "*movsf_vfp"
   "
   [(set_attr "predicable" "yes")
    (set_attr "type"
-     "r_2_f,f_2_r,farith,f_loads,f_stores,load1,store1,ffarith,*")
+     "r_2_f,f_2_r,fconsts,f_loads,f_stores,load1,store1,fcpys,*")
    (set_attr "pool_range" "*,*,*,1020,*,4096,*,*,*")
    (set_attr "neg_pool_range" "*,*,*,1008,*,4080,*,*,*")]
 )
@@ -250,7 +257,7 @@ (define_insn "*thumb2_movsf_vfp"
   "
   [(set_attr "predicable" "yes")
    (set_attr "type"
-     "r_2_f,f_2_r,farith,f_load,f_store,load1,store1,ffarith,*")
+     "r_2_f,f_2_r,fconsts,f_load,f_store,load1,store1,fcpys,*")
    (set_attr "pool_range" "*,*,*,1020,*,4092,*,*,*")
    (set_attr "neg_pool_range" "*,*,*,1008,*,0,*,*,*")]
 )
@@ -288,7 +295,7 @@ (define_insn "*movdf_vfp"
     }
   "
   [(set_attr "type"
-     "r_2_f,f_2_r,farith,f_loadd,f_stored,load2,store2,ffarith,*")
+     "r_2_f,f_2_r,fconstd,f_loadd,f_stored,load2,store2,ffarithd,*")
    (set_attr "length" "4,4,4,8,8,4,4,4,8")
    (set_attr "pool_range" "*,*,*,1020,*,1020,*,*,*")
    (set_attr "neg_pool_range" "*,*,*,1008,*,1008,*,*,*")]
@@ -320,7 +327,7 @@ (define_insn "*thumb2_movdf_vfp"
     }
   "
   [(set_attr "type"
-     "r_2_f,f_2_r,farith,load2,store2,f_load,f_store,ffarith,*")
+     "r_2_f,f_2_r,fconstd,load2,store2,f_load,f_store,ffarithd,*")
    (set_attr "length" "4,4,4,8,8,4,4,4,8")
    (set_attr "pool_range" "*,*,*,4096,*,1020,*,*,*")
    (set_attr "neg_pool_range" "*,*,*,0,*,1008,*,*,*")]
@@ -349,7 +356,7 @@ (define_insn "*movsfcc_vfp"
    fmrs%D3\\t%0, %2\;fmrs%d3\\t%0, %1"
    [(set_attr "conds" "use")
     (set_attr "length" "4,4,8,4,4,8,4,4,8")
-    
(set_attr "type" "ffarith,ffarith,ffarith,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")]
+    
(set_attr "type" "fcpys,fcpys,fcpys,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")]
 )
 
 (define_insn "*thumb2_movsfcc_vfp"
@@ -372,7 +379,7 @@ (define_insn "*thumb2_movsfcc_vfp"
    ite\\t%D3\;fmrs%D3\\t%0, %2\;fmrs%d3\\t%0, %1"
    [(set_attr "conds" "use")
     (set_attr "length" "6,6,10,6,6,10,6,6,10")
-    
(set_attr "type" "ffarith,ffarith,ffarith,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")]
+    
(set_attr "type" "fcpys,fcpys,fcpys,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")]
 )
 
 (define_insn "*movdfcc_vfp"
@@ -395,7 +402,7 @@ (define_insn "*movdfcc_vfp"
    fmrrd%D3\\t%Q0, %R0, %P2\;fmrrd%d3\\t%Q0, %R0, %P1"
    [(set_attr "conds" "use")
     (set_attr "length" "4,4,8,4,4,8,4,4,8")
-    
(set_attr "type" "ffarith,ffarith,ffarith,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")]
+    
(set_attr "type" "ffarithd,ffarithd,ffarithd,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")]
 )
 
 (define_insn "*thumb2_movdfcc_vfp"
@@ -418,7 +425,7 @@ (define_insn "*thumb2_movdfcc_vfp"
    ite\\t%D3\;fmrrd%D3\\t%Q0, %R0, %P2\;fmrrd%d3\\t%Q0, %R0, %P1"
    [(set_attr "conds" "use")
     (set_attr "length" "6,6,10,6,6,10,6,6,10")
-    
(set_attr "type" "ffarith,ffarith,ffarith,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")]
+    
(set_attr "type" "ffarithd,ffarithd,ffarithd,r_2_f,r_2_f,r_2_f,f_2_r,f_2_r,f_2_r")]
 )
 
 
@@ -430,7 +437,7 @@ (define_insn "*abssf2_vfp"
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "fabss%?\\t%0, %1"
   [(set_attr "predicable" "yes")
-   (set_attr "type" "ffarith")]
+   (set_attr "type" "ffariths")]
 )
 
 (define_insn "*absdf2_vfp"
@@ -439,7 +446,7 @@ (define_insn "*absdf2_vfp"
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "fabsd%?\\t%P0, %P1"
   [(set_attr "predicable" "yes")
-   (set_attr "type" "ffarith")]
+   (set_attr "type" "ffarithd")]
 )
 
 (define_insn "*negsf2_vfp"
@@ -450,7 +457,7 @@ (define_insn "*negsf2_vfp"
    fnegs%?\\t%0, %1
    eor%?\\t%0, %1, #-2147483648"
   [(set_attr "predicable" "yes")
-   (set_attr "type" "ffarith")]
+   (set_attr "type" "ffariths")]
 )
 
 (define_insn_and_split "*negdf2_vfp"
@@ -496,7 +503,7 @@ (define_insn_and_split "*negdf2_vfp"
   "
   [(set_attr "predicable" "yes")
    (set_attr "length" "4,4,8")
-   (set_attr "type" "ffarith")]
+   (set_attr "type" "ffarithd")]
 )
 
 
@@ -509,7 +516,7 @@ (define_insn "*addsf3_vfp"
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "fadds%?\\t%0, %1, %2"
   [(set_attr "predicable" "yes")
-   (set_attr "type" "farith")]
+   (set_attr "type" "fadds")]
 )
 
 (define_insn "*adddf3_vfp"
@@ -519,7 +526,7 @@ (define_insn "*adddf3_vfp"
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "faddd%?\\t%P0, %P1, %P2"
   [(set_attr "predicable" "yes")
-   (set_attr "type" "farith")]
+   (set_attr "type" "faddd")]
 )
 
 
@@ -530,7 +537,7 @@ (define_insn "*subsf3_vfp"
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "fsubs%?\\t%0, %1, %2"
   [(set_attr "predicable" "yes")
-   (set_attr "type" "farith")]
+   (set_attr "type" "fadds")]
 )
 
 (define_insn "*subdf3_vfp"
@@ -540,7 +547,7 @@ (define_insn "*subdf3_vfp"
   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
   "fsubd%?\\t%P0, %P1, %P2"
   [(set_attr "predicable" "yes")
-   (set_attr "type" "farith")]
+   (set_attr "type" "faddd")]
 )
 
 
@@ -909,7 +916,7 @@ (define_insn "*cmpsf_vfp"
    fcmps%?\\t%0, %1
    fcmpzs%?\\t%0"
   [(set_attr "predicable" "yes")
-   (set_attr "type" "ffarith")]
+   (set_attr "type" "fcmps")]
 )
 
 (define_insn "*cmpsf_trap_vfp"
@@ -921,7 +928,7 @@ (define_insn "*cmpsf_trap_vfp"
    fcmpes%?\\t%0, %1
    fcmpezs%?\\t%0"
   [(set_attr "predicable" "yes")
-   (set_attr "type" "ffarith")]
+   (set_attr "type" "fcmpd")]
 )
 
 (define_insn "*cmpdf_vfp"
@@ -933,7 +940,7 @@ (define_insn "*cmpdf_vfp"
    fcmpd%?\\t%P0, %P1
    fcmpzd%?\\t%P0"
   [(set_attr "predicable" "yes")
-   (set_attr "type" "ffarith")]
+   (set_attr "type" "fcmpd")]
 )
 
 (define_insn "*cmpdf_trap_vfp"
@@ -945,7 +952,7 @@ (define_insn "*cmpdf_trap_vfp"
    fcmped%?\\t%P0, %P1
    fcmpezd%?\\t%P0"
   [(set_attr "predicable" "yes")
-   (set_attr "type" "ffarith")]
+   (set_attr "type" "fcmpd")]
 )
 
 
Index: gcc/config/arm/cortex-r4f.md
===================================================================
--- gcc/config/arm/cortex-r4f.md	(revision 0)
+++ gcc/config/arm/cortex-r4f.md	(revision 0)
@@ -0,0 +1,161 @@
+;; ARM Crotex-R4F VFP pipeline description
+;; Copyright (C) 2007 Free Software Foundation, Inc.
+;; Written by CodeSourcery.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; With the exception of simple VMOV <freg>, <freg> instructions and
+;; the accululate operand of a multiply-accumulate instruction, all
+;; registers are early registers.  Thus base latencies are 1 more than
+;; those listed in the TRM.
+
+;; We use the A, B abd C units from the integer core, plus two additional
+;; units to enforce VFP dual issue constraints.
+
+;;		  A B C	    V1	VMLA
+;; fcpy		  1 2
+;; farith	  1 2	    1
+;; fmrc		  1 2
+;; fconst	  1 2 *	    *
+;; ffarith	  1 2 *	    *
+;; fmac		  1 2	    1	2
+;; fdiv		  1 2	    *
+;; f_loads	  *   *	    *
+;; f_stores	  *   *	    	*
+
+(define_cpu_unit "cortex_r4_v1" "cortex_r4")
+
+(define_cpu_unit "cortex_r4_vmla" "cortex_r4")
+
+(define_reservation "cortex_r4_issue_ab"
+		    "(cortex_r4_issue_a|cortex_r4_issue_b)")
+(define_reservation "cortex_r4_single_issue"
+		    "cortex_r4_issue_a+cortex_r4_issue_b")
+
+(define_insn_reservation "cortex_r4_fcpys" 2
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "fcpys"))
+ "cortex_r4_issue_ab")
+
+(define_insn_reservation "cortex_r4_ffariths" 2
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "ffariths,fconsts,fcmps"))
+ "cortex_r4_issue_ab+cortex_r4_issue_c+cortex_r4_v1")
+
+(define_insn_reservation "cortex_r4_fariths" 3
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "fadds,fmuls"))
+ "(cortex_r4_issue_a+cortex_r4_v1)|cortex_r4_issue_b")
+
+(define_insn_reservation "cortex_r4_fmacs" 6
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "fmacs"))
+ "(cortex_r4_issue_a+cortex_r4_v1)|(cortex_r4_issue_b+cortex_r4_vmla)")
+
+(define_insn_reservation "cortex_r4_fdivs" 17
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "fdivs"))
+ "cortex_r4_issue_ab+cortex_r4_v1,cortex_r4_issue_a+cortex_r4_v1")
+
+(define_insn_reservation "cortex_r4_floads" 2
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "f_loads"))
+ "cortex_r4_issue_a+cortex_r4_issue_c+cortex_r4_v1")
+
+(define_insn_reservation "cortex_r4_fstores" 1
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "f_stores"))
+ "cortex_r4_issue_a+cortex_r4_issue_c+cortex_r4_vmla")
+
+(define_insn_reservation "cortex_r4_mcr" 2
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "r_2_f"))
+ "cortex_r4_issue_ab")
+
+(define_insn_reservation "cortex_r4_mrc" 3
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "f_2_r"))
+ "cortex_r4_issue_ab")
+
+;; Bypasses for normal (not early) regs.
+(define_bypass 1 "cortex_r4_ffariths,cortex_r4_fcpys,cortex_r4_mcr"
+		 "cortex_r4_fcpys")
+(define_bypass 2 "cortex_r4_fariths"
+		 "cortex_r4_fcpys")
+(define_bypass 5 "cortex_r4_fmacs"
+		 "cortex_r4_fcpys")
+(define_bypass 16 "cortex_r4_fdivs"
+		  "cortex_r4_fcpys")
+
+(define_bypass 1 "cortex_r4_ffariths,cortex_r4_fcpys,cortex_r4_mcr"
+		 "cortex_r4_fmacs"
+		 "arm_no_early_mul_dep")
+(define_bypass 2 "cortex_r4_fariths"
+		 "cortex_r4_fmacs"
+		 "arm_no_early_mul_dep")
+;; mac->mac has an extra forwarding path.
+(define_bypass 3 "cortex_r4_fmacs"
+		 "cortex_r4_fmacs"
+		 "arm_no_early_mul_dep")
+(define_bypass 16 "cortex_r4_fdivs"
+		  "cortex_r4_fmacs"
+		  "arm_no_early_mul_dep")
+
+;; Double precision operations.  These can not dual issue.
+
+(define_insn_reservation "cortex_r4_fmacd" 20
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "fmacd"))
+ "cortex_r4_single_issue*13")
+
+(define_insn_reservation "cortex_r4_farith" 10
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "faddd,fmuld"))
+ "cortex_r4_single_issue*3")
+
+;; FIXME: The short cycle count suggests these instructions complete
+;; out of order.  Chances are this is not a pipelined operation.
+(define_insn_reservation "cortex_r4_fdivd" 97
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "fdivd"))
+ "cortex_r4_single_issue*3")
+
+(define_insn_reservation "cortex_r4_ffarithd" 2
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "ffarithd,fconstd"))
+ "cortex_r4_single_issue")
+
+(define_insn_reservation "cortex_r4_fcmpd" 2
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "fcmpd"))
+ "cortex_r4_single_issue*2")
+
+(define_insn_reservation "cortex_r4_f_cvt" 8
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "f_cvt"))
+ "cortex_r4_single_issue*3")
+
+(define_insn_reservation "cortex_r4_f_memd" 8
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "f_loadd,f_stored"))
+ "cortex_r4_single_issue")
+
+(define_insn_reservation "cortex_r4_f_flag" 1
+ (and (eq_attr "tune_cortexr4" "yes")
+      (eq_attr "type" "f_stores"))
+ "cortex_r4_single_issue")
+
Index: gcc/config/arm/arm.md
===================================================================
--- gcc/config/arm/arm.md	(revision 139860)
+++ gcc/config/arm/arm.md	(working copy)
@@ -157,7 +157,7 @@ (define_attr "shift" "" (const_int 0))
 ; Floating Point Unit.  If we only have floating point emulation, then there
 ; is no point in scheduling the floating point insns.  (Well, for best
 ; performance we should try and group them together).
-(define_attr "fpu" "none,fpa,fpe2,fpe3,maverick,vfp"
+(define_attr "fpu" "none,fpa,fpe2,fpe3,maverick,vfp,vfpv3d16,vfpv3,neon"
   (const (symbol_ref "arm_fpu_attr")))
 
 ; LENGTH of an instruction (in bytes)
@@ -239,7 +239,7 @@ (define_attr "insn"
 ;
 
 (define_attr "type"
-	"alu,alu_shift,alu_shift_reg,mult,block,float,fdivx,fdivd,fdivs,fmul,fmuls,fmuld,fmacs,fmacd,ffmul,farith,ffarith,f_flag,float_em,f_load,f_store,f_loads,f_loadd,f_stores,f_stored,f_mem_r,r_mem_f,f_2_r,r_2_f,f_cvt,branch,call,load_byte,load1,load2,load3,load4,store1,store2,store3,store4,mav_farith,mav_dmult"
+	"alu,alu_shift,alu_shift_reg,mult,block,float,fdivx,fdivd,fdivs,fmul,fmuls,fmuld,fmacs,fmacd,ffmul,farith,ffarith,f_flag,float_em,f_load,f_store,f_loads,f_loadd,f_stores,f_stored,f_mem_r,r_mem_f,f_2_r,r_2_f,f_cvt,branch,call,load_byte,load1,load2,load3,load4,store1,store2,store3,store4,mav_farith,mav_dmult,fconsts,fconstd,fadds,faddd,ffariths,ffarithd,fcmps,fcmpd,fcpys"
 	(if_then_else 
 	 
(eq_attr "insn" "smulxy,smlaxy,smlalxy,smulwy,smlawx,mul,muls,mla,mlas,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals")
 	 (const_string "mult")
@@ -331,18 +331,26 @@ (define_mode_iterator ANY64 [DI DF V8QI 
 ;; Processor type.  This is created automatically from arm-cores.def.
 (include "arm-tune.md")
 
+(define_attr "tune_cortexr4" "yes,no"
+  (const (if_then_else
+	  (eq_attr "tune" "cortexr4,cortexr4f")
+	  (const_string "yes")
+	  (const_string "no"))))
+
 ;; True if the generic scheduling description should be used.
 
 (define_attr "generic_sched" "yes,no"
   (const (if_then_else 
-          
(eq_attr "tune" "arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa8,cortexr4")
+          (ior 
(eq_attr "tune" "arm926ejs,arm1020e,arm1026ejs,arm1136js,arm1136jfs,cortexa8")
+	      (eq_attr "tune_cortexr4" "yes"))
           (const_string "no")
           (const_string "yes"))))
 
 (define_attr "generic_vfp" "yes,no"
   (const (if_then_else
 	  (and (eq_attr "fpu" "vfp")
-	       (eq_attr "tune" "!arm1020e,arm1022e,cortexa8"))
+	       (eq_attr "tune" "!arm1020e,arm1022e,cortexa8")
+	       (eq_attr "tune_cortexr4" "no"))
 	  (const_string "yes")
 	  (const_string "no"))))
 
@@ -353,6 +361,7 @@ (define_attr "generic_vfp" "yes,no"
 (include "arm1136jfs.md")
 (include "cortex-a8.md")
 (include "cortex-r4.md")
+(include "cortex-r4f.md")
 (include "vfp11.md")
 
 
Index: gcc/config/arm/vfp11.md
===================================================================
--- gcc/config/arm/vfp11.md	(revision 139860)
+++ gcc/config/arm/vfp11.md	(working copy)
@@ -51,12 +51,12 @@ (define_cpu_unit "fmstat" "vfp11")
 
 (define_insn_reservation "vfp_ffarith" 4
  (and (eq_attr "generic_vfp" "yes")
-      (eq_attr "type" "ffarith"))
+      (eq_attr "type" "fcpys,ffariths,ffarithd,fcmps,fcmpd"))
  "fmac")
 
 (define_insn_reservation "vfp_farith" 8
  (and (eq_attr "generic_vfp" "yes")
-      (eq_attr "type" "farith,f_cvt,fmuls,fmacs"))
+      (eq_attr "type" "fadds,faddd,fconsts,fconstd,f_cvt,fmuls,fmacs"))
  "fmac")
 
 (define_insn_reservation "vfp_fmul" 9