This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

PATCH: Provide ARM 1026EJ-S pipeline description


This patch adds a pipeline description for the ARM 1026EJ-S core.

Applied on the csl-arm-branch.

--
Mark Mitchell
CodeSourcery, LLC
mark@codesourcery.com

2003-10-28  Mark Mitchell  <mark@codesourcery.com>

	* config/arm/arm.h (processor_type): New enumeration type.
	(CPP_ARCH_DEFAULT_SPEC): Set appropriately for ARM 926EJ-S,
	ARM1026EJ-S, ARM1136J-S, and ARM1136JF-S processor cores.
	(CPP_CPU_ARCH_SPEC): Likewise.
	* config/arm/arm.c (arm_tune): New variable.
	(all_cores): Use cores.def.
	(all_architectures): Add representative processor.
	(arm_override_options): Restructure way in which tuning
	information is deduced.
	* arm.md: Update "insn" and "type" attributes throughout.
	(insn): New attribute.
	(type): Compute "mult" from "insn" attribute.  Add load2,
	load3, load4 alternatives.
	(arm automaton): Move to arm-generic.md.
	* config/arm/arm-cores.def: New file.
	* config/arm/arm-generic.md: Likewise.
	* config/arm/arm1026ejs.md: Likewise.

Index: arm-cores.def
===================================================================
RCS file: arm-cores.def
diff -N arm-cores.def
*** /dev/null	1 Jan 1970 00:00:00 -0000
--- arm-cores.def	28 Oct 2003 07:58:30 -0000
***************
*** 0 ****
--- 1,86 ----
+ /* ARM CPU Cores
+    Copyright (C) 2003 Free Software Foundation, Inc.
+    Written by CodeSourcery, LLC
+ 
+    This file is part of GCC.
+ 
+    GCC is free software; you can redistribute it and/or modify it
+    under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2, or (at your option)
+    any later version.
+ 
+    GCC is distributed in the hope that it will be useful, but
+    WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    General Public License for more details.
+ 
+    You should have received a copy of the GNU General Public License
+    along with GCC; see the file COPYING.  If not, write to the Free
+    Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+    02111-1307, USA.  */
+ 
+ /* Before using #include to read this file, define a macro:
+ 	
+       ARM_CORE(CORE_NAME, FLAGS)
+ 
+    The CORE_NAME is the name of the core, represented as an identifier
+    rather than a string constant.  The FLAGS are the bitwise-or of the
+    traits that apply to that core.
+ 
+    If you update this table, you must update the "tune" attribue in
+    arm.md.  */
+ 
+ ARM_CORE(arm2,		FL_CO_PROC | FL_MODE26)
+ ARM_CORE(arm250,	FL_CO_PROC | FL_MODE26)
+ ARM_CORE(arm3,		FL_CO_PROC | FL_MODE26)
+ ARM_CORE(arm6,		FL_CO_PROC | FL_MODE26 | FL_MODE32)
+ ARM_CORE(arm60,		FL_CO_PROC | FL_MODE26 | FL_MODE32)
+ ARM_CORE(arm600,	FL_CO_PROC | FL_MODE26 | FL_MODE32)
+ ARM_CORE(arm610,	             FL_MODE26 | FL_MODE32)
+ ARM_CORE(arm620,	FL_CO_PROC | FL_MODE26 | FL_MODE32)
+ ARM_CORE(arm7,		FL_CO_PROC | FL_MODE26 | FL_MODE32)
+ /* arm7m doesn't exist on its own, but only with D, (and I), but
+    those don't alter the code, so arm7m is sometimes used.  */
+ ARM_CORE(arm7m,		FL_CO_PROC | FL_MODE26 | FL_MODE32 | FL_FAST_MULT)
+ ARM_CORE(arm7d,		FL_CO_PROC | FL_MODE26 | FL_MODE32)
+ ARM_CORE(arm7dm,	FL_CO_PROC | FL_MODE26 | FL_MODE32 | FL_FAST_MULT)
+ ARM_CORE(arm7di,	FL_CO_PROC | FL_MODE26 | FL_MODE32)
+ ARM_CORE(arm7dmi,	FL_CO_PROC | FL_MODE26 | FL_MODE32 | FL_FAST_MULT)
+ ARM_CORE(arm70,		FL_CO_PROC | FL_MODE26 | FL_MODE32)
+ ARM_CORE(arm700,	FL_CO_PROC | FL_MODE26 | FL_MODE32)
+ ARM_CORE(arm700i,	FL_CO_PROC | FL_MODE26 | FL_MODE32)
+ ARM_CORE(arm710,	             FL_MODE26 | FL_MODE32)
+ ARM_CORE(arm720,	             FL_MODE26 | FL_MODE32)
+ ARM_CORE(arm710c,	             FL_MODE26 | FL_MODE32)
+ ARM_CORE(arm7100,	             FL_MODE26 | FL_MODE32)
+ ARM_CORE(arm7500,	             FL_MODE26 | FL_MODE32)
+ /* Doesn't have an external co-proc, but does have embedded fpa.  */
+ ARM_CORE(arm7500fe,	FL_CO_PROC | FL_MODE26 | FL_MODE32)
+ /* V4 Architecture Processors */
+ ARM_CORE(arm7tdmi,	FL_CO_PROC |             FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB)
+ ARM_CORE(arm710t,	                         FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB)
+ ARM_CORE(arm720t,	                         FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB)
+ ARM_CORE(arm740t,	                         FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB)
+ ARM_CORE(arm8,	                     FL_MODE26 | FL_MODE32 | FL_FAST_MULT | FL_ARCH4 |            FL_LDSCHED)
+ ARM_CORE(arm810,	             FL_MODE26 | FL_MODE32 | FL_FAST_MULT | FL_ARCH4 |            FL_LDSCHED)
+ ARM_CORE(arm9,	                                 FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB | FL_LDSCHED)
+ ARM_CORE(arm920,	                         FL_MODE32 | FL_FAST_MULT | FL_ARCH4 |            FL_LDSCHED)
+ ARM_CORE(arm920t,	                         FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB | FL_LDSCHED)
+ ARM_CORE(arm940t,	                         FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB | FL_LDSCHED)
+ ARM_CORE(arm9tdmi,	                         FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB | FL_LDSCHED)
+ ARM_CORE(arm9e,	       	      		         FL_MODE32 | FL_FAST_MULT | FL_ARCH4 |            FL_LDSCHED)
+ ARM_CORE(ep9312,	   			 FL_MODE32 | FL_FAST_MULT | FL_ARCH4 |            FL_LDSCHED |             FL_CIRRUS)
+ ARM_CORE(strongarm,	             FL_MODE26 | FL_MODE32 | FL_FAST_MULT | FL_ARCH4 |            FL_LDSCHED | FL_STRONG)
+ ARM_CORE(strongarm110,               FL_MODE26 | FL_MODE32 | FL_FAST_MULT | FL_ARCH4 |            FL_LDSCHED | FL_STRONG)
+ ARM_CORE(strongarm1100,              FL_MODE26 | FL_MODE32 | FL_FAST_MULT | FL_ARCH4 |            FL_LDSCHED | FL_STRONG)
+ ARM_CORE(strongarm1110,              FL_MODE26 | FL_MODE32 | FL_FAST_MULT | FL_ARCH4 |            FL_LDSCHED | FL_STRONG)
+ /* V5 Architecture Processors */
+ ARM_CORE(arm10tdmi,	                         FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB | FL_LDSCHED             | FL_ARCH5)
+ ARM_CORE(arm1020t,	                         FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB | FL_LDSCHED             | FL_ARCH5)
+ ARM_CORE(arm926ejs,                              FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB                          | FL_ARCH5 | FL_ARCH5E)
+ ARM_CORE(arm1026ejs,                             FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB                          | FL_ARCH5 | FL_ARCH5E)
+ ARM_CORE(xscale,                                 FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB | FL_LDSCHED | FL_STRONG | FL_ARCH5 | FL_ARCH5E | FL_XSCALE)
+ ARM_CORE(iwmmxt,                                 FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB | FL_LDSCHED | FL_STRONG | FL_ARCH5 | FL_ARCH5E | FL_XSCALE | FL_IWMMXT)
+ /* V6 Architecture Processors */
+ ARM_CORE(arm1136js,                              FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB                          | FL_ARCH5 | FL_ARCH5E | FL_ARCH6J)
+ ARM_CORE(arm1136jfs,                             FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB                          | FL_ARCH5 | FL_ARCH5E | FL_ARCH6J | FL_VFPV2)
Index: arm-generic.md
===================================================================
RCS file: arm-generic.md
diff -N arm-generic.md
*** /dev/null	1 Jan 1970 00:00:00 -0000
--- arm-generic.md	28 Oct 2003 07:58:30 -0000
***************
*** 0 ****
--- 1,152 ----
+ ;; Generic ARM Pipeline Description
+ ;; Copyright (C) 2003 Free Software Foundation, Inc.
+ ;;
+ ;; This file is part of GCC.
+ ;;
+ ;; GCC is free software; you can redistribute it and/or modify it
+ ;; under the terms of the GNU General Public License as published by
+ ;; the Free Software Foundation; either version 2, or (at your option)
+ ;; any later version.
+ ;;
+ ;; GCC is distributed in the hope that it will be useful, but
+ ;; WITHOUT ANY WARRANTY; without even the implied warranty of
+ ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ ;; General Public License for more details.
+ ;;
+ ;; You should have received a copy of the GNU General Public License
+ ;; along with GCC; see the file COPYING.  If not, write to the Free
+ ;; Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+ ;; 02111-1307, USA.  */
+ 
+ (define_automaton "arm")
+ 
+ ;; Write buffer
+ ;
+ ; Strictly, we should model a 4-deep write buffer for ARM7xx based chips
+ ;
+ ; The write buffer on some of the arm6 processors is hard to model exactly.
+ ; There is room in the buffer for up to two addresses and up to eight words
+ ; of memory, but the two needn't be split evenly.  When writing the two
+ ; addresses are fully pipelined.  However, a read from memory that is not
+ ; currently in the cache will block until the writes have completed.
+ ; It is normally the case that FCLK and MCLK will be in the ratio 2:1, so
+ ; writes will take 2 FCLK cycles per word, if FCLK and MCLK are asynchronous
+ ; (they aren't allowed to be at present) then there is a startup cost of 1MCLK
+ ; cycle to add as well.
+ (define_cpu_unit "write_buf" "arm")
+ 
+ ;; Write blockage unit
+ ;
+ ; The write_blockage unit models (partially), the fact that reads will stall
+ ; until the write buffer empties.
+ ; The f_mem_r and r_mem_f could also block, but they are to the stack,
+ ; so we don't model them here
+ (define_cpu_unit "write_blockage" "arm")
+ 
+ ;; Core
+ ;
+ (define_cpu_unit "core" "arm")
+ 
+ (define_insn_reservation "r_mem_f_wbuf" 5
+   (and (eq_attr "tune" "!arm1026ejs")
+        (and (eq_attr "model_wbuf" "yes")
+ 	    (eq_attr "type" "r_mem_f")))
+   "core+write_buf*3")
+ 
+ (define_insn_reservation "store_wbuf" 5
+   (and (eq_attr "tune" "!arm1026ejs")
+        (and (eq_attr "model_wbuf" "yes")
+        	    (eq_attr "type" "store1")))
+   "core+write_buf*3+write_blockage*5")
+ 
+ (define_insn_reservation "store2_wbuf" 7
+   (and (eq_attr "tune" "!arm1026ejs")
+        (and (eq_attr "model_wbuf" "yes")
+ 	    (eq_attr "type" "store2")))
+   "core+write_buf*4+write_blockage*7")
+ 
+ (define_insn_reservation "store3_wbuf" 9
+   (and (eq_attr "tune" "!arm1026ejs")
+        (and (eq_attr "model_wbuf" "yes")
+ 	    (eq_attr "type" "store3")))
+   "core+write_buf*5+write_blockage*9")
+ 
+ (define_insn_reservation "store4_wbuf" 11
+   (and (eq_attr "tune" "!arm1026ejs")
+        (and (eq_attr "model_wbuf" "yes")
+             (eq_attr "type" "store4")))
+   "core+write_buf*6+write_blockage*11")
+ 
+ (define_insn_reservation "store2" 3
+   (and (eq_attr "tune" "!arm1026ejs")
+        (and (eq_attr "model_wbuf" "no")
+             (eq_attr "type" "store2")))
+   "core*3")
+ 
+ (define_insn_reservation "store3" 4
+   (and (eq_attr "tune" "!arm1026ejs")
+        (and (eq_attr "model_wbuf" "no")
+             (eq_attr "type" "store3")))
+   "core*4")
+ 
+ (define_insn_reservation "store4" 5
+   (and (eq_attr "tune" "!arm1026ejs")
+        (and (eq_attr "model_wbuf" "no")
+ 	    (eq_attr "type" "store4")))
+   "core*5")
+ 
+ (define_insn_reservation "store_ldsched" 1
+   (and (eq_attr "tune" "!arm1026ejs")
+        (and (eq_attr "ldsched" "yes") 
+ 	    (eq_attr "type" "store1")))
+   "core")
+ 
+ (define_insn_reservation "load_ldsched_xscale" 3
+   (and (eq_attr "tune" "!arm1026ejs")
+        (and (eq_attr "ldsched" "yes") 
+ 	    (and (eq_attr "type" "load")
+ 	         (eq_attr "is_xscale" "yes"))))
+   "core")
+ 
+ (define_insn_reservation "load_ldsched" 2
+   (and (eq_attr "tune" "!arm1026ejs")
+        (and (eq_attr "ldsched" "yes") 
+ 	    (and (eq_attr "type" "load")
+ 	         (eq_attr "is_xscale" "no"))))
+   "core")
+ 
+ (define_insn_reservation "load_or_store" 2
+   (and (eq_attr "tune" "!arm1026ejs")
+        (and (eq_attr "ldsched" "!yes") 
+ 	    (eq_attr "type" "load,load2,load3,load4,store1")))
+   "core*2")
+ 
+ (define_insn_reservation "mult" 16
+   (and (eq_attr "tune" "!arm1026ejs")
+        (and (eq_attr "ldsched" "no") (eq_attr "type" "mult")))
+   "core*16")
+ 
+ (define_insn_reservation "mult_ldsched_strongarm" 3
+   (and (eq_attr "tune" "!arm1026ejs")
+        (and (eq_attr "ldsched" "yes") 
+ 	    (and (eq_attr "is_strongarm" "yes")
+ 	         (eq_attr "type" "mult"))))
+   "core*2")
+ 
+ (define_insn_reservation "mult_ldsched" 4
+   (and (eq_attr "tune" "!arm1026ejs")
+        (and (eq_attr "ldsched" "yes") 
+ 	    (and (eq_attr "is_strongarm" "no")
+ 	         (eq_attr "type" "mult"))))
+   "core*4")
+ 
+ (define_insn_reservation "multi_cycle" 32
+   (and (eq_attr "tune" "!arm1026ejs")
+        (and (eq_attr "core_cycles" "multi")
+             (eq_attr "type" "!mult,load,load2,load3,load4,store1,store2,store3,store4")))
+   "core*32")
+ 
+ (define_insn_reservation "single_cycle" 1
+   (and (eq_attr "tune" "!arm1026ejs")
+        (eq_attr "core_cycles" "single"))
+   "core")
Index: arm.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/arm/arm.c,v
retrieving revision 1.303
diff -c -5 -p -r1.303 arm.c
*** arm.c	24 Oct 2003 09:25:30 -0000	1.303
--- arm.c	28 Oct 2003 07:58:31 -0000
*************** int making_const_table;
*** 249,258 ****
--- 249,261 ----
  
  /* Define the information needed to generate branch insns.  This is
     stored from the compare operation.  */
  rtx arm_compare_op0, arm_compare_op1;
  
+ /* The processor for which instructions should be scheduled.  */
+ enum processor_type arm_tune = arm_none;
+ 
  /* What type of floating point are we tuning for?  */
  enum fputype arm_fpu_tune;
  
  /* What type of floating point instructions are available?  */
  enum fputype arm_fpu_arch;
*************** static const char * const arm_condition_
*** 372,466 ****
  /* Initialization code.  */
  
  struct processors
  {
    const char *const name;
    const unsigned long flags;
  };
  
  /* Not all of these give usefully different compilation alternatives,
     but there is no simple way of generalizing them.  */
  static const struct processors all_cores[] =
  {
    /* ARM Cores */
!   
!   {"arm2",	FL_CO_PROC | FL_MODE26 },
!   {"arm250",	FL_CO_PROC | FL_MODE26 },
!   {"arm3",	FL_CO_PROC | FL_MODE26 },
!   {"arm6",	FL_CO_PROC | FL_MODE26 | FL_MODE32 },
!   {"arm60",	FL_CO_PROC | FL_MODE26 | FL_MODE32 },
!   {"arm600",	FL_CO_PROC | FL_MODE26 | FL_MODE32 },
!   {"arm610",	             FL_MODE26 | FL_MODE32 },
!   {"arm620",	FL_CO_PROC | FL_MODE26 | FL_MODE32 },
!   {"arm7",	FL_CO_PROC | FL_MODE26 | FL_MODE32 },
!   /* arm7m doesn't exist on its own, but only with D, (and I), but
!      those don't alter the code, so arm7m is sometimes used.  */
!   {"arm7m",	FL_CO_PROC | FL_MODE26 | FL_MODE32 | FL_FAST_MULT },
!   {"arm7d",	FL_CO_PROC | FL_MODE26 | FL_MODE32 },
!   {"arm7dm",	FL_CO_PROC | FL_MODE26 | FL_MODE32 | FL_FAST_MULT },
!   {"arm7di",	FL_CO_PROC | FL_MODE26 | FL_MODE32 },
!   {"arm7dmi",	FL_CO_PROC | FL_MODE26 | FL_MODE32 | FL_FAST_MULT },
!   {"arm70",	FL_CO_PROC | FL_MODE26 | FL_MODE32 },
!   {"arm700",	FL_CO_PROC | FL_MODE26 | FL_MODE32 },
!   {"arm700i",	FL_CO_PROC | FL_MODE26 | FL_MODE32 },
!   {"arm710",	             FL_MODE26 | FL_MODE32 },
!   {"arm720",	             FL_MODE26 | FL_MODE32 },
!   {"arm710c",	             FL_MODE26 | FL_MODE32 },
!   {"arm7100",	             FL_MODE26 | FL_MODE32 },
!   {"arm7500",	             FL_MODE26 | FL_MODE32 },
!   /* Doesn't have an external co-proc, but does have embedded fpa.  */
!   {"arm7500fe",	FL_CO_PROC | FL_MODE26 | FL_MODE32 },
!   /* V4 Architecture Processors */
!   {"arm7tdmi",	FL_CO_PROC |             FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB },
!   {"arm710t",	                         FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB },
!   {"arm720t",	                         FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB },
!   {"arm740t",	                         FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB },
!   {"arm8",	             FL_MODE26 | FL_MODE32 | FL_FAST_MULT | FL_ARCH4 |            FL_LDSCHED },
!   {"arm810",	             FL_MODE26 | FL_MODE32 | FL_FAST_MULT | FL_ARCH4 |            FL_LDSCHED },
!   {"arm9",	                         FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB | FL_LDSCHED },
!   {"arm920",	                         FL_MODE32 | FL_FAST_MULT | FL_ARCH4 |            FL_LDSCHED },
!   {"arm920t",	                         FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB | FL_LDSCHED },
!   {"arm940t",	                         FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB | FL_LDSCHED },
!   {"arm9tdmi",	                         FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB | FL_LDSCHED },
!   {"arm9e",	       	      		 FL_MODE32 | FL_FAST_MULT | FL_ARCH4 |            FL_LDSCHED },
!   {"ep9312",	   			 FL_MODE32 | FL_FAST_MULT | FL_ARCH4 |            FL_LDSCHED |             FL_CIRRUS },
!   {"strongarm",	             FL_MODE26 | FL_MODE32 | FL_FAST_MULT | FL_ARCH4 |            FL_LDSCHED | FL_STRONG },
!   {"strongarm110",           FL_MODE26 | FL_MODE32 | FL_FAST_MULT | FL_ARCH4 |            FL_LDSCHED | FL_STRONG },
!   {"strongarm1100",          FL_MODE26 | FL_MODE32 | FL_FAST_MULT | FL_ARCH4 |            FL_LDSCHED | FL_STRONG },
!   {"strongarm1110",          FL_MODE26 | FL_MODE32 | FL_FAST_MULT | FL_ARCH4 |            FL_LDSCHED | FL_STRONG },
!   /* V5 Architecture Processors */
!   {"arm10tdmi",	                         FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB | FL_LDSCHED             | FL_ARCH5 },
!   {"arm1020t",	                         FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB | FL_LDSCHED             | FL_ARCH5 },
!   {"arm926ejs",                          FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB                          | FL_ARCH5 | FL_ARCH5E },
!   {"arm1026ejs",                         FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB                          | FL_ARCH5 | FL_ARCH5E },
!   {"xscale",                             FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB | FL_LDSCHED | FL_STRONG | FL_ARCH5 | FL_ARCH5E | FL_XSCALE },
!   {"iwmmxt",                             FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB | FL_LDSCHED | FL_STRONG | FL_ARCH5 | FL_ARCH5E | FL_XSCALE | FL_IWMMXT },
!   /* V6 Architecture Processors */
!   {"arm1136js",                          FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB                          | FL_ARCH5 | FL_ARCH5E | FL_ARCH6J },
!   {"arm1136jfs",                         FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB                          | FL_ARCH5 | FL_ARCH5E | FL_ARCH6J | FL_VFPV2 },
!   {NULL, 0}
  };
  
  static const struct processors all_architectures[] =
  {
    /* ARM Architectures */
    
!   { "armv2",     FL_CO_PROC | FL_MODE26 },
!   { "armv2a",    FL_CO_PROC | FL_MODE26 },
!   { "armv3",     FL_CO_PROC | FL_MODE26 | FL_MODE32 },
!   { "armv3m",    FL_CO_PROC | FL_MODE26 | FL_MODE32 | FL_FAST_MULT },
!   { "armv4",     FL_CO_PROC | FL_MODE26 | FL_MODE32 | FL_FAST_MULT | FL_ARCH4 },
    /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
       implementations that support it, so we will leave it out for now.  */
!   { "armv4t",    FL_CO_PROC |             FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB },
!   { "armv5",     FL_CO_PROC |             FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB | FL_ARCH5 },
!   { "armv5t",    FL_CO_PROC |             FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB | FL_ARCH5 },
!   { "armv5te",   FL_CO_PROC |             FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB | FL_ARCH5 | FL_ARCH5E },
!   { "armv6j",    FL_CO_PROC |             FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB | FL_ARCH5 | FL_ARCH5E | FL_ARCH6J },
!   { "ep9312",				  FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_LDSCHED | FL_CIRRUS },
!   {"iwmmxt",                             FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB | FL_LDSCHED | FL_STRONG | FL_ARCH5 | FL_ARCH5E | FL_XSCALE | FL_IWMMXT },
!   { NULL, 0 }
  };
  
  /* This is a magic structure.  The 'string' field is magically filled in
     with a pointer to the value specified by the user on the command line
     assuming that the user has specified such a value.  */
--- 375,419 ----
  /* Initialization code.  */
  
  struct processors
  {
    const char *const name;
+   enum processor_type core;
    const unsigned long flags;
  };
  
  /* Not all of these give usefully different compilation alternatives,
     but there is no simple way of generalizing them.  */
  static const struct processors all_cores[] =
  {
    /* ARM Cores */
! #define ARM_CORE(NAME, FLAGS) \
!   {#NAME, arm_none, FLAGS},
! #include "arm-cores.def"
! #undef ARM_CORE
!   {NULL, arm_none, 0}
  };
  
  static const struct processors all_architectures[] =
  {
    /* ARM Architectures */
    
!   { "armv2",     arm2,       FL_CO_PROC | FL_MODE26 },
!   { "armv2a",    arm2,       FL_CO_PROC | FL_MODE26 },
!   { "armv3",     arm6,       FL_CO_PROC | FL_MODE26 | FL_MODE32 },
!   { "armv3m",    arm7m,      FL_CO_PROC | FL_MODE26 | FL_MODE32 | FL_FAST_MULT },
!   { "armv4",     arm7tdmi,   FL_CO_PROC | FL_MODE26 | FL_MODE32 | FL_FAST_MULT | FL_ARCH4 },
    /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
       implementations that support it, so we will leave it out for now.  */
!   { "armv4t",    arm7tdmi,   FL_CO_PROC |             FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB },
!   { "armv5",     arm10tdmi,  FL_CO_PROC |             FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB | FL_ARCH5 },
!   { "armv5t",    arm10tdmi,  FL_CO_PROC |             FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB | FL_ARCH5 },
!   { "armv5te",   arm1026ejs, FL_CO_PROC |             FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB | FL_ARCH5 | FL_ARCH5E },
!   { "armv6j",    arm1136js,  FL_CO_PROC |             FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB | FL_ARCH5 | FL_ARCH5E | FL_ARCH6J },
!   { "ep9312",	 ep9312, 			      FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_LDSCHED | FL_CIRRUS },
!   {"iwmmxt",     iwmmxt,                              FL_MODE32 | FL_FAST_MULT | FL_ARCH4 | FL_THUMB | FL_LDSCHED | FL_STRONG | FL_ARCH5 | FL_ARCH5E | FL_XSCALE | FL_IWMMXT },
!   { NULL, arm_none, 0 }
  };
  
  /* This is a magic structure.  The 'string' field is magically filled in
     with a pointer to the value specified by the user on the command line
     assuming that the user has specified such a value.  */
*************** bit_count (unsigned long value)
*** 492,502 ****
     This has now turned into a maze.  */
  void
  arm_override_options (void)
  {
    unsigned i;
!   
    /* Set up the flags based on the cpu/architecture selected by the user.  */
    for (i = ARRAY_SIZE (arm_select); i--;)
      {
        struct arm_cpu_select * ptr = arm_select + i;
        
--- 445,455 ----
     This has now turned into a maze.  */
  void
  arm_override_options (void)
  {
    unsigned i;
! 
    /* Set up the flags based on the cpu/architecture selected by the user.  */
    for (i = ARRAY_SIZE (arm_select); i--;)
      {
        struct arm_cpu_select * ptr = arm_select + i;
        
*************** arm_override_options (void)
*** 505,517 ****
  	  const struct processors * sel;
  
            for (sel = ptr->processors; sel->name != NULL; sel++)
              if (streq (ptr->string, sel->name))
                {
! 		if (i == 2)
! 		  tune_flags = sel->flags;
! 		else
  		  {
  		    /* If we have been given an architecture and a processor
  		       make sure that they are compatible.  We only generate
  		       a warning though, and we prefer the CPU over the
  		       architecture.  */
--- 458,480 ----
  	  const struct processors * sel;
  
            for (sel = ptr->processors; sel->name != NULL; sel++)
              if (streq (ptr->string, sel->name))
                {
! 		/* Determine the processor core for which we should
! 		   tune code-generation.  */
! 		if (/* -mcpu= is a sensible default.  */
! 		    i == 0
! 		    /* If -march= is used, and -mcpu= has not been used,
! 		       assume that we should tune for a representative
! 		       CPU from that architecture.  */
! 		    || i == 1
! 		    /* -mtune= overrides -mcpu= and -march=.  */
! 		    || i == 2)
! 		  arm_tune = (enum processor_type) (sel - ptr->processors);
! 
! 		if (i != 2)
  		  {
  		    /* If we have been given an architecture and a processor
  		       make sure that they are compatible.  We only generate
  		       a warning though, and we prefer the CPU over the
  		       architecture.  */
*************** arm_override_options (void)
*** 581,591 ****
        
        if (sel->name == NULL)
  	abort ();
  
        insn_flags = sel->flags;
!       
        /* Now check to see if the user has specified some command line
  	 switch that require certain abilities from the cpu.  */
        sought = 0;
        
        if (TARGET_INTERWORK || TARGET_THUMB)
--- 544,554 ----
        
        if (sel->name == NULL)
  	abort ();
  
        insn_flags = sel->flags;
! 
        /* Now check to see if the user has specified some command line
  	 switch that require certain abilities from the cpu.  */
        sought = 0;
        
        if (TARGET_INTERWORK || TARGET_THUMB)
*************** arm_override_options (void)
*** 651,666 ****
  		sel = best_fit;
  	    }
  
  	  insn_flags = sel->flags;
  	}
      }
    
!   /* If tuning has not been specified, tune for whichever processor or
!      architecture has been selected.  */
!   if (tune_flags == 0)
!     tune_flags = insn_flags;
  
    /* Make sure that the processor choice does not conflict with any of the
       other command line choices.  */
    if (TARGET_APCS_32 && !(insn_flags & FL_MODE32))
      {
--- 614,633 ----
  		sel = best_fit;
  	    }
  
  	  insn_flags = sel->flags;
  	}
+       if (arm_tune == arm_none)
+ 	arm_tune = (enum processor_type) (sel - all_cores);
      }
    
!   /* The processor for which we shoudl tune should now have been
!      chosen.  */
!   if (arm_tune == arm_none)
!     abort ();
!   
!   tune_flags = all_cores[(int)arm_tune].flags;
  
    /* Make sure that the processor choice does not conflict with any of the
       other command line choices.  */
    if (TARGET_APCS_32 && !(insn_flags & FL_MODE32))
      {
Index: arm.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/arm/arm.h,v
retrieving revision 1.210
diff -c -5 -p -r1.210 arm.h
*** arm.h	24 Oct 2003 09:25:30 -0000	1.210
--- arm.h	28 Oct 2003 07:58:31 -0000
***************
*** 103,112 ****
--- 103,126 ----
  #define TARGET_CPU_arm1136j_s   0x2000
  #define TARGET_CPU_arm1136jf_s  0x4000
  /* Configure didn't specify.  */
  #define TARGET_CPU_generic	0x8000
  
+ /* The various ARM cores.  */
+ enum processor_type
+ {
+ #define ARM_CORE(NAME, FLAGS) \
+   NAME,
+ #include "arm-cores.def"
+ #undef ARM_CORE
+   /* Used to indicate that no processor has been specified.  */
+   arm_none
+ };
+ 
+ /* The processor for which instructions should be scheduled.  */
+ extern enum processor_type arm_tune;
+ 
  typedef enum arm_cond_code
  {
    ARM_EQ = 0, ARM_NE, ARM_CS, ARM_CC, ARM_MI, ARM_PL, ARM_VS, ARM_VC,
    ARM_HI, ARM_LS, ARM_GE, ARM_LT, ARM_GT, ARM_LE, ARM_AL, ARM_NV
  }
*************** extern GTY(()) rtx aof_pic_label;
*** 179,197 ****
--- 193,221 ----
  #endif
  #else
  #if TARGET_CPU_DEFAULT == TARGET_CPU_iwmmxt
  #define CPP_ARCH_DEFAULT_SPEC "-D__ARM_ARCH_5TE__ -D__XSCALE__ -D__IWMMXT__"
  #else
+ #if (TARGET_CPU_DEFAULT == TARGET_CPU_arm926ej_s || \
+      TARGET_CPU_DEFAULT == TARGET_CPU_arm1026ej_s)
+ #define CPP_ARCH_DEFAULT_SPEC "-D__ARM_ARCH_5TEJ__"
+ #else
+ #if (TARGET_CPU_DEFAULT == TARGET_CPU_arm1136j_s || \
+      TARGET_CPU_DEFAULT == TARGET_CPU_arm1136jf_s)
+ #define CPP_ARCH_DEFAULT_SPEC "-D__ARM_ARCH_6J__"
+ #else
  #error Unrecognized value in TARGET_CPU_DEFAULT.
  #endif
  #endif
  #endif
  #endif
  #endif
  #endif
  #endif
  #endif
+ #endif
+ #endif
  
  #undef  CPP_SPEC
  #define CPP_SPEC "%(cpp_cpu_arch) %(subtarget_cpp_spec)			\
  %{mapcs-32:%{mapcs-26:							\
  	%e-mapcs-26 and -mapcs-32 may not be used together}}		\
*************** extern GTY(()) rtx aof_pic_label;
*** 223,233 ****
--- 247,261 ----
  %{march=arm8:-D__ARM_ARCH_4__} \
  %{march=arm810:-D__ARM_ARCH_4__} \
  %{march=arm9:-D__ARM_ARCH_4T__} \
  %{march=arm920:-D__ARM_ARCH_4__} \
  %{march=arm920t:-D__ARM_ARCH_4T__} \
+ %{march=arm926ejs:-D__ARM_ARCH_5TEJ__} \
  %{march=arm9tdmi:-D__ARM_ARCH_4T__} \
+ %{march=arm1026ejs:-D__ARM_ARCH_5TEJ__} \
+ %{march=arm1136js:-D__ARM_ARCH_6J__} \
+ %{march=arm1136jfs:-D__ARM_ARCH_6J__} \
  %{march=strongarm:-D__ARM_ARCH_4__} \
  %{march=strongarm110:-D__ARM_ARCH_4__} \
  %{march=strongarm1100:-D__ARM_ARCH_4__} \
  %{march=xscale:-D__ARM_ARCH_5TE__} \
  %{march=xscale:-D__XSCALE__} \
*************** extern GTY(()) rtx aof_pic_label;
*** 241,250 ****
--- 269,279 ----
  %{march=armv4t:-D__ARM_ARCH_4T__} \
  %{march=armv5:-D__ARM_ARCH_5__} \
  %{march=armv5t:-D__ARM_ARCH_5T__} \
  %{march=armv5e:-D__ARM_ARCH_5E__} \
  %{march=armv5te:-D__ARM_ARCH_5TE__} \
+ %{march=arm6j:-D__ARM_ARCH6J__} \
  %{!march=*: \
   %{mcpu=arm2:-D__ARM_ARCH_2__} \
   %{mcpu=arm250:-D__ARM_ARCH_2__} \
   %{mcpu=arm3:-D__ARM_ARCH_2__} \
   %{mcpu=arm6:-D__ARM_ARCH_3__} \
*************** extern GTY(()) rtx aof_pic_label;
*** 264,274 ****
--- 293,307 ----
   %{mcpu=arm8:-D__ARM_ARCH_4__} \
   %{mcpu=arm810:-D__ARM_ARCH_4__} \
   %{mcpu=arm9:-D__ARM_ARCH_4T__} \
   %{mcpu=arm920:-D__ARM_ARCH_4__} \
   %{mcpu=arm920t:-D__ARM_ARCH_4T__} \
+  %{mcpu=arm926ejs:-D__ARM_ARCH_5TEJ__} \
   %{mcpu=arm9tdmi:-D__ARM_ARCH_4T__} \
+  %{mcpu=arm1026ejs:-D__ARM_ARCH_5TEJ__} \
+  %{mcpu=arm1136js:-D__ARM_ARCH_6J__} \
+  %{mcpu=arm1136jfs:-D__ARM_ARCH_6J__} \
   %{mcpu=strongarm:-D__ARM_ARCH_4__} \
   %{mcpu=strongarm110:-D__ARM_ARCH_4__} \
   %{mcpu=strongarm1100:-D__ARM_ARCH_4__} \
   %{mcpu=xscale:-D__ARM_ARCH_5TE__} \
   %{mcpu=xscale:-D__XSCALE__} \
Index: arm.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/arm/arm.md,v
retrieving revision 1.145
diff -c -5 -p -r1.145 arm.md
*** arm.md	24 Oct 2003 09:25:30 -0000	1.145
--- arm.md	28 Oct 2003 07:58:31 -0000
***************
*** 165,174 ****
--- 165,182 ----
  (define_asm_attributes
   [(set_attr "conds" "clob")
    (set_attr "length" "4")
    (set_attr "pool_range" "250")])
  
+ ;; The instruction used to implement a particular pattern.  This
+ ;; information is used by pipeline descriptions to provide accurate
+ ;; scheduling information.
+ 
+ (define_attr "insn"
+         "smulxy,smlaxy,smlalxy,smulwy,smlawx,mul,muls,mla,mlas,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals,other"
+         (const_string "other"))
+ 
  ; TYPE attribute is used to detect floating point instructions which, if
  ; running on a co-processor can run in parallel with other, basic instructions
  ; If write-buffer scheduling is enabled then it can also be used in the
  ; scheduling of writes.
  
***************
*** 190,211 ****
  ; f_mem_r	a transfer of a floating point register to a real reg via mem
  ; r_mem_f	the reverse of f_mem_r
  ; f_2_r		fast transfer float to arm (no memory needed)
  ; r_2_f		fast transfer arm to float
  ; call		a subroutine call
! ; load		any load from memory
! ; store1	store 1 word to memory from arm registers
  ; store2	store 2 words
  ; store3	store 3 words
! ; store4	store 4 words
  ;  Additions for Cirrus Maverick co-processor:
  ; mav_farith	Floating point arithmetic (4 cycle)
  ; mav_dmult	Double multiplies (7 cycle)
  ;
  (define_attr "type"
! 	"normal,mult,block,float,fdivx,fdivd,fdivs,fmul,ffmul,farith,ffarith,float_em,f_load,f_store,f_mem_r,r_mem_f,f_2_r,r_2_f,call,load,store1,store2,store3,store4,mav_farith,mav_dmult" 
! 	(const_string "normal"))
  
  ; Load scheduling, set from the arm_ld_sched variable
  ; initialized by arm_override_options() 
  (define_attr "ldsched" "no,yes" (const (symbol_ref "arm_ld_sched")))
  
--- 198,225 ----
  ; f_mem_r	a transfer of a floating point register to a real reg via mem
  ; r_mem_f	the reverse of f_mem_r
  ; f_2_r		fast transfer float to arm (no memory needed)
  ; r_2_f		fast transfer arm to float
  ; call		a subroutine call
! ; load		load 1 word from memory to arm registers
! ; load2         load 2 words from memory to arm registers
! ; load3         load 3 words from memory to arm registers
! ; load4         load 4 words from memory to arm registers
! ; store		store 1 word to memory from arm registers
  ; store2	store 2 words
  ; store3	store 3 words
! ; store4	store 4 (or more) words
  ;  Additions for Cirrus Maverick co-processor:
  ; mav_farith	Floating point arithmetic (4 cycle)
  ; mav_dmult	Double multiplies (7 cycle)
  ;
  (define_attr "type"
! 	"normal,mult,block,float,fdivx,fdivd,fdivs,fmul,ffmul,farith,ffarith,float_em,f_load,f_store,f_mem_r,r_mem_f,f_2_r,r_2_f,branch,call,load,load2,load3,load4,store1,store2,store3,store4,mav_farith,mav_dmult" 
! 	(if_then_else 
! 	 (eq_attr "insn" "smulxy,smlaxy,smlalxy,smulwy,smlawx,mul,muls,mla,mlas,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals")
! 	 (const_string "mult")
! 	 (const_string "normal")))
  
  ; Load scheduling, set from the arm_ld_sched variable
  ; initialized by arm_override_options() 
  (define_attr "ldsched" "no,yes" (const (symbol_ref "arm_ld_sched")))
  
***************
*** 265,383 ****
  
  ;; FAR_JUMP is "yes" if a BL instruction is used to generate a branch to a
  ;; distant label.  Only applicable to Thumb code.
  (define_attr "far_jump" "yes,no" (const_string "no"))
  
! (define_automaton "arm")
! 
! ;; Write buffer
! ;
! ; Strictly, we should model a 4-deep write buffer for ARM7xx based chips
! ;
! ; The write buffer on some of the arm6 processors is hard to model exactly.
! ; There is room in the buffer for up to two addresses and up to eight words
! ; of memory, but the two needn't be split evenly.  When writing the two
! ; addresses are fully pipelined.  However, a read from memory that is not
! ; currently in the cache will block until the writes have completed.
! ; It is normally the case that FCLK and MCLK will be in the ratio 2:1, so
! ; writes will take 2 FCLK cycles per word, if FCLK and MCLK are asynchronous
! ; (they aren't allowed to be at present) then there is a startup cost of 1MCLK
! ; cycle to add as well.
! (define_cpu_unit "write_buf" "arm")
! 
! ;; Write blockage unit
! ;
! ; The write_blockage unit models (partially), the fact that reads will stall
! ; until the write buffer empties.
! ; The f_mem_r and r_mem_f could also block, but they are to the stack,
! ; so we don't model them here
! (define_cpu_unit "write_blockage" "arm")
  
! ;; Core
! ;
! (define_cpu_unit "core" "arm")
  
! (define_insn_reservation "r_mem_f_wbuf" 5
!   (and (eq_attr "model_wbuf" "yes")
!        (eq_attr "type" "r_mem_f"))
!   "core+write_buf*3")
! 
! (define_insn_reservation "store1_wbuf" 5
!   (and (eq_attr "model_wbuf" "yes")
!        (eq_attr "type" "store1"))
!   "core+write_buf*3+write_blockage*5")
! 
! (define_insn_reservation "store2_wbuf" 7
!   (and (eq_attr "model_wbuf" "yes")
!        (eq_attr "type" "store2"))
!   "core+write_buf*4+write_blockage*7")
! 
! (define_insn_reservation "store3_wbuf" 9
!   (and (eq_attr "model_wbuf" "yes")
!        (eq_attr "type" "store3"))
!   "core+write_buf*5+write_blockage*9")
! 
! (define_insn_reservation "store4_wbuf" 11
!   (and (eq_attr "model_wbuf" "yes")
!        (eq_attr "type" "store4"))
!   "core+write_buf*6+write_blockage*11")
! 
! (define_insn_reservation "store2" 3
!   (and (eq_attr "model_wbuf" "no")
!        (eq_attr "type" "store2"))
!   "core*3")
! 
! (define_insn_reservation "store3" 4
!   (and (eq_attr "model_wbuf" "no")
!        (eq_attr "type" "store3"))
!   "core*4")
! 
! (define_insn_reservation "store4" 5
!   (and (eq_attr "model_wbuf" "no")
!        (eq_attr "type" "store4"))
!   "core*5")
! 
! (define_insn_reservation "store1_ldsched" 1
!   (and (eq_attr "ldsched" "yes") (eq_attr "type" "store1"))
!   "core")
! 
! (define_insn_reservation "load_ldsched_xscale" 3
!   (and (and (eq_attr "ldsched" "yes") (eq_attr "type" "load"))
!        (eq_attr "is_xscale" "yes"))
!   "core")
! 
! (define_insn_reservation "load_ldsched" 2
!   (and (and (eq_attr "ldsched" "yes") (eq_attr "type" "load"))
!        (eq_attr "is_xscale" "no"))
!   "core")
! 
! (define_insn_reservation "load_or_store" 2
!   (and (eq_attr "ldsched" "!yes") (eq_attr "type" "load,store1"))
!   "core*2")
! 
! (define_insn_reservation "mult" 16
!   (and (eq_attr "ldsched" "no") (eq_attr "type" "mult"))
!   "core*16")
! 
! (define_insn_reservation "mult_ldsched_strongarm" 3
!   (and (and (eq_attr "ldsched" "yes") (eq_attr "is_strongarm" "yes"))
!        (eq_attr "type" "mult"))
!   "core*2")
! 
! (define_insn_reservation "mult_ldsched" 4
!   (and (and (eq_attr "ldsched" "yes") (eq_attr "is_strongarm" "no"))
!        (eq_attr "type" "mult"))
!   "core*4")
! 
! (define_insn_reservation "multi_cycle" 32
!   (and (eq_attr "core_cycles" "multi")
!        (eq_attr "type" "!mult,load,store1,store2,store3,store4"))
!   "core*32")
! 
! (define_insn_reservation "single_cycle" 1
!   (eq_attr "core_cycles" "single")
!   "core")
  
  
  ;;---------------------------------------------------------------------------
  ;; Insn patterns
  ;;
--- 279,299 ----
  
  ;; FAR_JUMP is "yes" if a BL instruction is used to generate a branch to a
  ;; distant label.  Only applicable to Thumb code.
  (define_attr "far_jump" "yes,no" (const_string "no"))
  
! ;;---------------------------------------------------------------------------
! ;; Pipeline descriptions
  
! ;; Processor type.  This attribute must exactly match the table in 
! ;; arm-cores.def.
! (define_attr "tune" 
! 	     "arm2,arm250,arm3,arm6,arm60,arm600,arm610,arm620,arm7,arm7m,arm7d,arm7dm,arm7di,arm7dmi,arm70,arm700,arm700i,arm710,arm720,arm710c,arm7100,arm7500,arm7500fe,arm7tdmi,arm710t,arm720t,arm740t,arm8,arm810,arm9,arm920,arm920t,arm940t,arm9tdmi,arm9e,ep9312,strongarm,strongarm110,strongarm1100,strongarm1110,arm10tdmi,arm1020t,arm926ejs,arm1026ejs,xscale,iwmmxt,arm1136js,arm1136jfs"
! 	     (const (symbol_ref "arm_tune")))
  
! (include "arm-generic.md")
! (include "arm1026ejs.md")
  
  
  ;;---------------------------------------------------------------------------
  ;; Insn patterns
  ;;
***************
*** 1117,1127 ****
    [(set (match_operand:SI          0 "s_register_operand" "=&r,&r")
  	(mult:SI (match_operand:SI 2 "s_register_operand" "r,r")
  		 (match_operand:SI 1 "s_register_operand" "%?r,0")))]
    "TARGET_ARM"
    "mul%?\\t%0, %2, %1"
!   [(set_attr "type" "mult")
     (set_attr "predicable" "yes")]
  )
  
  ; Unfortunately with the Thumb the '&'/'0' trick can fails when operands 
  ; 1 and 2; are the same, because reload will make operand 0 match 
--- 1033,1043 ----
    [(set (match_operand:SI          0 "s_register_operand" "=&r,&r")
  	(mult:SI (match_operand:SI 2 "s_register_operand" "r,r")
  		 (match_operand:SI 1 "s_register_operand" "%?r,0")))]
    "TARGET_ARM"
    "mul%?\\t%0, %2, %1"
!   [(set_attr "insn" "mul")
     (set_attr "predicable" "yes")]
  )
  
  ; Unfortunately with the Thumb the '&'/'0' trick can fails when operands 
  ; 1 and 2; are the same, because reload will make operand 0 match 
***************
*** 1138,1148 ****
      return \"mov\\t%0, %1\;mul\\t%0, %0, %2\";
    else
      return \"mul\\t%0, %0, %2\";
    "
    [(set_attr "length" "4,4,2")
!    (set_attr "type" "mult")]
  )
  
  (define_insn "*mulsi3_compare0"
    [(set (reg:CC_NOOV CC_REGNUM)
  	(compare:CC_NOOV (mult:SI
--- 1054,1064 ----
      return \"mov\\t%0, %1\;mul\\t%0, %0, %2\";
    else
      return \"mul\\t%0, %0, %2\";
    "
    [(set_attr "length" "4,4,2")
!    (set_attr "insn" "mul")]
  )
  
  (define_insn "*mulsi3_compare0"
    [(set (reg:CC_NOOV CC_REGNUM)
  	(compare:CC_NOOV (mult:SI
***************
*** 1152,1162 ****
     (set (match_operand:SI 0 "s_register_operand" "=&r,&r")
  	(mult:SI (match_dup 2) (match_dup 1)))]
    "TARGET_ARM && !arm_arch_xscale"
    "mul%?s\\t%0, %2, %1"
    [(set_attr "conds" "set")
!    (set_attr "type" "mult")]
  )
  
  (define_insn "*mulsi_compare0_scratch"
    [(set (reg:CC_NOOV CC_REGNUM)
  	(compare:CC_NOOV (mult:SI
--- 1068,1078 ----
     (set (match_operand:SI 0 "s_register_operand" "=&r,&r")
  	(mult:SI (match_dup 2) (match_dup 1)))]
    "TARGET_ARM && !arm_arch_xscale"
    "mul%?s\\t%0, %2, %1"
    [(set_attr "conds" "set")
!    (set_attr "insn" "muls")]
  )
  
  (define_insn "*mulsi_compare0_scratch"
    [(set (reg:CC_NOOV CC_REGNUM)
  	(compare:CC_NOOV (mult:SI
***************
*** 1165,1175 ****
  			 (const_int 0)))
     (clobber (match_scratch:SI 0 "=&r,&r"))]
    "TARGET_ARM && !arm_arch_xscale"
    "mul%?s\\t%0, %2, %1"
    [(set_attr "conds" "set")
!    (set_attr "type" "mult")]
  )
  
  ;; Unnamed templates to match MLA instruction.
  
  (define_insn "*mulsi3addsi"
--- 1081,1091 ----
  			 (const_int 0)))
     (clobber (match_scratch:SI 0 "=&r,&r"))]
    "TARGET_ARM && !arm_arch_xscale"
    "mul%?s\\t%0, %2, %1"
    [(set_attr "conds" "set")
!    (set_attr "insn" "muls")]
  )
  
  ;; Unnamed templates to match MLA instruction.
  
  (define_insn "*mulsi3addsi"
***************
*** 1178,1188 ****
  	  (mult:SI (match_operand:SI 2 "s_register_operand" "r,r,r,r")
  		   (match_operand:SI 1 "s_register_operand" "%r,0,r,0"))
  	  (match_operand:SI 3 "s_register_operand" "?r,r,0,0")))]
    "TARGET_ARM"
    "mla%?\\t%0, %2, %1, %3"
!   [(set_attr "type" "mult")
     (set_attr "predicable" "yes")]
  )
  
  (define_insn "*mulsi3addsi_compare0"
    [(set (reg:CC_NOOV CC_REGNUM)
--- 1094,1104 ----
  	  (mult:SI (match_operand:SI 2 "s_register_operand" "r,r,r,r")
  		   (match_operand:SI 1 "s_register_operand" "%r,0,r,0"))
  	  (match_operand:SI 3 "s_register_operand" "?r,r,0,0")))]
    "TARGET_ARM"
    "mla%?\\t%0, %2, %1, %3"
!   [(set_attr "insn" "mla")
     (set_attr "predicable" "yes")]
  )
  
  (define_insn "*mulsi3addsi_compare0"
    [(set (reg:CC_NOOV CC_REGNUM)
***************
*** 1196,1206 ****
  	(plus:SI (mult:SI (match_dup 2) (match_dup 1))
  		 (match_dup 3)))]
    "TARGET_ARM && !arm_arch_xscale"
    "mla%?s\\t%0, %2, %1, %3"
    [(set_attr "conds" "set")
!    (set_attr "type" "mult")]
  )
  
  (define_insn "*mulsi3addsi_compare0_scratch"
    [(set (reg:CC_NOOV CC_REGNUM)
  	(compare:CC_NOOV
--- 1112,1122 ----
  	(plus:SI (mult:SI (match_dup 2) (match_dup 1))
  		 (match_dup 3)))]
    "TARGET_ARM && !arm_arch_xscale"
    "mla%?s\\t%0, %2, %1, %3"
    [(set_attr "conds" "set")
!    (set_attr "insn" "mlas")]
  )
  
  (define_insn "*mulsi3addsi_compare0_scratch"
    [(set (reg:CC_NOOV CC_REGNUM)
  	(compare:CC_NOOV
***************
*** 1211,1221 ****
  	 (const_int 0)))
     (clobber (match_scratch:SI 0 "=&r,&r,&r,&r"))]
    "TARGET_ARM && !arm_arch_xscale"
    "mla%?s\\t%0, %2, %1, %3"
    [(set_attr "conds" "set")
!    (set_attr "type" "mult")]
  )
  
  ;; Unnamed template to match long long multiply-accumlate (smlal)
  
  (define_insn "*mulsidi3adddi"
--- 1127,1137 ----
  	 (const_int 0)))
     (clobber (match_scratch:SI 0 "=&r,&r,&r,&r"))]
    "TARGET_ARM && !arm_arch_xscale"
    "mla%?s\\t%0, %2, %1, %3"
    [(set_attr "conds" "set")
!    (set_attr "insn" "mlas")]
  )
  
  ;; Unnamed template to match long long multiply-accumlate (smlal)
  
  (define_insn "*mulsidi3adddi"
***************
*** 1225,1257 ****
  	  (sign_extend:DI (match_operand:SI 2 "s_register_operand" "%r"))
  	  (sign_extend:DI (match_operand:SI 3 "s_register_operand" "r")))
  	 (match_operand:DI 1 "s_register_operand" "0")))]
    "TARGET_ARM && arm_fast_multiply"
    "smlal%?\\t%Q0, %R0, %3, %2"
!   [(set_attr "type" "mult")
     (set_attr "predicable" "yes")]
  )
  
  (define_insn "mulsidi3"
    [(set (match_operand:DI 0 "s_register_operand" "=&r")
  	(mult:DI
  	 (sign_extend:DI (match_operand:SI 1 "s_register_operand" "%r"))
  	 (sign_extend:DI (match_operand:SI 2 "s_register_operand" "r"))))]
    "TARGET_ARM && arm_fast_multiply"
    "smull%?\\t%Q0, %R0, %1, %2"
!   [(set_attr "type" "mult")
     (set_attr "predicable" "yes")]
  )
  
  (define_insn "umulsidi3"
    [(set (match_operand:DI 0 "s_register_operand" "=&r")
  	(mult:DI
  	 (zero_extend:DI (match_operand:SI 1 "s_register_operand" "%r"))
  	 (zero_extend:DI (match_operand:SI 2 "s_register_operand" "r"))))]
    "TARGET_ARM && arm_fast_multiply"
    "umull%?\\t%Q0, %R0, %1, %2"
!   [(set_attr "type" "mult")
     (set_attr "predicable" "yes")]
  )
  
  ;; Unnamed template to match long long unsigned multiply-accumlate (umlal)
  
--- 1141,1173 ----
  	  (sign_extend:DI (match_operand:SI 2 "s_register_operand" "%r"))
  	  (sign_extend:DI (match_operand:SI 3 "s_register_operand" "r")))
  	 (match_operand:DI 1 "s_register_operand" "0")))]
    "TARGET_ARM && arm_fast_multiply"
    "smlal%?\\t%Q0, %R0, %3, %2"
!   [(set_attr "insn" "smlal")
     (set_attr "predicable" "yes")]
  )
  
  (define_insn "mulsidi3"
    [(set (match_operand:DI 0 "s_register_operand" "=&r")
  	(mult:DI
  	 (sign_extend:DI (match_operand:SI 1 "s_register_operand" "%r"))
  	 (sign_extend:DI (match_operand:SI 2 "s_register_operand" "r"))))]
    "TARGET_ARM && arm_fast_multiply"
    "smull%?\\t%Q0, %R0, %1, %2"
!   [(set_attr "insn" "smull")
     (set_attr "predicable" "yes")]
  )
  
  (define_insn "umulsidi3"
    [(set (match_operand:DI 0 "s_register_operand" "=&r")
  	(mult:DI
  	 (zero_extend:DI (match_operand:SI 1 "s_register_operand" "%r"))
  	 (zero_extend:DI (match_operand:SI 2 "s_register_operand" "r"))))]
    "TARGET_ARM && arm_fast_multiply"
    "umull%?\\t%Q0, %R0, %1, %2"
!   [(set_attr "insn" "umull")
     (set_attr "predicable" "yes")]
  )
  
  ;; Unnamed template to match long long unsigned multiply-accumlate (umlal)
  
***************
*** 1262,1272 ****
  	  (zero_extend:DI (match_operand:SI 2 "s_register_operand" "%r"))
  	  (zero_extend:DI (match_operand:SI 3 "s_register_operand" "r")))
  	 (match_operand:DI 1 "s_register_operand" "0")))]
    "TARGET_ARM && arm_fast_multiply"
    "umlal%?\\t%Q0, %R0, %3, %2"
!   [(set_attr "type" "mult")
     (set_attr "predicable" "yes")]
  )
  
  (define_insn "smulsi3_highpart"
    [(set (match_operand:SI 0 "s_register_operand" "=&r,&r")
--- 1178,1188 ----
  	  (zero_extend:DI (match_operand:SI 2 "s_register_operand" "%r"))
  	  (zero_extend:DI (match_operand:SI 3 "s_register_operand" "r")))
  	 (match_operand:DI 1 "s_register_operand" "0")))]
    "TARGET_ARM && arm_fast_multiply"
    "umlal%?\\t%Q0, %R0, %3, %2"
!   [(set_attr "insn" "umlal")
     (set_attr "predicable" "yes")]
  )
  
  (define_insn "smulsi3_highpart"
    [(set (match_operand:SI 0 "s_register_operand" "=&r,&r")
***************
*** 1277,1287 ****
  	   (sign_extend:DI (match_operand:SI 2 "s_register_operand" "r,r")))
  	  (const_int 32))))
     (clobber (match_scratch:SI 3 "=&r,&r"))]
    "TARGET_ARM && arm_fast_multiply"
    "smull%?\\t%3, %0, %2, %1"
!   [(set_attr "type" "mult")
     (set_attr "predicable" "yes")]
  )
  
  (define_insn "umulsi3_highpart"
    [(set (match_operand:SI 0 "s_register_operand" "=&r,&r")
--- 1193,1203 ----
  	   (sign_extend:DI (match_operand:SI 2 "s_register_operand" "r,r")))
  	  (const_int 32))))
     (clobber (match_scratch:SI 3 "=&r,&r"))]
    "TARGET_ARM && arm_fast_multiply"
    "smull%?\\t%3, %0, %2, %1"
!   [(set_attr "insn" "smull")
     (set_attr "predicable" "yes")]
  )
  
  (define_insn "umulsi3_highpart"
    [(set (match_operand:SI 0 "s_register_operand" "=&r,&r")
***************
*** 1292,1302 ****
  	   (zero_extend:DI (match_operand:SI 2 "s_register_operand" "r,r")))
  	  (const_int 32))))
     (clobber (match_scratch:SI 3 "=&r,&r"))]
    "TARGET_ARM && arm_fast_multiply"
    "umull%?\\t%3, %0, %2, %1"
!   [(set_attr "type" "mult")
     (set_attr "predicable" "yes")]
  )
  
  (define_insn "mulhisi3"
    [(set (match_operand:SI 0 "s_register_operand" "=r")
--- 1208,1218 ----
  	   (zero_extend:DI (match_operand:SI 2 "s_register_operand" "r,r")))
  	  (const_int 32))))
     (clobber (match_scratch:SI 3 "=&r,&r"))]
    "TARGET_ARM && arm_fast_multiply"
    "umull%?\\t%3, %0, %2, %1"
!   [(set_attr "insn" "umull")
     (set_attr "predicable" "yes")]
  )
  
  (define_insn "mulhisi3"
    [(set (match_operand:SI 0 "s_register_operand" "=r")
***************
*** 1304,1314 ****
  		  (match_operand:HI 1 "s_register_operand" "%r"))
  		 (sign_extend:SI
  		  (match_operand:HI 2 "s_register_operand" "r"))))]
    "TARGET_ARM && arm_arch5e"
    "smulbb%?\\t%0, %1, %2"
!   [(set_attr "type" "mult")
     (set_attr "predicable" "yes")]
  )
  
  (define_insn "*mulhisi3tb"
    [(set (match_operand:SI 0 "s_register_operand" "=r")
--- 1220,1230 ----
  		  (match_operand:HI 1 "s_register_operand" "%r"))
  		 (sign_extend:SI
  		  (match_operand:HI 2 "s_register_operand" "r"))))]
    "TARGET_ARM && arm_arch5e"
    "smulbb%?\\t%0, %1, %2"
!   [(set_attr "insn" "smulxy")
     (set_attr "predicable" "yes")]
  )
  
  (define_insn "*mulhisi3tb"
    [(set (match_operand:SI 0 "s_register_operand" "=r")
***************
*** 1317,1327 ****
  		  (const_int 16))
  		 (sign_extend:SI
  		  (match_operand:HI 2 "s_register_operand" "r"))))]
    "TARGET_ARM && arm_arch5e"
    "smultb%?\\t%0, %1, %2"
!   [(set_attr "type" "mult")
     (set_attr "predicable" "yes")]
  )
  
  (define_insn "*mulhisi3bt"
    [(set (match_operand:SI 0 "s_register_operand" "=r")
--- 1233,1243 ----
  		  (const_int 16))
  		 (sign_extend:SI
  		  (match_operand:HI 2 "s_register_operand" "r"))))]
    "TARGET_ARM && arm_arch5e"
    "smultb%?\\t%0, %1, %2"
!   [(set_attr "insn" "smulxy")
     (set_attr "predicable" "yes")]
  )
  
  (define_insn "*mulhisi3bt"
    [(set (match_operand:SI 0 "s_register_operand" "=r")
***************
*** 1330,1340 ****
  		 (ashiftrt:SI
  		  (match_operand:SI 2 "s_register_operand" "r")
  		  (const_int 16))))]
    "TARGET_ARM && arm_arch5e"
    "smulbt%?\\t%0, %1, %2"
!   [(set_attr "type" "mult")
     (set_attr "predicable" "yes")]
  )
  
  (define_insn "*mulhisi3tt"
    [(set (match_operand:SI 0 "s_register_operand" "=r")
--- 1246,1256 ----
  		 (ashiftrt:SI
  		  (match_operand:SI 2 "s_register_operand" "r")
  		  (const_int 16))))]
    "TARGET_ARM && arm_arch5e"
    "smulbt%?\\t%0, %1, %2"
!   [(set_attr "insn" "smulxy")
     (set_attr "predicable" "yes")]
  )
  
  (define_insn "*mulhisi3tt"
    [(set (match_operand:SI 0 "s_register_operand" "=r")
***************
*** 1344,1354 ****
  		 (ashiftrt:SI
  		  (match_operand:SI 2 "s_register_operand" "r")
  		  (const_int 16))))]
    "TARGET_ARM && arm_arch5e"
    "smultt%?\\t%0, %1, %2"
!   [(set_attr "type" "mult")
     (set_attr "predicable" "yes")]
  )
  
  (define_insn "*mulhisi3addsi"
    [(set (match_operand:SI 0 "s_register_operand" "=r")
--- 1260,1270 ----
  		 (ashiftrt:SI
  		  (match_operand:SI 2 "s_register_operand" "r")
  		  (const_int 16))))]
    "TARGET_ARM && arm_arch5e"
    "smultt%?\\t%0, %1, %2"
!   [(set_attr "insn" "smulxy")
     (set_attr "predicable" "yes")]
  )
  
  (define_insn "*mulhisi3addsi"
    [(set (match_operand:SI 0 "s_register_operand" "=r")
***************
*** 1357,1367 ****
  			   (match_operand:HI 2 "s_register_operand" "%r"))
  			  (sign_extend:SI
  			   (match_operand:HI 3 "s_register_operand" "r")))))]
    "TARGET_ARM && arm_arch5e"
    "smlabb%?\\t%0, %2, %3, %1"
!   [(set_attr "type" "mult")
     (set_attr "predicable" "yes")]
  )
  
  (define_insn "*mulhidi3adddi"
    [(set (match_operand:DI 0 "s_register_operand" "=r")
--- 1273,1283 ----
  			   (match_operand:HI 2 "s_register_operand" "%r"))
  			  (sign_extend:SI
  			   (match_operand:HI 3 "s_register_operand" "r")))))]
    "TARGET_ARM && arm_arch5e"
    "smlabb%?\\t%0, %2, %3, %1"
!   [(set_attr "insn" "smlaxy")
     (set_attr "predicable" "yes")]
  )
  
  (define_insn "*mulhidi3adddi"
    [(set (match_operand:DI 0 "s_register_operand" "=r")
***************
*** 1371,1381 ****
  	 	    (match_operand:HI 2 "s_register_operand" "%r"))
  		   (sign_extend:DI
  		    (match_operand:HI 3 "s_register_operand" "r")))))]
    "TARGET_ARM && arm_arch5e"
    "smlalbb%?\\t%Q0, %R0, %2, %3"
!   [(set_attr "type" "mult")
     (set_attr "predicable" "yes")])
  
  (define_expand "mulsf3"
    [(set (match_operand:SF          0 "s_register_operand" "")
  	(mult:SF (match_operand:SF 1 "s_register_operand" "")
--- 1287,1297 ----
  	 	    (match_operand:HI 2 "s_register_operand" "%r"))
  		   (sign_extend:DI
  		    (match_operand:HI 3 "s_register_operand" "r")))))]
    "TARGET_ARM && arm_arch5e"
    "smlalbb%?\\t%Q0, %R0, %2, %3"
!   [(set_attr "insn" "smlalxy")
     (set_attr "predicable" "yes")])
  
  (define_expand "mulsf3"
    [(set (match_operand:SF          0 "s_register_operand" "")
  	(mult:SF (match_operand:SF 1 "s_register_operand" "")
***************
*** 4914,4924 ****
  	  (mem:SI (plus:SI (match_dup 2) (const_int 8))))
       (set (match_operand:SI 6 "arm_hard_register_operand" "")
  	  (mem:SI (plus:SI (match_dup 2) (const_int 12))))])]
    "TARGET_ARM && XVECLEN (operands[0], 0) == 5"
    "ldm%?ia\\t%1!, {%3, %4, %5, %6}"
!   [(set_attr "type" "load")
     (set_attr "predicable" "yes")]
  )
  
  (define_insn "*ldmsi_postinc3"
    [(match_parallel 0 "load_multiple_operation"
--- 4830,4840 ----
  	  (mem:SI (plus:SI (match_dup 2) (const_int 8))))
       (set (match_operand:SI 6 "arm_hard_register_operand" "")
  	  (mem:SI (plus:SI (match_dup 2) (const_int 12))))])]
    "TARGET_ARM && XVECLEN (operands[0], 0) == 5"
    "ldm%?ia\\t%1!, {%3, %4, %5, %6}"
!   [(set_attr "type" "load4")
     (set_attr "predicable" "yes")]
  )
  
  (define_insn "*ldmsi_postinc3"
    [(match_parallel 0 "load_multiple_operation"
***************
*** 4931,4941 ****
  	  (mem:SI (plus:SI (match_dup 2) (const_int 4))))
       (set (match_operand:SI 5 "arm_hard_register_operand" "")
  	  (mem:SI (plus:SI (match_dup 2) (const_int 8))))])]
    "TARGET_ARM && XVECLEN (operands[0], 0) == 4"
    "ldm%?ia\\t%1!, {%3, %4, %5}"
!   [(set_attr "type" "load")
     (set_attr "predicable" "yes")]
  )
  
  (define_insn "*ldmsi_postinc2"
    [(match_parallel 0 "load_multiple_operation"
--- 4847,4857 ----
  	  (mem:SI (plus:SI (match_dup 2) (const_int 4))))
       (set (match_operand:SI 5 "arm_hard_register_operand" "")
  	  (mem:SI (plus:SI (match_dup 2) (const_int 8))))])]
    "TARGET_ARM && XVECLEN (operands[0], 0) == 4"
    "ldm%?ia\\t%1!, {%3, %4, %5}"
!   [(set_attr "type" "load3")
     (set_attr "predicable" "yes")]
  )
  
  (define_insn "*ldmsi_postinc2"
    [(match_parallel 0 "load_multiple_operation"
***************
*** 4946,4956 ****
  	  (mem:SI (match_dup 2)))
       (set (match_operand:SI 4 "arm_hard_register_operand" "")
  	  (mem:SI (plus:SI (match_dup 2) (const_int 4))))])]
    "TARGET_ARM && XVECLEN (operands[0], 0) == 3"
    "ldm%?ia\\t%1!, {%3, %4}"
!   [(set_attr "type" "load")
     (set_attr "predicable" "yes")]
  )
  
  ;; Ordinary load multiple
  
--- 4862,4872 ----
  	  (mem:SI (match_dup 2)))
       (set (match_operand:SI 4 "arm_hard_register_operand" "")
  	  (mem:SI (plus:SI (match_dup 2) (const_int 4))))])]
    "TARGET_ARM && XVECLEN (operands[0], 0) == 3"
    "ldm%?ia\\t%1!, {%3, %4}"
!   [(set_attr "type" "load2")
     (set_attr "predicable" "yes")]
  )
  
  ;; Ordinary load multiple
  
***************
*** 4964,4974 ****
  	  (mem:SI (plus:SI (match_dup 1) (const_int 8))))
       (set (match_operand:SI 5 "arm_hard_register_operand" "")
  	  (mem:SI (plus:SI (match_dup 1) (const_int 12))))])]
    "TARGET_ARM && XVECLEN (operands[0], 0) == 4"
    "ldm%?ia\\t%1, {%2, %3, %4, %5}"
!   [(set_attr "type" "load")
     (set_attr "predicable" "yes")]
  )
  
  (define_insn "*ldmsi3"
    [(match_parallel 0 "load_multiple_operation"
--- 4880,4890 ----
  	  (mem:SI (plus:SI (match_dup 1) (const_int 8))))
       (set (match_operand:SI 5 "arm_hard_register_operand" "")
  	  (mem:SI (plus:SI (match_dup 1) (const_int 12))))])]
    "TARGET_ARM && XVECLEN (operands[0], 0) == 4"
    "ldm%?ia\\t%1, {%2, %3, %4, %5}"
!   [(set_attr "type" "load4")
     (set_attr "predicable" "yes")]
  )
  
  (define_insn "*ldmsi3"
    [(match_parallel 0 "load_multiple_operation"
***************
*** 4978,4988 ****
  	  (mem:SI (plus:SI (match_dup 1) (const_int 4))))
       (set (match_operand:SI 4 "arm_hard_register_operand" "")
  	  (mem:SI (plus:SI (match_dup 1) (const_int 8))))])]
    "TARGET_ARM && XVECLEN (operands[0], 0) == 3"
    "ldm%?ia\\t%1, {%2, %3, %4}"
!   [(set_attr "type" "load")
     (set_attr "predicable" "yes")]
  )
  
  (define_insn "*ldmsi2"
    [(match_parallel 0 "load_multiple_operation"
--- 4894,4904 ----
  	  (mem:SI (plus:SI (match_dup 1) (const_int 4))))
       (set (match_operand:SI 4 "arm_hard_register_operand" "")
  	  (mem:SI (plus:SI (match_dup 1) (const_int 8))))])]
    "TARGET_ARM && XVECLEN (operands[0], 0) == 3"
    "ldm%?ia\\t%1, {%2, %3, %4}"
!   [(set_attr "type" "load3")
     (set_attr "predicable" "yes")]
  )
  
  (define_insn "*ldmsi2"
    [(match_parallel 0 "load_multiple_operation"
***************
*** 4990,5000 ****
  	  (mem:SI (match_operand:SI 1 "s_register_operand" "r")))
       (set (match_operand:SI 3 "arm_hard_register_operand" "")
  	  (mem:SI (plus:SI (match_dup 1) (const_int 4))))])]
    "TARGET_ARM && XVECLEN (operands[0], 0) == 2"
    "ldm%?ia\\t%1, {%2, %3}"
!   [(set_attr "type" "load")
     (set_attr "predicable" "yes")]
  )
  
  (define_expand "store_multiple"
    [(match_par_dup 3 [(set (match_operand:SI 0 "" "")
--- 4906,4916 ----
  	  (mem:SI (match_operand:SI 1 "s_register_operand" "r")))
       (set (match_operand:SI 3 "arm_hard_register_operand" "")
  	  (mem:SI (plus:SI (match_dup 1) (const_int 4))))])]
    "TARGET_ARM && XVECLEN (operands[0], 0) == 2"
    "ldm%?ia\\t%1, {%2, %3}"
!   [(set_attr "type" "load2")
     (set_attr "predicable" "yes")]
  )
  
  (define_expand "store_multiple"
    [(match_par_dup 3 [(set (match_operand:SI 0 "" "")
***************
*** 6595,6605 ****
        arm_ccfsm_state += 2;
        return \"\";
      }
    return \"b%d1\\t%l0\";
    "
!   [(set_attr "conds" "use")]
  )
  
  ; Special pattern to match reversed UNEQ.
  (define_insn "*arm_buneq_reversed"
    [(set (pc)
--- 6511,6522 ----
        arm_ccfsm_state += 2;
        return \"\";
      }
    return \"b%d1\\t%l0\";
    "
!   [(set_attr "conds" "use")
!    (set_attr "type" "branch")]
  )
  
  ; Special pattern to match reversed UNEQ.
  (define_insn "*arm_buneq_reversed"
    [(set (pc)
***************
*** 6647,6657 ****
        arm_ccfsm_state += 2;
        return \"\";
      }
    return \"b%D1\\t%l0\";
    "
!   [(set_attr "conds" "use")]
  )
  
  
  
  ; scc insns
--- 6564,6575 ----
        arm_ccfsm_state += 2;
        return \"\";
      }
    return \"b%D1\\t%l0\";
    "
!   [(set_attr "conds" "use")
!    (set_attr "type" "branch")]
  )
  
  
  
  ; scc insns
Index: arm1026ejs.md
===================================================================
RCS file: arm1026ejs.md
diff -N arm1026ejs.md
*** /dev/null	1 Jan 1970 00:00:00 -0000
--- arm1026ejs.md	28 Oct 2003 07:58:31 -0000
***************
*** 0 ****
--- 1,200 ----
+ ;; ARM 1026EJ-S Pipeline Description
+ ;; Copyright (C) 2003 Free Software Foundation, Inc.
+ ;; Written by CodeSourcery, LLC.
+ ;;
+ ;; This file is part of GCC.
+ ;;
+ ;; GCC is free software; you can redistribute it and/or modify it
+ ;; under the terms of the GNU General Public License as published by
+ ;; the Free Software Foundation; either version 2, or (at your option)
+ ;; any later version.
+ ;;
+ ;; GCC is distributed in the hope that it will be useful, but
+ ;; WITHOUT ANY WARRANTY; without even the implied warranty of
+ ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ ;; General Public License for more details.
+ ;;
+ ;; You should have received a copy of the GNU General Public License
+ ;; along with GCC; see the file COPYING.  If not, write to the Free
+ ;; Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+ ;; 02111-1307, USA.  */
+ 
+ ;; These descriptions are based on the information contained in the
+ ;; ARM1026EJ-S Technical Reference Manual, Copyright (c) 2003 ARM
+ ;; Limited.
+ ;;
+ 
+ ;; This automaton provides a pipeline description for the ARM
+ ;; 1026EJ-S core.
+ ;;
+ ;; The model given here assumes that the condition for all conditional
+ ;; instructions is "true", i.e., that all of the instructions are
+ ;; actually executed.
+ 
+ (define_automaton "arm1026ejs")
+ 
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ;; Pipelines
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ 
+ ;; There are two pipelines:
+ ;; 
+ ;; - An Arithmetic Logic Unit (ALU) pipeline.
+ ;;
+ ;;   The ALU pipeline has fetch, issue, decode, execute, memory, and
+ ;;   write stages.
+ ;;
+ ;; - A Load-Store Unit (LSU) pipeline.
+ ;;
+ ;;   The LSU pipeline has decode, execute, memory, and write stages.
+ 
+ (define_cpu_unit "a_f,a_i,a_d,a_e,a_m,a_w" "arm1026ejs")
+ (define_cpu_unit "l_d,l_e,l_m,l_w" "arm1026ejs")
+ 
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ;; ALU Instructions
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ 
+ ;; ALU instructions require six cycles to execute, and use the ALU
+ ;; pipeline in each of the six stages.  The results are available
+ ;; after the execute stage stage has finished.
+ ;;
+ ;; If the destination register is the PC, the pipelines are stalled
+ ;; for several cycles.  That case is not modeled here.
+ 
+ (define_insn_reservation "alu_op" 4 
+  (and (eq_attr "tune" "arm1026ejs")
+       (eq_attr "type" "normal"))
+  "a_f,a_i,a_d,a_e,a_m,a_w")
+ 
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ;; Multiplication Instructions
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ 
+ ;; Multiplication instructions loop in the execute stage until the
+ ;; instruction has been passed through the multiplier array enough
+ ;; times.
+ 
+ ;; The result of the "smul" and "smulw" instructions is not available
+ ;; until after the memory stage.
+ (define_insn_reservation "mult1" 5
+  (and (eq_attr "tune" "arm1026ejs")
+       (eq_attr "insn" "smulxy,smulwy"))
+  "a_f,a_i,a_d,a_e,a_m,a_w")
+ 
+ ;; The "smlaxy" and "smlawx" instructions require two iterations through
+ ;; the execute stage; the result is available immediately following
+ ;; the execute stage.
+ (define_insn_reservation "mult2" 5
+  (and (eq_attr "tune" "arm1026ejs")
+       (eq_attr "insn" "smlaxy,smlalxy,smlawx"))
+  "a_f,a_i,a_d,a_e*2,a_m,a_w")
+ 
+ ;; The "smlalxy", "mul", and "mla" instructions require two iterations
+ ;; through the execute stage; the result is not available until after
+ ;; the memory stage.
+ (define_insn_reservation "mult3" 6
+  (and (eq_attr "tune" "arm1026ejs")
+       (eq_attr "insn" "smlalxy,mul,mla"))
+  "a_f,a_i,a_d,a_e*2,a_m,a_w")
+ 
+ ;; The "muls" and "mlas" instructions loop in the execute stage for
+ ;; four iterations in order to set the flags.  The value result is
+ ;; available after three iterations.
+ (define_insn_reservation "mult4" 6
+  (and (eq_attr "tune" "arm1026ejs")
+       (eq_attr "insn" "muls,mlas"))
+  "a_f,a_i,a_d,a_e*4,a_m,a_w")
+ 
+ ;; Long multiply instructions that produce two registers of
+ ;; output (such as umull) make their results available in two cycles;
+ ;; the least significant word is available before the most significant
+ ;; word.  That fact is not modeled; instead, the instructions are
+ ;; described.as if the entire result was available at the end of the
+ ;; cycle in which both words are available.
+ 
+ ;; The "umull", "umlal", "smull", and "smlal" instructions all take
+ ;; three iterations through the execute cycle, and make their results
+ ;; available after the memory cycle.
+ (define_insn_reservation "mult5" 7
+  (and (eq_attr "tune" "arm1026ejs")
+       (eq_attr "insn" "umull,umlal,smull,smlal"))
+  "a_f,a_i,a_d,a_e*3,a_m,a_w")
+ 
+ ;; The "umulls", "umlals", "smulls", and "smlals" instructions loop in
+ ;; the execute stage for five iterations in order to set the flags.
+ ;; The value result is vailable after four iterations.
+ (define_insn_reservation "mult6" 7
+  (and (eq_attr "tune" "arm1026ejs")
+       (eq_attr "insn" "umulls,umlals,smulls,smlals"))
+  "a_f,a_i,a_d,a_e*5,a_m,a_w")
+ 
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ;; Load/Store Instructions
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ 
+ ;; The models for load/store instructions do not accurately describe
+ ;; the difference between operations with a base register writeback
+ ;; (such as "ldm!").  These models assume that all memory references
+ ;; hit in dcache.
+ 
+ ;; LSU instructions require six cycles to execute.  They use the ALU
+ ;; pipeline in all but the 5th cycle, and the LSU pipeline in cycles
+ ;; three through six.
+ 
+ (define_insn_reservation "lsu_op" 5
+  (and (eq_attr "tune" "arm1026ejs")
+       (eq_attr "type" "load,store1"))
+  "a_f,a_i,a_d+l_d,a_e+l_e,l_m,a_w+l_w")
+ 
+ ;; On a LDM/STM operation, the LSU pipeline iterates until all of the
+ ;; registers have been processed.
+ ;;
+ ;; The time it takes to load the data depends on whether or not the
+ ;; base address is 64-bit aligned; if it is not, an additional cycle
+ ;; is required.  This model assumes that the address is always 64-bit
+ ;; aligned.  Because the processor can load two registers per cycle,
+ ;; that assumption means that we use the same instruction rservations
+ ;; for loading 2k and 2k - 1 registers.
+ ;;
+ ;; The ALU pipeline is stalled until the completion of the last memory
+ ;; stage in the LSU pipeline.  That is modeled by keeping the ALU
+ ;; execute stage busy until that point.
+ ;;
+ ;; As with ALU operations, if one of the destination registers is the
+ ;; PC, there are additional stalls; that is not modeled.
+ 
+ (define_insn_reservation "lsm2_op" 5
+  (and (eq_attr "tune" "arm1026ejs")
+       (eq_attr "type" "load2,store2"))
+  "a_f,a_i,a_d+l_d,a_e+l_e,l_m,a_w+l_w")
+ 
+ (define_insn_reservation "lsm3_op" 6
+  (and (eq_attr "tune" "arm1026ejs")
+       (eq_attr "type" "load3,store3,load4,store4"))
+  "a_f,a_i,a_d+l_d,a_e+l_d+l_e,a_e+l_e+l_m,a_e+l_m,a_w+l_w")
+ 
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ ;; Branch and Call Instructions
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+ 
+ ;; Branch instructions are difficult to model accurately.  The ARM
+ ;; core can predict most branches.  If the branch is predicted
+ ;; correctly, and predicted early enough, the branch can be completely
+ ;; eliminated from the instruction stream.  Some branches can
+ ;; therefore appear to require zero cycles to execute.  We assume that
+ ;; all branches are predicted correctly, and that the latency is
+ ;; therefore the minimum value.
+ 
+ (define_insn_reservation "branch_op" 0
+  (and (eq_attr "tune" "arm1026ejs")
+       (eq_attr "type" "branch"))
+  "nothing")
+ 
+ ;; The latency for a call is not predictable.  Therefore, we use 32 as
+ ;; roughly equivalent to postive infinity.
+ 
+ (define_insn_reservation "call_op" 32
+  (and (eq_attr "tune" "arm1026ejs")
+       (eq_attr "type" "call"))
+  "nothing")


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]