This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH 2/2, AArch64] Pipeline model for APM XGene-1.


Hi Philipp,

Some comments inline

On 19/11/14 17:32, Philipp Tomsich wrote:
---
  gcc/ChangeLog                 |   6 +
  gcc/config/aarch64/aarch64.md |   4 +-
  gcc/config/arm/xgene1.md      | 739 ++++++++++++++++++++++++++++++++++++++++++
  3 files changed, 748 insertions(+), 1 deletion(-)
  create mode 100644 gcc/config/arm/xgene1.md

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 5b389c5..9cc3b5a 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,5 +1,11 @@
  2014-11-19  Philipp Tomsich  <philipp.tomsich@theobroma-systems.com>

+       * config/aarch64/aarch64.md: Include xgene1.md.
+       (generic_sched): Set to no for xgene1.
+       * config/arm/xgene1.md: New file.
+
+2014-11-19  Philipp Tomsich  <philipp.tomsich@theobroma-systems.com>
+
         * config/aarch64/aarch64-cores.def (xgene1): Update/add the
         xgene1 (APM XGene-1) core definition.
         * gcc/config/aarch64/aarch64.c: Add cost tables for APM XGene-1
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 597ff8c..5d92051 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -191,7 +191,7 @@

  (define_attr "generic_sched" "yes,no"
    (const (if_then_else
-          (eq_attr "tune" "cortexa53,cortexa15,thunderx")
+          (eq_attr "tune" "cortexa53,cortexa15,thunderx,xgene1")
            (const_string "no")
            (const_string "yes"))))

@@ -4211,3 +4211,5 @@

  ;; Atomic Operations
  (include "atomics.md")
+
+(include "../arm/xgene1.md")
diff --git a/gcc/config/arm/xgene1.md b/gcc/config/arm/xgene1.md
new file mode 100644
index 0000000..3c08b16
--- /dev/null
+++ b/gcc/config/arm/xgene1.md
@@ -0,0 +1,739 @@
+;; Machine description for AppliedMicro xgene1 core.
+;; Copyright (C) 2012 Free Software Foundation, Inc.

Copyright 2012-2014 I think...

+;; Contributed by Theobroma Systems Design und Consulting GmbH.
+;;                See http://www.theobroma-systems.com for more info.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Pipeline description for the xgene1 micro-architecture
+
+(define_automaton "xgene1")
+
+(define_cpu_unit "decode_out_0" "xgene1")
+(define_cpu_unit "decode_out_1" "xgene1")
+(define_cpu_unit "decode_out_2" "xgene1")
+(define_cpu_unit "decode_out_3" "xgene1")
+
+(define_cpu_unit "divide" "xgene1")
+(define_cpu_unit "fp_divide" "xgene1")
+
+(define_reservation "decode1op"
+        "( decode_out_0 )
+        |( decode_out_1 )
+        |( decode_out_2 )
+        |( decode_out_3 )"
+)
+(define_reservation "decode2op"
+        "( decode_out_0 + decode_out_1 )
+        |( decode_out_0 + decode_out_2 )
+        |( decode_out_0 + decode_out_3 )
+        |( decode_out_1 + decode_out_2 )
+        |( decode_out_1 + decode_out_3 )
+        |( decode_out_2 + decode_out_3 )"
+)
+(define_reservation "decodeIsolated"
+        "( decode_out_0 + decode_out_1 + decode_out_2 + decode_out_3 )"
+)
+
+;; (define_insn_reservation "dummy" 1
+;;   (and (eq_attr "tune" "xgene1")
+;;        (eq_attr "type" "neon_minmax"))
+;;   "decodeIsolated")

Remove that commented out unit.

+
+;; B: nop.
+;; BR: branch op.
+
+;; RET
+;; CBZ
+;; TBZ
+(define_insn_reservation "branch" 1
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "branch"))
+  "decode1op")
+
+;; NOP
+;; HINT
+(define_insn_reservation "nop" 1
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "no_insn"))
+  "decode1op")
+
+;; See #3565
This is not meaningful, it should be removed. Similarly elsewhere in the file.

+;; BLR: arithmetic op & branch op.
+;; BL: arithmetic op.
+(define_insn_reservation "call" 1
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "call"))
+  "decode2op")
+
+;; LDR: FP load op & arithmetic op.
+(define_insn_reservation "f_load" 10
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "f_loadd,f_loads"))
+  "decode2op,nothing*9")

you can just write "decode2op". The nothing*9 is implicit if you don't specify nothing. Using the 'nothing' is only useful if you wanted to model the reservation of a unit with intermediate 'empty stages' like:
"unit0, nothing*2, unit1". Similarly throughout the file.

+
+;; STR: FP store op & arithmetic op.
+(define_insn_reservation "f_store" 4
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "f_stored,f_stores"))
+  "decode2op,nothing*3")
+
+;; FMOV (immediate): FP move op.
+;; FMOV (register): FP move op.
+(define_insn_reservation "fmov" 2
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "fmov,fconsts,fconstd"))
+  "decode1op,nothing")
+
+;; LDP: FP load op & FP load op.
+;; LDP: FP load op & FP load op & arithmetic op.
+(define_insn_reservation "f_mcr" 10
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "f_mcr"))
+  "decodeIsolated,nothing*9")
+
+;; STP: FP store op & FP store op.
+(define_insn_reservation "f_mrc" 4
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "f_mrc"))
+  "decode2op,nothing*3")
+
+;; The (register offset) instructions with a shift
+;; of #0, #2, or #3 (or no shift) are translated
+;; as shown.
+;; For these instructions, any other shift amount
+;; causes the instruction be prefixed with an
+;; sbfm/ubfm op (1 cycle latency).
+
+;; Load/store register pair (post-indexed):
+;; LDP: load op & load op & arithmetic op.
+;; Load/store register pair (offset):
+;; LDP: load op & load op.
+;; Load/store register pair (pre-indexed):
+;; LDP: load op & load op & arithmetic op.
+;; 5 + 1
+(define_insn_reservation "load_pair" 6
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "load2"))
+  "decodeIsolated,nothing*5")
+
+;; Load/store register pair (post-indexed):
+;; STP: store op & store op & arithmetic op.
+;; Load/store register pair (offset):
+;; STP: store op & store op.
+;; Load/store register pair (pre-indexed):
+;; STP: store op & store op & arithmetic op.
+;; 1 + 1
+(define_insn_reservation "store_pair" 2
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "store2"))
+  "decodeIsolated,nothing")
+
+;; Load register (literal):
+;; LDR: load op.
+;; Load/store register (immediate post-indexed):
+;; LDRB/LDRH/LDR: load op & arithmetic op.
+;; Load/store register (immediate pre-indexed):
+;; LDRB/LDRH/LDR: load op & arithmetic op.
+;; Load/store register (register offset)
+;; DRB/LDRH/LDR: load op.
+;; LDRSB/LDRSH/LDRSW: load op + sbfm op (1 cycle latency).
+;; Load/store register (unsigned immediate):
+;; LDRB/LDRH/LDR: load op.
+;; 5 + 1
+;; FIXME This is inaccurate but avoids a crash.
+(define_insn_reservation "load1" 2
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "load1"))
+  "decode2op,nothing")
+
+;; Load/store register (immediate post-indexed):
+;; STRB/STRH/STR: store op & arithmetic op.
+;; Load/store register (immediate pre-indexed):
+;; STRB/STRH/STR: store op & arithmetic op.
+;; Load/store register (register offset)
+;; STRB/STRH/STR: store op.
+;; Load/store register (unsigned immediate):
+;; STRB/STRH/STR: store op.
+;; 1 + 1
+(define_insn_reservation "store1" 2
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "store1"))
+  "decode2op,nothing")
+
+;; MOVI
+;; MOV
+;; Move wide: logical op.
+;; MRS NZCV: logical op (register result).
+(define_insn_reservation "move" 1
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "mov_reg,mov_imm,mrs"))
+  "decode1op")
+
+;; See #3565
+(define_insn_reservation "alu" 1
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "alu_imm,alu_sreg,alu_shift_imm,\
+                        alu_ext,adc_reg,csel,logic_imm,\
+                        logic_reg,logic_shift_imm,clz,\
+                        rbit,shift_reg,adr,mov_reg,\
+                        mov_imm,extend"))
+  "decode1op")
+
+;; REV/REV16/REV32: SIMD op.
+(define_insn_reservation "simd" 1
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "rev"))
+  "decode1op")
+
+;; See #3565
+(define_insn_reservation "alus" 1
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "alus_imm,alu_sreg,alus_shift_imm,\
+                        alus_ext,logics_imm,logics_reg,\
+                        logics_shift_imm"))
+  "decode1op")
+
+;; MADD/SMADDL/UMADDL with Ra=XZR/WZR: multiply op.
+;; MADD/SMADDL/UMADDL with other Ra: multiply op + arithmetic op.
+;; MSUB/SMSUBL/UMSUBL: multiply op + arithmetic op.
+;; SMULH/UMULH: multiply op.
+;; 5 + 1
+(define_insn_reservation "mul" 6
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "mul,mla,smull,umull,smlal,umlal"))
+  "decode2op,nothing*5")
+
+;; UDIV/SDIV: divide op.
+(define_insn_reservation "div" 66
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "sdiv,udiv"))
+  "decode1op,divide*65")

Such large reservations tend to blow up the state-space of the automaton without contributing too much to the codegen quality.
See PR 60743 for an example where it bit us.
Is the automaton size reasonable here?
You can get a feel of how large the automaton becomes by adding the options:
 (automata_option "v")
 (automata_option "time")
 (automata_option "stats")
 (automata_option "progress")

to the .md file and it will show you some stats during genautomata.

+
+;; FCMP/FCMPE: FP compare op.
+(define_insn_reservation "fcmp" 10
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "fcmpd,fcmps"))
+  "decode1op,nothing*11")
+
+;; FCSEL: FP select op
+(define_insn_reservation "fcsel" 3
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "fcsel"))
+  "decode1op,nothing*2")
+
+;; See #3565
+(define_insn_reservation "bfm" 2
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "bfm"))
+  "decode1op,nothing")
+
+;; FRINTN/FRINTP/FRINTM/FRINTZ/FRINTA/FRINTX/FRINTI:
+;; FP convert op
+(define_insn_reservation "f_rint" 5
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "f_rintd,f_rints"))
+  "decode1op,nothing*4")
+
+;; FCVT (single to double or double to single): FP arithmetic op.
+;; FCVT (to or from half precision): FP half cvt op.
+(define_insn_reservation "f_cvt" 3
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "f_cvt"))
+  "decode1op,nothing*2")
+
+;; Floating-point<->integer conversions:
+;; FCVTNS/FCVTAS/FCVTPS/FCVTMS: FP convert op + FP store op (data bypass path) + (integer) load op.
+;; FCVTNU/FCVTAU/FCVTPU/FCVTMU: FP convert op + FP store op (data bypass path) + (integer) load op.
+;; FCVTZS/FCVTZU (integer): FP convert op + FP store op (data bypass path) + (integer) load op.
+;; Floating-point<->fixed-point conversions:
+;; FCVTZS/FCVTZU (fixed-point): FP convert op + FP store op (data bypass path) + (integer) load op.
+;; 5 + 1 + 5
+(define_insn_reservation "f_cvtf2i" 11
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "f_cvtf2i"))
+  "decodeIsolated,nothing*10")
+
+;; SCVTF/UCVTF (integer): (integer) store op (data bypass path) + FP load op + FP arithmetic op.
+;; SCVTF/UCVTF (fixed-point): (integer) store op (data bypass path) + FP load op + FP arithmetic op.
+;; -1 + 10 + 5
+(define_insn_reservation "f_cvti2f" 14
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "f_cvti2f"))
+  "decodeIsolated,nothing*13")
+
+;; FMUL/FADD/FSUB/FNMUL: FP arithmetic op.
+(define_insn_reservation "f_add" 5
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "faddd,fadds,fmuld,fmuls"))
+  "decode1op,nothing*5")
+
+;; FDIV: FP divide op.
+(define_insn_reservation "f_div" 28
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "fdivd,fdivs"))
+  "decode1op,fp_divide*27")
+
+;; FABS/FNEG: FP move op.
+(define_insn_reservation "f_arith" 2
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "ffarithd,ffariths"))
+  "decode1op,nothing")
+
+;; FSQRT: FP sqrt op.
+(define_insn_reservation "f_sqrt" 38
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "fsqrtd,fsqrts"))
+  "decode1op,fp_divide*37")
+
+;; FMAX/FMIN/FMAXNM/FMINNM: FP select op.
+(define_insn_reservation "f_select" 3
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "f_minmaxd,f_minmaxs"))
+  "decode1op,nothing*2")
+
+
+;; SIMD (aka neon)
+
+;; DUP (element) (size=x1000): ASIMD logical op.
+;; DUP (element) (size=other): ASIMD shift op.
+(define_insn_reservation "neon_dup" 3
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_dup,neon_dup_q"))
+  "decode1op,nothing*2")
+
+;; LDR: FP load op & arithmetic op.
+;; LD1 (one register, 1D): FP load op.
+;; LD1 (one register, 2D): FP load op*2.
+;; LD1 (one register, 2S/4H/8B): FP complex load op.
+;; LD1 (one register, 4S/8H/16B): FP complex load op*2.
+(define_insn_reservation "neon_load1" 11
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_load1_1reg, neon_load1_1reg_q"))
+  "decode2op,nothing*10")
+
+;; STR: FP store op & arithmetic op.
+;; ST1 (one register, 1D): FP store op
+;; ST1 (one register, 2D): FP store op*2
+;; ST1 (one register, 2S/4H/8B): FP complex store op
+;; ST1 (one register, 4S/8H/16B): FP complex store op*2
+;; 4 + 1
+(define_insn_reservation "neon_store1" 5
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_store1_1reg, neon_store1_1reg_q"))
+  "decode2op,nothing*4")
+
+;; MOVI/MVNI/ORR/BIC/FMOV: ASIMD logical op^Q.
+;; AND/BIC/ORR/ORN/EOR/BSL/BIT/BIF: ASIMD logical op^Q.
+(define_insn_reservation "neon_logic" 2
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_logic,\
+                        neon_logic_q,\
+                        neon_bsl,\
+                        neon_bsl_q,\
+                        neon_move,\
+                        neon_move_q,\
+                       "))
+  "decode1op,nothing")
+;; N.B. ^Q means that it only uses one decode slot.
+
+;; UMOV (imm5=xxx00): FP store op (data bypass path) + (integer) load op.
+;; UMOV (imm5=other): FP store op (data bypass path) + (integer) load op + ubfm op (1 cycle latency).
+;; 1 + 5 + 1
+(define_insn_reservation "neon_umov" 7
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_to_gp, neon_to_gp_q"))
+  "decodeIsolated,nothing*6")
+
+;; INS (element) (imm5=01000): FP move op.
+;; INS (element) (imm5=other): ASIMD shift op + ASIMD insert op.
+;; INS (general register) (imm5=01000): (integer) store op + FP load op.
+;; INS (general register) (imm5=other): (integer) store op + FP load op + ASIMD insert op.
+;; 1 + 10 + 3
+(define_insn_reservation "neon_ins" 14
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_from_gp,\
+                        neon_from_gp_q,\
+                        neon_ins,\
+                        neon_ins_q,\
+                       "))
+  "decodeIsolated,nothing*13")
+
+;; USHR/URSHR: ASIMD shift op.
+;; USHR/URSHR: ASIMD shift op^Q.
What are these ^Q, editor artifact?

+;; SSHR/SRSHR: ASIMD shift op.
+;; SSHR/SRSHR: ASIMD shift op^Q.
+;; SHL/SQSHL/SQSHLU: ASIMD shift op.
+;; SHL/SQSHL/SQSHLU: ASIMD shift op^Q.
+;; SSHL/SQSHL/SRSHL/SQRSHL: ASIMD shift op.
+;; SSHL/SQSHL/SRSHL/SQRSHL: ASIMD shift op^Q.
+;; USHL/UQSHL/URSHL/UQRSHL: ASIMD shift op.
+;; USHL/UQSHL/URSHL/UQRSHL: ASIMD shift op^Q.
+;; XTN/SQXTN/UQXTN/SQXTUN/SHLL: ASIMD shift op.
+;; SSHLL/USHLL: ASIMD shift op*2.
+(define_insn_reservation "neon_shift" 3
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_shift_imm,\
+                        neon_shift_imm_q,\
+                        neon_shift_reg,\
+                        neon_shift_reg_q,\
+                        neon_shift_imm_long,\
+                        neon_sat_shift_imm,\
+                        neon_sat_shift_imm_q,\
+                        neon_sat_shift_imm_narrow_q,\
+                        neon_sat_shift_reg,\
+                        neon_sat_shift_reg_q,\
+                        neon_shift_imm_narrow_q,\
+                       "))
+  "decode1op,nothing*2")
+
+;; ADD/SUB: ASIMD arithmetic op.
+;; ADD/SUB/ADDP: ASIMD arithmetic op^Q.
+;; SMAX/SMIN/SABD/SMAXP/SMINP: ASIMD arithmetic op^Q.
+;; UMAX/UMIN/UABD/UMAXP/UMINP: ASIMD arithmetic op^Q.
+;; USQADD/ABS/NEG: ASIMD arithmetic op.
+;; UADDLP/USQADD/ABS/NEG: ASIMD arithmetic op^Q.
+;; SHADD/SQADD/SRHADD/SHSUB/SQSUB: ASIMD arithmetic op^Q.
+;; UHADD/UQADD/URHADD/UHSUB/UQSUB: ASIMD arithmetic op^Q.
+
+;; SHADD/SQADD/SRHADD/SHSUB/SQSUB: ASIMD arithmetic op^Q.
+;; UHADD/UQADD/URHADD/UHSUB/UQSUB: ASIMD arithmetic op^Q.
+;; SADDLP/SUQADD/SQABS/SQNEG: ASIMD arithmetic op^Q.
+;; UADDLP/USQADD/ABS/NEG: ASIMD arithmetic op^Q.
+
+;; CMGT/CMGE/CMTST/CMHI/CMHS/CMEQ (register): ASIMD arithmetic op.
+;; CMGT/CMEQ/CMLT/CMGE/CMLE (zero): ASIMD arithmetic op.
+;; CMGT/CMGE/CMTST/CMHI/CMHS/CMEQ (register): ASIMD arithmetic op^Q.
+;; CMGT/CMEQ/CMLT/CMGE/CMLE (zero): ASIMD arithmetic op^Q.
+(define_insn_reservation "neon_arith" 3
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_add,\
+                        neon_add_q,\
+                        neon_sub,\
+                        neon_sub_q,\
+                        neon_neg,\
+                        neon_neg_q,\
+                        neon_abs,\
+                        neon_abs_q,\
+                        neon_abd_q,\
+                        neon_arith_acc,\
+                        neon_arith_acc_q,\
+                        neon_reduc_add,\
+                        neon_reduc_add_q,\
+                        neon_add_halve,\
+                        neon_add_halve_q,\
+                        neon_sub_halve,\
+                        neon_sub_halve_q,\
+                        neon_qadd,\
+                        neon_qadd_q,\
+                        neon_compare,\
+                        neon_compare_q,\
+                        neon_compare_zero,\
+                        neon_compare_zero_q,\
+                        neon_tst,\
+                        neon_tst_q,\
+                       "))
+  "decode1op,nothing*2")
+
+;; SABA/UABA: (ASIMD arithmetic op + ASIMD arithmetic op)^Q.
+;; 3*3
+(define_insn_reservation "neon_abs_diff" 6
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_arith_acc,neon_arith_acc_q"))
+  "decode2op,nothing*5")
+
+;; MUL/MLA/MLS/SQDMULH/SQRDMULH: ASIMD multiply op^Q.
+;; MUL/SQDMULH/SQRDMULH/MLA/MLS (by element): ASIMD multiply op^Q.
+;; SMLAL/SMLSL/SMULL/SQDMLAL/SQDMLSL/SQDMULL: ASIMD multiply op*2.
+;; SMULL/SMLAL/SMLSL (by element): ASIMD multiply op*2.
+;; UMULL/UMLAL/UMLSL (by element): ASIMD multiply op*2.
+;; FMUL/FMULX/FMLA/FMLS (by element): ASIMD multiply op.
+;; FMUL/FMULX/FMLA/FMLS (by element): ASIMD multiply op^Q.
+
+;; SQDMULH/SQRDMULH: ASIMD multiply op.
+;; SQDMULH/SQRDMULH (by element): ASIMD multiply op.
+;; MUL/MLA/MLS/SQDMULH/SQRDMULH: ASIMD multiply op^Q.
+;; MUL/SQDMULH/SQRDMULH/MLA/MLS (by element): ASIMD multiply op^Q.
+
+;; SQDMULL/SQDMLAL/SQDMLSL (by element): ASIMD multiply op*2.
+;; SQDMULL/SQDMLAL/SQDMLSL: ASIMD multiply op.
+(define_insn_reservation "neon_mul" 5
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_mul_b,\
+                        neon_mul_b_q,\
+                        neon_mul_h,\
+                        neon_mul_h_q,\
+                        neon_mul_s,\
+                        neon_mul_s_q,\
+                        neon_fp_mul_s_scalar,\
+                        neon_fp_mul_s_scalar_q,\
+                        neon_fp_mul_d_scalar_q,\
+                        neon_mla_b,neon_mla_b_q,\
+                        neon_mla_h,neon_mla_h_q,\
+                        neon_mla_s,neon_mla_s_q,\
+                        neon_mla_h_scalar,\
+                        neon_mla_h_scalar_q,\
+                        neon_mla_s_scalar,\
+                        neon_mla_s_scalar_q,\
+                        neon_mla_b_long,\
+                        neon_mla_h_long,\
+                        neon_mla_s_long,\
+                        neon_fp_mul_s,\
+                        neon_fp_mul_s_q,\
+                        neon_fp_mul_d,\
+                        neon_fp_mul_d_q,\
+                        neon_fp_mla_s,\
+                        neon_fp_mla_s_q,\
+                        neon_fp_mla_d,\
+                        neon_fp_mla_d_q,\
+                        neon_fp_mla_s_scalar,\
+                        neon_fp_mla_s_scalar_q,\
+                        neon_fp_mla_d_scalar_q,\
+                        neon_sat_mul_b,\
+                        neon_sat_mul_b_q,\
+                        neon_sat_mul_h,\
+                        neon_sat_mul_h_q,\
+                        neon_sat_mul_s,\
+                        neon_sat_mul_s_q,\
+                        neon_sat_mul_h_scalar,\
+                        neon_sat_mul_h_scalar_q,\
+                        neon_sat_mul_s_scalar,\
+                        neon_sat_mul_s_scalar_q,\
+                        neon_sat_mul_h_scalar_long,\
+                        neon_sat_mul_s_scalar_long,\
+                        neon_sat_mla_b_long,\
+                        neon_sat_mla_h_long,\
+                        neon_sat_mla_s_long,\
+                        neon_sat_mla_h_scalar_long,\
+                        neon_sat_mla_s_scalar_long,\
+                       "))
+  "decode2op,nothing*4")
+
+;; FMULX/FRECPS/FRSQRTS/FABD: FP arithmetic op.
+;; FABD: FP arithmetic op^Q.
+(define_insn_reservation "fp_abd_diff" 5
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_fp_abd_s,\
+                        neon_fp_abd_s_q,\
+                        neon_fp_abd_d,\
+                        neon_fp_abd_d_q,\
+                       "))
+  "decode1op,nothing*4")
+
+;; See #3565
+;; FMUL/FADD/FSUB/FNMUL: FP arithmetic op.
+(define_insn_reservation "neon_f_add" 5
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_fp_addsub_s,\
+                        neon_fp_addsub_s_q,\
+                        neon_fp_addsub_d,\
+                        neon_fp_addsub_d_q,\
+                       "))
+  "decode1op,nothing*5")
+
+;; FDIV: FP divide op^Q.
+(define_insn_reservation "neon_f_div" 28
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_fp_div_s,\
+                        neon_fp_div_s_q,\
+                        neon_fp_div_d,\
+                        neon_fp_div_d_q,\
+                       "))
+  "decode1op,fp_divide*27")
+
+;; FABS/FNEG: FP move op^Q.
+(define_insn_reservation "neon_f_neg" 2
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_fp_neg_s,\
+                        neon_fp_neg_s_q,\
+                        neon_fp_neg_d,\
+                        neon_fp_neg_d_q,\
+                        neon_fp_abs_s,\
+                        neon_fp_abs_s_q,\
+                        neon_fp_abs_d,\
+                        neon_fp_abs_d_q,\
+                       "))
+  "decode1op,nothing")
+
+;; FRINTN/FRINTM/FRINTA/FRINTP/FRINTZ/FRINTX/FRINTI: FP convert op^Q.
+(define_insn_reservation "neon_f_round" 5
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_fp_round_s,\
+                        neon_fp_round_s_q,\
+                        neon_fp_round_d,\
+                        neon_fp_round_d_q,\
+                       "))
+  "decode1op,nothing*4")
+
+;; FCVTNS/FCVTMS/FCVTAS/FCVTPS: FP convert op^Q.
+;; FCVTNU/FCVTMU/FCVTAU/FCVTPU: FP convert op^Q.
+;; FCVTZS/FCVTZU (integer): FP convert op^Q.
+;; FCVTN/FCVTL (size=0): FP half cvt op.
+(define_insn_reservation "neon_f_cvt" 5
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type"  "neon_int_to_fp_s,\
+                         neon_int_to_fp_s_q,\
+                         neon_int_to_fp_d,\
+                         neon_int_to_fp_d_q,\
+                         neon_fp_cvt_widen_s,\
+                         neon_fp_cvt_narrow_s_q,\
+                         neon_fp_cvt_narrow_d_q,\
+                        "))
+  "decode1op,nothing*4")
+
+;; FADD/FSUB/FMULX/FMLA/FMLS/FADDP: FP arithmetic op^Q.
+(define_insn_reservation "neon_f_reduc" 5
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_fp_reduc_add_s,\
+                        neon_fp_reduc_add_s_q,\
+                        neon_fp_reduc_add_d,\
+                        neon_fp_reduc_add_d_q,\
+                       "))
+  "decode1op,nothing*4")
+
+;; CLS/CLZ/CNT/NOT/RBIT: ASIMD logical op^Q.
+;; PMUL: ASIMD logical op^Q.
+(define_insn_reservation "neon_cls" 2
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_cls,neon_cls_q"))
+  "decode1op,nothing")
+
+;; ST1 (one register, 1D): FP store op.
+(define_insn_reservation "neon_st1" 4
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_store1_one_lane,\
+                        neon_store1_one_lane_q,\
+                       "))
+  "decode1op,nothing*3")
+
+;; ADDHN/SUBHN/RADDHN/RSUBHN: ASIMD arithmetic op*2 + ASIMD shift op.
+;; 3 + 3
+(define_insn_reservation "neon_halve_narrow" 6
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_sub_halve_narrow_q,\
+                        neon_add_halve_narrow_q,\
+                       "))
+  "decodeIsolated,nothing*5")
+
+;; SSRA/SRSRA: (ASIMD shift op + ASIMD arithmetic op).
+;; USRA/URSRA: (ASIMD shift op + ASIMD arithmetic op).
+;; SSRA/SRSRA: (ASIMD shift op + ASIMD arithmetic op)^Q.
+;; USRA/URSRA: (ASIMD shift op + ASIMD arithmetic op)^Q.
+;; 3 + 3
+(define_insn_reservation "neon_shift_acc" 6
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_shift_acc,\
+                        neon_shift_acc_q,\
+                       "))
+  "decode2op,nothing*5")
+
+;; FCMEQ/FCMGE/FCMGT/FACGE/FACGT: FP select op.
+;; FCMGT/FCMEQ/FCMLT/FCMGE/FCMLE (zero): FP select op.
+;; FCMEQ/FCMGE/FCMGT/FACGE/FACGT: FP select op^Q.
+;; FCMGT/FCMEQ/FCMLT/FCMGE/FCMLE (zero): FP select op^Q.
+(define_insn_reservation "neon_fp_compare" 3
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_fp_compare_s,\
+                        neon_fp_compare_s_q,\
+                        neon_fp_compare_d,\
+                        neon_fp_compare_d_q,\
+                       "))
+  "decode1op,nothing*2")
+
+;; FSQRT: FP sqrt op.
+(define_insn_reservation "neon_fp_sqrt" 38
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_fp_sqrt_s,\
+                        neon_fp_sqrt_s_q,\
+                        neon_fp_sqrt_d,\
+                        neon_fp_sqrt_d_q,\
+                       "))
+  "decode1op,fp_divide*37")

Similar concern to the integer divide comment above.

+
+;; See #3566
+;; TBL/TBX (single register table): (ASIMD logical op + ASIMD logical op)^Q.
+;; 2 + 2
+(define_insn_reservation "neon_tbl1" 4
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_tbl1,\
+                        neon_tbl1_q,\
+                       "))
+  "decode2op,nothing*2")
+
+;; TBL/TBX (two register table): (ASIMD logical op + ASIMD logical op + ASIMD logical op + ASIMD logical op)^Q.
+;; 2 + 2 + 2 + 2
+(define_insn_reservation "neon_tbl2" 8
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_tbl2,\
+                        neon_tbl2_q,\
+                       "))
+  "decodeIsolated,nothing*7")
+
+;; See #3565
+;; ZIP1/ZIP2/UZP1/UZP2 (Q=0): ASIMD shift op.
+;; ZIP1/ZIP2/UZP1/UZP2 (Q=1, size=11): ASIMD logical op*2.
+;; ZIP1/ZIP2/UZP1/UZP2 (Q=1, size=other): ASIMD shift op*2.
+;; TRN1/TRN2 (size=11): ASIMD logical op*2.
+;; TRN1/TRN2 (size=other): ASIMD shift op^Q.
+(define_insn_reservation "neon_permute" 3
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_permute,\
+                        neon_permute_q,\
+                       "))
+  "decode2op,nothing*2")
+
+;; LD1R: FP load op.
+(define_insn_reservation "neon_ld1r" 10
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_load1_all_lanes,\
+                       "))
+  "decode1op,nothing*9")
+
+;; FRECPE/FRECPX: ASIMD dre op.
+;; FRECPE/FRECPX: ASIMD dre op.
+(define_insn_reservation "neon_fp_recp" 3
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_fp_recpe_s,\
+                        neon_fp_recpe_s_q,\
+                        neon_fp_recpe_d,\
+                        neon_fp_recpe_d_q,\
+                        neon_fp_recpx_s,\
+                        neon_fp_recpx_s_q,\
+                        neon_fp_recpx_d,\
+                        neon_fp_recpx_d_q,\
+                       "))
+  "decode1op,nothing*2")
+
+
+;; FMULX/FRECPS/FRSQRTS/FABD: FP arithmetic op.
+;; FRECPS/FRSQRTS: FP arithmetic op^Q.
+(define_insn_reservation "neon_fp_recp_s" 5
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_fp_recps_s,\
+                        neon_fp_recps_s_q,\
+                        neon_fp_recps_d,\
+                        neon_fp_recps_d_q,\
+                       "))
+  "decode1op,nothing*4")
+
+;; See #3566
+;; PMULL: ASIMD polymul op*2.
+(define_insn_reservation "neon_pmull" 5
+  (and (eq_attr "tune" "xgene1")
+       (eq_attr "type" "neon_mul_d_long,\
+                       "))
+  "decode2op,nothing*4")
--
1.9.1





Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]