This is the mail archive of the
gcc-patches@gcc.gnu.org
mailing list for the GCC project.
[PATCH V2 4/7] sparc: reworked M7 DFA based on instruction subtypes
- From: "Jose E. Marchesi" <jose dot marchesi at oracle dot com>
- To: gcc-patches at gcc dot gnu dot org
- Date: Wed, 5 Jul 2017 16:15:23 +0200
- Subject: [PATCH V2 4/7] sparc: reworked M7 DFA based on instruction subtypes
- Authentication-results: sourceware.org; auth=none
- References: <1499264126-31635-1-git-send-email-jose.marchesi@oracle.com>
This patch reworks the M7 DFA scheduler to use instruction subtypes. It
also removes the v3pipe insn attribute from sparc.md, as it is no longer
needed.
gcc/ChangeLog:
* config/sparc/niagara7.md: Rework the DFA scheduler to use insn
subtypes.
* config/sparc/sparc.md: Remove the `v3pipe' insn attribute.
("*movdi_insn_sp32"): Likewise.
("*movsi_insn"): Likewise.
("*movdi_insn_sp64"): Likewise.
("*movsf_insn"): Likewise.
("*movdf_insn_sp32"): Likewise.
("*movdf_insn_sp64"): Likewise.
("*zero_extendsidi2_insn_sp64"): Likewise.
("*sign_extendsidi2_insn"): Likewise.
("*mov<VM32:mode>_insn"): Likewise.
("*mov<VM64:mode>_insn_sp64"): Likewise.
("*mov<VM64:mode>_insn_sp32"): Likewise.
("<plusminus_insn><VADDSUB:mode>3"): Likewise.
("<vlop:code><VL:mode>3"): Likewise.
("*not_<vlop:code><VL:mode>3"): Likewise.
("*nand<VL:mode>_vis"): Likewise.
("*<vlnotop:code>_not1<VL:mode>_vis"): Likewise.
("*<vlnotop:code>_not2<VL:mode>_vis"): Likewise.
("one_cmpl<VL:mode>2"): Likewise.
("faligndata<VM64:mode>_vis"): Likewise.
("alignaddrsi_vis"): Likewise.
("alignaddrdi_vis"): Likweise.
("alignaddrlsi_vis"): Likewise.
("alignaddrldi_vis"): Likewise.
("fcmp<gcond:code><GCM:gcm_name><P:mode>_vis"): Likewise.
("bmaskdi_vis"): Likewise.
("bmasksi_vis"): Likewise.
("bshuffle<VM64:mode>_vis"): Likewise.
("cmask8<P:mode>_vis"): Likewise.
("cmask16<P:mode>_vis"): Likewise.
("cmask32<P:mode>_vis"): Likewise.
("pdistn<P:mode>_vis"): Likewise.
("<vis3_addsub_ss_patname><VASS:mode>3"): Likewise.
---
gcc/ChangeLog | 38 +++++++++
gcc/config/sparc/niagara7.md | 181 ++++++++++++++++++++++++++++++-------------
gcc/config/sparc/sparc.md | 93 +++++++---------------
3 files changed, 192 insertions(+), 120 deletions(-)
diff --git a/gcc/config/sparc/niagara7.md b/gcc/config/sparc/niagara7.md
index 3f46198..23b6707 100644
--- a/gcc/config/sparc/niagara7.md
+++ b/gcc/config/sparc/niagara7.md
@@ -19,64 +19,120 @@
(define_automaton "niagara7_0")
-(define_cpu_unit "n7_slot0,n7_slot1,n7_slot2" "niagara7_0")
-(define_reservation "n7_single_issue" "n7_slot0 + n7_slot1 + n7_slot2")
+;; The S4 core has a dual-issue queue. This queue is divided into two
+;; slots. One instruction can be issued each cycle to each slot, and
+;; up to 2 instructions are committed each cycle. Each slot serves
+;; several execution units, as depicted below:
+;;
+;;
+;; m7_slot0 - Integer unit.
+;; - Load/Store unit.
+;; === QUEUE ==>
+;;
+;; m7_slot1 - Integer unit.
+;; - Branch unit.
+;; - Floating-point and graphics unit.
+;; - 3-cycles crypto unit.
-(define_cpu_unit "n7_load_store" "niagara7_0")
+(define_cpu_unit "n7_slot0,n7_slot1" "niagara7_0")
+
+;; Some instructions stall the pipeline and avoid any other
+;; instruction to be issued in the same cycle. We assume the same for
+;; multi-instruction insns.
+
+(define_reservation "n7_single_issue" "n7_slot0 + n7_slot1")
(define_insn_reservation "n7_single" 1
(and (eq_attr "cpu" "niagara7")
(eq_attr "type" "multi,savew,flushw,trap"))
"n7_single_issue")
-(define_insn_reservation "n7_iflush" 27
- (and (eq_attr "cpu" "niagara7")
- (eq_attr "type" "iflush"))
- "(n7_slot0 | n7_slot1), nothing*26")
+;; Most of the instructions executing in the integer unit have a
+;; latency of 1.
(define_insn_reservation "n7_integer" 1
(and (eq_attr "cpu" "niagara7")
(eq_attr "type" "ialu,ialuX,shift,cmove,compare"))
"(n7_slot0 | n7_slot1)")
+;; Flushing the instruction memory takes 27 cycles.
+
+(define_insn_reservation "n7_iflush" 27
+ (and (eq_attr "cpu" "niagara7")
+ (eq_attr "type" "iflush"))
+ "(n7_slot0 | n7_slot1), nothing*26")
+
+;; The integer multiplication instructions have a latency of 12 cycles
+;; and execute in the integer unit.
+;;
+;; Likewise for array*, edge* and pdistn instructions.
+
(define_insn_reservation "n7_imul" 12
(and (eq_attr "cpu" "niagara7")
- (eq_attr "type" "imul"))
- "n7_slot1, nothing*11")
+ (eq_attr "type" "imul,array,edge,edgen,pdistn"))
+ "(n7_slot0 | n7_slot1), nothing*11")
+
+;; The integer division instructions have a latency of 35 cycles and
+;; execute in the integer unit.
(define_insn_reservation "n7_idiv" 35
(and (eq_attr "cpu" "niagara7")
(eq_attr "type" "idiv"))
- "n7_slot1, nothing*34")
+ "(n7_slot0 | n7_slot1), nothing*34")
+
+;; Both integer and floating-point load instructions have a latency of
+;; 5 cycles, and execute in the slot0.
+;;
+;; The prefetch instruction also executes in the load/store unit, but
+;; its latency is only 1 cycle.
(define_insn_reservation "n7_load" 5
(and (eq_attr "cpu" "niagara7")
- (eq_attr "type" "load,fpload,sload"))
- "(n7_slot0 + n7_load_store), nothing*4")
+ (ior (eq_attr "type" "fpload,sload")
+ (and (eq_attr "type" "load")
+ (eq_attr "subtype" "regular"))))
+ "n7_slot0, nothing*4")
+
+(define_insn_reservation "n7_prefetch" 1
+ (and (eq_attr "cpu" "niagara7")
+ (eq_attr "type" "load")
+ (eq_attr "subtype" "prefetch"))
+ "n7_slot0")
+
+;; Both integer and floating-point store instructions have a latency
+;; of 1 cycle, and execute in the load/store unit in slot0.
(define_insn_reservation "n7_store" 1
(and (eq_attr "cpu" "niagara7")
(eq_attr "type" "store,fpstore"))
- "(n7_slot0 | n7_slot2) + n7_load_store")
+ "n7_slot0")
+
+;; Control-transfer instructions execute in the Branch Unit in the
+;; slot1.
(define_insn_reservation "n7_cti" 1
(and (eq_attr "cpu" "niagara7")
(eq_attr "type" "cbcond,uncond_cbcond,branch,call,sibcall,call_no_delay_slot,uncond_branch,return"))
"n7_slot1")
+;; Many instructions executing in the Floating-point and Graphics unit
+;; in the slot1 feature a latency of 11 cycles.
+
(define_insn_reservation "n7_fp" 11
(and (eq_attr "cpu" "niagara7")
- (eq_attr "type" "fpmove,fpcmove,fpcrmove,fp,fpcmp,fpmul"))
+ (ior (eq_attr "type" "fpmove,fpcmove,fpcrmove,fp,fpcmp,fpmul,fgm_pack,fgm_mul,pdist")
+ (and (eq_attr "type" "fga")
+ (eq_attr "subtype" "fpu,maxmin"))))
"n7_slot1, nothing*10")
-(define_insn_reservation "n7_array" 12
- (and (eq_attr "cpu" "niagara7")
- (eq_attr "type" "array,bmask,edge,edgen"))
- "n7_slot1, nothing*11")
+;; Floating-point division and floating-point square-root instructions
+;; have high latencies. They execute in the floating-point and
+;; graphics unit in the slot1.
+
(define_insn_reservation "n7_fpdivs" 24
(and (eq_attr "cpu" "niagara7")
- (eq_attr "type" "fpdivs,fpsqrts"))
+ (eq_attr "type" "fpdivs,fpsqrts"))
"n7_slot1, nothing*23")
(define_insn_reservation "n7_fpdivd" 37
@@ -84,53 +140,66 @@
(eq_attr "type" "fpdivd,fpsqrtd"))
"n7_slot1, nothing*36")
-(define_insn_reservation "n7_lzd" 12
- (and (eq_attr "cpu" "niagara7")
- (eq_attr "type" "lzd"))
- "(n7_slot0 | n7_slot1), nothing*11")
-
-;; There is an internal unit called the "V3 pipe", that was originally
-;; intended to process some of the short cryptographic instructions.
-;; However, as soon as in the T4 several of the VIS instructions
-;; (notably non-FP instructions) have been moved to the V3 pipe.
-;; Consequently, these instructions feature a latency of 3 instead of
-;; 11 or 12 cycles, provided their consumers also execute in the V3
-;; pipe.
+;; SIMD VIS instructions executing in the Floating-point and graphics
+;; unit (FPG) in slot1 usually have a latency of either 11 or 12
+;; cycles.
;;
-;; This is modelled here with a bypass.
+;; However, the latency for many instructions is only 3 cycles if the
+;; consumer can also be executed in 3 cycles. We model this with a
+;; bypass. In these cases the instructions are executed in the
+;; 3-cycle crypto unit which also serves slot1.
+
+(define_insn_reservation "n7_vis_11cycles" 11
+ (and (eq_attr "cpu" "niagara7")
+ (ior (and (eq_attr "type" "fga")
+ (eq_attr "subtype" "addsub64,other"))
+ (and (eq_attr "type" "vismv")
+ (eq_attr "subtype" "double,single"))
+ (and (eq_attr "type" "visl")
+ (eq_attr "subtype" "double,single"))))
+ "n7_slot1, nothing*10")
-(define_insn_reservation "n7_vis_fga" 11
+(define_insn_reservation "n7_vis_12cycles" 12
(and (eq_attr "cpu" "niagara7")
- (eq_attr "type" "fga,gsr"))
- "n7_slot1, nothing*10")
+ (ior (eq_attr "type" "bmask,viscmp")
+ (and (eq_attr "type" "fga")
+ (eq_attr "subtype" "cmask"))
+ (and (eq_attr "type" "vismv")
+ (eq_attr "subtype" "movstouw"))))
+ "n7_slot1, nothing*11")
+
+(define_bypass 3 "n7_vis_*" "n7_vis_*")
+
+;; Some other VIS instructions have a latency of 12 cycles, and won't
+;; be executed in the 3-cycle crypto pipe.
-(define_insn_reservation "n7_vis_fgm" 11
+(define_insn_reservation "n7_lzd" 12
(and (eq_attr "cpu" "niagara7")
- (eq_attr "type" "fgm_pack,fgm_mul,pdist"))
- "n7_slot1, nothing*10")
+ (ior (eq_attr "type" "lzd,")
+ (and (eq_attr "type" "gsr")
+ (eq_attr "subtype" "alignaddr"))))
+ "n7_slot1, nothing*11")
-(define_insn_reservation "n7_vis_move_v3pipe" 11
+;; A couple of VIS instructions feature very low latencies in the M7.
+
+(define_insn_reservation "n7_single_vis" 1
(and (eq_attr "cpu" "niagara7")
- (and (eq_attr "type" "vismv")
- (eq_attr "v3pipe" "true")))
+ (eq_attr "type" "vismv")
+ (eq_attr "subtype" "movxtod"))
"n7_slot1")
-(define_insn_reservation "n7_vis_move_11cycle" 11
+(define_insn_reservation "n7_double_vis" 2
(and (eq_attr "cpu" "niagara7")
- (and (eq_attr "type" "vismv")
- (eq_attr "v3pipe" "false")))
- "n7_slot1, nothing*10")
+ (eq_attr "type" "vismv")
+ (eq_attr "subtype" "movdtox"))
+ "n7_slot1, nothing")
-(define_insn_reservation "n7_vis_logical_v3pipe" 11
- (and (eq_attr "cpu" "niagara7")
- (and (eq_attr "type" "visl,viscmp,pdistn")
- (eq_attr "v3pipe" "true")))
- "n7_slot1, nothing*2")
+;; Reading and writing to the gsr register takes a high number of
+;; cycles that is not documented in the PRM. Let's use the same value
+;; than the M8.
-(define_insn_reservation "n7_vis_logical_11cycle" 11
+(define_insn_reservation "n7_gsr_reg" 70
(and (eq_attr "cpu" "niagara7")
- (and (eq_attr "type" "visl,viscmp")
- (eq_attr "v3pipe" "false")))
- "n7_slot1, nothing*10")
-
-(define_bypass 3 "n7*_v3pipe" "n7_*_v3pipe")
+ (eq_attr "type" "gsr")
+ (eq_attr "subtype" "reg"))
+ "n7_slot1, nothing*70")
diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md
index d1bf6a7..b550f037 100644
--- a/gcc/config/sparc/sparc.md
+++ b/gcc/config/sparc/sparc.md
@@ -574,9 +574,6 @@
(const_string "true")
] (const_string "false")))
-;; True if the instruction executes in the V3 pipeline, in M7 and later processors.
-(define_attr "v3pipe" "false,true" (const_string "false"))
-
(define_delay (eq_attr "type" "call")
[(eq_attr "in_call_delay" "true") (nil) (nil)])
@@ -1656,8 +1653,7 @@
fones\t%0"
[(set_attr "type" "*,*,load,store,vismv,vismv,fpmove,fpload,fpstore,visl,visl")
(set_attr "subtype" "*,*,regular,*,movstouw,single,*,*,*,single,single")
- (set_attr "cpu_feature" "*,*,*,*,vis3,vis3,*,*,*,vis,vis")
- (set_attr "v3pipe" "*,*,*,*,true,true,*,*,*,true,true")])
+ (set_attr "cpu_feature" "*,*,*,*,vis3,vis3,*,*,*,vis,vis")])
(define_insn "*movsi_lo_sum"
[(set (match_operand:SI 0 "register_operand" "=r")
@@ -1830,7 +1826,6 @@ visl")
(set_attr "length" "*,2,*,*,*,*,2,2,*,*,2,2,*,2,2,2,*,*,*,*")
(set_attr "fptype" "*,*,*,*,*,*,*,*,*,*,*,*,double,*,*,*,*,*,double,double")
(set_attr "cpu_feature" "v9,*,*,*,*,*,*,*,fpu,fpu,fpu,fpu,v9,fpunotv9,vis3,vis3,fpu,fpu,vis,vis")
- (set_attr "v3pipe" "*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,true,true")
(set_attr "lra" "*,*,disabled,disabled,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*")])
(define_insn "*movdi_insn_sp64"
@@ -1854,8 +1849,7 @@ visl")
[(set_attr "type" "*,*,load,store,vismv,vismv,fpmove,fpload,fpstore,visl,visl")
(set_attr "subtype" "*,*,regular,*,movdtox,movxtod,*,*,*,double,double")
(set_attr "fptype" "*,*,*,*,*,*,double,*,*,double,double")
- (set_attr "cpu_feature" "*,*,*,*,vis3,vis3,*,*,*,vis,vis")
- (set_attr "v3pipe" "*,*,*,*,*,*,*,*,*,true,true")])
+ (set_attr "cpu_feature" "*,*,*,*,vis3,vis3,*,*,*,vis,vis")])
(define_expand "movdi_pic_label_ref"
[(set (match_dup 3) (high:DI
@@ -2385,8 +2379,7 @@ visl")
}
[(set_attr "type" "visl,visl,fpmove,*,*,*,vismv,vismv,fpload,load,fpstore,store")
(set_attr "subtype" "single,single,*,*,*,*,movstouw,single,*,regular,*,*")
- (set_attr "cpu_feature" "vis,vis,fpu,*,*,*,vis3,vis3,fpu,*,fpu,*")
- (set_attr "v3pipe" "true,true,*,*,*,*,true,true,*,*,*,*")])
+ (set_attr "cpu_feature" "vis,vis,fpu,*,*,*,vis3,vis3,fpu,*,fpu,*")])
;; The following 3 patterns build SFmode constants in integer registers.
@@ -2462,7 +2455,6 @@ visl")
(set_attr "length" "*,2,*,*,*,2,2,2,*,*,*,*,2,2,2,*,*")
(set_attr "fptype" "*,*,double,double,double,*,*,*,*,*,*,*,*,*,*,*,*")
(set_attr "cpu_feature" "v9,*,vis,vis,v9,fpunotv9,vis3,vis3,fpu,fpu,*,*,fpu,fpu,*,*,*")
- (set_attr "v3pipe" "*,*,true,true,*,*,*,*,*,*,*,*,*,*,*,*,*")
(set_attr "lra" "*,*,*,*,*,*,*,*,*,*,disabled,disabled,*,*,*,*,*")])
(define_insn "*movdf_insn_sp64"
@@ -2487,8 +2479,7 @@ visl")
(set_attr "subtype" "double,double,*,movdtox,movxtod,regular,*,*,regular,*,*")
(set_attr "length" "*,*,*,*,*,*,*,*,*,*,2")
(set_attr "fptype" "double,double,double,double,double,*,*,*,*,*,*")
- (set_attr "cpu_feature" "vis,vis,fpu,vis3,vis3,fpu,fpu,*,*,*,*")
- (set_attr "v3pipe" "true,true,*,*,*,*,*,*,*,*,*")])
+ (set_attr "cpu_feature" "vis,vis,fpu,vis3,vis3,fpu,fpu,*,*,*,*")])
;; This pattern builds DFmode constants in integer registers.
(define_split
@@ -3119,8 +3110,7 @@ visl")
movstouw\t%1, %0"
[(set_attr "type" "shift,load,vismv")
(set_attr "subtype" "*,regular,movstouw")
- (set_attr "cpu_feature" "*,*,vis3")
- (set_attr "v3pipe" "*,*,true")])
+ (set_attr "cpu_feature" "*,*,vis3")])
(define_insn_and_split "*zero_extendsidi2_insn_sp32"
[(set (match_operand:DI 0 "register_operand" "=r")
@@ -3435,8 +3425,7 @@ visl")
movstosw\t%1, %0"
[(set_attr "type" "shift,sload,vismv")
(set_attr "us3load_type" "*,3cycle,*")
- (set_attr "cpu_feature" "*,*,vis3")
- (set_attr "v3pipe" "*,*,true")])
+ (set_attr "cpu_feature" "*,*,vis3")])
;; Special pattern for optimizing bit-field compares. This is needed
@@ -8645,8 +8634,7 @@ visl")
movwtos\t%1, %0"
[(set_attr "type" "visl,visl,vismv,fpload,fpstore,store,load,store,*,vismv,vismv")
(set_attr "subtype" "single,single,single,*,*,*,regular,*,*,movstouw,single")
- (set_attr "cpu_feature" "vis,vis,vis,*,*,*,*,*,*,vis3,vis3")
- (set_attr "v3pipe" "true,true,true,*,*,*,*,*,*,true,true")])
+ (set_attr "cpu_feature" "vis,vis,vis,*,*,*,*,*,*,vis3,vis3")])
(define_insn "*mov<VM64:mode>_insn_sp64"
[(set (match_operand:VM64 0 "nonimmediate_operand" "=e,e,e,e,W,m,*r, m,*r, e,*r")
@@ -8669,8 +8657,7 @@ visl")
mov\t%1, %0"
[(set_attr "type" "visl,visl,vismv,fpload,fpstore,store,load,store,vismv,vismv,*")
(set_attr "subtype" "double,double,double,*,*,*,regular,*,movdtox,movxtod,*")
- (set_attr "cpu_feature" "vis,vis,vis,*,*,*,*,*,vis3,vis3,*")
- (set_attr "v3pipe" "true,true,true,*,*,*,*,*,*,*,*")])
+ (set_attr "cpu_feature" "vis,vis,vis,*,*,*,*,*,vis3,vis3,*")])
(define_insn "*mov<VM64:mode>_insn_sp32"
[(set (match_operand:VM64 0 "nonimmediate_operand"
@@ -8702,7 +8689,6 @@ visl")
(set_attr "subtype" "*,*,double,double,double,*,*,*,*,regular,*,*,*,*,regular,*")
(set_attr "length" "*,2,*,*,*,2,2,*,*,*,*,2,2,2,*,*")
(set_attr "cpu_feature" "*,*,vis,vis,vis,vis3,vis3,*,*,*,*,*,*,*,*,*")
- (set_attr "v3pipe" "*,*,true,true,true,*,*,*,*,*,*,*,*,*,*,*")
(set_attr "lra" "*,*,*,*,*,*,*,*,*,disabled,disabled,*,*,*,*,*")])
(define_split
@@ -8781,8 +8767,7 @@ visl")
"fp<plusminus_insn><vbits>\t%1, %2, %0"
[(set_attr "type" "fga")
(set_attr "subtype" "other")
- (set_attr "fptype" "<vfptype>")
- (set_attr "v3pipe" "true")])
+ (set_attr "fptype" "<vfptype>")])
(define_mode_iterator VL [V1SI V2HI V4QI V1DI V2SI V4HI V8QI])
(define_mode_attr vlsuf [(V1SI "s") (V2HI "s") (V4QI "s")
@@ -8798,8 +8783,7 @@ visl")
"TARGET_VIS"
"f<vlinsn><vlsuf>\t%1, %2, %0"
[(set_attr "type" "visl")
- (set_attr "fptype" "<vfptype>")
- (set_attr "v3pipe" "true")])
+ (set_attr "fptype" "<vfptype>")])
(define_insn "*not_<vlop:code><VL:mode>3"
[(set (match_operand:VL 0 "register_operand" "=<vconstr>")
@@ -8808,8 +8792,7 @@ visl")
"TARGET_VIS"
"f<vlninsn><vlsuf>\t%1, %2, %0"
[(set_attr "type" "visl")
- (set_attr "fptype" "<vfptype>")
- (set_attr "v3pipe" "true")])
+ (set_attr "fptype" "<vfptype>")])
;; (ior (not (op1)) (not (op2))) is the canonical form of NAND.
(define_insn "*nand<VL:mode>_vis"
@@ -8819,8 +8802,7 @@ visl")
"TARGET_VIS"
"fnand<vlsuf>\t%1, %2, %0"
[(set_attr "type" "visl")
- (set_attr "fptype" "<vfptype>")
- (set_attr "v3pipe" "true")])
+ (set_attr "fptype" "<vfptype>")])
(define_code_iterator vlnotop [ior and])
@@ -8831,8 +8813,7 @@ visl")
"TARGET_VIS"
"f<vlinsn>not1<vlsuf>\t%1, %2, %0"
[(set_attr "type" "visl")
- (set_attr "fptype" "<vfptype>")
- (set_attr "v3pipe" "true")])
+ (set_attr "fptype" "<vfptype>")])
(define_insn "*<vlnotop:code>_not2<VL:mode>_vis"
[(set (match_operand:VL 0 "register_operand" "=<vconstr>")
@@ -8841,8 +8822,7 @@ visl")
"TARGET_VIS"
"f<vlinsn>not2<vlsuf>\t%1, %2, %0"
[(set_attr "type" "visl")
- (set_attr "fptype" "<vfptype>")
- (set_attr "v3pipe" "true")])
+ (set_attr "fptype" "<vfptype>")])
(define_insn "one_cmpl<VL:mode>2"
[(set (match_operand:VL 0 "register_operand" "=<vconstr>")
@@ -8850,8 +8830,7 @@ visl")
"TARGET_VIS"
"fnot1<vlsuf>\t%1, %0"
[(set_attr "type" "visl")
- (set_attr "fptype" "<vfptype>")
- (set_attr "v3pipe" "true")])
+ (set_attr "fptype" "<vfptype>")])
;; Hard to generate VIS instructions. We have builtins for these.
@@ -9054,8 +9033,7 @@ visl")
"faligndata\t%1, %2, %0"
[(set_attr "type" "fga")
(set_attr "subtype" "other")
- (set_attr "fptype" "double")
- (set_attr "v3pipe" "true")])
+ (set_attr "fptype" "double")])
(define_insn "alignaddrsi_vis"
[(set (match_operand:SI 0 "register_operand" "=r")
@@ -9066,8 +9044,7 @@ visl")
"TARGET_VIS"
"alignaddr\t%r1, %r2, %0"
[(set_attr "type" "gsr")
- (set_attr "subtype" "alignaddr")
- (set_attr "v3pipe" "true")])
+ (set_attr "subtype" "alignaddr")])
(define_insn "alignaddrdi_vis"
[(set (match_operand:DI 0 "register_operand" "=r")
@@ -9078,8 +9055,7 @@ visl")
"TARGET_VIS"
"alignaddr\t%r1, %r2, %0"
[(set_attr "type" "gsr")
- (set_attr "subtype" "alignaddr")
- (set_attr "v3pipe" "true")])
+ (set_attr "subtype" "alignaddr")])
(define_insn "alignaddrlsi_vis"
[(set (match_operand:SI 0 "register_operand" "=r")
@@ -9091,8 +9067,7 @@ visl")
"TARGET_VIS"
"alignaddrl\t%r1, %r2, %0"
[(set_attr "type" "gsr")
- (set_attr "subtype" "alignaddr")
- (set_attr "v3pipe" "true")])
+ (set_attr "subtype" "alignaddr")])
(define_insn "alignaddrldi_vis"
[(set (match_operand:DI 0 "register_operand" "=r")
@@ -9104,8 +9079,7 @@ visl")
"TARGET_VIS"
"alignaddrl\t%r1, %r2, %0"
[(set_attr "type" "gsr")
- (set_attr "subtype" "alignaddr")
- (set_attr "v3pipe" "true")])
+ (set_attr "subtype" "alignaddr")])
(define_insn "pdist_vis"
[(set (match_operand:DI 0 "register_operand" "=e")
@@ -9197,8 +9171,7 @@ visl")
UNSPEC_FCMP))]
"TARGET_VIS"
"fcmp<gcond:code><GCM:gcm_name>\t%1, %2, %0"
- [(set_attr "type" "viscmp")
- (set_attr "v3pipe" "true")])
+ [(set_attr "type" "viscmp")])
(define_insn "fpcmp<gcond:code>8<P:mode>_vis"
[(set (match_operand:P 0 "register_operand" "=r")
@@ -9270,8 +9243,7 @@ visl")
(plus:DI (match_dup 1) (match_dup 2)))]
"TARGET_VIS2 && TARGET_ARCH64"
"bmask\t%r1, %r2, %0"
- [(set_attr "type" "bmask")
- (set_attr "v3pipe" "true")])
+ [(set_attr "type" "bmask")])
(define_insn "bmasksi_vis"
[(set (match_operand:SI 0 "register_operand" "=r")
@@ -9281,8 +9253,7 @@ visl")
(zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
"TARGET_VIS2"
"bmask\t%r1, %r2, %0"
- [(set_attr "type" "bmask")
- (set_attr "v3pipe" "true")])
+ [(set_attr "type" "bmask")])
(define_insn "bshuffle<VM64:mode>_vis"
[(set (match_operand:VM64 0 "register_operand" "=e")
@@ -9294,8 +9265,7 @@ visl")
"bshuffle\t%1, %2, %0"
[(set_attr "type" "fga")
(set_attr "subtype" "other")
- (set_attr "fptype" "double")
- (set_attr "v3pipe" "true")])
+ (set_attr "fptype" "double")])
;; The rtl expanders will happily convert constant permutations on other
;; modes down to V8QI. Rely on this to avoid the complexity of the byte
@@ -9398,8 +9368,7 @@ visl")
"TARGET_VIS3"
"cmask8\t%r0"
[(set_attr "type" "fga")
- (set_attr "subtype" "cmask")
- (set_attr "v3pipe" "true")])
+ (set_attr "subtype" "cmask")])
(define_insn "cmask16<P:mode>_vis"
[(set (reg:DI GSR_REG)
@@ -9409,8 +9378,7 @@ visl")
"TARGET_VIS3"
"cmask16\t%r0"
[(set_attr "type" "fga")
- (set_attr "subtype" "cmask")
- (set_attr "v3pipe" "true")])
+ (set_attr "subtype" "cmask")])
(define_insn "cmask32<P:mode>_vis"
[(set (reg:DI GSR_REG)
@@ -9420,8 +9388,7 @@ visl")
"TARGET_VIS3"
"cmask32\t%r0"
[(set_attr "type" "fga")
- (set_attr "subtype" "cmask")
- (set_attr "v3pipe" "true")])
+ (set_attr "subtype" "cmask")])
(define_insn "fchksm16_vis"
[(set (match_operand:V4HI 0 "register_operand" "=e")
@@ -9456,8 +9423,7 @@ visl")
"TARGET_VIS3"
"pdistn\t%1, %2, %0"
[(set_attr "type" "pdistn")
- (set_attr "fptype" "double")
- (set_attr "v3pipe" "true")])
+ (set_attr "fptype" "double")])
(define_insn "fmean16_vis"
[(set (match_operand:V4HI 0 "register_operand" "=e")
@@ -9509,8 +9475,7 @@ visl")
"TARGET_VIS3"
"<vis3_addsub_ss_insn><vbits>\t%1, %2, %0"
[(set_attr "type" "fga")
- (set_attr "subtype" "other")
- (set_attr "v3pipe" "true")])
+ (set_attr "subtype" "other")])
(define_mode_iterator VMMAX [V8QI V4HI V2SI])
(define_code_iterator vis4_minmax [smin smax])
--
2.3.4