This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: [PATCH]: Machine independent patch, was: Update SSE5 vector multiplication, shift, rotate



To some extent, it would be cleaner if we changed all of the backends so that
SHL/SHR, etc. of vector types always was a vector/vector shift.  Then create
new standard names for vector/scalar shifts.  We then could extend this to
other vector/scalar combinations, such as vector multiply/add by a scalar (I
can imagine some optimizations if we had the vector/scalar ops).  However, it
is a flag day type operation where you need to change several ports at the same
time.

Sure, but IMHO there's no reason to add this (IMHO confusing) code when
there are not many targets to convert and it is easy to do so, which it is. See the followups from Paolo to which I agree.

I agree a lot with this (Michael's). OTOH if you want to go the other way, I attach a patch to convert the SPU port (mostly untested); this goes with the patch I already posted for AltiVec.


Paolo
2008-04-22  Paolo Bonzini  <bonzini@gnu.org>

	* config/spu/spu.md: Use <v> iterator for shifts and rotates.
	* config/spu/spu-builtins.md: Change ashl to vashl for vector modes.

Index: spu-builtins.def
===================================================================
--- spu-builtins.def	(revision 134435)
+++ spu-builtins.def	(working copy)
@@ -107,10 +107,10 @@ DEF_BUILTIN (SI_NOR,         CODE_FOR_no
 DEF_BUILTIN (SI_EQV,         CODE_FOR_eqv_v16qi,     "si_eqv",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
 DEF_BUILTIN (SI_SELB,        CODE_FOR_selb,          "si_selb",        B_INSN,   _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
 DEF_BUILTIN (SI_SHUFB,       CODE_FOR_shufb,         "si_shufb",       B_INSN,   _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
-DEF_BUILTIN (SI_SHLH,        CODE_FOR_ashlv8hi3,     "si_shlh",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
-DEF_BUILTIN (SI_SHLHI,       CODE_FOR_ashlv8hi3,     "si_shlhi",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_7))
-DEF_BUILTIN (SI_SHL,         CODE_FOR_ashlv4si3,     "si_shl",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
-DEF_BUILTIN (SI_SHLI,        CODE_FOR_ashlv4si3,     "si_shli",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_7))
+DEF_BUILTIN (SI_SHLH,        CODE_FOR_vashlv8hi3,     "si_shlh",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_SHLHI,       CODE_FOR_vashlv8hi3,     "si_shlhi",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_7))
+DEF_BUILTIN (SI_SHL,         CODE_FOR_vashlv4si3,     "si_shl",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_SHLI,        CODE_FOR_vashlv4si3,     "si_shli",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_7))
 DEF_BUILTIN (SI_SHLQBI,      CODE_FOR_shlqbi_ti,     "si_shlqbi",      B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
 DEF_BUILTIN (SI_SHLQBII,     CODE_FOR_shlqbi_ti,     "si_shlqbii",     B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_7))
 DEF_BUILTIN (SI_SHLQBY,      CODE_FOR_shlqby_ti,     "si_shlqby",      B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
@@ -629,14 +629,14 @@ DEF_BUILTIN (SPU_RLMASKQWBYTEBC_7, CODE_
 DEF_BUILTIN (SPU_RLMASKQWBYTEBC_8, CODE_FOR_rotqmbybi_ti,  "spu_rlmaskqwbytebc_8", B_INTERNAL, _A3(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_INTSI))
 DEF_BUILTIN (SPU_RLMASKQWBYTEBC_9, CODE_FOR_rotqmbybi_ti,  "spu_rlmaskqwbytebc_9", B_INTERNAL, _A3(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_INTSI))
 DEF_BUILTIN (SPU_SL,               CODE_FOR_nothing,       "spu_sl",               B_OVERLOAD, _A1(SPU_BTI_VOID))
-DEF_BUILTIN (SPU_SL_0,             CODE_FOR_ashlv8hi3,     "spu_sl_0",             B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UV8HI))
-DEF_BUILTIN (SPU_SL_1,             CODE_FOR_ashlv8hi3,     "spu_sl_1",             B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_UV8HI))
-DEF_BUILTIN (SPU_SL_2,             CODE_FOR_ashlv4si3,     "spu_sl_2",             B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI))
-DEF_BUILTIN (SPU_SL_3,             CODE_FOR_ashlv4si3,     "spu_sl_3",             B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_UV4SI))
-DEF_BUILTIN (SPU_SL_4,             CODE_FOR_ashlv8hi3,     "spu_sl_4",             B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UINTSI))
-DEF_BUILTIN (SPU_SL_5,             CODE_FOR_ashlv8hi3,     "spu_sl_5",             B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_UINTSI))
-DEF_BUILTIN (SPU_SL_6,             CODE_FOR_ashlv4si3,     "spu_sl_6",             B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UINTSI))
-DEF_BUILTIN (SPU_SL_7,             CODE_FOR_ashlv4si3,     "spu_sl_7",             B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SL_0,             CODE_FOR_vashlv8hi3,     "spu_sl_0",             B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UV8HI))
+DEF_BUILTIN (SPU_SL_1,             CODE_FOR_vashlv8hi3,     "spu_sl_1",             B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_UV8HI))
+DEF_BUILTIN (SPU_SL_2,             CODE_FOR_vashlv4si3,     "spu_sl_2",             B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_SL_3,             CODE_FOR_vashlv4si3,     "spu_sl_3",             B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_UV4SI))
+DEF_BUILTIN (SPU_SL_4,             CODE_FOR_vashlv8hi3,     "spu_sl_4",             B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SL_5,             CODE_FOR_vashlv8hi3,     "spu_sl_5",             B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SL_6,             CODE_FOR_vashlv4si3,     "spu_sl_6",             B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UINTSI))
+DEF_BUILTIN (SPU_SL_7,             CODE_FOR_vashlv4si3,     "spu_sl_7",             B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V4SI,   SPU_BTI_UINTSI))
 DEF_BUILTIN (SPU_SLQW,             CODE_FOR_nothing,       "spu_slqw",             B_OVERLOAD, _A1(SPU_BTI_VOID))
 DEF_BUILTIN (SPU_SLQW_0,           CODE_FOR_shlqbi_ti,     "spu_slqw_0",           B_INTERNAL, _A3(SPU_BTI_V2DI,   SPU_BTI_V2DI,   SPU_BTI_UINTSI))
 DEF_BUILTIN (SPU_SLQW_1,           CODE_FOR_shlqbi_ti,     "spu_slqw_1",           B_INTERNAL, _A3(SPU_BTI_UV2DI,  SPU_BTI_UV2DI,  SPU_BTI_UINTSI))
Index: spu.md
===================================================================
--- spu.md	(revision 134435)
+++ spu.md	(working copy)
@@ -211,6 +211,9 @@
 			  V8HI
 			  V4SI])
 
+(define_mode_attr v	 [(V8HI  "v") (V4SI  "v")
+			  (HI    "") (SI    "")])
+
 (define_mode_attr bh  [(QI "b")  (V16QI "b")
 		       (HI "h")  (V8HI "h")
 		       (SI "")   (V4SI "")])
@@ -727,7 +730,7 @@
     rtx op6_ti = gen_rtx_REG (TImode, REGNO (ops[6]));
     emit_insn (gen_clzv4si2 (ops[3],op1_v4si));
     emit_move_insn (ops[6], spu_const (V4SImode, 1023+31));
-    emit_insn (gen_ashlv4si3 (ops[4],op1_v4si,ops[3]));
+    emit_insn (gen_vashlv4si3 (ops[4],op1_v4si,ops[3]));
     emit_insn (gen_ceq_v4si (ops[5],ops[3],spu_const (V4SImode, 32)));
     emit_insn (gen_subv4si3 (ops[6],ops[6],ops[3]));
     emit_insn (gen_addv4si3 (ops[4],ops[4],ops[4]));
@@ -822,7 +825,7 @@
     rtx op4_df = gen_rtx_REG (DFmode, REGNO(ops[4]));
     rtx op5_df = gen_rtx_REG (DFmode, REGNO(ops[5]));
     emit_insn (gen_clzv4si2 (ops[4],op1_v4si));
-    emit_insn (gen_ashlv4si3 (ops[5],op1_v4si,ops[4]));
+    emit_insn (gen_vashlv4si3 (ops[5],op1_v4si,ops[4]));
     emit_insn (gen_ceq_v4si (ops[6],ops[4],spu_const (V4SImode, 32)));
     emit_insn (gen_subv4si3 (ops[4],ops[3],ops[4]));
     emit_insn (gen_addv4si3 (ops[5],ops[5],ops[5]));
@@ -1216,7 +1219,7 @@
     emit_move_insn (mask, spu_const (V4SImode, 0x0000ffff));
     emit_insn (gen_spu_mpyhh (high, operands[1], operands[2]));
     emit_insn (gen_spu_mpy (low, operands[1], operands[2]));
-    emit_insn (gen_ashlv4si3 (shift, high, spu_const(V4SImode, 16)));
+    emit_insn (gen_vashlv4si3 (shift, high, spu_const(V4SImode, 16)));
     emit_insn (gen_selb (result, shift, low, mask));
     DONE;
    }")
@@ -2094,9 +2097,9 @@
   [(set_attr "type" "fxb")])
 
 
-;; ashl
+;; ashl, vashl
 
-(define_insn "ashl<mode>3"
+(define_insn "<v>ashl<mode>3"
   [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r")
 	(ashift:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r")
 		     (match_operand:VHSI 2 "spu_nonmem_operand" "r,W")))]
@@ -2228,9 +2231,9 @@
   [(set_attr "type" "shuf,shuf")])
 
 
-;; lshr
+;; lshr, vlshr
 
-(define_insn_and_split "lshr<mode>3"
+(define_insn_and_split "<v>lshr<mode>3"
   [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r")
 	(lshiftrt:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r")
 		       (match_operand:VHSI 2 "spu_nonmem_operand" "r,W")))
@@ -2357,9 +2360,9 @@
   [(set_attr "type" "shuf")])
 
 
-;; ashr
+;; ashr, vashr
 
-(define_insn_and_split "ashr<mode>3"
+(define_insn_and_split "<v>ashr<mode>3"
   [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r")
 	(ashiftrt:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r")
 		       (match_operand:VHSI 2 "spu_nonmem_operand" "r,W")))
@@ -2424,7 +2427,7 @@
 	emit_insn (gen_lshrti3 (op0, op1, GEN_INT (32)));
 	emit_insn (gen_spu_xswd (op0d, op0v));
         if (val > 32)
-	  emit_insn (gen_ashrv4si3 (op0v, op0v, spu_const (V4SImode, val - 32)));
+	  emit_insn (gen_vashrv4si3 (op0v, op0v, spu_const (V4SImode, val - 32)));
       }
     else
       {
@@ -2473,7 +2476,7 @@
     rtx op1_v4si = spu_gen_subreg (V4SImode, operands[1]);
     rtx t = gen_reg_rtx (TImode);
     emit_insn (gen_subsi3 (sign_shift, GEN_INT (128), force_reg (SImode, operands[2])));
-    emit_insn (gen_ashrv4si3 (sign_mask_v4si, op1_v4si, spu_const (V4SImode, 31)));
+    emit_insn (gen_vashrv4si3 (sign_mask_v4si, op1_v4si, spu_const (V4SImode, 31)));
     emit_insn (gen_fsm_ti (sign_mask, sign_mask));
     emit_insn (gen_ashlti3 (sign_mask, sign_mask, sign_shift));
     emit_insn (gen_lshrti3 (t, operands[1], operands[2]));
@@ -2490,9 +2493,9 @@
   [(set_attr "type" "shuf")])
 
 
-;; rotl
+;; vrotl, rotl
 
-(define_insn "rotl<mode>3"
+(define_insn "<v>rotl<mode>3"
   [(set (match_operand:VHSI 0 "spu_reg_operand" "=r,r")
 	(rotate:VHSI (match_operand:VHSI 1 "spu_reg_operand" "r,r")
 		     (match_operand:VHSI 2 "spu_nonmem_operand" "r,W")))]
@@ -3040,14 +3043,14 @@ selb\t%0,%5,%0,%3"
           emit_insn (gen_iorv4si3 (a_nan, a_nan, b_nan));
 	}
       emit_move_insn (zero, CONST0_RTX (V4SImode));
-      emit_insn (gen_ashrv4si3 (asel, ra, spu_const (V4SImode, 31)));
+      emit_insn (gen_vashrv4si3 (asel, ra, spu_const (V4SImode, 31)));
       emit_insn (gen_shufb (asel, asel, asel, hi_promote));
       emit_insn (gen_bg_v4si (abor, zero, a_abs));
       emit_insn (gen_shufb (abor, abor, abor, borrow_shuffle));
       emit_insn (gen_sfx_v4si (abor, zero, a_abs, abor));
       emit_insn (gen_selb (abor, a_abs, abor, asel));
 
-      emit_insn (gen_ashrv4si3 (bsel, rb, spu_const (V4SImode, 31)));
+      emit_insn (gen_vashrv4si3 (bsel, rb, spu_const (V4SImode, 31)));
       emit_insn (gen_shufb (bsel, bsel, bsel, hi_promote));
       emit_insn (gen_bg_v4si (bbor, zero, b_abs));
       emit_insn (gen_shufb (bbor, bbor, bbor, borrow_shuffle));
@@ -3148,13 +3151,13 @@ selb\t%0,%5,%0,%3"
       emit_insn (gen_shufb (b_nan, b_nan, b_nan, hi_promote));
       emit_insn (gen_iorv4si3 (a_nan, a_nan, b_nan));
       emit_move_insn (zero, CONST0_RTX (V4SImode));
-      emit_insn (gen_ashrv4si3 (asel, ra, spu_const (V4SImode, 31)));
+      emit_insn (gen_vashrv4si3 (asel, ra, spu_const (V4SImode, 31)));
       emit_insn (gen_shufb (asel, asel, asel, hi_promote));
       emit_insn (gen_bg_v4si (abor, zero, a_abs));
       emit_insn (gen_shufb (abor, abor, abor, borrow_shuffle));
       emit_insn (gen_sfx_v4si (abor, zero, a_abs, abor));
       emit_insn (gen_selb (abor, a_abs, abor, asel));
-      emit_insn (gen_ashrv4si3 (bsel, rb, spu_const (V4SImode, 31)));
+      emit_insn (gen_vashrv4si3 (bsel, rb, spu_const (V4SImode, 31)));
       emit_insn (gen_shufb (bsel, bsel, bsel, hi_promote));
       emit_insn (gen_bg_v4si (bbor, zero, b_abs));
       emit_insn (gen_shufb (bbor, bbor, bbor, borrow_shuffle));
@@ -3338,7 +3341,7 @@ selb\t%0,%4,%0,%3"
                                              0x08090A0B, 0x08090A0B);
           emit_move_insn (hi_promote, pat);
 
-          emit_insn (gen_ashrv4si3 (sign, ra, spu_const (V4SImode, 31)));
+          emit_insn (gen_vashrv4si3 (sign, ra, spu_const (V4SImode, 31)));
           emit_insn (gen_shufb (sign, sign, sign, hi_promote));
           emit_insn (gen_andv4si3 (abs, ra, sign_mask));
 

Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]