This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

Re: ia64 mulv8qi3 expander


There's the patch I checked in to address this.


r~


        * config/ia64/ia64.c (rtx_needs_barrier): Handle CONST_VECTOR
        and VEC_SELECT.
        * config/ia64/vect.md (mulv8qi3): Re-implement with mix_[rl].
        (mulv4hi3): Set itanium_class mmmul.
        (fpack_sfxf, fpack_xfsf, fpack_xfxf): Remove.
        (fpack): Rename from fpack_sfsf.

Index: config/ia64/ia64.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/ia64/ia64.c,v
retrieving revision 1.339
diff -c -p -d -r1.339 ia64.c
*** config/ia64/ia64.c	3 Jan 2005 19:59:11 -0000	1.339
--- config/ia64/ia64.c	6 Jan 2005 06:14:42 -0000
*************** rtx_needs_barrier (rtx x, struct reg_fla
*** 5198,5214 ****
        for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
  	{
  	  rtx pat = XVECEXP (x, 0, i);
! 	  if (GET_CODE (pat) == SET)
  	    {
  	      update_set_flags (pat, &new_flags, &pred, &cond);
! 	      need_barrier |= set_src_needs_barrier (pat, new_flags, pred, cond);
  	    }
- 	  else if (GET_CODE (pat) == USE
- 		   || GET_CODE (pat) == CALL
- 		   || GET_CODE (pat) == ASM_OPERANDS)
- 	    need_barrier |= rtx_needs_barrier (pat, flags, pred);
- 	  else if (GET_CODE (pat) != CLOBBER && GET_CODE (pat) != RETURN)
- 	    abort ();
  	}
        for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
  	{
--- 5198,5224 ----
        for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
  	{
  	  rtx pat = XVECEXP (x, 0, i);
! 	  switch (GET_CODE (pat))
  	    {
+ 	    case SET:
  	      update_set_flags (pat, &new_flags, &pred, &cond);
! 	      need_barrier |= set_src_needs_barrier (pat, new_flags,
! 						     pred, cond);
! 	      break;
! 
! 	    case USE:
! 	    case CALL:
! 	    case ASM_OPERANDS:
! 	      need_barrier |= rtx_needs_barrier (pat, flags, pred);
! 	      break;
! 
! 	    case CLOBBER:
! 	    case RETURN:
! 	      break;
! 
! 	    default:
! 	      gcc_unreachable ();
  	    }
  	}
        for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
  	{
*************** rtx_needs_barrier (rtx x, struct reg_fla
*** 5246,5252 ****
        need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
        break;
  
!     case CONST_INT:   case CONST_DOUBLE:
      case SYMBOL_REF:  case LABEL_REF:     case CONST:
        break;
  
--- 5256,5262 ----
        need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
        break;
  
!     case CONST_INT:   case CONST_DOUBLE:  case CONST_VECTOR:
      case SYMBOL_REF:  case LABEL_REF:     case CONST:
        break;
  
*************** rtx_needs_barrier (rtx x, struct reg_fla
*** 5290,5295 ****
--- 5300,5313 ----
        need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
        break;
  
+     case VEC_SELECT:
+       /* VEC_SELECT's second argument is a PARALLEL with integers that
+ 	 describe the elements selected.  On ia64, those integers are
+ 	 always constants.  Avoid walking the PARALLEL so that we don't
+ 	 get confused with "normal" parallels and abort.  */
+       need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
+       break;
+ 
      case UNSPEC:
        switch (XINT (x, 1))
  	{
Index: config/ia64/vect.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/ia64/vect.md,v
retrieving revision 1.1
diff -c -p -d -r1.1 vect.md
*** config/ia64/vect.md	3 Jan 2005 19:59:11 -0000	1.1
--- config/ia64/vect.md	6 Jan 2005 06:14:43 -0000
***************
*** 172,214 ****
  		   (match_operand:V8QI 2 "gr_register_operand" "r")))]
    ""
  {
!   rtx l1, h1, l2, h2, lm, hm, lz, hz;
  
    l1 = gen_reg_rtx (V4HImode);
!   h1 = gen_reg_rtx (V4HImode);
    l2 = gen_reg_rtx (V4HImode);
-   h2 = gen_reg_rtx (V4HImode);
  
!   /* Zero-extend the QImode elements into two words of HImode elements.  */
!   emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l1),
! 			    operands[1], CONST0_RTX (V8QImode)));
!   emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l2),
! 			    operands[2], CONST0_RTX (V8QImode)));
!   emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h1),
! 			    operands[1], CONST0_RTX (V8QImode)));
!   emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h2),
! 			    operands[2], CONST0_RTX (V8QImode)));
  
    /* Multiply.  */
    lm = gen_reg_rtx (V4HImode);
!   hm = gen_reg_rtx (V4HImode);
    emit_insn (gen_mulv4hi3 (lm, l1, l2));
-   emit_insn (gen_mulv4hi3 (hm, h1, h2));
  
!   /* Zap the high order bytes of the HImode elements.  There are several
!      ways that this could be done.  On Itanium2, there's 1 cycle latency
!      moving between the ALU units and the PALU units, so using AND would
!      be 3 cycles latency into the eventual pack insn, whereas using MIX
!      is only 2 cycles.  */
!   lz = gen_reg_rtx (V4HImode);
!   hz = gen_reg_rtx (V4HImode);
!   emit_insn (gen_mix1_r (gen_lowpart (V8QImode, lz),
! 			 gen_lowpart (V8QImode, lm), CONST0_RTX (V8QImode)));
!   emit_insn (gen_mix1_r (gen_lowpart (V8QImode, lz),
! 			 gen_lowpart (V8QImode, lm), CONST0_RTX (V8QImode)));
! 
!   /* Repack the HImode elements as QImode elements.  */
!   emit_insn (gen_pack2_sss (operands[0], lz, hz));
    DONE;
  })
  
--- 172,206 ----
  		   (match_operand:V8QI 2 "gr_register_operand" "r")))]
    ""
  {
!   rtx r1, l1, r2, l2, rm, lm;
  
+   r1 = gen_reg_rtx (V4HImode);
    l1 = gen_reg_rtx (V4HImode);
!   r2 = gen_reg_rtx (V4HImode);
    l2 = gen_reg_rtx (V4HImode);
  
!   /* Zero-extend the QImode elements into two words of HImode elements
!      by interleaving them with zero bytes.  */
!   emit_insn (gen_mix1_r (gen_lowpart (V8QImode, r1),
!                          operands[1], CONST0_RTX (V8QImode)));
!   emit_insn (gen_mix1_r (gen_lowpart (V8QImode, r2),
!                          operands[2], CONST0_RTX (V8QImode)));
!   emit_insn (gen_mix1_l (gen_lowpart (V8QImode, l1),
!                          operands[1], CONST0_RTX (V8QImode)));
!   emit_insn (gen_mix1_l (gen_lowpart (V8QImode, l2),
!                          operands[2], CONST0_RTX (V8QImode)));
  
    /* Multiply.  */
+   rm = gen_reg_rtx (V4HImode);
    lm = gen_reg_rtx (V4HImode);
!   emit_insn (gen_mulv4hi3 (rm, r1, r2));
    emit_insn (gen_mulv4hi3 (lm, l1, l2));
  
!   /* Zap the high order bytes of the HImode elements by overwriting those
!      in one part with the low order bytes of the other.  */
!   emit_insn (gen_mix1_r (operands[0],
!                          gen_lowpart (V8QImode, rm),
!                          gen_lowpart (V8QImode, lm)));
    DONE;
  })
  
***************
*** 218,224 ****
  		   (match_operand:V4HI 2 "gr_register_operand" "r")))]
    ""
    "pmpyshr2 %0 = %1, %2, 0"
!   [(set_attr "itanium_class" "mmalua")])
  
  (define_expand "umax<mode>3"
    [(set (match_operand:VECINT 0 "gr_register_operand" "")
--- 210,216 ----
  		   (match_operand:V4HI 2 "gr_register_operand" "r")))]
    ""
    "pmpyshr2 %0 = %1, %2, 0"
!   [(set_attr "itanium_class" "mmmul")])
  
  (define_expand "umax<mode>3"
    [(set (match_operand:VECINT 0 "gr_register_operand" "")
***************
*** 450,456 ****
    "mix1.r %0 = %r2, %r1"
    [(set_attr "itanium_class" "mmshf")])
  
! (define_insn "*mix1_l"
    [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
  	(vec_select:V8QI
  	  (vec_concat:V16QI
--- 442,448 ----
    "mix1.r %0 = %r2, %r1"
    [(set_attr "itanium_class" "mmshf")])
  
! (define_insn "mix1_l"
    [(set (match_operand:V8QI 0 "gr_register_operand" "=r")
  	(vec_select:V8QI
  	  (vec_concat:V16QI
***************
*** 948,954 ****
    DONE;
  })
  
! (define_insn "*fpack_sfsf"
    [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
  	(vec_concat:V2SF
  	  (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")
--- 940,946 ----
    DONE;
  })
  
! (define_insn "*fpack"
    [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
  	(vec_concat:V2SF
  	  (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")
***************
*** 957,994 ****
    "fpack %0 = %F2, %F1"
    [(set_attr "itanium_class" "fmisc")])
  
- (define_insn "*fpack_sfxf"
-   [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
- 	(vec_concat:V2SF
- 	  (match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")
- 	  (float_truncate:SF
- 	    (match_operand 2 "fr_register_operand" "f"))))]
-   "GET_MODE (operands[2]) == DFmode || GET_MODE (operands[2]) == XFmode"
-   "fpack %0 = %2, %F1"
-   [(set_attr "itanium_class" "fmisc")])
- 
- (define_insn "*fpack_xfsf"
-   [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
- 	(vec_concat:V2SF
- 	  (float_truncate:SF
- 	    (match_operand 1 "fr_register_operand" "f"))
- 	  (match_operand:SF 2 "fr_reg_or_fp01_operand" "fG")))]
-   "GET_MODE (operands[1]) == DFmode || GET_MODE (operands[1]) == XFmode"
-   "fpack %0 = %F2, %1"
-   [(set_attr "itanium_class" "fmisc")])
- 
- (define_insn "*fpack_xfxf"
-   [(set (match_operand:V2SF 0 "fr_register_operand" "=f")
- 	(vec_concat:V2SF
- 	  (float_truncate:SF
- 	    (match_operand 1 "fr_register_operand" "f"))
- 	  (float_truncate:SF
- 	    (match_operand 2 "fr_register_operand" "f"))))]
-   "(GET_MODE (operands[1]) == DFmode || GET_MODE (operands[1]) == XFmode)
-    && (GET_MODE (operands[2]) == DFmode || GET_MODE (operands[2]) == XFmode)"
-   "fpack %0 = %2, %1"
-   [(set_attr "itanium_class" "fmisc")])
- 
  ;; Missing operations
  ;; fprcpa
  ;; fpsqrta
--- 949,954 ----
Index: testsuite/gcc.c-torture/execute/simd-6.c
===================================================================
RCS file: testsuite/gcc.c-torture/execute/simd-6.c
diff -N testsuite/gcc.c-torture/execute/simd-6.c
*** /dev/null	1 Jan 1970 00:00:00 -0000
--- testsuite/gcc.c-torture/execute/simd-6.c	6 Jan 2005 06:14:43 -0000
***************
*** 0 ****
--- 1,22 ----
+ extern void abort (void);
+ extern int memcmp (const void *, const void *, __SIZE_TYPE__);
+ 
+ typedef unsigned char v8qi __attribute__((vector_size(8)));
+ 
+ v8qi foo(v8qi x, v8qi y)
+ {
+   return x * y;
+ }
+ 
+ int main()
+ {
+   v8qi a = { 1, 2, 3, 4, 5, 6, 7, 8 };
+   v8qi b = { 3, 3, 3, 3, 3, 3, 3, 3 };
+   v8qi c = { 3, 6, 9, 12, 15, 18, 21, 24 };
+   v8qi r;
+ 
+   r = foo (a, b);
+   if (memcmp (&r, &c, 8) != 0)
+     abort ();
+   return 0;
+ }


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]