This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[Patch]: Splitter for Altivec moves


Hello,
      the altivec move insns have right now multi assembler insn sequences
for moving
vectors between memory and general purpose registers. This implementation
has three
shortcomings:
      - They might be wrong in case of destructive overlap of source and
target register range
      - For the load case, one of the register range might be used as base
register, subsequent loads
        will load from garbage location
      - They are wrong for 64 bit mode
In order to fix this, I would propose following patch, using post reload
splitter.

Index: altivec.md
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/rs6000/altivec.md,v
retrieving revision 1.7
diff -c -p -r1.7 altivec.md
*** altivec.md    13 Apr 2003 17:51:06 -0000    1.7
--- altivec.md    18 Jun 2003 06:14:10 -0000
***************
*** 101,115 ****
      case 0: return \"stvx %1,%y0\";
      case 1: return \"lvx %0,%y1\";
      case 2: return \"vor %0,%1,%1\";
!     case 3: return \"stw%U0 %1,%0\;stw %L1,%L0\;stw %Y1,%Y0\;stw %Z1,%Z0\";
!     case 4: return \"lwz%U1 %0,%1\;lwz %L0,%L1\;lwz %Y0,%Y1\;lwz %Z0,%Z1\";
!     case 5: return \"mr %0,%1\;mr %L0,%L1\;mr %Y0,%Y1\;mr %Z0,%Z1\";
      case 6: return output_vec_const_move (operands);
      default: abort();
      }
  }"
!   [(set_attr "type" "vecstore,vecload,vecsimple,store,load,*,*")
!    (set_attr "length" "*,*,*,16,16,16,*")])

  (define_split
    [(set (match_operand:V4SI 0 "altivec_register_operand" "")
--- 101,138 ----
      case 0: return \"stvx %1,%y0\";
      case 1: return \"lvx %0,%y1\";
      case 2: return \"vor %0,%1,%1\";
!     case 3: return \"#\";
!     case 4: return \"#\";
!     case 5: return \"#\";
      case 6: return output_vec_const_move (operands);
      default: abort();
      }
  }"
!   [(set_attr "type" "vecstore,vecload,vecsimple,store,load,*,*")])
!
! (define_split
!   [(set (match_operand:V4SI 0 "nonimmediate_operand" "")
!         (match_operand:V4SI 1 "input_operand" ""))]
!   "TARGET_ALTIVEC && reload_completed && TARGET_POWERPC64
!    && altivecs_in_gpr_p (operands[0], operands[1])"
!   [(set (match_dup 2) (match_dup 4))
!    (set (match_dup 3) (match_dup 5))]
! "{
!      rs6000_split_altivecs_in_gpr (&operands[0], 6);
! }")
!
! (define_split
!   [(set (match_operand:V4SI 0 "nonimmediate_operand" "")
!         (match_operand:V4SI 1 "input_operand" ""))]
!   "TARGET_ALTIVEC && reload_completed && !TARGET_POWERPC64
!    && altivecs_in_gpr_p (operands[0], operands[1])"
!   [(set (match_dup 2) (match_dup 6))
!    (set (match_dup 3) (match_dup 7))
!    (set (match_dup 4) (match_dup 8))
!    (set (match_dup 5) (match_dup 9))]
! "{
!      rs6000_split_altivecs_in_gpr (&operands[0], 10);
! }")

  (define_split
    [(set (match_operand:V4SI 0 "altivec_register_operand" "")
***************
*** 140,154 ****
       case 0: return \"stvx %1,%y0\";
       case 1: return \"lvx %0,%y1\";
       case 2: return \"vor %0,%1,%1\";
!      case 3: return \"stw%U0 %1,%0\;stw %L1,%L0\;stw %Y1,%Y0\;stw %Z1,%Z0\";
!      case 4: return \"lwz%U1 %0,%1\;lwz %L0,%L1\;lwz %Y0,%Y1\;lwz %Z0,%Z1\";
!      case 5: return \"mr %0,%1\;mr %L0,%L1\;mr %Y0,%Y1\;mr %Z0,%Z1\";
       case 6: return output_vec_const_move (operands);
       default: abort ();
       }
  }"
!   [(set_attr "type" "vecstore,vecload,vecsimple,store,load,*,*")
!    (set_attr "length" "*,*,*,16,16,16,*")])

  (define_split
    [(set (match_operand:V8HI 0 "altivec_register_operand" "")
--- 163,200 ----
       case 0: return \"stvx %1,%y0\";
       case 1: return \"lvx %0,%y1\";
       case 2: return \"vor %0,%1,%1\";
!      case 3: return \"#\";
!      case 4: return \"#\";
!      case 5: return \"#\";
       case 6: return output_vec_const_move (operands);
       default: abort ();
       }
  }"
!   [(set_attr "type" "vecstore,vecload,vecsimple,store,load,*,*")])
!
! (define_split
!   [(set (match_operand:V8HI 0 "nonimmediate_operand" "")
!         (match_operand:V8HI 1 "input_operand" ""))]
!   "TARGET_ALTIVEC && reload_completed && TARGET_POWERPC64
!    && altivecs_in_gpr_p (operands[0], operands[1])"
!   [(set (match_dup 2) (match_dup 4))
!    (set (match_dup 3) (match_dup 5))]
! "{
!      rs6000_split_altivecs_in_gpr (&operands[0], 6);
! }")
!
! (define_split
!   [(set (match_operand:V8HI 0 "nonimmediate_operand" "")
!         (match_operand:V8HI 1 "input_operand" ""))]
!   "TARGET_ALTIVEC && reload_completed && !TARGET_POWERPC64
!    && altivecs_in_gpr_p (operands[0], operands[1])"
!   [(set (match_dup 2) (match_dup 6))
!    (set (match_dup 3) (match_dup 7))
!    (set (match_dup 4) (match_dup 8))
!    (set (match_dup 5) (match_dup 9))]
! "{
!      rs6000_split_altivecs_in_gpr (&operands[0], 10);
! }")

  (define_split
    [(set (match_operand:V8HI 0 "altivec_register_operand" "")
***************
*** 179,193 ****
      case 0: return \"stvx %1,%y0\";
      case 1: return \"lvx %0,%y1\";
      case 2: return \"vor %0,%1,%1\";
!     case 3: return \"stw%U0 %1,%0\;stw %L1,%L0\;stw %Y1,%Y0\;stw %Z1,%Z0\";
!     case 4: return \"lwz%U1 %0,%1\;lwz %L0,%L1\;lwz %Y0,%Y1\;lwz %Z0,%Z1\";
!     case 5: return \"mr %0,%1\;mr %L0,%L1\;mr %Y0,%Y1\;mr %Z0,%Z1\";
      case 6: return output_vec_const_move (operands);
      default: abort ();
      }
  }"
!   [(set_attr "type" "vecstore,vecload,vecsimple,store,load,*,*")
!    (set_attr "length" "*,*,*,16,16,16,*")])

  (define_split
    [(set (match_operand:V16QI 0 "altivec_register_operand" "")
--- 225,262 ----
      case 0: return \"stvx %1,%y0\";
      case 1: return \"lvx %0,%y1\";
      case 2: return \"vor %0,%1,%1\";
!     case 3: return \"#\";
!     case 4: return \"#\";
!     case 5: return \"#\";
      case 6: return output_vec_const_move (operands);
      default: abort ();
      }
  }"
!   [(set_attr "type" "vecstore,vecload,vecsimple,store,load,*,*")])
!
! (define_split
!   [(set (match_operand:V16QI 0 "nonimmediate_operand" "")
!         (match_operand:V16QI 1 "input_operand" ""))]
!   "TARGET_ALTIVEC && reload_completed && TARGET_POWERPC64
!    && altivecs_in_gpr_p (operands[0], operands[1])"
!   [(set (match_dup 2) (match_dup 4))
!    (set (match_dup 3) (match_dup 5))]
! "{
!      rs6000_split_altivecs_in_gpr (&operands[0], 6);
! }")
!
! (define_split
!   [(set (match_operand:V16QI 0 "nonimmediate_operand" "")
!         (match_operand:V16QI 1 "input_operand" ""))]
!   "TARGET_ALTIVEC && reload_completed && !TARGET_POWERPC64
!    && altivecs_in_gpr_p (operands[0], operands[1])"
!   [(set (match_dup 2) (match_dup 6))
!    (set (match_dup 3) (match_dup 7))
!    (set (match_dup 4) (match_dup 8))
!    (set (match_dup 5) (match_dup 9))]
! "{
!      rs6000_split_altivecs_in_gpr (&operands[0], 10);
! }")

  (define_split
    [(set (match_operand:V16QI 0 "altivec_register_operand" "")
***************
*** 218,232 ****
      case 0: return \"stvx %1,%y0\";
      case 1: return \"lvx %0,%y1\";
      case 2: return \"vor %0,%1,%1\";
!     case 3: return \"stw%U0 %1,%0\;stw %L1,%L0\;stw %Y1,%Y0\;stw %Z1,%Z0\";
!     case 4: return \"lwz%U1 %0,%1\;lwz %L0,%L1\;lwz %Y0,%Y1\;lwz %Z0,%Z1\";
!     case 5: return \"mr %0,%1\;mr %L0,%L1\;mr %Y0,%Y1\;mr %Z0,%Z1\";
      case 6: return output_vec_const_move (operands);
      default: abort ();
      }
  }"
!   [(set_attr "type" "vecstore,vecload,vecsimple,store,load,*,*")
!    (set_attr "length" "*,*,*,16,16,16,*")])

  (define_insn "get_vrsave_internal"
    [(set (match_operand:SI 0 "register_operand" "=r")
--- 287,324 ----
      case 0: return \"stvx %1,%y0\";
      case 1: return \"lvx %0,%y1\";
      case 2: return \"vor %0,%1,%1\";
!     case 3: return \"#\";
!     case 4: return \"#\";
!     case 5: return \"#\";
      case 6: return output_vec_const_move (operands);
      default: abort ();
      }
  }"
!   [(set_attr "type" "vecstore,vecload,vecsimple,store,load,*,*")])
!
! (define_split
!   [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
!         (match_operand:V4SF 1 "input_operand" ""))]
!   "TARGET_ALTIVEC && reload_completed && TARGET_POWERPC64
!    && altivecs_in_gpr_p (operands[0], operands[1])"
!   [(set (match_dup 2) (match_dup 4))
!    (set (match_dup 3) (match_dup 5))]
! "{
!      rs6000_split_altivecs_in_gpr (&operands[0], 6);
! }")
!
! (define_split
!   [(set (match_operand:V4SF 0 "nonimmediate_operand" "")
!         (match_operand:V4SF 1 "input_operand" ""))]
!   "TARGET_ALTIVEC && reload_completed && !TARGET_POWERPC64
!    && altivecs_in_gpr_p (operands[0], operands[1])"
!   [(set (match_dup 2) (match_dup 6))
!    (set (match_dup 3) (match_dup 7))
!    (set (match_dup 4) (match_dup 8))
!    (set (match_dup 5) (match_dup 9))]
! "{
!      rs6000_split_altivecs_in_gpr (&operands[0], 10);
! }")

  (define_insn "get_vrsave_internal"
    [(set (match_operand:SI 0 "register_operand" "=r")
Index: rs6000-protos.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/rs6000/rs6000-protos.h,v
retrieving revision 1.56
diff -c -p -r1.56 rs6000-protos.h
*** rs6000-protos.h     4 Jun 2003 15:23:42 -0000     1.56
--- rs6000-protos.h     18 Jun 2003 06:14:10 -0000
*************** extern int includes_rshift_p PARAMS ((rt
*** 100,105 ****
--- 100,106 ----
  extern int includes_rldic_lshift_p PARAMS ((rtx, rtx));
  extern int includes_rldicr_lshift_p PARAMS ((rtx, rtx));
  extern int registers_ok_for_quad_peep PARAMS ((rtx, rtx));
+ extern int altivecs_in_gpr_p PARAMS ((rtx, rtx));
  extern int addrs_ok_for_quad_peep PARAMS ((rtx, rtx));
  extern enum reg_class secondary_reload_class PARAMS ((enum reg_class,
                                          enum machine_mode, rtx));
*************** extern int mtcrf_operation PARAMS ((rtx,
*** 125,130 ****
--- 126,132 ----
  extern int lmw_operation PARAMS ((rtx, enum machine_mode));
  extern struct rtx_def *create_TOC_reference PARAMS ((rtx));
  extern void rs6000_emit_eh_toc_restore PARAMS ((rtx));
+ extern void rs6000_split_altivecs_in_gpr PARAMS ((rtx*, int));
  extern void rs6000_emit_move PARAMS ((rtx, rtx, enum machine_mode));
  extern rtx rs6000_legitimize_address PARAMS ((rtx, rtx, enum machine_mode));
  extern rtx rs6000_legitimize_reload_address PARAMS ((rtx, enum machine_mode,
Index: rs6000.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/rs6000/rs6000.c,v
retrieving revision 1.493
diff -c -p -r1.493 rs6000.c
*** rs6000.c      15 Jun 2003 18:01:55 -0000    1.493
--- rs6000.c      18 Jun 2003 06:14:11 -0000
*************** small_data_operand (op, mode)
*** 2293,2298 ****
--- 2293,2314 ----
    return 0;
  #endif
  }
+
+ /* Return 1 for all valid move insn operand combination
+    involving altivec vectors in gprs.  */
+
+ int
+ altivecs_in_gpr_p (op0, op1)
+      rtx op0, op1;
+ {
+   if (REG_P (op0) && ALTIVEC_REGNO_P (REGNO (op0)))
+     return 0;
+
+   if (REG_P (op1) && ALTIVEC_REGNO_P (REGNO (op1)))
+     return 0;
+   return 1;
+ }
+

  /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address.  */

*************** rs6000_emit_minmax (dest, code, op0, op1
*** 9622,9627 ****
--- 9638,9710 ----
    if (target != dest)
      emit_move_insn (dest, target);
  }
+
+ /* Called by altivec splitter.
+    Input:
+           operands[0] : Destination of move
+           operands[1] : Source of move
+       noperands   : Size of operands vector
+    Output:
+       operands[2-5] ([2-3] in 64 bit) : Destination slots
+       operands[6-9] ([4-5] in 64 bit) : Source slots
+ */
+
+ void
+ rs6000_split_altivecs_in_gpr (operands, noperands)
+   rtx *operands;
+   int noperands;
+ {
+   int nregs, i;
+   enum machine_mode mode;
+
+   /* Number to move (2/4 for 32/64 bit).  */
+   nregs = (noperands - 2) / 2;
+   mode = GET_MODE (operands[0]);
+
+   if (REG_P (operands[1])
+       && REG_P (operands[0])
+       && (REGNO (operands[1]) < REGNO (operands[0])))
+     {
+       /* Move register range backwards,
+      if we have destructive overlap.  */
+
+       for (i = nregs; i > 0; i--)
+         {
+       operands[i+2] = operand_subword (operands[0], i, 0, mode);
+       operands[i+2+nregs] =
+         operand_subword (operands[1], i, 0, mode);
+     }
+     }
+   else
+     {
+       int j = 0;
+       rtx breg;
+
+       if (GET_CODE (operands[1]) == MEM)
+         {
+       /* We have offset-table addresses only.
+          If we use one of the registers to address memory,
+          we have change that register as last.  */
+
+       breg = GET_CODE (XEXP (operands[1], 0)) == PLUS ?
+           XEXP (XEXP (operands[1], 0), 0) :
+           XEXP (operands[1], 0);
+
+       if (GET_CODE (breg) == REG
+           && REGNO (breg) >= REGNO (operands[0])
+           && REGNO (breg) < REGNO (operands[0]) + nregs)
+           j = REGNO (breg) - REGNO (operands[0]) + 1;
+     }
+
+       for (i = 0; i < nregs; i++, j = (j == nregs) ? 0 : j + 1)
+         {
+       operands[j + 2] = operand_subword (operands[0], j, 0, mode);
+       operands[j + 2 + nregs] =
+         operand_subword (operands[1], j, 0, mode);
+     }
+     }
+ }
+

  /* This page contains routines that are used to determine what the
     function prologue and epilogue code will do and write them out.  */


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]