This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

[PATCH] Complex move by parts (PR rtl-optimization/20306)


	The patch for PR rtl-optimization/15289 introduced a performance
regression for floating point complex moves.  Previously, GCC would move
complex modes by parts; after the change, GCC tries to use blockmove and
move via integer registers.  Operands to the blockmove machinery
explicitly are converted to BLKmode, so blockmove does not know the
original mode or, in the case of complex, the inner mode, and cannot
choose to move floating pointer values through FPRs.

	On PowerPC, this change diminishes the performance of floating
point complex operations, apparent in BLAS routines, e.g, ZGEMM.

	The new algorithm looks for optabs corresponding to complex modes
first, but defining those optabs creates more problems by changing other
behaviors of GCC, so defining complex move optabs appears to be a bad
choice.

	After discussions on IRC, the best choice appears to be a new
target hook to prefer the original complex move by parts scalarization
instead of the blockmove and integer move options.  The appended patch
implements that option.

Okay for mainline, gcc-4.0, and gcc-3.4?

Bootstrapped and regression tested on powerpc-ibm-aix5.2.0.0.

Thanks, David


	PR rtl-optimization/20306
	* target-def.h (TARGET_COMPLEX_MOVE_BY_PARTS): Define.
	* target.h (struct gcc_target): Add complex_move_by_parts.
	* expr.c (emit_move_complex): Avoid blockmove and integer move if
	both operands prefer complex_move_by_parts.

	* config/rs6000/rs6000.c (rs6000_complex_move_by_parts): Define.

Index: target-def.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/target-def.h,v
retrieving revision 1.113
diff -c -p -r1.113 target-def.h
*** target-def.h	8 Mar 2005 21:01:41 -0000	1.113
--- target-def.h	10 Mar 2005 14:58:52 -0000
*************** Foundation, 59 Temple Place - Suite 330,
*** 327,332 ****
--- 327,336 ----
  #define TARGET_VECTOR_OPAQUE_P hook_bool_tree_false
  #endif
  
+ #ifndef TARGET_COMPLEX_MOVE_BY_PARTS
+ #define TARGET_COMPLEX_MOVE_BY_PARTS hook_bool_mode_false
+ #endif
+ 
  /* In hooks.c.  */
  #define TARGET_CANNOT_MODIFY_JUMPS_P hook_bool_void_false
  #define TARGET_BRANCH_TARGET_REGISTER_CLASS hook_int_void_no_regs
*************** Foundation, 59 Temple Place - Suite 330,
*** 517,522 ****
--- 521,527 ----
    TARGET_SCALAR_MODE_SUPPORTED_P,		\
    TARGET_VECTOR_MODE_SUPPORTED_P,               \
    TARGET_VECTOR_OPAQUE_P,			\
+   TARGET_COMPLEX_MOVE_BY_PARTS,			\
    TARGET_RTX_COSTS,				\
    TARGET_ADDRESS_COST,				\
    TARGET_DWARF_REGISTER_SPAN,                   \
Index: target.h
===================================================================
RCS file: /cvs/gcc/gcc/gcc/target.h,v
retrieving revision 1.125
diff -c -p -r1.125 target.h
*** target.h	8 Mar 2005 21:01:41 -0000	1.125
--- target.h	10 Mar 2005 14:58:52 -0000
*************** struct gcc_target
*** 411,416 ****
--- 411,420 ----
    /* True if a vector is opaque.  */
    bool (* vector_opaque_p) (tree);
  
+   /* Given a complex mode, return true if a complex operand always
+      should move by parts.  */
+   bool (* complex_move_by_parts) (enum machine_mode);
+ 
    /* Compute a (partial) cost for rtx X.  Return true if the complete
       cost has been computed, and false if subexpressions should be
       scanned.  In either case, *TOTAL contains the cost result.  */
Index: expr.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/expr.c,v
retrieving revision 1.780
diff -c -p -r1.780 expr.c
*** expr.c	9 Mar 2005 07:07:37 -0000	1.780
--- expr.c	10 Mar 2005 14:58:53 -0000
*************** emit_move_complex (enum machine_mode mod
*** 2877,2915 ****
    if (push_operand (x, mode))
      return emit_move_complex_push (mode, x, y);
  
!   /* For memory to memory moves, optimal behavior can be had with the
!      existing block move logic.  */
!   if (MEM_P (x) && MEM_P (y))
!     {
!       emit_block_move (x, y, GEN_INT (GET_MODE_SIZE (mode)),
! 		       BLOCK_OP_NO_LIBCALL);
!       return get_last_insn ();
!     }
! 
!   /* See if we can coerce the target into moving both values at once.  */
! 
!   /* Not possible if the values are inherently not adjacent.  */
!   if (GET_CODE (x) == CONCAT || GET_CODE (y) == CONCAT)
!     try_int = false;
!   /* Is possible if both are registers (or subregs of registers).  */
!   else if (register_operand (x, mode) && register_operand (y, mode))
!     try_int = true;
!   /* If one of the operands is a memory, and alignment constraints
!      are friendly enough, we may be able to do combined memory operations.
!      We do not attempt this if Y is a constant because that combination is
!      usually better with the by-parts thing below.  */
!   else if ((MEM_P (x) ? !CONSTANT_P (y) : MEM_P (y))
! 	   && (!STRICT_ALIGNMENT
! 	       || get_mode_alignment (mode) == BIGGEST_ALIGNMENT))
!     try_int = true;
!   else
!     try_int = false;
! 
!   if (try_int)
      {
!       rtx ret = emit_move_via_integer (mode, x, y);
!       if (ret)
! 	return ret;
      }
  
    /* Show the output dies here.  This is necessary for SUBREGs
--- 2877,2919 ----
    if (push_operand (x, mode))
      return emit_move_complex_push (mode, x, y);
  
!   if (!targetm.complex_move_by_parts (GET_MODE (x))
!       || !targetm.complex_move_by_parts (GET_MODE (y)))
      {
!       /* For memory to memory moves, optimal behavior can be had with the
! 	 existing block move logic.  */
!       if (MEM_P (x) && MEM_P (y))
! 	{
! 	  emit_block_move (x, y, GEN_INT (GET_MODE_SIZE (mode)),
! 			   BLOCK_OP_NO_LIBCALL);
! 	  return get_last_insn ();
! 	}
! 
!       /* See if we can coerce the target into moving both values at once.  */
! 
!       /* Not possible if the values are inherently not adjacent.  */
!       if (GET_CODE (x) == CONCAT || GET_CODE (y) == CONCAT)
! 	try_int = false;
!       /* Is possible if both are registers (or subregs of registers).  */
!       else if (register_operand (x, mode) && register_operand (y, mode))
! 	try_int = true;
!       /* If one of the operands is a memory, and alignment constraints
! 	 are friendly enough, we may be able to do combined memory operations.
! 	 We do not attempt this if Y is a constant because that combination is
! 	 usually better with the by-parts thing below.  */
!       else if ((MEM_P (x) ? !CONSTANT_P (y) : MEM_P (y))
! 	       && (!STRICT_ALIGNMENT
! 		   || get_mode_alignment (mode) == BIGGEST_ALIGNMENT))
! 	try_int = true;
!       else
! 	try_int = false;
! 
!       if (try_int)
! 	{
! 	  rtx ret = emit_move_via_integer (mode, x, y);
! 	  if (ret)
! 	    return ret;
! 	}
      }
  
    /* Show the output dies here.  This is necessary for SUBREGs
Index: config/rs6000/rs6000.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/config/rs6000/rs6000.c,v
retrieving revision 1.793
diff -c -p -r1.793 rs6000.c
*** config/rs6000/rs6000.c	8 Mar 2005 21:01:42 -0000	1.793
--- config/rs6000/rs6000.c	10 Mar 2005 14:58:53 -0000
*************** static void rs6000_darwin_file_start (vo
*** 770,775 ****
--- 770,776 ----
  static tree rs6000_build_builtin_va_list (void);
  static tree rs6000_gimplify_va_arg (tree, tree, tree *, tree *);
  static bool rs6000_must_pass_in_stack (enum machine_mode, tree);
+ static bool rs6000_complex_move_by_parts (enum machine_mode);
  static bool rs6000_vector_mode_supported_p (enum machine_mode);
  static int get_vec_cmp_insn (enum rtx_code, enum machine_mode,
  			     enum machine_mode);
*************** static const char alt_reg_names[][8] =
*** 986,991 ****
--- 987,994 ----
  #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
  #undef TARGET_SPLIT_COMPLEX_ARG
  #define TARGET_SPLIT_COMPLEX_ARG hook_bool_tree_true
+ #undef TARGET_COMPLEX_MOVE_BY_PARTS
+ #define TARGET_COMPLEX_MOVE_BY_PARTS rs6000_complex_move_by_parts
  #undef TARGET_MUST_PASS_IN_STACK
  #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
  #undef TARGET_PASS_BY_REFERENCE
*************** rs6000_vector_mode_supported_p (enum mac
*** 17509,17514 ****
--- 17512,17524 ----
      return false;
  }
  
+ /* Target hook for complex_move_by_parts.  */
+ static bool
+ rs6000_complex_move_by_parts (enum machine_mode mode)
+ {
+   return GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT;
+ }
+ 
  /* Target hook for invalid_arg_for_unprototyped_fn. */ 
  static const char * 
  invalid_arg_for_unprototyped_fn (tree typelist, tree funcdecl, tree val)


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]