inlining BLKmode functions

Gavin Romig-Koch gavin@cygnus.com
Fri Sep 25 14:44:00 GMT 1998


This is my shot at getting inlining to work for functions 
which return BLKmode structs in registers (like on the PA
and under EABI).  

OK to commit?

                            -gavin...



	* calls.c (expand_call) : Encapsulate code into 
	copy_blkmode_from_reg.
	* expr.c (copy_blkmode_from_reg): New function.
	* expr.h (copy_blkmode_from_reg): New function.
	* integrate.c (function_cannot_inline_p): We can inline
	these now.
	(expand_inline_function): Use copy_blkmode_from_reg
	if needed.  Avoid creating BLKmode REGs.
	(copy_rtx_and_substitute): Don't try to SUBREG a BLKmode
	object.

Index: calls.c
===================================================================
RCS file: /egcs/carton/cvsfiles/egcs/gcc/calls.c,v
retrieving revision 1.27
diff -c -p -r1.27 calls.c
*** calls.c	1998/08/26 17:06:03	1.27
--- calls.c	1998/09/25 21:27:31
*************** expand_call (exp, target, ignore)
*** 2105,2179 ****
         when function inlining is being done.  */
      emit_move_insn (target, valreg);
    else if (TYPE_MODE (TREE_TYPE (exp)) == BLKmode)
!     {
!       /* Some machines (the PA for example) want to return all small
! 	 structures in registers regardless of the structure's alignment.
! 	 
! 	 Deal with them explicitly by copying from the return registers
! 	 into the target MEM locations.  */
!       int bytes = int_size_in_bytes (TREE_TYPE (exp));
!       rtx src = NULL, dst = NULL;
!       int bitsize = MIN (TYPE_ALIGN (TREE_TYPE (exp)), BITS_PER_WORD);
!       int bitpos, xbitpos, big_endian_correction = 0;
!       
!       if (target == 0)
! 	{
! 	  target = assign_stack_temp (BLKmode, bytes, 0);
! 	  MEM_IN_STRUCT_P (target) = AGGREGATE_TYPE_P (TREE_TYPE (exp));
! 	  preserve_temp_slots (target);
! 	}
! 
!       /* This code assumes valreg is at least a full word.  If it isn't,
! 	 copy it into a new pseudo which is a full word.  */
!       if (GET_MODE (valreg) != BLKmode
! 	  && GET_MODE_SIZE (GET_MODE (valreg)) < UNITS_PER_WORD)
! 	valreg = convert_to_mode (word_mode, valreg,
! 				  TREE_UNSIGNED (TREE_TYPE (exp)));
! 
!       /* Structures whose size is not a multiple of a word are aligned
! 	 to the least significant byte (to the right).  On a BYTES_BIG_ENDIAN
! 	 machine, this means we must skip the empty high order bytes when
! 	 calculating the bit offset.  */
!       if (BYTES_BIG_ENDIAN && bytes % UNITS_PER_WORD)
! 	big_endian_correction = (BITS_PER_WORD - ((bytes % UNITS_PER_WORD)
! 						  * BITS_PER_UNIT));
! 
!       /* Copy the structure BITSIZE bites at a time.
! 
! 	 We could probably emit more efficient code for machines
! 	 which do not use strict alignment, but it doesn't seem
! 	 worth the effort at the current time.  */
!       for (bitpos = 0, xbitpos = big_endian_correction;
! 	   bitpos < bytes * BITS_PER_UNIT;
! 	   bitpos += bitsize, xbitpos += bitsize)
! 	{
! 
! 	  /* We need a new source operand each time xbitpos is on a 
! 	     word boundary and when xbitpos == big_endian_correction
! 	     (the first time through).  */
! 	  if (xbitpos % BITS_PER_WORD == 0
! 	      || xbitpos == big_endian_correction)
! 	    src = operand_subword_force (valreg,
! 					 xbitpos / BITS_PER_WORD, 
! 					 BLKmode);
! 
! 	  /* We need a new destination operand each time bitpos is on
! 	     a word boundary.  */
! 	  if (bitpos % BITS_PER_WORD == 0)
! 	    dst = operand_subword (target, bitpos / BITS_PER_WORD, 1, BLKmode);
! 	      
! 	  /* Use xbitpos for the source extraction (right justified) and
! 	     xbitpos for the destination store (left justified).  */
! 	  store_bit_field (dst, bitsize, bitpos % BITS_PER_WORD, word_mode,
! 			   extract_bit_field (src, bitsize,
! 					      xbitpos % BITS_PER_WORD, 1,
! 					      NULL_RTX, word_mode,
! 					      word_mode,
! 					      bitsize / BITS_PER_UNIT,
! 					      BITS_PER_WORD),
! 			   bitsize / BITS_PER_UNIT, BITS_PER_WORD);
! 	}
!     }
    else
      target = copy_to_reg (valreg);
  
--- 2105,2111 ----
         when function inlining is being done.  */
      emit_move_insn (target, valreg);
    else if (TYPE_MODE (TREE_TYPE (exp)) == BLKmode)
!     target = copy_blkmode_from_reg (target, valreg, TREE_TYPE (exp));
    else
      target = copy_to_reg (valreg);
  
Index: expr.c
===================================================================
RCS file: /egcs/carton/cvsfiles/egcs/gcc/expr.c,v
retrieving revision 1.95
diff -c -p -r1.95 expr.c
*** expr.c	1998/09/08 22:47:54	1.95
--- expr.c	1998/09/25 21:27:43
*************** emit_group_store (orig_dst, src, ssize, 
*** 2063,2068 ****
--- 2063,2150 ----
      emit_move_insn (orig_dst, dst);
  }
  
+ /* Generate code to copy a BLKmode object of TYPE out of a
+    set of registers starting with SRCREG into TGTBLK.  If TGTBLK
+    is null, a stack temporary is created.  TGTBLK is returned.
+ 
+    The primary purpose of this routine is to handle functions
+    that return BLKmode structures in registers.  Some machines
+    (the PA for example) want to return all small structures
+    in registers regardless of the structure's alignment.
+   */
+ 
+ rtx
+ copy_blkmode_from_reg(tgtblk,srcreg,type)
+      rtx tgtblk;
+      rtx srcreg;
+      tree type;
+ {
+       int bytes = int_size_in_bytes (type);
+       rtx src = NULL, dst = NULL;
+       int bitsize = MIN (TYPE_ALIGN (type), BITS_PER_WORD);
+       int bitpos, xbitpos, big_endian_correction = 0;
+       
+       if (tgtblk == 0)
+ 	{
+ 	  tgtblk = assign_stack_temp (BLKmode, bytes, 0);
+ 	  MEM_IN_STRUCT_P (tgtblk) = AGGREGATE_TYPE_P (type);
+ 	  preserve_temp_slots (tgtblk);
+ 	}
+       
+       /* This code assumes srcreg is at least a full word.  If it isn't,
+ 	 copy it into a new pseudo which is a full word.  */
+       if (GET_MODE (srcreg) != BLKmode
+ 	  && GET_MODE_SIZE (GET_MODE (srcreg)) < UNITS_PER_WORD)
+ 	srcreg = convert_to_mode (word_mode, srcreg,
+ 				  TREE_UNSIGNED (type));
+ 
+       /* Structures whose size is not a multiple of a word are aligned
+ 	 to the least significant byte (to the right).  On a BYTES_BIG_ENDIAN
+ 	 machine, this means we must skip the empty high order bytes when
+ 	 calculating the bit offset.  */
+       if (BYTES_BIG_ENDIAN && bytes % UNITS_PER_WORD)
+ 	big_endian_correction = (BITS_PER_WORD - ((bytes % UNITS_PER_WORD)
+ 						  * BITS_PER_UNIT));
+ 
+       /* Copy the structure BITSIZE bites at a time.
+ 
+ 	 We could probably emit more efficient code for machines
+ 	 which do not use strict alignment, but it doesn't seem
+ 	 worth the effort at the current time.  */
+       for (bitpos = 0, xbitpos = big_endian_correction;
+ 	   bitpos < bytes * BITS_PER_UNIT;
+ 	   bitpos += bitsize, xbitpos += bitsize)
+ 	{
+ 
+ 	  /* We need a new source operand each time xbitpos is on a 
+ 	     word boundary and when xbitpos == big_endian_correction
+ 	     (the first time through).  */
+ 	  if (xbitpos % BITS_PER_WORD == 0
+ 	      || xbitpos == big_endian_correction)
+ 	    src = operand_subword_force (srcreg,
+ 					 xbitpos / BITS_PER_WORD, 
+ 					 BLKmode);
+ 
+ 	  /* We need a new destination operand each time bitpos is on
+ 	     a word boundary.  */
+ 	  if (bitpos % BITS_PER_WORD == 0)
+ 	    dst = operand_subword (tgtblk, bitpos / BITS_PER_WORD, 1, BLKmode);
+ 	      
+ 	  /* Use xbitpos for the source extraction (right justified) and
+ 	     xbitpos for the destination store (left justified).  */
+ 	  store_bit_field (dst, bitsize, bitpos % BITS_PER_WORD, word_mode,
+ 			   extract_bit_field (src, bitsize,
+ 					      xbitpos % BITS_PER_WORD, 1,
+ 					      NULL_RTX, word_mode,
+ 					      word_mode,
+ 					      bitsize / BITS_PER_UNIT,
+ 					      BITS_PER_WORD),
+ 			   bitsize / BITS_PER_UNIT, BITS_PER_WORD);
+ 	}
+       return tgtblk;
+ }
+ 
+ 
  /* Add a USE expression for REG to the (possibly empty) list pointed
     to by CALL_FUSAGE.  REG must denote a hard register.  */
  
Index: expr.h
===================================================================
RCS file: /egcs/carton/cvsfiles/egcs/gcc/expr.h,v
retrieving revision 1.22
diff -c -p -r1.22 expr.h
*** expr.h	1998/07/30 10:38:10	1.22
--- expr.h	1998/09/25 21:27:43
*************** extern void emit_group_load PROTO((rtx, 
*** 731,736 ****
--- 731,741 ----
     PARALLEL.  */
  extern void emit_group_store PROTO((rtx, rtx, int, int));
  
+ #ifdef TREE_CODE
+ /* Copy BLKmode object from a set of registers. */
+ extern rtx copy_blkmode_from_reg PROTO((rtx,rtx,tree));
+ #endif
+ 
  /* Mark REG as holding a parameter for the next CALL_INSN.  */
  extern void use_reg PROTO((rtx *, rtx));
  /* Mark NREGS consecutive regs, starting at REGNO, as holding parameters
Index: integrate.c
===================================================================
RCS file: /egcs/carton/cvsfiles/egcs/gcc/integrate.c,v
retrieving revision 1.35
diff -c -p -r1.35 integrate.c
*** integrate.c	1998/09/10 10:45:56	1.35
--- integrate.c	1998/09/25 21:27:46
*************** function_cannot_inline_p (fndecl)
*** 154,164 ****
    if (current_function_returns_pcc_struct)
      return "inline functions not supported for this return value type";
  
-   /* We can't inline functions that return BLKmode structures in registers.  */
-   if (TYPE_MODE (TREE_TYPE (TREE_TYPE (fndecl))) == BLKmode
-       && ! aggregate_value_p (TREE_TYPE (TREE_TYPE (fndecl))))
-     return "inline functions not supported for this return value type";
- 
    /* We can't inline functions that return structures of varying size.  */
    if (int_size_in_bytes (TREE_TYPE (TREE_TYPE (fndecl))) < 0)
      return "function with varying-size return value cannot be inline";
--- 154,159 ----
*************** expand_inline_function (fndecl, parms, t
*** 1807,1813 ****
--- 1802,1824 ----
  	 Let the combiner substitute the MEM if that is valid.  */
        if (target == 0 || GET_CODE (target) != REG
  	  || GET_MODE (target) != departing_mode)
+ 	{
+ 	  /* Don't make BLKmode registers.  If this looks like
+ 	     a BLKmode object being returned in a register, get
+ 	     the mode from that, otherwise abort. */
+ 	  if (departing_mode == BLKmode)
+ 	    {
+ 	      if (REG == GET_CODE (DECL_RTL (DECL_RESULT (fndecl))))
+ 		{
+ 		  departing_mode = GET_MODE (DECL_RTL (DECL_RESULT (fndecl)));
+ 		  arriving_mode = departing_mode;
+ 		}
+ 	      else
+ 		abort();
+ 	    }
+ 	      
  	target = gen_reg_rtx (departing_mode);
+ 	}
  
        /* If function's value was promoted before return,
  	 avoid machine mode mismatch when we substitute INLINE_TARGET.
*************** expand_inline_function (fndecl, parms, t
*** 2167,2172 ****
--- 2178,2189 ----
  
    emit_line_note (input_filename, lineno);
  
+   /* If the function returns a BLKmode object in a register, copy it
+      out of the temp register into a BLKmode memory object. */
+   if (TYPE_MODE (TREE_TYPE (TREE_TYPE (fndecl))) == BLKmode
+       && ! aggregate_value_p (TREE_TYPE (TREE_TYPE (fndecl))))
+     target = copy_blkmode_from_reg (0, target, TREE_TYPE (TREE_TYPE (fndecl)));
+   
    if (structure_value_addr)
      {
        target = gen_rtx_MEM (TYPE_MODE (type),
*************** copy_rtx_and_substitute (orig, map)
*** 2431,2442 ****
  	    {
  	      /* This is a reference to the function return value.  If
  		 the function doesn't have a return value, error.  If the
! 		 mode doesn't agree, make a SUBREG.  */
  	      if (map->inline_target == 0)
  		/* Must be unrolling loops or replicating code if we
  		   reach here, so return the register unchanged.  */
  		return orig;
! 	      else if (mode != GET_MODE (map->inline_target))
  		return gen_lowpart (mode, map->inline_target);
  	      else
  		return map->inline_target;
--- 2448,2460 ----
  	    {
  	      /* This is a reference to the function return value.  If
  		 the function doesn't have a return value, error.  If the
! 		 mode doesn't agree, and it ain't BLKmode, make a SUBREG.  */
  	      if (map->inline_target == 0)
  		/* Must be unrolling loops or replicating code if we
  		   reach here, so return the register unchanged.  */
  		return orig;
! 	      else if (GET_MODE (map->inline_target) != BLKmode
! 		       && mode != GET_MODE (map->inline_target))
  		return gen_lowpart (mode, map->inline_target);
  	      else
  		return map->inline_target;





More information about the Gcc-patches mailing list