This is the mail archive of the gcc-patches@gcc.gnu.org mailing list for the GCC project.


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]
Other format: [Raw text]

improve unaligned loads with no special insns


I wrote this after looking at some bit field extraction code generated
for ia64, and seeing how truely abysmal it was.  And then recognizing
that the same held true for many other ports.

The object here is to forcably align a memory (with AND), load two 
words, and shift them into place.  The mechanism that we currently use
is loading N aligned units (usually bytes) and shifting them into place
one at a time.  The new scheme is more efficient for more than two
loads, and is *sigificantly* better for 8 byte unaligned loads.

For instance,

	struct S { char c; long x __attribute__((packed)); };
	long f(struct S *s) { return s->x; }

.text size/	old	new
ia64		208	96
sparc64		92	48

It also triggers for some edge conditions of bitfields for x86; thus
the STRICT_ALIGNMENT checking there.  I can't find that one again,
but it does appear in the testsuite.

Tested on ia64, alpha and x86 linux.


r~


        * expmed.c (extract_force_align_mem_bit_field): New.
        (extract_split_bit_field): Call it.

Index: expmed.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/expmed.c,v
retrieving revision 1.200
diff -c -p -d -r1.200 expmed.c
*** expmed.c	21 Oct 2004 10:51:00 -0000	1.200
--- expmed.c	5 Nov 2004 23:56:15 -0000
*************** lshift_value (enum machine_mode mode, rt
*** 1853,1858 ****
--- 1869,2012 ----
    return immed_double_const (low, high, mode);
  }
  
+ /* Extract a bit field from a memory by forcing the alignment of the
+    memory.  This efficient only if the field spans at least 4 boundaries.
+ 
+    OP0 is the MEM.
+    BITSIZE is the field width; BITPOS is the position of the first bit.
+    UNSIGNEDP is true if the result should be zero-extended.  */
+ 
+ static rtx
+ extract_force_align_mem_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
+ 				   unsigned HOST_WIDE_INT bitpos,
+ 				   int unsignedp)
+ {
+   enum machine_mode mode, dmode;
+   unsigned int m_bitsize, m_size;
+   unsigned int sign_shift_up, sign_shift_dn;
+   rtx base, a1, a2, v1, v2, comb, shift, result, start;
+ 
+   /* Choose a mode that will fit BITSIZE.    */
+   mode = smallest_mode_for_size (bitsize, MODE_INT);
+   m_size = GET_MODE_SIZE (mode);
+   m_bitsize = GET_MODE_BITSIZE (mode);
+ 
+   /* Choose a mode twice as wide.  Fail if no such mode exists.  */
+   dmode = mode_for_size (m_bitsize * 2, MODE_INT, false);
+   if (dmode == BLKmode)
+     return NULL;
+ 
+   do_pending_stack_adjust ();
+   start = get_last_insn ();
+ 
+   /* At the end, we'll need an additional shift to deal with sign/zero
+      extension.  By default this will be a left+right shift of the
+      appropriate size.  But we may be able to elimitate one of them.  */
+   sign_shift_up = sign_shift_dn = m_bitsize - bitsize;
+ 
+   if (STRICT_ALIGNMENT)
+     {
+       base = plus_constant (XEXP (op0, 0), bitpos / BITS_PER_UNIT);
+       base = force_operand (base, NULL);
+       bitpos %= BITS_PER_UNIT;
+ 
+       /* Force alignment of the address; load two sequential values.  */
+       a1 = expand_simple_binop (Pmode, AND, base,
+ 				GEN_INT (-(HOST_WIDE_INT)m_size),
+ 				NULL, true, OPTAB_LIB_WIDEN);
+       mark_reg_pointer (a1, m_bitsize);
+       v1 = gen_rtx_MEM (mode, a1);
+       set_mem_align (v1, m_bitsize);
+       v1 = force_reg (mode, validize_mem (v1));
+ 
+       a2 = plus_constant (a1, GET_MODE_SIZE (mode));
+       v2 = gen_rtx_MEM (mode, a2);
+       set_mem_align (v2, m_bitsize);
+       v2 = force_reg (mode, validize_mem (v2));
+ 
+       /* Combine these two values into a double-word value.  */
+       if (m_bitsize == BITS_PER_WORD)
+ 	{
+ 	  comb = gen_reg_rtx (dmode);
+ 	  emit_insn (gen_rtx_CLOBBER (VOIDmode, comb));
+ 	  emit_move_insn (gen_rtx_SUBREG (mode, comb, 0), v1);
+ 	  emit_move_insn (gen_rtx_SUBREG (mode, comb, m_size), v2);
+ 	}
+       else
+ 	{
+ 	  if (BYTES_BIG_ENDIAN)
+ 	    comb = v1, v1 = v2, v2 = comb;
+ 	  v1 = convert_modes (dmode, mode, v1, true);
+ 	  if (v1 == NULL)
+ 	    goto fail;
+ 	  v2 = convert_modes (dmode, mode, v2, true);
+ 	  v2 = expand_simple_binop (dmode, ASHIFT, v2, GEN_INT (m_bitsize),
+ 				    NULL, true, OPTAB_LIB_WIDEN);
+ 	  if (v2 == NULL)
+ 	    goto fail;
+ 	  comb = expand_simple_binop (dmode, IOR, v1, v2, NULL,
+ 				      true, OPTAB_LIB_WIDEN);
+ 	  if (comb == NULL)
+ 	    goto fail;
+ 	}
+ 
+       shift = expand_simple_binop (Pmode, AND, base, GEN_INT (m_size - 1),
+ 				   NULL, true, OPTAB_LIB_WIDEN);
+       shift = expand_mult (Pmode, shift, GEN_INT (BITS_PER_UNIT), NULL, 1);
+ 
+       if (bitpos != 0)
+ 	{
+ 	  if (sign_shift_up <= bitpos)
+ 	    bitpos -= sign_shift_up, sign_shift_up = 0;
+ 	  shift = expand_simple_binop (Pmode, PLUS, shift, GEN_INT (bitpos),
+ 				       NULL, true, OPTAB_LIB_WIDEN);
+ 	}
+     }
+   else
+     {
+       unsigned HOST_WIDE_INT offset = bitpos / BITS_PER_UNIT;
+       bitpos %= BITS_PER_UNIT;
+ 
+       /* When strict alignment is not required, we can just load directly
+ 	 from memory without masking.  If the remaining BITPOS offset is
+ 	 small enough, we may be able to do all operations in MODE as 
+ 	 opposed to DMODE.  */
+       if (bitpos + bitsize <= m_bitsize)
+ 	dmode = mode;
+       comb = adjust_address (op0, dmode, offset);
+ 
+       if (sign_shift_up <= bitpos)
+ 	bitpos -= sign_shift_up, sign_shift_up = 0;
+       shift = GEN_INT (bitpos);
+     }
+ 
+   /* Shift down the double-word such that the requested value is at bit 0.  */
+   if (shift != const0_rtx)
+     comb = expand_simple_binop (dmode, unsignedp ? LSHIFTRT : ASHIFTRT,
+ 				comb, shift, NULL, unsignedp, OPTAB_LIB_WIDEN);
+   if (comb == NULL)
+     goto fail;
+ 
+   /* If the field exactly matches MODE, then all we need to do is return the
+      lowpart.  Otherwise, shift to get the sign bits set properly.  */
+   result = force_reg (mode, gen_lowpart (mode, comb));
+ 
+   if (sign_shift_up)
+     result = expand_simple_binop (mode, ASHIFT, result,
+ 				  GEN_INT (sign_shift_up),
+ 				  NULL_RTX, 0, OPTAB_LIB_WIDEN);
+   if (sign_shift_dn)
+     result = expand_simple_binop (mode, unsignedp ? LSHIFTRT : ASHIFTRT,
+ 				  result, GEN_INT (sign_shift_dn),
+ 				  NULL_RTX, 0, OPTAB_LIB_WIDEN);
+ 
+   return result;
+ 
+  fail:
+   delete_insns_since (start);
+   return NULL;
+ }
+ 
  /* Extract a bit field that is split across two words
     and return an RTX for the result.
  
*************** extract_split_bit_field (rtx op0, unsign
*** 1874,1880 ****
    if (REG_P (op0) || GET_CODE (op0) == SUBREG)
      unit = BITS_PER_WORD;
    else
!     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
  
    while (bitsdone < bitsize)
      {
--- 2028,2043 ----
    if (REG_P (op0) || GET_CODE (op0) == SUBREG)
      unit = BITS_PER_WORD;
    else
!     {
!       unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
!       if (bitsize / unit > 2)
! 	{
! 	  rtx tmp = extract_force_align_mem_bit_field (op0, bitsize, bitpos,
! 						       unsignedp);
! 	  if (tmp)
! 	    return tmp;
! 	}
!     }
  
    while (bitsdone < bitsize)
      {


Index Nav: [Date Index] [Subject Index] [Author Index] [Thread Index]
Message Nav: [Date Prev] [Date Next] [Thread Prev] [Thread Next]