[cft] fix big-endian bitfield problems

Richard Henderson rth@redhat.com
Fri Nov 12 03:22:00 GMT 2004


Finding a working, properly installed, big-endian machine on which
to do more than superficial testing on this patch has defeated me.

Will someone please test this on powerpc, sparc, and hppa and see
if it does the job?


r~



Index: expmed.c
===================================================================
RCS file: /cvs/gcc/gcc/gcc/expmed.c,v
retrieving revision 1.203
diff -c -p -d -r1.203 expmed.c
*** expmed.c	8 Nov 2004 19:18:16 -0000	1.203
--- expmed.c	12 Nov 2004 03:08:00 -0000
*************** Software Foundation, 59 Temple Place - S
*** 36,41 ****
--- 36,43 ----
  #include "real.h"
  #include "recog.h"
  #include "langhooks.h"
+ #include "target.h"
+ 
  
  static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  				   unsigned HOST_WIDE_INT,
*************** static bool smod_pow2_cheap[NUM_MACHINE_
*** 90,95 ****
--- 92,101 ----
  #define CODE_FOR_extzv	CODE_FOR_nothing
  #define gen_extzv(a,b,c,d) NULL_RTX
  #endif
+ #ifndef HAVE_conditional_move
+ #define HAVE_conditional_move 0
+ #define emit_conditional_move(a,b,c,d,e,f,g,h,i) NULL_RTX
+ #endif
  
  /* Cost of various pieces of RTL.  Note that some of these are indexed by
     shift count and some by mode.  */
*************** static int div_cost[NUM_MACHINE_MODES];
*** 104,115 ****
--- 110,130 ----
  static int mul_widen_cost[NUM_MACHINE_MODES];
  static int mul_highpart_cost[NUM_MACHINE_MODES];
  
+ /* Element [N][M] is true if (zero_extend:N (mem:M (reg))) is valid.  */
+ static bool can_ze_mem[NUM_MACHINE_MODES][NUM_MACHINE_MODES];
+ static int zext_cost[NUM_MACHINE_MODES][NUM_MACHINE_MODES];
+ 
+ /* Element M is true if (mem:M (and (reg) (const_int -SIZE))) is a valid
+    addressing mode.  */
+ static bool can_force_align_addr[NUM_MACHINE_MODES];
+ 
  void
  init_expmed (void)
  {
    struct
    {
      struct rtx_def reg;		rtunion reg_fld[2];
+     struct rtx_def reg2;	rtunion reg2_fld[2];
      struct rtx_def plus;	rtunion plus_fld1;
      struct rtx_def neg;
      struct rtx_def udiv;	rtunion udiv_fld1;
*************** init_expmed (void)
*** 124,129 ****
--- 139,149 ----
      struct rtx_def shift_mult;	rtunion shift_mult_fld1;
      struct rtx_def shift_add;	rtunion shift_add_fld1;
      struct rtx_def shift_sub;	rtunion shift_sub_fld1;
+     struct rtx_def zext2;
+     struct rtx_def mem;		rtunion mem_fld1;
+     struct rtx_def and;		rtunion and_fld1;
+     struct rtx_def set;		rtunion set_fld1;
+     struct rtx_def insn;	rtunion insn_fld[8];
    } all;
  
    rtx pow2[MAX_BITS_PER_WORD];
*************** init_expmed (void)
*** 194,200 ****
    XEXP (&all.shift_sub, 0) = &all.shift_mult;
    XEXP (&all.shift_sub, 1) = &all.reg;
  
!   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
         mode != VOIDmode;
         mode = GET_MODE_WIDER_MODE (mode))
      {
--- 214,220 ----
    XEXP (&all.shift_sub, 0) = &all.shift_mult;
    XEXP (&all.shift_sub, 1) = &all.reg;
  
!   for (mode = MIN_MODE_INT;
         mode != VOIDmode;
         mode = GET_MODE_WIDER_MODE (mode))
      {
*************** init_expmed (void)
*** 245,250 ****
--- 265,320 ----
  	  shiftsub_cost[mode][m] = rtx_cost (&all.shift_sub, SET);
  	}
      }
+ 
+   PUT_CODE (&all.reg2, REG);
+   REGNO (&all.reg2) = 10001;
+   PUT_MODE (&all.reg2, Pmode);
+ 
+   PUT_CODE (&all.mem, MEM);
+   XEXP (&all.mem, 0) = &all.reg2;
+ 
+   PUT_CODE (&all.zext2, ZERO_EXTEND);
+   XEXP (&all.zext2, 0) = &all.mem;
+ 
+   PUT_CODE (&all.set, SET);
+   XEXP (&all.set, 0) = &all.reg;
+   XEXP (&all.set, 1) = &all.zext2;
+ 
+   PUT_CODE (&all.insn, INSN);
+   PATTERN (&all.insn) = &all.set;
+ 
+   for (mode = MIN_MODE_INT;
+        mode != VOIDmode;
+        mode = GET_MODE_WIDER_MODE (mode))
+     {
+       PUT_MODE (&all.mem, mode);
+ 
+       for (wider_mode = GET_MODE_WIDER_MODE (mode);
+ 	   wider_mode != VOIDmode;
+ 	   wider_mode = GET_MODE_WIDER_MODE (wider_mode))
+ 	{
+ 	  PUT_MODE (&all.zext, wider_mode);
+ 	  PUT_MODE (&all.reg, mode);
+ 	  zext_cost[wider_mode][mode] = rtx_cost (&all.zext, SET);
+ 
+ 	  PUT_MODE (&all.reg, wider_mode);
+ 	  PUT_MODE (&all.zext2, wider_mode);
+ 	  can_ze_mem[wider_mode][mode] = recog (&all.set, &all.insn, 0) >= 0;
+ 	}
+     }
+ 
+   PUT_CODE (&all.and, AND);
+   XEXP (&all.and, 0) = &all.reg2;
+ 
+   for (mode = 0; mode < NUM_MACHINE_MODES; mode++)
+     {
+       if (GET_MODE_CLASS (mode) == MODE_RANDOM
+ 	  || GET_MODE_CLASS (mode) == MODE_CC)
+ 	continue;
+ 
+       XEXP (&all.and, 1) = GEN_INT (-(int)GET_MODE_SIZE (mode));
+       can_force_align_addr[mode] = memory_address_p (mode, &all.and);
+     }
  }
  
  /* Return an rtx representing minus the value of X.
*************** lshift_value (enum machine_mode mode, rt
*** 1853,1858 ****
--- 1923,1996 ----
    return immed_double_const (low, high, mode);
  }
  
+ /* Approximate the cost of the rtl that would be generated by 
+    extract_force_align_mem_bit_field..  The better these functions
+    stay in sync, the more accurate results we'll get, but the more
+    time it'll take.  */
+ 
+ static unsigned int
+ cost_for_force_align_mem_bit_field (void)
+ {
+   unsigned cost = 0;
+ 
+   /* ??? Not taking known alignment into account properly.  */
+ 
+   if (!can_force_align_addr[word_mode])
+     {
+       /* When forced alignment is not supported, we'll need to compute
+ 	 ((X + C) & -SIZE) and ((X + C + SIZE-1) & -SIZE).  Assume the
+ 	 AND is the same cost as a PLUS.  So two PLUS and two AND make
+ 	 for 4 arithmetic operations accounted for here.  */
+       cost += 4 * add_cost[Pmode];
+     }
+ 
+   /* We'll need to compute the dynamic shift required by the misalignment.
+      This is (X & SIZE-1) << log2(SIZE).  */
+   cost += add_cost[word_mode];
+   cost += shift_cost[word_mode][exact_log2 (BITS_PER_UNIT)];
+ 
+   /* We'll need to re-pack the bits back into a single word.  This can be
+      either one double-word shift or two single-word shifts plus an ior.
+      This will be a variable sized shift; for the purposes here, assume
+      maximum costs.  */
+   /* ??? Most ports don't properly cost double-word shifts, so we can't
+      tell if, like i386 and ia64, it's supported in hardware.  */
+   cost += shift_cost[word_mode][BITS_PER_WORD] * 2 + add_cost[word_mode];
+ 
+   /* ??? Not computed here is the cost of getting the value out of the
+      shifted word.  This could be two more word-mode shifts, one mask,
+      or nothing at all.  */
+ 
+   return cost;
+ }
+ 
+ /* Similarly, except for extract_part_bit_field as applied to a MEM.  */
+ 
+ static unsigned int
+ cost_for_part_mem_bit_field (unsigned HOST_WIDE_INT bitsize)
+ {
+   unsigned cost = 0, n;
+ 
+   /* ??? Not taking known alignment into account properly.  */
+ 
+   n = (bitsize + BITS_PER_UNIT - 1) / BITS_PER_UNIT;
+ 
+   /* Approximate the cost of loading and shifting one byte into place,
+      multiplied by the number of times we'd have to do this.  This is
+      not exact when shift cost varies with the shift count.  Note that
+      the first element is neither shifted nor ior'ed, thus N-1.  */
+ 
+   if (!can_ze_mem[word_mode][QImode])
+     cost += zext_cost[word_mode][QImode] * n;
+   cost += shift_cost[word_mode][BITS_PER_UNIT] * (n-1);
+   cost += add_cost[word_mode] * (n-1);
+ 
+   /* ??? Not computed here is the cost of extracting the (possibly sign
+      extended) value afterward.  */
+ 
+   return cost;
+ }
+ 
  /* Extract a bit field from a memory by forcing the alignment of the
     memory.  This efficient only if the field spans at least 4 boundaries.
  
*************** extract_force_align_mem_bit_field (rtx o
*** 1865,1874 ****
  				   unsigned HOST_WIDE_INT bitpos,
  				   int unsignedp)
  {
    enum machine_mode mode, dmode;
    unsigned int m_bitsize, m_size;
!   unsigned int sign_shift_up, sign_shift_dn;
!   rtx base, a1, a2, v1, v2, comb, shift, result, start;
  
    /* Choose a mode that will fit BITSIZE.    */
    mode = smallest_mode_for_size (bitsize, MODE_INT);
--- 2003,2017 ----
  				   unsigned HOST_WIDE_INT bitpos,
  				   int unsignedp)
  {
+   unsigned HOST_WIDE_INT offset;
    enum machine_mode mode, dmode;
    unsigned int m_bitsize, m_size;
!   rtx base, a1, a2, v1, v2, comb, start;
!   enum rtx_code shift_type;
!   rtx shift;
! 
!   offset = bitpos / BITS_PER_UNIT;
!   bitpos %= BITS_PER_UNIT;
  
    /* Choose a mode that will fit BITSIZE.    */
    mode = smallest_mode_for_size (bitsize, MODE_INT);
*************** extract_force_align_mem_bit_field (rtx o
*** 1876,1998 ****
    m_bitsize = GET_MODE_BITSIZE (mode);
  
    /* Choose a mode twice as wide.  Fail if no such mode exists.  */
!   dmode = mode_for_size (m_bitsize * 2, MODE_INT, false);
    if (dmode == BLKmode)
      return NULL;
  
    do_pending_stack_adjust ();
    start = get_last_insn ();
  
!   /* At the end, we'll need an additional shift to deal with sign/zero
!      extension.  By default this will be a left+right shift of the
!      appropriate size.  But we may be able to eliminate one of them.  */
!   sign_shift_up = sign_shift_dn = m_bitsize - bitsize;
! 
!   if (STRICT_ALIGNMENT)
      {
!       base = plus_constant (XEXP (op0, 0), bitpos / BITS_PER_UNIT);
!       bitpos %= BITS_PER_UNIT;
  
!       /* We load two values to be concatenate.  There's an edge condition
! 	 that bears notice -- an aligned value at the end of a page can
! 	 only load one value lest we segfault.  So the two values we load
! 	 are at "base & -size" and "(base + size - 1) & -size".  If base
! 	 is unaligned, the addresses will be aligned and sequential; if
! 	 base is aligned, the addresses will both be equal to base.  */
  
!       a1 = expand_simple_binop (Pmode, AND, force_operand (base, NULL),
! 				GEN_INT (-(HOST_WIDE_INT)m_size),
! 				NULL, true, OPTAB_LIB_WIDEN);
!       mark_reg_pointer (a1, m_bitsize);
!       v1 = gen_rtx_MEM (mode, a1);
!       set_mem_align (v1, m_bitsize);
!       v1 = force_reg (mode, validize_mem (v1));
  
        a2 = plus_constant (base, GET_MODE_SIZE (mode) - 1);
        a2 = expand_simple_binop (Pmode, AND, force_operand (a2, NULL),
  				GEN_INT (-(HOST_WIDE_INT)m_size),
  				NULL, true, OPTAB_LIB_WIDEN);
!       v2 = gen_rtx_MEM (mode, a2);
!       set_mem_align (v2, m_bitsize);
!       v2 = force_reg (mode, validize_mem (v2));
! 
!       /* Combine these two values into a double-word value.  */
!       if (m_bitsize == BITS_PER_WORD)
! 	{
! 	  comb = gen_reg_rtx (dmode);
! 	  emit_insn (gen_rtx_CLOBBER (VOIDmode, comb));
! 	  emit_move_insn (gen_rtx_SUBREG (mode, comb, 0), v1);
! 	  emit_move_insn (gen_rtx_SUBREG (mode, comb, m_size), v2);
! 	}
!       else
! 	{
! 	  if (BYTES_BIG_ENDIAN)
! 	    comb = v1, v1 = v2, v2 = comb;
! 	  v1 = convert_modes (dmode, mode, v1, true);
! 	  if (v1 == NULL)
! 	    goto fail;
! 	  v2 = convert_modes (dmode, mode, v2, true);
! 	  v2 = expand_simple_binop (dmode, ASHIFT, v2, GEN_INT (m_bitsize),
! 				    NULL, true, OPTAB_LIB_WIDEN);
! 	  if (v2 == NULL)
! 	    goto fail;
! 	  comb = expand_simple_binop (dmode, IOR, v1, v2, NULL,
! 				      true, OPTAB_LIB_WIDEN);
! 	  if (comb == NULL)
! 	    goto fail;
! 	}
  
!       shift = expand_simple_binop (Pmode, AND, base, GEN_INT (m_size - 1),
! 				   NULL, true, OPTAB_LIB_WIDEN);
!       shift = expand_mult (Pmode, shift, GEN_INT (BITS_PER_UNIT), NULL, 1);
  
!       if (bitpos != 0)
! 	{
! 	  if (sign_shift_up <= bitpos)
! 	    bitpos -= sign_shift_up, sign_shift_up = 0;
! 	  shift = expand_simple_binop (Pmode, PLUS, shift, GEN_INT (bitpos),
! 				       NULL, true, OPTAB_LIB_WIDEN);
! 	}
!     }
    else
!     {
!       unsigned HOST_WIDE_INT offset = bitpos / BITS_PER_UNIT;
!       bitpos %= BITS_PER_UNIT;
  
!       /* When strict alignment is not required, we can just load directly
! 	 from memory without masking.  If the remaining BITPOS offset is
! 	 small enough, we may be able to do all operations in MODE as 
! 	 opposed to DMODE.  */
!       if (bitpos + bitsize <= m_bitsize)
! 	dmode = mode;
!       comb = adjust_address (op0, dmode, offset);
  
!       if (sign_shift_up <= bitpos)
! 	bitpos -= sign_shift_up, sign_shift_up = 0;
!       shift = GEN_INT (bitpos);
!     }
  
!   /* Shift down the double-word such that the requested value is at bit 0.  */
!   if (shift != const0_rtx)
!     comb = expand_simple_binop (dmode, unsignedp ? LSHIFTRT : ASHIFTRT,
! 				comb, shift, NULL, unsignedp, OPTAB_LIB_WIDEN);
!   if (comb == NULL)
!     goto fail;
  
!   /* If the field exactly matches MODE, then all we need to do is return the
!      lowpart.  Otherwise, shift to get the sign bits set properly.  */
!   result = force_reg (mode, gen_lowpart (mode, comb));
  
!   if (sign_shift_up)
!     result = expand_simple_binop (mode, ASHIFT, result,
! 				  GEN_INT (sign_shift_up),
! 				  NULL_RTX, 0, OPTAB_LIB_WIDEN);
!   if (sign_shift_dn)
!     result = expand_simple_binop (mode, unsignedp ? LSHIFTRT : ASHIFTRT,
! 				  result, GEN_INT (sign_shift_dn),
! 				  NULL_RTX, 0, OPTAB_LIB_WIDEN);
  
!   return result;
  
   fail:
    delete_insns_since (start);
--- 2019,2149 ----
    m_bitsize = GET_MODE_BITSIZE (mode);
  
    /* Choose a mode twice as wide.  Fail if no such mode exists.  */
!   dmode = mode_for_size (2 * m_bitsize, MODE_INT, false);
    if (dmode == BLKmode)
      return NULL;
  
    do_pending_stack_adjust ();
    start = get_last_insn ();
  
!   /* When strict alignment is not required, we may be able to load from
!      memory without masking.  This requires that we be loading exactly
!      the same number of bytes as we would have otherwise, lest we segv
!      at the end of a page.  */
!   if (!STRICT_ALIGNMENT)
      {
!       if (bitpos + bitsize <= m_bitsize
! 	  && bitpos + bitsize > m_bitsize - BITS_PER_UNIT)
! 	{
! 	  comb = force_reg (mode, adjust_address (op0, mode, offset));
! 	  goto done;
! 	}
!       else if (bitpos + bitsize > m_bitsize)
! 	{
! 	  v1 = adjust_address (op0, mode, offset);
! 	  v2 = adjust_address (op0, QImode, offset + m_size);
! 	  v2 = gen_lowpart (mode, force_reg (QImode, v2));
! 	  if (BYTES_BIG_ENDIAN)
! 	    v2 = expand_simple_binop (mode, ASHIFT, v2,
! 				      GEN_INT (m_bitsize - BITS_PER_UNIT),
! 				      NULL, true, OPTAB_LIB_WIDEN);
! 	  shift = const0_rtx;
! 	  goto combine;
! 	}
!     }
  
!   base = plus_constant (XEXP (op0, 0), offset);
  
!   /* We load two values to be concatenate.  There's an edge condition
!      that bears notice -- an aligned value at the end of a page can
!      only load one value lest we segfault.  So the two values we load
!      are at "base & -size" and "(base + size - 1) & -size".  If base
!      is unaligned, the addresses will be aligned and sequential; if
!      base is aligned, the addresses will both be equal to base.  */
  
+   a1 = expand_simple_binop (Pmode, AND, force_operand (base, NULL),
+ 			    GEN_INT (-(HOST_WIDE_INT)m_size),
+ 			    NULL, true, OPTAB_LIB_WIDEN);
+   mark_reg_pointer (a1, m_bitsize);
+   v1 = gen_rtx_MEM (mode, a1);
+   set_mem_align (v1, m_bitsize);
+   v1 = force_reg (mode, validize_mem (v1));
+ 
+   if (bitpos + bitsize <= m_bitsize)
+     {
        a2 = plus_constant (base, GET_MODE_SIZE (mode) - 1);
        a2 = expand_simple_binop (Pmode, AND, force_operand (a2, NULL),
  				GEN_INT (-(HOST_WIDE_INT)m_size),
  				NULL, true, OPTAB_LIB_WIDEN);
!       mark_reg_pointer (a2, m_bitsize);
!     }
!   else
!     a2 = plus_constant (a1, GET_MODE_SIZE (mode));
!   v2 = gen_rtx_MEM (mode, a2);
!   set_mem_align (v2, m_bitsize);
!   v2 = force_reg (mode, validize_mem (v2));
  
!   /* For little-endian, the displacement computed into SHIFT is the
!      amount we should shift down such that the result is in the low
!      word.  For big-endian, it's the amount we should shift up so that
!      the result is in the high word.  */
!   shift = expand_simple_binop (Pmode, AND, base, GEN_INT (m_size - 1),
! 			       NULL, true, OPTAB_LIB_WIDEN);
!   shift = expand_mult (Pmode, shift, GEN_INT (BITS_PER_UNIT), NULL, 1);
  
!  combine:
!   if (BYTES_BIG_ENDIAN)
!     shift_type = ASHIFT;
    else
!     shift_type = unsignedp ? LSHIFTRT : ASHIFTRT;
  
!   if (m_bitsize >= BITS_PER_WORD)
!     {
!       /* ??? Don't unnecessarily exceed WORD_MODE.  If MODE==WORD_MODE, the
! 	 shift is no larger than BITS_PER_WORD-BITS_PER_UNIT, and for many
! 	 targets we're unable to get rid of the code that attempts to adjust
! 	 for shifts larger than BITS_PER_WORD.
  
! 	 However, MODE may already be larger than WORD_MODE, at which point
! 	 we do not have the option of a quad-word shift.
  
! 	 Duplicating much of the code from expand_subword_shift doesn't
! 	 seem like the best of ideas, so punt for now and simply emit the
! 	 shift as-is, and be prepared to fail the entire operation.  */
  
!       comb = gen_reg_rtx (dmode);
!       emit_insn (gen_rtx_CLOBBER (dmode, comb));
!       emit_move_insn (gen_rtx_SUBREG (mode, comb, 0), v1);
!       emit_move_insn (gen_rtx_SUBREG (mode, comb, m_size), v2);
  
!       if (shift != const0_rtx)
! 	comb = expand_simple_binop (dmode, shift_type, comb, shift, NULL,
! 				    unsignedp, OPTAB_LIB_WIDEN);
!       if (comb == NULL)
! 	goto fail;
!     }
!   else
!     {
!       if (BYTES_BIG_ENDIAN)
! 	comb = v1, v1 = v2, v2 = comb;
!       v1 = convert_modes (dmode, mode, v1, true);
!       v2 = gen_lowpart (dmode, v2);
!       v2 = expand_simple_binop (dmode, ASHIFT, v2, GEN_INT (m_bitsize),
! 				NULL, true, OPTAB_LIB_WIDEN);
!       comb = expand_simple_binop (dmode, IOR, v1, v2, NULL,
! 				  true, OPTAB_LIB_WIDEN);
!       if (shift != const0_rtx)
!         comb = expand_simple_binop (dmode, shift_type, comb, shift, NULL,
! 				    unsignedp, OPTAB_LIB_WIDEN);
!     }
  
!   /* We've now simplified the problem.  Recurse to extract the proper
!      bits out of the given word.  */
!   /* ??? While extract_fixed_bit_field ought to be more natural, it
!      frobs bitpos in ways that we don't want.  */
!  done:
!   return extract_bit_field (comb, bitsize, bitpos, unsignedp,
! 			    NULL, mode, mode);
  
   fail:
    delete_insns_since (start);
*************** extract_split_bit_field (rtx op0, unsign
*** 2021,2034 ****
      unit = BITS_PER_WORD;
    else
      {
!       unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
!       if (bitsize / unit > 2)
  	{
  	  rtx tmp = extract_force_align_mem_bit_field (op0, bitsize, bitpos,
  						       unsignedp);
  	  if (tmp)
  	    return tmp;
  	}
      }
  
    while (bitsdone < bitsize)
--- 2172,2191 ----
      unit = BITS_PER_WORD;
    else
      {
!       /* Choosing the correct alternative here is difficult.  It depends
! 	 very much on the addressing modes available, and whether we have
! 	 a zero-extending byte load instruction.  */
! 
!       if (cost_for_force_align_mem_bit_field ()
! 	  < cost_for_part_mem_bit_field (bitsize))
  	{
  	  rtx tmp = extract_force_align_mem_bit_field (op0, bitsize, bitpos,
  						       unsignedp);
  	  if (tmp)
  	    return tmp;
  	}
+ 
+       unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
      }
  
    while (bitsdone < bitsize)



More information about the Gcc-patches mailing list