gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001 Free Software Foundation, Inc.
   5
   6 This file is part of GNU CC.
   7
   8 GNU CC is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation; either version 2, or (at your option)
  11 any later version.
  12
  13 GNU CC is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU CC; see the file COPYING.  If not, write to
  20 the Free Software Foundation, 59 Temple Place - Suite 330,
  21 Boston, MA 02111-1307, USA.  */
  22
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "toplev.h"
  27 #include "rtl.h"
  28 #include "tree.h"
  29 #include "tm_p.h"
  30 #include "flags.h"
  31 #include "insn-flags.h"
  32 #include "insn-codes.h"
  33 #include "insn-config.h"
  34 #include "expr.h"
  35 #include "real.h"
  36 #include "recog.h"
  37
  38 static void store_fixed_bit_field       PARAMS ((rtx, unsigned HOST_WIDE_INT,
  39                                                  unsigned HOST_WIDE_INT,
  40                                                  unsigned HOST_WIDE_INT, rtx,
  41                                                  unsigned int));
  42 static void store_split_bit_field       PARAMS ((rtx, unsigned HOST_WIDE_INT,
  43                                                  unsigned HOST_WIDE_INT, rtx,
  44                                                  unsigned int));
  45 static rtx extract_fixed_bit_field      PARAMS ((enum machine_mode, rtx,
  46                                                  unsigned HOST_WIDE_INT,
  47                                                  unsigned HOST_WIDE_INT,
  48                                                  unsigned HOST_WIDE_INT,
  49                                                  rtx, int, unsigned int));
  50 static rtx mask_rtx                     PARAMS ((enum machine_mode, int,
  51                                                  int, int));
  52 static rtx lshift_value                 PARAMS ((enum machine_mode, rtx,
  53                                                  int, int));
  54 static rtx extract_split_bit_field      PARAMS ((rtx, unsigned HOST_WIDE_INT,
  55                                                  unsigned HOST_WIDE_INT, int,
  56                                                  unsigned int));
  57 static void do_cmp_and_jump             PARAMS ((rtx, rtx, enum rtx_code,
  58                                                  enum machine_mode, rtx));
  59
  60 /* Non-zero means divides or modulus operations are relatively cheap for
  61    powers of two, so don't use branches; emit the operation instead.
  62    Usually, this will mean that the MD file will emit non-branch
  63    sequences.  */
  64
  65 static int sdiv_pow2_cheap, smod_pow2_cheap;
  66
  67 #ifndef SLOW_UNALIGNED_ACCESS
  68 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  69 #endif
  70
  71 /* For compilers that support multiple targets with different word sizes,
  72    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  73    is the H8/300(H) compiler.  */
  74
  75 #ifndef MAX_BITS_PER_WORD
  76 #define MAX_BITS_PER_WORD BITS_PER_WORD
  77 #endif
  78
  79 /* Cost of various pieces of RTL.  Note that some of these are indexed by
  80    shift count and some by mode.  */
  81 static int add_cost, negate_cost, zero_cost;
  82 static int shift_cost[MAX_BITS_PER_WORD];
  83 static int shiftadd_cost[MAX_BITS_PER_WORD];
  84 static int shiftsub_cost[MAX_BITS_PER_WORD];
  85 static int mul_cost[NUM_MACHINE_MODES];
  86 static int div_cost[NUM_MACHINE_MODES];
  87 static int mul_widen_cost[NUM_MACHINE_MODES];
  88 static int mul_highpart_cost[NUM_MACHINE_MODES];
  89
  90 void
  91 init_expmed ()
  92 {
  93   /* This is "some random pseudo register" for purposes of calling recog
  94      to see what insns exist.  */
  95   rtx reg = gen_rtx_REG (word_mode, 10000);
  96   rtx shift_insn, shiftadd_insn, shiftsub_insn;
  97   int dummy;
  98   int m;
  99   enum machine_mode mode, wider_mode;
 100
 101   start_sequence ();
 102
 103   reg = gen_rtx_REG (word_mode, 10000);
 104
 105   zero_cost = rtx_cost (const0_rtx, 0);
 106   add_cost = rtx_cost (gen_rtx_PLUS (word_mode, reg, reg), SET);
 107
 108   shift_insn = emit_insn (gen_rtx_SET (VOIDmode, reg,
 109                                        gen_rtx_ASHIFT (word_mode, reg,
 110                                                        const0_rtx)));
 111
 112   shiftadd_insn
 113     = emit_insn (gen_rtx_SET (VOIDmode, reg,
 114                               gen_rtx_PLUS (word_mode,
 115                                             gen_rtx_MULT (word_mode,
 116                                                           reg, const0_rtx),
 117                                             reg)));
 118
 119   shiftsub_insn
 120     = emit_insn (gen_rtx_SET (VOIDmode, reg,
 121                               gen_rtx_MINUS (word_mode,
 122                                              gen_rtx_MULT (word_mode,
 123                                                            reg, const0_rtx),
 124                                              reg)));
 125
 126   init_recog ();
 127
 128   shift_cost[0] = 0;
 129   shiftadd_cost[0] = shiftsub_cost[0] = add_cost;
 130
 131   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 132     {
 133       shift_cost[m] = shiftadd_cost[m] = shiftsub_cost[m] = 32000;
 134
 135       XEXP (SET_SRC (PATTERN (shift_insn)), 1) = GEN_INT (m);
 136       if (recog (PATTERN (shift_insn), shift_insn, &dummy) >= 0)
 137         shift_cost[m] = rtx_cost (SET_SRC (PATTERN (shift_insn)), SET);
 138
 139       XEXP (XEXP (SET_SRC (PATTERN (shiftadd_insn)), 0), 1)
 140         = GEN_INT ((HOST_WIDE_INT) 1 << m);
 141       if (recog (PATTERN (shiftadd_insn), shiftadd_insn, &dummy) >= 0)
 142         shiftadd_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftadd_insn)), SET);
 143
 144       XEXP (XEXP (SET_SRC (PATTERN (shiftsub_insn)), 0), 1)
 145         = GEN_INT ((HOST_WIDE_INT) 1 << m);
 146       if (recog (PATTERN (shiftsub_insn), shiftsub_insn, &dummy) >= 0)
 147         shiftsub_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftsub_insn)), SET);
 148     }
 149
 150   negate_cost = rtx_cost (gen_rtx_NEG (word_mode, reg), SET);
 151
 152   sdiv_pow2_cheap
 153     = (rtx_cost (gen_rtx_DIV (word_mode, reg, GEN_INT (32)), SET)
 154        <= 2 * add_cost);
 155   smod_pow2_cheap
 156     = (rtx_cost (gen_rtx_MOD (word_mode, reg, GEN_INT (32)), SET)
 157        <= 2 * add_cost);
 158
 159   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 160        mode != VOIDmode;
 161        mode = GET_MODE_WIDER_MODE (mode))
 162     {
 163       reg = gen_rtx_REG (mode, 10000);
 164       div_cost[(int) mode] = rtx_cost (gen_rtx_UDIV (mode, reg, reg), SET);
 165       mul_cost[(int) mode] = rtx_cost (gen_rtx_MULT (mode, reg, reg), SET);
 166       wider_mode = GET_MODE_WIDER_MODE (mode);
 167       if (wider_mode != VOIDmode)
 168         {
 169           mul_widen_cost[(int) wider_mode]
 170             = rtx_cost (gen_rtx_MULT (wider_mode,
 171                                       gen_rtx_ZERO_EXTEND (wider_mode, reg),
 172                                       gen_rtx_ZERO_EXTEND (wider_mode, reg)),
 173                         SET);
 174           mul_highpart_cost[(int) mode]
 175             = rtx_cost (gen_rtx_TRUNCATE
 176                         (mode,
 177                          gen_rtx_LSHIFTRT (wider_mode,
 178                                            gen_rtx_MULT (wider_mode,
 179                                                          gen_rtx_ZERO_EXTEND
 180                                                          (wider_mode, reg),
 181                                                          gen_rtx_ZERO_EXTEND
 182                                                          (wider_mode, reg)),
 183                                            GEN_INT (GET_MODE_BITSIZE (mode)))),
 184                         SET);
 185         }
 186     }
 187
 188   end_sequence ();
 189 }
 190
 191 /* Return an rtx representing minus the value of X.
 192    MODE is the intended mode of the result,
 193    useful if X is a CONST_INT.  */
 194
 195 rtx
 196 negate_rtx (mode, x)
 197      enum machine_mode mode;
 198      rtx x;
 199 {
 200   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 201
 202   if (result == 0)
 203     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 204
 205   return result;
 206 }
 207 \f
 208 /* Generate code to store value from rtx VALUE
 209    into a bit-field within structure STR_RTX
 210    containing BITSIZE bits starting at bit BITNUM.
 211    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
 212    ALIGN is the alignment that STR_RTX is known to have.
 213    TOTAL_SIZE is the size of the structure in bytes, or -1 if varying.  */
 214
 215 /* ??? Note that there are two different ideas here for how
 216    to determine the size to count bits within, for a register.
 217    One is BITS_PER_WORD, and the other is the size of operand 3
 218    of the insv pattern.
 219
 220    If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD
 221    else, we use the mode of operand 3.  */
 222
 223 rtx
 224 store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, align, total_size)
 225      rtx str_rtx;
 226      unsigned HOST_WIDE_INT bitsize;
 227      unsigned HOST_WIDE_INT bitnum;
 228      enum machine_mode fieldmode;
 229      rtx value;
 230      unsigned int align;
 231      HOST_WIDE_INT total_size;
 232 {
 233   unsigned int unit
 234     = (GET_CODE (str_rtx) == MEM) ? BITS_PER_UNIT : BITS_PER_WORD;
 235   unsigned HOST_WIDE_INT offset = bitnum / unit;
 236   unsigned HOST_WIDE_INT bitpos = bitnum % unit;
 237   register rtx op0 = str_rtx;
 238 #ifdef HAVE_insv
 239   unsigned HOST_WIDE_INT insv_bitsize;
 240   enum machine_mode op_mode;
 241
 242   op_mode = insn_data[(int) CODE_FOR_insv].operand[3].mode;
 243   if (op_mode == VOIDmode)
 244     op_mode = word_mode;
 245   insv_bitsize = GET_MODE_BITSIZE (op_mode);
 246 #endif
 247
 248   /* It is wrong to have align==0, since every object is aligned at
 249      least at a bit boundary.  This usually means a bug elsewhere.  */
 250   if (align == 0)
 251     abort ();
 252
 253   /* Discount the part of the structure before the desired byte.
 254      We need to know how many bytes are safe to reference after it.  */
 255   if (total_size >= 0)
 256     total_size -= (bitpos / BIGGEST_ALIGNMENT
 257                    * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
 258
 259   while (GET_CODE (op0) == SUBREG)
 260     {
 261       /* The following line once was done only if WORDS_BIG_ENDIAN,
 262          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 263          meaningful at a much higher level; when structures are copied
 264          between memory and regs, the higher-numbered regs
 265          always get higher addresses.  */
 266       offset += SUBREG_WORD (op0);
 267       /* We used to adjust BITPOS here, but now we do the whole adjustment
 268          right after the loop.  */
 269       op0 = SUBREG_REG (op0);
 270     }
 271
 272   /* If OP0 is a register, BITPOS must count within a word.
 273      But as we have it, it counts within whatever size OP0 now has.
 274      On a bigendian machine, these are not the same, so convert.  */
 275   if (BYTES_BIG_ENDIAN
 276       && GET_CODE (op0) != MEM
 277       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 278     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 279
 280   value = protect_from_queue (value, 0);
 281
 282   if (flag_force_mem)
 283     value = force_not_mem (value);
 284
 285   /* If the target is a register, overwriting the entire object, or storing
 286      a full-word or multi-word field can be done with just a SUBREG.
 287
 288      If the target is memory, storing any naturally aligned field can be
 289      done with a simple store.  For targets that support fast unaligned
 290      memory, any naturally sized, unit aligned field can be done directly.  */
 291
 292   if (bitsize == GET_MODE_BITSIZE (fieldmode)
 293       && (GET_CODE (op0) != MEM
 294           ? (GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 295              || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 296           : (! SLOW_UNALIGNED_ACCESS (fieldmode, align)
 297              || (offset * BITS_PER_UNIT % bitsize == 0
 298                  && align % GET_MODE_BITSIZE (fieldmode) == 0)))
 299       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0))
 300     {
 301       if (GET_MODE (op0) != fieldmode)
 302         {
 303           if (GET_CODE (op0) == SUBREG)
 304             {
 305               if (GET_MODE (SUBREG_REG (op0)) == fieldmode
 306                   || GET_MODE_CLASS (fieldmode) == MODE_INT
 307                   || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
 308                 op0 = SUBREG_REG (op0);
 309               else
 310                 /* Else we've got some float mode source being extracted into
 311                    a different float mode destination -- this combination of
 312                    subregs results in Severe Tire Damage.  */
 313                 abort ();
 314             }
 315           if (GET_CODE (op0) == REG)
 316             op0 = gen_rtx_SUBREG (fieldmode, op0, offset);
 317           else
 318             op0 = change_address (op0, fieldmode,
 319                                   plus_constant (XEXP (op0, 0), offset));
 320         }
 321       emit_move_insn (op0, value);
 322       return value;
 323     }
 324
 325   /* Make sure we are playing with integral modes.  Pun with subregs
 326      if we aren't.  This must come after the entire register case above,
 327      since that case is valid for any mode.  The following cases are only
 328      valid for integral modes.  */
 329   {
 330     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 331     if (imode != GET_MODE (op0))
 332       {
 333         if (GET_CODE (op0) == MEM)
 334           op0 = change_address (op0, imode, NULL_RTX);
 335         else if (imode != BLKmode)
 336           op0 = gen_lowpart (imode, op0);
 337         else
 338           abort ();
 339       }
 340   }
 341
 342   /* Storing an lsb-aligned field in a register
 343      can be done with a movestrict instruction.  */
 344
 345   if (GET_CODE (op0) != MEM
 346       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 347       && bitsize == GET_MODE_BITSIZE (fieldmode)
 348       && (movstrict_optab->handlers[(int) fieldmode].insn_code
 349           != CODE_FOR_nothing))
 350     {
 351       int icode = movstrict_optab->handlers[(int) fieldmode].insn_code;
 352
 353       /* Get appropriate low part of the value being stored.  */
 354       if (GET_CODE (value) == CONST_INT || GET_CODE (value) == REG)
 355         value = gen_lowpart (fieldmode, value);
 356       else if (!(GET_CODE (value) == SYMBOL_REF
 357                  || GET_CODE (value) == LABEL_REF
 358                  || GET_CODE (value) == CONST))
 359         value = convert_to_mode (fieldmode, value, 0);
 360
 361       if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
 362         value = copy_to_mode_reg (fieldmode, value);
 363
 364       if (GET_CODE (op0) == SUBREG)
 365         {
 366           if (GET_MODE (SUBREG_REG (op0)) == fieldmode
 367               || GET_MODE_CLASS (fieldmode) == MODE_INT
 368               || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
 369             op0 = SUBREG_REG (op0);
 370           else
 371             /* Else we've got some float mode source being extracted into
 372                a different float mode destination -- this combination of
 373                subregs results in Severe Tire Damage.  */
 374             abort ();
 375         }
 376
 377       emit_insn (GEN_FCN (icode)
 378                  (gen_rtx_SUBREG (fieldmode, op0, offset), value));
 379
 380       return value;
 381     }
 382
 383   /* Handle fields bigger than a word.  */
 384
 385   if (bitsize > BITS_PER_WORD)
 386     {
 387       /* Here we transfer the words of the field
 388          in the order least significant first.
 389          This is because the most significant word is the one which may
 390          be less than full.
 391          However, only do that if the value is not BLKmode.  */
 392
 393       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 394       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 395       unsigned int i;
 396
 397       /* This is the mode we must force value to, so that there will be enough
 398          subwords to extract.  Note that fieldmode will often (always?) be
 399          VOIDmode, because that is what store_field uses to indicate that this
 400          is a bit field, but passing VOIDmode to operand_subword_force will
 401          result in an abort.  */
 402       fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 403
 404       for (i = 0; i < nwords; i++)
 405         {
 406           /* If I is 0, use the low-order word in both field and target;
 407              if I is 1, use the next to lowest word; and so on.  */
 408           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 409           unsigned int bit_offset = (backwards
 410                             ? MAX ((int) bitsize - ((int) i + 1)
 411                                    * BITS_PER_WORD,
 412                                    0)
 413                             : (int) i * BITS_PER_WORD);
 414
 415           store_bit_field (op0, MIN (BITS_PER_WORD,
 416                                      bitsize - i * BITS_PER_WORD),
 417                            bitnum + bit_offset, word_mode,
 418                            operand_subword_force (value, wordnum,
 419                                                   (GET_MODE (value) == VOIDmode
 420                                                    ? fieldmode
 421                                                    : GET_MODE (value))),
 422                            align, total_size);
 423         }
 424       return value;
 425     }
 426
 427   /* From here on we can assume that the field to be stored in is
 428      a full-word (whatever type that is), since it is shorter than a word.  */
 429
 430   /* OFFSET is the number of words or bytes (UNIT says which)
 431      from STR_RTX to the first word or byte containing part of the field.  */
 432
 433   if (GET_CODE (op0) != MEM)
 434     {
 435       if (offset != 0
 436           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 437         {
 438           if (GET_CODE (op0) != REG)
 439             {
 440               /* Since this is a destination (lvalue), we can't copy it to a
 441                  pseudo.  We can trivially remove a SUBREG that does not
 442                  change the size of the operand.  Such a SUBREG may have been
 443                  added above.  Otherwise, abort.  */
 444               if (GET_CODE (op0) == SUBREG
 445                   && (GET_MODE_SIZE (GET_MODE (op0))
 446                       == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))))
 447                 op0 = SUBREG_REG (op0);
 448               else
 449                 abort ();
 450             }
 451           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 452                                 op0, offset);
 453         }
 454       offset = 0;
 455     }
 456   else
 457     {
 458       op0 = protect_from_queue (op0, 1);
 459     }
 460
 461   /* If VALUE is a floating-point mode, access it as an integer of the
 462      corresponding size.  This can occur on a machine with 64 bit registers
 463      that uses SFmode for float.  This can also occur for unaligned float
 464      structure fields.  */
 465   if (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT)
 466     {
 467       if (GET_CODE (value) != REG)
 468         value = copy_to_reg (value);
 469       value = gen_rtx_SUBREG (word_mode, value, 0);
 470     }
 471
 472   /* Now OFFSET is nonzero only if OP0 is memory
 473      and is therefore always measured in bytes.  */
 474
 475 #ifdef HAVE_insv
 476   if (HAVE_insv
 477       && GET_MODE (value) != BLKmode
 478       && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
 479       /* Ensure insv's size is wide enough for this field.  */
 480       && (insv_bitsize >= bitsize)
 481       && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 482             && (bitsize + bitpos > insv_bitsize)))
 483     {
 484       int xbitpos = bitpos;
 485       rtx value1;
 486       rtx xop0 = op0;
 487       rtx last = get_last_insn ();
 488       rtx pat;
 489       enum machine_mode maxmode;
 490       int save_volatile_ok = volatile_ok;
 491
 492       maxmode = insn_data[(int) CODE_FOR_insv].operand[3].mode;
 493       if (maxmode == VOIDmode)
 494         maxmode = word_mode;
 495
 496       volatile_ok = 1;
 497
 498       /* If this machine's insv can only insert into a register, copy OP0
 499          into a register and save it back later.  */
 500       /* This used to check flag_force_mem, but that was a serious
 501          de-optimization now that flag_force_mem is enabled by -O2.  */
 502       if (GET_CODE (op0) == MEM
 503           && ! ((*insn_data[(int) CODE_FOR_insv].operand[0].predicate)
 504                 (op0, VOIDmode)))
 505         {
 506           rtx tempreg;
 507           enum machine_mode bestmode;
 508
 509           /* Get the mode to use for inserting into this field.  If OP0 is
 510              BLKmode, get the smallest mode consistent with the alignment. If
 511              OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
 512              mode. Otherwise, use the smallest mode containing the field.  */
 513
 514           if (GET_MODE (op0) == BLKmode
 515               || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
 516             bestmode
 517               = get_best_mode (bitsize, bitnum, align, maxmode,
 518                                MEM_VOLATILE_P (op0));
 519           else
 520             bestmode = GET_MODE (op0);
 521
 522           if (bestmode == VOIDmode
 523               || (SLOW_UNALIGNED_ACCESS (bestmode, align)
 524                   && GET_MODE_BITSIZE (bestmode) > align))
 525             goto insv_loses;
 526
 527           /* Adjust address to point to the containing unit of that mode.  */
 528           unit = GET_MODE_BITSIZE (bestmode);
 529           /* Compute offset as multiple of this unit, counting in bytes.  */
 530           offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 531           bitpos = bitnum % unit;
 532           op0 = change_address (op0, bestmode,
 533                                 plus_constant (XEXP (op0, 0), offset));
 534
 535           /* Fetch that unit, store the bitfield in it, then store
 536              the unit.  */
 537           tempreg = copy_to_reg (op0);
 538           store_bit_field (tempreg, bitsize, bitpos, fieldmode, value,
 539                            align, total_size);
 540           emit_move_insn (op0, tempreg);
 541           return value;
 542         }
 543       volatile_ok = save_volatile_ok;
 544
 545       /* Add OFFSET into OP0's address.  */
 546       if (GET_CODE (xop0) == MEM)
 547         xop0 = change_address (xop0, byte_mode,
 548                                plus_constant (XEXP (xop0, 0), offset));
 549
 550       /* If xop0 is a register, we need it in MAXMODE
 551          to make it acceptable to the format of insv.  */
 552       if (GET_CODE (xop0) == SUBREG)
 553         /* We can't just change the mode, because this might clobber op0,
 554            and we will need the original value of op0 if insv fails.  */
 555         xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_WORD (xop0));
 556       if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
 557         xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
 558
 559       /* On big-endian machines, we count bits from the most significant.
 560          If the bit field insn does not, we must invert.  */
 561
 562       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 563         xbitpos = unit - bitsize - xbitpos;
 564
 565       /* We have been counting XBITPOS within UNIT.
 566          Count instead within the size of the register.  */
 567       if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
 568         xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
 569
 570       unit = GET_MODE_BITSIZE (maxmode);
 571
 572       /* Convert VALUE to maxmode (which insv insn wants) in VALUE1.  */
 573       value1 = value;
 574       if (GET_MODE (value) != maxmode)
 575         {
 576           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 577             {
 578               /* Optimization: Don't bother really extending VALUE
 579                  if it has all the bits we will actually use.  However,
 580                  if we must narrow it, be sure we do it correctly.  */
 581
 582               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
 583                 {
 584                   /* Avoid making subreg of a subreg, or of a mem.  */
 585                   if (GET_CODE (value1) != REG)
 586                     value1 = copy_to_reg (value1);
 587                   value1 = gen_rtx_SUBREG (maxmode, value1, 0);
 588                 }
 589               else
 590                 value1 = gen_lowpart (maxmode, value1);
 591             }
 592           else if (!CONSTANT_P (value))
 593             /* Parse phase is supposed to make VALUE's data type
 594                match that of the component reference, which is a type
 595                at least as wide as the field; so VALUE should have
 596                a mode that corresponds to that type.  */
 597             abort ();
 598         }
 599
 600       /* If this machine's insv insists on a register,
 601          get VALUE1 into a register.  */
 602       if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
 603              (value1, maxmode)))
 604         value1 = force_reg (maxmode, value1);
 605
 606       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 607       if (pat)
 608         emit_insn (pat);
 609       else
 610         {
 611           delete_insns_since (last);
 612           store_fixed_bit_field (op0, offset, bitsize, bitpos, value, align);
 613         }
 614     }
 615   else
 616     insv_loses:
 617 #endif
 618     /* Insv is not available; store using shifts and boolean ops.  */
 619     store_fixed_bit_field (op0, offset, bitsize, bitpos, value, align);
 620   return value;
 621 }
 622 \f
 623 /* Use shifts and boolean operations to store VALUE
 624    into a bit field of width BITSIZE
 625    in a memory location specified by OP0 except offset by OFFSET bytes.
 626      (OFFSET must be 0 if OP0 is a register.)
 627    The field starts at position BITPOS within the byte.
 628     (If OP0 is a register, it may be a full word or a narrower mode,
 629      but BITPOS still counts within a full word,
 630      which is significant on bigendian machines.)
 631    STRUCT_ALIGN is the alignment the structure is known to have.
 632
 633    Note that protect_from_queue has already been done on OP0 and VALUE.  */
 634
 635 static void
 636 store_fixed_bit_field (op0, offset, bitsize, bitpos, value, struct_align)
 637      register rtx op0;
 638      unsigned HOST_WIDE_INT offset, bitsize, bitpos;
 639      register rtx value;
 640      unsigned int struct_align;
 641 {
 642   register enum machine_mode mode;
 643   unsigned int total_bits = BITS_PER_WORD;
 644   rtx subtarget, temp;
 645   int all_zero = 0;
 646   int all_one = 0;
 647
 648   if (! SLOW_UNALIGNED_ACCESS (word_mode, struct_align))
 649     struct_align = BIGGEST_ALIGNMENT;
 650
 651   /* There is a case not handled here:
 652      a structure with a known alignment of just a halfword
 653      and a field split across two aligned halfwords within the structure.
 654      Or likewise a structure with a known alignment of just a byte
 655      and a field split across two bytes.
 656      Such cases are not supposed to be able to occur.  */
 657
 658   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 659     {
 660       if (offset != 0)
 661         abort ();
 662       /* Special treatment for a bit field split across two registers.  */
 663       if (bitsize + bitpos > BITS_PER_WORD)
 664         {
 665           store_split_bit_field (op0, bitsize, bitpos,
 666                                  value, BITS_PER_WORD);
 667           return;
 668         }
 669     }
 670   else
 671     {
 672       /* Get the proper mode to use for this field.  We want a mode that
 673          includes the entire field.  If such a mode would be larger than
 674          a word, we won't be doing the extraction the normal way.  */
 675
 676       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 677                             struct_align, word_mode,
 678                             GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0));
 679
 680       if (mode == VOIDmode)
 681         {
 682           /* The only way this should occur is if the field spans word
 683              boundaries.  */
 684           store_split_bit_field (op0,
 685                                  bitsize, bitpos + offset * BITS_PER_UNIT,
 686                                  value, struct_align);
 687           return;
 688         }
 689
 690       total_bits = GET_MODE_BITSIZE (mode);
 691
 692       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 693          be in the range 0 to total_bits-1, and put any excess bytes in
 694          OFFSET.  */
 695       if (bitpos >= total_bits)
 696         {
 697           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 698           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 699                      * BITS_PER_UNIT);
 700         }
 701
 702       /* Get ref to an aligned byte, halfword, or word containing the field.
 703          Adjust BITPOS to be position within a word,
 704          and OFFSET to be the offset of that word.
 705          Then alter OP0 to refer to that word.  */
 706       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 707       offset -= (offset % (total_bits / BITS_PER_UNIT));
 708       op0 = change_address (op0, mode,
 709                             plus_constant (XEXP (op0, 0), offset));
 710     }
 711
 712   mode = GET_MODE (op0);
 713
 714   /* Now MODE is either some integral mode for a MEM as OP0,
 715      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 716      The bit field is contained entirely within OP0.
 717      BITPOS is the starting bit number within OP0.
 718      (OP0's mode may actually be narrower than MODE.)  */
 719
 720   if (BYTES_BIG_ENDIAN)
 721       /* BITPOS is the distance between our msb
 722          and that of the containing datum.
 723          Convert it to the distance from the lsb.  */
 724       bitpos = total_bits - bitsize - bitpos;
 725
 726   /* Now BITPOS is always the distance between our lsb
 727      and that of OP0.  */
 728
 729   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 730      we must first convert its mode to MODE.  */
 731
 732   if (GET_CODE (value) == CONST_INT)
 733     {
 734       register HOST_WIDE_INT v = INTVAL (value);
 735
 736       if (bitsize < HOST_BITS_PER_WIDE_INT)
 737         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 738
 739       if (v == 0)
 740         all_zero = 1;
 741       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 742                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 743                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 744         all_one = 1;
 745
 746       value = lshift_value (mode, value, bitpos, bitsize);
 747     }
 748   else
 749     {
 750       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 751                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 752
 753       if (GET_MODE (value) != mode)
 754         {
 755           if ((GET_CODE (value) == REG || GET_CODE (value) == SUBREG)
 756               && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
 757             value = gen_lowpart (mode, value);
 758           else
 759             value = convert_to_mode (mode, value, 1);
 760         }
 761
 762       if (must_and)
 763         value = expand_binop (mode, and_optab, value,
 764                               mask_rtx (mode, 0, bitsize, 0),
 765                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 766       if (bitpos > 0)
 767         value = expand_shift (LSHIFT_EXPR, mode, value,
 768                               build_int_2 (bitpos, 0), NULL_RTX, 1);
 769     }
 770
 771   /* Now clear the chosen bits in OP0,
 772      except that if VALUE is -1 we need not bother.  */
 773
 774   subtarget = (GET_CODE (op0) == REG || ! flag_force_mem) ? op0 : 0;
 775
 776   if (! all_one)
 777     {
 778       temp = expand_binop (mode, and_optab, op0,
 779                            mask_rtx (mode, bitpos, bitsize, 1),
 780                            subtarget, 1, OPTAB_LIB_WIDEN);
 781       subtarget = temp;
 782     }
 783   else
 784     temp = op0;
 785
 786   /* Now logical-or VALUE into OP0, unless it is zero.  */
 787
 788   if (! all_zero)
 789     temp = expand_binop (mode, ior_optab, temp, value,
 790                          subtarget, 1, OPTAB_LIB_WIDEN);
 791   if (op0 != temp)
 792     emit_move_insn (op0, temp);
 793 }
 794 \f
 795 /* Store a bit field that is split across multiple accessible memory objects.
 796
 797    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
 798    BITSIZE is the field width; BITPOS the position of its first bit
 799    (within the word).
 800    VALUE is the value to store.
 801    ALIGN is the known alignment of OP0.
 802    This is also the size of the memory objects to be used.
 803
 804    This does not yet handle fields wider than BITS_PER_WORD.  */
 805
 806 static void
 807 store_split_bit_field (op0, bitsize, bitpos, value, align)
 808      rtx op0;
 809      unsigned HOST_WIDE_INT bitsize, bitpos;
 810      rtx value;
 811      unsigned int align;
 812 {
 813   unsigned int unit;
 814   unsigned int bitsdone = 0;
 815
 816   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
 817      much at a time.  */
 818   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
 819     unit = BITS_PER_WORD;
 820   else
 821     unit = MIN (align, BITS_PER_WORD);
 822
 823   /* If VALUE is a constant other than a CONST_INT, get it into a register in
 824      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
 825      that VALUE might be a floating-point constant.  */
 826   if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
 827     {
 828       rtx word = gen_lowpart_common (word_mode, value);
 829
 830       if (word && (value != word))
 831         value = word;
 832       else
 833         value = gen_lowpart_common (word_mode,
 834                                     force_reg (GET_MODE (value) != VOIDmode
 835                                                ? GET_MODE (value)
 836                                                : word_mode, value));
 837     }
 838   else if (GET_CODE (value) == ADDRESSOF)
 839     value = copy_to_reg (value);
 840
 841   while (bitsdone < bitsize)
 842     {
 843       unsigned HOST_WIDE_INT thissize;
 844       rtx part, word;
 845       unsigned HOST_WIDE_INT thispos;
 846       unsigned HOST_WIDE_INT offset;
 847
 848       offset = (bitpos + bitsdone) / unit;
 849       thispos = (bitpos + bitsdone) % unit;
 850
 851       /* THISSIZE must not overrun a word boundary.  Otherwise,
 852          store_fixed_bit_field will call us again, and we will mutually
 853          recurse forever.  */
 854       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
 855       thissize = MIN (thissize, unit - thispos);
 856
 857       if (BYTES_BIG_ENDIAN)
 858         {
 859           int total_bits;
 860
 861           /* We must do an endian conversion exactly the same way as it is
 862              done in extract_bit_field, so that the two calls to
 863              extract_fixed_bit_field will have comparable arguments.  */
 864           if (GET_CODE (value) != MEM || GET_MODE (value) == BLKmode)
 865             total_bits = BITS_PER_WORD;
 866           else
 867             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
 868
 869           /* Fetch successively less significant portions.  */
 870           if (GET_CODE (value) == CONST_INT)
 871             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 872                              >> (bitsize - bitsdone - thissize))
 873                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 874           else
 875             /* The args are chosen so that the last part includes the
 876                lsb.  Give extract_bit_field the value it needs (with
 877                endianness compensation) to fetch the piece we want.
 878
 879                ??? We have no idea what the alignment of VALUE is, so
 880                we have to use a guess.  */
 881             part
 882               = extract_fixed_bit_field
 883                 (word_mode, value, 0, thissize,
 884                  total_bits - bitsize + bitsdone, NULL_RTX, 1,
 885                  GET_MODE (value) == VOIDmode
 886                  ? UNITS_PER_WORD
 887                  : (GET_MODE (value) == BLKmode
 888                     ? 1 : GET_MODE_ALIGNMENT (GET_MODE (value))));
 889         }
 890       else
 891         {
 892           /* Fetch successively more significant portions.  */
 893           if (GET_CODE (value) == CONST_INT)
 894             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
 895                              >> bitsdone)
 896                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
 897           else
 898             part
 899               = extract_fixed_bit_field
 900                 (word_mode, value, 0, thissize, bitsdone, NULL_RTX, 1,
 901                  GET_MODE (value) == VOIDmode
 902                  ? UNITS_PER_WORD
 903                  : (GET_MODE (value) == BLKmode
 904                     ? 1 : GET_MODE_ALIGNMENT (GET_MODE (value))));
 905         }
 906
 907       /* If OP0 is a register, then handle OFFSET here.
 908
 909          When handling multiword bitfields, extract_bit_field may pass
 910          down a word_mode SUBREG of a larger REG for a bitfield that actually
 911          crosses a word boundary.  Thus, for a SUBREG, we must find
 912          the current word starting from the base register.  */
 913       if (GET_CODE (op0) == SUBREG)
 914         {
 915           word = operand_subword_force (SUBREG_REG (op0),
 916                                         SUBREG_WORD (op0) + offset,
 917                                         GET_MODE (SUBREG_REG (op0)));
 918           offset = 0;
 919         }
 920       else if (GET_CODE (op0) == REG)
 921         {
 922           word = operand_subword_force (op0, offset, GET_MODE (op0));
 923           offset = 0;
 924         }
 925       else
 926         word = op0;
 927
 928       /* OFFSET is in UNITs, and UNIT is in bits.
 929          store_fixed_bit_field wants offset in bytes.  */
 930       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT,
 931                              thissize, thispos, part, align);
 932       bitsdone += thissize;
 933     }
 934 }
 935 \f
 936 /* Generate code to extract a byte-field from STR_RTX
 937    containing BITSIZE bits, starting at BITNUM,
 938    and put it in TARGET if possible (if TARGET is nonzero).
 939    Regardless of TARGET, we return the rtx for where the value is placed.
 940    It may be a QUEUED.
 941
 942    STR_RTX is the structure containing the byte (a REG or MEM).
 943    UNSIGNEDP is nonzero if this is an unsigned bit field.
 944    MODE is the natural mode of the field value once extracted.
 945    TMODE is the mode the caller would like the value to have;
 946    but the value may be returned with type MODE instead.
 947
 948    ALIGN is the alignment that STR_RTX is known to have.
 949    TOTAL_SIZE is the size in bytes of the containing structure,
 950    or -1 if varying.
 951
 952    If a TARGET is specified and we can store in it at no extra cost,
 953    we do so, and return TARGET.
 954    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
 955    if they are equally easy.  */
 956
 957 rtx
 958 extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
 959                    target, mode, tmode, align, total_size)
 960      rtx str_rtx;
 961      unsigned HOST_WIDE_INT bitsize;
 962      unsigned HOST_WIDE_INT bitnum;
 963      int unsignedp;
 964      rtx target;
 965      enum machine_mode mode, tmode;
 966      unsigned int align;
 967      HOST_WIDE_INT total_size;
 968 {
 969   unsigned int unit
 970     = (GET_CODE (str_rtx) == MEM) ? BITS_PER_UNIT : BITS_PER_WORD;
 971   unsigned HOST_WIDE_INT offset = bitnum / unit;
 972   unsigned HOST_WIDE_INT bitpos = bitnum % unit;
 973   register rtx op0 = str_rtx;
 974   rtx spec_target = target;
 975   rtx spec_target_subreg = 0;
 976   enum machine_mode int_mode;
 977 #ifdef HAVE_extv
 978   unsigned HOST_WIDE_INT extv_bitsize;
 979   enum machine_mode extv_mode;
 980 #endif
 981 #ifdef HAVE_extzv
 982   unsigned HOST_WIDE_INT extzv_bitsize;
 983   enum machine_mode extzv_mode;
 984 #endif
 985
 986 #ifdef HAVE_extv
 987   extv_mode = insn_data[(int) CODE_FOR_extv].operand[0].mode;
 988   if (extv_mode == VOIDmode)
 989     extv_mode = word_mode;
 990   extv_bitsize = GET_MODE_BITSIZE (extv_mode);
 991 #endif
 992
 993 #ifdef HAVE_extzv
 994   extzv_mode = insn_data[(int) CODE_FOR_extzv].operand[0].mode;
 995   if (extzv_mode == VOIDmode)
 996     extzv_mode = word_mode;
 997   extzv_bitsize = GET_MODE_BITSIZE (extzv_mode);
 998 #endif
 999
1000   /* Discount the part of the structure before the desired byte.
1001      We need to know how many bytes are safe to reference after it.  */
1002   if (total_size >= 0)
1003     total_size -= (bitpos / BIGGEST_ALIGNMENT
1004                    * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
1005
1006   if (tmode == VOIDmode)
1007     tmode = mode;
1008   while (GET_CODE (op0) == SUBREG)
1009     {
1010       int outer_size = GET_MODE_BITSIZE (GET_MODE (op0));
1011       int inner_size = GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op0)));
1012
1013       offset += SUBREG_WORD (op0);
1014
1015       inner_size = MIN (inner_size, BITS_PER_WORD);
1016
1017       if (BYTES_BIG_ENDIAN && (outer_size < inner_size))
1018         {
1019           bitpos += inner_size - outer_size;
1020           if (bitpos > unit)
1021             {
1022               offset += (bitpos / unit);
1023               bitpos %= unit;
1024             }
1025         }
1026
1027       op0 = SUBREG_REG (op0);
1028     }
1029
1030   if (GET_CODE (op0) == REG
1031       && mode == GET_MODE (op0)
1032       && bitnum == 0
1033       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1034     {
1035       /* We're trying to extract a full register from itself.  */
1036       return op0;
1037     }
1038
1039   /* Make sure we are playing with integral modes.  Pun with subregs
1040      if we aren't.  */
1041   {
1042     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1043     if (imode != GET_MODE (op0))
1044       {
1045         if (GET_CODE (op0) == MEM)
1046           op0 = change_address (op0, imode, NULL_RTX);
1047         else if (imode != BLKmode)
1048           op0 = gen_lowpart (imode, op0);
1049         else
1050           abort ();
1051       }
1052   }
1053
1054   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1055      If that's wrong, the solution is to test for it and set TARGET to 0
1056      if needed.  */
1057
1058   /* If OP0 is a register, BITPOS must count within a word.
1059      But as we have it, it counts within whatever size OP0 now has.
1060      On a bigendian machine, these are not the same, so convert.  */
1061   if (BYTES_BIG_ENDIAN
1062       && GET_CODE (op0) != MEM
1063       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1064     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1065
1066   /* Extracting a full-word or multi-word value
1067      from a structure in a register or aligned memory.
1068      This can be done with just SUBREG.
1069      So too extracting a subword value in
1070      the least significant part of the register.  */
1071
1072   if (((GET_CODE (op0) != MEM
1073         && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1074                                   GET_MODE_BITSIZE (GET_MODE (op0))))
1075        || (GET_CODE (op0) == MEM
1076            && (! SLOW_UNALIGNED_ACCESS (mode, align)
1077                || (offset * BITS_PER_UNIT % bitsize == 0
1078                    && align % bitsize == 0))))
1079       && ((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1080            && bitpos % BITS_PER_WORD == 0)
1081           || (mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0) != BLKmode
1082               /* ??? The big endian test here is wrong.  This is correct
1083                  if the value is in a register, and if mode_for_size is not
1084                  the same mode as op0.  This causes us to get unnecessarily
1085                  inefficient code from the Thumb port when -mbig-endian.  */
1086               && (BYTES_BIG_ENDIAN
1087                   ? bitpos + bitsize == BITS_PER_WORD
1088                   : bitpos == 0))))
1089     {
1090       enum machine_mode mode1
1091         = (VECTOR_MODE_P (tmode) ? mode
1092            : mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0));
1093
1094       if (mode1 != GET_MODE (op0))
1095         {
1096           if (GET_CODE (op0) == SUBREG)
1097             {
1098               if (GET_MODE (SUBREG_REG (op0)) == mode1
1099                   || GET_MODE_CLASS (mode1) == MODE_INT
1100                   || GET_MODE_CLASS (mode1) == MODE_PARTIAL_INT)
1101                 op0 = SUBREG_REG (op0);
1102               else
1103                 /* Else we've got some float mode source being extracted into
1104                    a different float mode destination -- this combination of
1105                    subregs results in Severe Tire Damage.  */
1106                 abort ();
1107             }
1108           if (GET_CODE (op0) == REG)
1109             op0 = gen_rtx_SUBREG (mode1, op0, offset);
1110           else
1111             op0 = change_address (op0, mode1,
1112                                   plus_constant (XEXP (op0, 0), offset));
1113         }
1114       if (mode1 != mode)
1115         return convert_to_mode (tmode, op0, unsignedp);
1116       return op0;
1117     }
1118
1119   /* Handle fields bigger than a word.  */
1120
1121   if (bitsize > BITS_PER_WORD)
1122     {
1123       /* Here we transfer the words of the field
1124          in the order least significant first.
1125          This is because the most significant word is the one which may
1126          be less than full.  */
1127
1128       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1129       unsigned int i;
1130
1131       if (target == 0 || GET_CODE (target) != REG)
1132         target = gen_reg_rtx (mode);
1133
1134       /* Indicate for flow that the entire target reg is being set.  */
1135       emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1136
1137       for (i = 0; i < nwords; i++)
1138         {
1139           /* If I is 0, use the low-order word in both field and target;
1140              if I is 1, use the next to lowest word; and so on.  */
1141           /* Word number in TARGET to use.  */
1142           unsigned int wordnum
1143             = (WORDS_BIG_ENDIAN
1144                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1145                : i);
1146           /* Offset from start of field in OP0.  */
1147           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1148                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1149                                                 * (int) BITS_PER_WORD))
1150                                      : (int) i * BITS_PER_WORD);
1151           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1152           rtx result_part
1153             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1154                                            bitsize - i * BITS_PER_WORD),
1155                                  bitnum + bit_offset, 1, target_part, mode,
1156                                  word_mode, align, total_size);
1157
1158           if (target_part == 0)
1159             abort ();
1160
1161           if (result_part != target_part)
1162             emit_move_insn (target_part, result_part);
1163         }
1164
1165       if (unsignedp)
1166         {
1167           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1168              need to be zero'd out.  */
1169           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1170             {
1171               unsigned int i, total_words;
1172
1173               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1174               for (i = nwords; i < total_words; i++)
1175                 {
1176                   int wordnum = WORDS_BIG_ENDIAN ? total_words - i - 1 : i;
1177                   rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1178                   emit_move_insn (target_part, const0_rtx);
1179                 }
1180             }
1181           return target;
1182         }
1183
1184       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1185       target = expand_shift (LSHIFT_EXPR, mode, target,
1186                              build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1187                              NULL_RTX, 0);
1188       return expand_shift (RSHIFT_EXPR, mode, target,
1189                            build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1190                            NULL_RTX, 0);
1191     }
1192
1193   /* From here on we know the desired field is smaller than a word.  */
1194
1195   /* Check if there is a correspondingly-sized integer field, so we can
1196      safely extract it as one size of integer, if necessary; then
1197      truncate or extend to the size that is wanted; then use SUBREGs or
1198      convert_to_mode to get one of the modes we really wanted.  */
1199
1200   int_mode = int_mode_for_mode (tmode);
1201   if (int_mode == BLKmode)
1202     int_mode = int_mode_for_mode (mode);
1203   if (int_mode == BLKmode)
1204     abort();    /* Should probably push op0 out to memory and then
1205                    do a load.  */
1206
1207   /* OFFSET is the number of words or bytes (UNIT says which)
1208      from STR_RTX to the first word or byte containing part of the field.  */
1209
1210   if (GET_CODE (op0) != MEM)
1211     {
1212       if (offset != 0
1213           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1214         {
1215           if (GET_CODE (op0) != REG)
1216             op0 = copy_to_reg (op0);
1217           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1218                                 op0, offset);
1219         }
1220       offset = 0;
1221     }
1222   else
1223     {
1224       op0 = protect_from_queue (str_rtx, 1);
1225     }
1226
1227   /* Now OFFSET is nonzero only for memory operands.  */
1228
1229   if (unsignedp)
1230     {
1231 #ifdef HAVE_extzv
1232       if (HAVE_extzv
1233           && (extzv_bitsize >= bitsize)
1234           && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1235                 && (bitsize + bitpos > extzv_bitsize)))
1236         {
1237           unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1238           rtx bitsize_rtx, bitpos_rtx;
1239           rtx last = get_last_insn ();
1240           rtx xop0 = op0;
1241           rtx xtarget = target;
1242           rtx xspec_target = spec_target;
1243           rtx xspec_target_subreg = spec_target_subreg;
1244           rtx pat;
1245           enum machine_mode maxmode;
1246
1247           maxmode = insn_data[(int) CODE_FOR_extzv].operand[0].mode;
1248           if (maxmode == VOIDmode)
1249             maxmode = word_mode;
1250
1251           if (GET_CODE (xop0) == MEM)
1252             {
1253               int save_volatile_ok = volatile_ok;
1254               volatile_ok = 1;
1255
1256               /* Is the memory operand acceptable?  */
1257               if (! ((*insn_data[(int) CODE_FOR_extzv].operand[1].predicate)
1258                      (xop0, GET_MODE (xop0))))
1259                 {
1260                   /* No, load into a reg and extract from there.  */
1261                   enum machine_mode bestmode;
1262
1263                   /* Get the mode to use for inserting into this field.  If
1264                      OP0 is BLKmode, get the smallest mode consistent with the
1265                      alignment. If OP0 is a non-BLKmode object that is no
1266                      wider than MAXMODE, use its mode. Otherwise, use the
1267                      smallest mode containing the field.  */
1268
1269                   if (GET_MODE (xop0) == BLKmode
1270                       || (GET_MODE_SIZE (GET_MODE (op0))
1271                           > GET_MODE_SIZE (maxmode)))
1272                     bestmode = get_best_mode (bitsize, bitnum, align, maxmode,
1273                                               MEM_VOLATILE_P (xop0));
1274                   else
1275                     bestmode = GET_MODE (xop0);
1276
1277                   if (bestmode == VOIDmode
1278                       || (SLOW_UNALIGNED_ACCESS (bestmode, align)
1279                           && GET_MODE_BITSIZE (bestmode) > align))
1280                     goto extzv_loses;
1281
1282                   /* Compute offset as multiple of this unit,
1283                      counting in bytes.  */
1284                   unit = GET_MODE_BITSIZE (bestmode);
1285                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1286                   xbitpos = bitnum % unit;
1287                   xop0 = change_address (xop0, bestmode,
1288                                          plus_constant (XEXP (xop0, 0),
1289                                                         xoffset));
1290                   /* Fetch it to a register in that size.  */
1291                   xop0 = force_reg (bestmode, xop0);
1292
1293                   /* XBITPOS counts within UNIT, which is what is expected.  */
1294                 }
1295               else
1296                 /* Get ref to first byte containing part of the field.  */
1297                 xop0 = change_address (xop0, byte_mode,
1298                                        plus_constant (XEXP (xop0, 0), xoffset));
1299
1300               volatile_ok = save_volatile_ok;
1301             }
1302
1303           /* If op0 is a register, we need it in MAXMODE (which is usually
1304              SImode). to make it acceptable to the format of extzv.  */
1305           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1306             goto extzv_loses;
1307           if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
1308             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1309
1310           /* On big-endian machines, we count bits from the most significant.
1311              If the bit field insn does not, we must invert.  */
1312           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1313             xbitpos = unit - bitsize - xbitpos;
1314
1315           /* Now convert from counting within UNIT to counting in MAXMODE.  */
1316           if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
1317             xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1318
1319           unit = GET_MODE_BITSIZE (maxmode);
1320
1321           if (xtarget == 0
1322               || (flag_force_mem && GET_CODE (xtarget) == MEM))
1323             xtarget = xspec_target = gen_reg_rtx (tmode);
1324
1325           if (GET_MODE (xtarget) != maxmode)
1326             {
1327               if (GET_CODE (xtarget) == REG)
1328                 {
1329                   int wider = (GET_MODE_SIZE (maxmode)
1330                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1331                   xtarget = gen_lowpart (maxmode, xtarget);
1332                   if (wider)
1333                     xspec_target_subreg = xtarget;
1334                 }
1335               else
1336                 xtarget = gen_reg_rtx (maxmode);
1337             }
1338
1339           /* If this machine's extzv insists on a register target,
1340              make sure we have one.  */
1341           if (! ((*insn_data[(int) CODE_FOR_extzv].operand[0].predicate)
1342                  (xtarget, maxmode)))
1343             xtarget = gen_reg_rtx (maxmode);
1344
1345           bitsize_rtx = GEN_INT (bitsize);
1346           bitpos_rtx = GEN_INT (xbitpos);
1347
1348           pat = gen_extzv (protect_from_queue (xtarget, 1),
1349                            xop0, bitsize_rtx, bitpos_rtx);
1350           if (pat)
1351             {
1352               emit_insn (pat);
1353               target = xtarget;
1354               spec_target = xspec_target;
1355               spec_target_subreg = xspec_target_subreg;
1356             }
1357           else
1358             {
1359               delete_insns_since (last);
1360               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1361                                                 bitpos, target, 1, align);
1362             }
1363         }
1364       else
1365         extzv_loses:
1366 #endif
1367       target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1368                                         bitpos, target, 1, align);
1369     }
1370   else
1371     {
1372 #ifdef HAVE_extv
1373       if (HAVE_extv
1374           && (extv_bitsize >= bitsize)
1375           && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1376                 && (bitsize + bitpos > extv_bitsize)))
1377         {
1378           int xbitpos = bitpos, xoffset = offset;
1379           rtx bitsize_rtx, bitpos_rtx;
1380           rtx last = get_last_insn ();
1381           rtx xop0 = op0, xtarget = target;
1382           rtx xspec_target = spec_target;
1383           rtx xspec_target_subreg = spec_target_subreg;
1384           rtx pat;
1385           enum machine_mode maxmode;
1386
1387           maxmode = insn_data[(int) CODE_FOR_extv].operand[0].mode;
1388           if (maxmode == VOIDmode)
1389             maxmode = word_mode;
1390
1391           if (GET_CODE (xop0) == MEM)
1392             {
1393               /* Is the memory operand acceptable?  */
1394               if (! ((*insn_data[(int) CODE_FOR_extv].operand[1].predicate)
1395                      (xop0, GET_MODE (xop0))))
1396                 {
1397                   /* No, load into a reg and extract from there.  */
1398                   enum machine_mode bestmode;
1399
1400                   /* Get the mode to use for inserting into this field.  If
1401                      OP0 is BLKmode, get the smallest mode consistent with the
1402                      alignment. If OP0 is a non-BLKmode object that is no
1403                      wider than MAXMODE, use its mode. Otherwise, use the
1404                      smallest mode containing the field.  */
1405
1406                   if (GET_MODE (xop0) == BLKmode
1407                       || (GET_MODE_SIZE (GET_MODE (op0))
1408                           > GET_MODE_SIZE (maxmode)))
1409                     bestmode = get_best_mode (bitsize, bitnum, align, maxmode,
1410                                               MEM_VOLATILE_P (xop0));
1411                   else
1412                     bestmode = GET_MODE (xop0);
1413
1414                   if (bestmode == VOIDmode
1415                       || (SLOW_UNALIGNED_ACCESS (bestmode, align)
1416                           && GET_MODE_BITSIZE (bestmode) > align))
1417                     goto extv_loses;
1418
1419                   /* Compute offset as multiple of this unit,
1420                      counting in bytes.  */
1421                   unit = GET_MODE_BITSIZE (bestmode);
1422                   xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1423                   xbitpos = bitnum % unit;
1424                   xop0 = change_address (xop0, bestmode,
1425                                          plus_constant (XEXP (xop0, 0),
1426                                                         xoffset));
1427                   /* Fetch it to a register in that size.  */
1428                   xop0 = force_reg (bestmode, xop0);
1429
1430                   /* XBITPOS counts within UNIT, which is what is expected.  */
1431                 }
1432               else
1433                 /* Get ref to first byte containing part of the field.  */
1434                 xop0 = change_address (xop0, byte_mode,
1435                                        plus_constant (XEXP (xop0, 0), xoffset));
1436             }
1437
1438           /* If op0 is a register, we need it in MAXMODE (which is usually
1439              SImode) to make it acceptable to the format of extv.  */
1440           if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1441             goto extv_loses;
1442           if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
1443             xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1444
1445           /* On big-endian machines, we count bits from the most significant.
1446              If the bit field insn does not, we must invert.  */
1447           if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1448             xbitpos = unit - bitsize - xbitpos;
1449
1450           /* XBITPOS counts within a size of UNIT.
1451              Adjust to count within a size of MAXMODE.  */
1452           if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
1453             xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1454
1455           unit = GET_MODE_BITSIZE (maxmode);
1456
1457           if (xtarget == 0
1458               || (flag_force_mem && GET_CODE (xtarget) == MEM))
1459             xtarget = xspec_target = gen_reg_rtx (tmode);
1460
1461           if (GET_MODE (xtarget) != maxmode)
1462             {
1463               if (GET_CODE (xtarget) == REG)
1464                 {
1465                   int wider = (GET_MODE_SIZE (maxmode)
1466                                > GET_MODE_SIZE (GET_MODE (xtarget)));
1467                   xtarget = gen_lowpart (maxmode, xtarget);
1468                   if (wider)
1469                     xspec_target_subreg = xtarget;
1470                 }
1471               else
1472                 xtarget = gen_reg_rtx (maxmode);
1473             }
1474
1475           /* If this machine's extv insists on a register target,
1476              make sure we have one.  */
1477           if (! ((*insn_data[(int) CODE_FOR_extv].operand[0].predicate)
1478                  (xtarget, maxmode)))
1479             xtarget = gen_reg_rtx (maxmode);
1480
1481           bitsize_rtx = GEN_INT (bitsize);
1482           bitpos_rtx = GEN_INT (xbitpos);
1483
1484           pat = gen_extv (protect_from_queue (xtarget, 1),
1485                           xop0, bitsize_rtx, bitpos_rtx);
1486           if (pat)
1487             {
1488               emit_insn (pat);
1489               target = xtarget;
1490               spec_target = xspec_target;
1491               spec_target_subreg = xspec_target_subreg;
1492             }
1493           else
1494             {
1495               delete_insns_since (last);
1496               target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1497                                                 bitpos, target, 0, align);
1498             }
1499         }
1500       else
1501         extv_loses:
1502 #endif
1503       target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1504                                         bitpos, target, 0, align);
1505     }
1506   if (target == spec_target)
1507     return target;
1508   if (target == spec_target_subreg)
1509     return spec_target;
1510   if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1511     {
1512       /* If the target mode is floating-point, first convert to the
1513          integer mode of that size and then access it as a floating-point
1514          value via a SUBREG.  */
1515       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1516         {
1517           target = convert_to_mode (mode_for_size (GET_MODE_BITSIZE (tmode),
1518                                                    MODE_INT, 0),
1519                                     target, unsignedp);
1520           if (GET_CODE (target) != REG)
1521             target = copy_to_reg (target);
1522           return gen_rtx_SUBREG (tmode, target, 0);
1523         }
1524       else
1525         return convert_to_mode (tmode, target, unsignedp);
1526     }
1527   return target;
1528 }
1529 \f
1530 /* Extract a bit field using shifts and boolean operations
1531    Returns an rtx to represent the value.
1532    OP0 addresses a register (word) or memory (byte).
1533    BITPOS says which bit within the word or byte the bit field starts in.
1534    OFFSET says how many bytes farther the bit field starts;
1535     it is 0 if OP0 is a register.
1536    BITSIZE says how many bits long the bit field is.
1537     (If OP0 is a register, it may be narrower than a full word,
1538      but BITPOS still counts within a full word,
1539      which is significant on bigendian machines.)
1540
1541    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1542    If TARGET is nonzero, attempts to store the value there
1543    and return TARGET, but this is not guaranteed.
1544    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.
1545
1546    ALIGN is the alignment that STR_RTX is known to have.  */
1547
1548 static rtx
1549 extract_fixed_bit_field (tmode, op0, offset, bitsize, bitpos,
1550                          target, unsignedp, align)
1551      enum machine_mode tmode;
1552      register rtx op0, target;
1553      unsigned HOST_WIDE_INT offset, bitsize, bitpos;
1554      int unsignedp;
1555      unsigned int align;
1556 {
1557   unsigned int total_bits = BITS_PER_WORD;
1558   enum machine_mode mode;
1559
1560   if (GET_CODE (op0) == SUBREG || GET_CODE (op0) == REG)
1561     {
1562       /* Special treatment for a bit field split across two registers.  */
1563       if (bitsize + bitpos > BITS_PER_WORD)
1564         return extract_split_bit_field (op0, bitsize, bitpos,
1565                                         unsignedp, align);
1566     }
1567   else
1568     {
1569       /* Get the proper mode to use for this field.  We want a mode that
1570          includes the entire field.  If such a mode would be larger than
1571          a word, we won't be doing the extraction the normal way.  */
1572
1573       mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT, align,
1574                             word_mode,
1575                             GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0));
1576
1577       if (mode == VOIDmode)
1578         /* The only way this should occur is if the field spans word
1579            boundaries.  */
1580         return extract_split_bit_field (op0, bitsize,
1581                                         bitpos + offset * BITS_PER_UNIT,
1582                                         unsignedp, align);
1583
1584       total_bits = GET_MODE_BITSIZE (mode);
1585
1586       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1587          be in the range 0 to total_bits-1, and put any excess bytes in
1588          OFFSET.  */
1589       if (bitpos >= total_bits)
1590         {
1591           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1592           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1593                      * BITS_PER_UNIT);
1594         }
1595
1596       /* Get ref to an aligned byte, halfword, or word containing the field.
1597          Adjust BITPOS to be position within a word,
1598          and OFFSET to be the offset of that word.
1599          Then alter OP0 to refer to that word.  */
1600       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1601       offset -= (offset % (total_bits / BITS_PER_UNIT));
1602       op0 = change_address (op0, mode,
1603                             plus_constant (XEXP (op0, 0), offset));
1604     }
1605
1606   mode = GET_MODE (op0);
1607
1608   if (BYTES_BIG_ENDIAN)
1609     {
1610       /* BITPOS is the distance between our msb and that of OP0.
1611          Convert it to the distance from the lsb.  */
1612
1613       bitpos = total_bits - bitsize - bitpos;
1614     }
1615
1616   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1617      We have reduced the big-endian case to the little-endian case.  */
1618
1619   if (unsignedp)
1620     {
1621       if (bitpos)
1622         {
1623           /* If the field does not already start at the lsb,
1624              shift it so it does.  */
1625           tree amount = build_int_2 (bitpos, 0);
1626           /* Maybe propagate the target for the shift.  */
1627           /* But not if we will return it--could confuse integrate.c.  */
1628           rtx subtarget = (target != 0 && GET_CODE (target) == REG
1629                            && !REG_FUNCTION_VALUE_P (target)
1630                            ? target : 0);
1631           if (tmode != mode) subtarget = 0;
1632           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1633         }
1634       /* Convert the value to the desired mode.  */
1635       if (mode != tmode)
1636         op0 = convert_to_mode (tmode, op0, 1);
1637
1638       /* Unless the msb of the field used to be the msb when we shifted,
1639          mask out the upper bits.  */
1640
1641       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize
1642 #if 0
1643 #ifdef SLOW_ZERO_EXTEND
1644           /* Always generate an `and' if
1645              we just zero-extended op0 and SLOW_ZERO_EXTEND, since it
1646              will combine fruitfully with the zero-extend.  */
1647           || tmode != mode
1648 #endif
1649 #endif
1650           )
1651         return expand_binop (GET_MODE (op0), and_optab, op0,
1652                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1653                              target, 1, OPTAB_LIB_WIDEN);
1654       return op0;
1655     }
1656
1657   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1658      then arithmetic-shift its lsb to the lsb of the word.  */
1659   op0 = force_reg (mode, op0);
1660   if (mode != tmode)
1661     target = 0;
1662
1663   /* Find the narrowest integer mode that contains the field.  */
1664
1665   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1666        mode = GET_MODE_WIDER_MODE (mode))
1667     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1668       {
1669         op0 = convert_to_mode (mode, op0, 0);
1670         break;
1671       }
1672
1673   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1674     {
1675       tree amount = build_int_2 (GET_MODE_BITSIZE (mode) - (bitsize + bitpos), 0);
1676       /* Maybe propagate the target for the shift.  */
1677       /* But not if we will return the result--could confuse integrate.c.  */
1678       rtx subtarget = (target != 0 && GET_CODE (target) == REG
1679                        && ! REG_FUNCTION_VALUE_P (target)
1680                        ? target : 0);
1681       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1682     }
1683
1684   return expand_shift (RSHIFT_EXPR, mode, op0,
1685                        build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1686                        target, 0);
1687 }
1688 \f
1689 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1690    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1691    complement of that if COMPLEMENT.  The mask is truncated if
1692    necessary to the width of mode MODE.  The mask is zero-extended if
1693    BITSIZE+BITPOS is too small for MODE.  */
1694
1695 static rtx
1696 mask_rtx (mode, bitpos, bitsize, complement)
1697      enum machine_mode mode;
1698      int bitpos, bitsize, complement;
1699 {
1700   HOST_WIDE_INT masklow, maskhigh;
1701
1702   if (bitpos < HOST_BITS_PER_WIDE_INT)
1703     masklow = (HOST_WIDE_INT) -1 << bitpos;
1704   else
1705     masklow = 0;
1706
1707   if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1708     masklow &= ((unsigned HOST_WIDE_INT) -1
1709                 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1710
1711   if (bitpos <= HOST_BITS_PER_WIDE_INT)
1712     maskhigh = -1;
1713   else
1714     maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1715
1716   if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1717     maskhigh &= ((unsigned HOST_WIDE_INT) -1
1718                  >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1719   else
1720     maskhigh = 0;
1721
1722   if (complement)
1723     {
1724       maskhigh = ~maskhigh;
1725       masklow = ~masklow;
1726     }
1727
1728   return immed_double_const (masklow, maskhigh, mode);
1729 }
1730
1731 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1732    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1733
1734 static rtx
1735 lshift_value (mode, value, bitpos, bitsize)
1736      enum machine_mode mode;
1737      rtx value;
1738      int bitpos, bitsize;
1739 {
1740   unsigned HOST_WIDE_INT v = INTVAL (value);
1741   HOST_WIDE_INT low, high;
1742
1743   if (bitsize < HOST_BITS_PER_WIDE_INT)
1744     v &= ~((HOST_WIDE_INT) -1 << bitsize);
1745
1746   if (bitpos < HOST_BITS_PER_WIDE_INT)
1747     {
1748       low = v << bitpos;
1749       high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1750     }
1751   else
1752     {
1753       low = 0;
1754       high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1755     }
1756
1757   return immed_double_const (low, high, mode);
1758 }
1759 \f
1760 /* Extract a bit field that is split across two words
1761    and return an RTX for the result.
1762
1763    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1764    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1765    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
1766
1767    ALIGN is the known alignment of OP0.  This is also the size of the
1768    memory objects to be used.  */
1769
1770 static rtx
1771 extract_split_bit_field (op0, bitsize, bitpos, unsignedp, align)
1772      rtx op0;
1773      unsigned HOST_WIDE_INT bitsize, bitpos;
1774      int unsignedp;
1775      unsigned int align;
1776 {
1777   unsigned int unit;
1778   unsigned int bitsdone = 0;
1779   rtx result = NULL_RTX;
1780   int first = 1;
1781
1782   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1783      much at a time.  */
1784   if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1785     unit = BITS_PER_WORD;
1786   else
1787     unit = MIN (align, BITS_PER_WORD);
1788
1789   while (bitsdone < bitsize)
1790     {
1791       unsigned HOST_WIDE_INT thissize;
1792       rtx part, word;
1793       unsigned HOST_WIDE_INT thispos;
1794       unsigned HOST_WIDE_INT offset;
1795
1796       offset = (bitpos + bitsdone) / unit;
1797       thispos = (bitpos + bitsdone) % unit;
1798
1799       /* THISSIZE must not overrun a word boundary.  Otherwise,
1800          extract_fixed_bit_field will call us again, and we will mutually
1801          recurse forever.  */
1802       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1803       thissize = MIN (thissize, unit - thispos);
1804
1805       /* If OP0 is a register, then handle OFFSET here.
1806
1807          When handling multiword bitfields, extract_bit_field may pass
1808          down a word_mode SUBREG of a larger REG for a bitfield that actually
1809          crosses a word boundary.  Thus, for a SUBREG, we must find
1810          the current word starting from the base register.  */
1811       if (GET_CODE (op0) == SUBREG)
1812         {
1813           word = operand_subword_force (SUBREG_REG (op0),
1814                                         SUBREG_WORD (op0) + offset,
1815                                         GET_MODE (SUBREG_REG (op0)));
1816           offset = 0;
1817         }
1818       else if (GET_CODE (op0) == REG)
1819         {
1820           word = operand_subword_force (op0, offset, GET_MODE (op0));
1821           offset = 0;
1822         }
1823       else
1824         word = op0;
1825
1826       /* Extract the parts in bit-counting order,
1827          whose meaning is determined by BYTES_PER_UNIT.
1828          OFFSET is in UNITs, and UNIT is in bits.
1829          extract_fixed_bit_field wants offset in bytes.  */
1830       part = extract_fixed_bit_field (word_mode, word,
1831                                       offset * unit / BITS_PER_UNIT,
1832                                       thissize, thispos, 0, 1, align);
1833       bitsdone += thissize;
1834
1835       /* Shift this part into place for the result.  */
1836       if (BYTES_BIG_ENDIAN)
1837         {
1838           if (bitsize != bitsdone)
1839             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1840                                  build_int_2 (bitsize - bitsdone, 0), 0, 1);
1841         }
1842       else
1843         {
1844           if (bitsdone != thissize)
1845             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1846                                  build_int_2 (bitsdone - thissize, 0), 0, 1);
1847         }
1848
1849       if (first)
1850         result = part;
1851       else
1852         /* Combine the parts with bitwise or.  This works
1853            because we extracted each part as an unsigned bit field.  */
1854         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1855                                OPTAB_LIB_WIDEN);
1856
1857       first = 0;
1858     }
1859
1860   /* Unsigned bit field: we are done.  */
1861   if (unsignedp)
1862     return result;
1863   /* Signed bit field: sign-extend with two arithmetic shifts.  */
1864   result = expand_shift (LSHIFT_EXPR, word_mode, result,
1865                          build_int_2 (BITS_PER_WORD - bitsize, 0),
1866                          NULL_RTX, 0);
1867   return expand_shift (RSHIFT_EXPR, word_mode, result,
1868                        build_int_2 (BITS_PER_WORD - bitsize, 0), NULL_RTX, 0);
1869 }
1870 \f
1871 /* Add INC into TARGET.  */
1872
1873 void
1874 expand_inc (target, inc)
1875      rtx target, inc;
1876 {
1877   rtx value = expand_binop (GET_MODE (target), add_optab,
1878                             target, inc,
1879                             target, 0, OPTAB_LIB_WIDEN);
1880   if (value != target)
1881     emit_move_insn (target, value);
1882 }
1883
1884 /* Subtract DEC from TARGET.  */
1885
1886 void
1887 expand_dec (target, dec)
1888      rtx target, dec;
1889 {
1890   rtx value = expand_binop (GET_MODE (target), sub_optab,
1891                             target, dec,
1892                             target, 0, OPTAB_LIB_WIDEN);
1893   if (value != target)
1894     emit_move_insn (target, value);
1895 }
1896 \f
1897 /* Output a shift instruction for expression code CODE,
1898    with SHIFTED being the rtx for the value to shift,
1899    and AMOUNT the tree for the amount to shift by.
1900    Store the result in the rtx TARGET, if that is convenient.
1901    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
1902    Return the rtx for where the value is.  */
1903
1904 rtx
1905 expand_shift (code, mode, shifted, amount, target, unsignedp)
1906      enum tree_code code;
1907      register enum machine_mode mode;
1908      rtx shifted;
1909      tree amount;
1910      register rtx target;
1911      int unsignedp;
1912 {
1913   register rtx op1, temp = 0;
1914   register int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
1915   register int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
1916   int try;
1917
1918   /* Previously detected shift-counts computed by NEGATE_EXPR
1919      and shifted in the other direction; but that does not work
1920      on all machines.  */
1921
1922   op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
1923
1924 #ifdef SHIFT_COUNT_TRUNCATED
1925   if (SHIFT_COUNT_TRUNCATED)
1926     {
1927       if (GET_CODE (op1) == CONST_INT
1928           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
1929               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
1930         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
1931                        % GET_MODE_BITSIZE (mode));
1932       else if (GET_CODE (op1) == SUBREG
1933                && SUBREG_WORD (op1) == 0)
1934         op1 = SUBREG_REG (op1);
1935     }
1936 #endif
1937
1938   if (op1 == const0_rtx)
1939     return shifted;
1940
1941   for (try = 0; temp == 0 && try < 3; try++)
1942     {
1943       enum optab_methods methods;
1944
1945       if (try == 0)
1946         methods = OPTAB_DIRECT;
1947       else if (try == 1)
1948         methods = OPTAB_WIDEN;
1949       else
1950         methods = OPTAB_LIB_WIDEN;
1951
1952       if (rotate)
1953         {
1954           /* Widening does not work for rotation.  */
1955           if (methods == OPTAB_WIDEN)
1956             continue;
1957           else if (methods == OPTAB_LIB_WIDEN)
1958             {
1959               /* If we have been unable to open-code this by a rotation,
1960                  do it as the IOR of two shifts.  I.e., to rotate A
1961                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
1962                  where C is the bitsize of A.
1963
1964                  It is theoretically possible that the target machine might
1965                  not be able to perform either shift and hence we would
1966                  be making two libcalls rather than just the one for the
1967                  shift (similarly if IOR could not be done).  We will allow
1968                  this extremely unlikely lossage to avoid complicating the
1969                  code below.  */
1970
1971               rtx subtarget = target == shifted ? 0 : target;
1972               rtx temp1;
1973               tree type = TREE_TYPE (amount);
1974               tree new_amount = make_tree (type, op1);
1975               tree other_amount
1976                 = fold (build (MINUS_EXPR, type,
1977                                convert (type,
1978                                         build_int_2 (GET_MODE_BITSIZE (mode),
1979                                                      0)),
1980                                amount));
1981
1982               shifted = force_reg (mode, shifted);
1983
1984               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
1985                                    mode, shifted, new_amount, subtarget, 1);
1986               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
1987                                     mode, shifted, other_amount, 0, 1);
1988               return expand_binop (mode, ior_optab, temp, temp1, target,
1989                                    unsignedp, methods);
1990             }
1991
1992           temp = expand_binop (mode,
1993                                left ? rotl_optab : rotr_optab,
1994                                shifted, op1, target, unsignedp, methods);
1995
1996           /* If we don't have the rotate, but we are rotating by a constant
1997              that is in range, try a rotate in the opposite direction.  */
1998
1999           if (temp == 0 && GET_CODE (op1) == CONST_INT
2000               && INTVAL (op1) > 0 && INTVAL (op1) < GET_MODE_BITSIZE (mode))
2001             temp = expand_binop (mode,
2002                                  left ? rotr_optab : rotl_optab,
2003                                  shifted,
2004                                  GEN_INT (GET_MODE_BITSIZE (mode)
2005                                           - INTVAL (op1)),
2006                                  target, unsignedp, methods);
2007         }
2008       else if (unsignedp)
2009         temp = expand_binop (mode,
2010                              left ? ashl_optab : lshr_optab,
2011                              shifted, op1, target, unsignedp, methods);
2012
2013       /* Do arithmetic shifts.
2014          Also, if we are going to widen the operand, we can just as well
2015          use an arithmetic right-shift instead of a logical one.  */
2016       if (temp == 0 && ! rotate
2017           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2018         {
2019           enum optab_methods methods1 = methods;
2020
2021           /* If trying to widen a log shift to an arithmetic shift,
2022              don't accept an arithmetic shift of the same size.  */
2023           if (unsignedp)
2024             methods1 = OPTAB_MUST_WIDEN;
2025
2026           /* Arithmetic shift */
2027
2028           temp = expand_binop (mode,
2029                                left ? ashl_optab : ashr_optab,
2030                                shifted, op1, target, unsignedp, methods1);
2031         }
2032
2033       /* We used to try extzv here for logical right shifts, but that was
2034          only useful for one machine, the VAX, and caused poor code
2035          generation there for lshrdi3, so the code was deleted and a
2036          define_expand for lshrsi3 was added to vax.md.  */
2037     }
2038
2039   if (temp == 0)
2040     abort ();
2041   return temp;
2042 }
2043 \f
2044 enum alg_code { alg_zero, alg_m, alg_shift,
2045                   alg_add_t_m2, alg_sub_t_m2,
2046                   alg_add_factor, alg_sub_factor,
2047                   alg_add_t2_m, alg_sub_t2_m,
2048                   alg_add, alg_subtract, alg_factor, alg_shiftop };
2049
2050 /* This structure records a sequence of operations.
2051    `ops' is the number of operations recorded.
2052    `cost' is their total cost.
2053    The operations are stored in `op' and the corresponding
2054    logarithms of the integer coefficients in `log'.
2055
2056    These are the operations:
2057    alg_zero             total := 0;
2058    alg_m                total := multiplicand;
2059    alg_shift            total := total * coeff
2060    alg_add_t_m2         total := total + multiplicand * coeff;
2061    alg_sub_t_m2         total := total - multiplicand * coeff;
2062    alg_add_factor       total := total * coeff + total;
2063    alg_sub_factor       total := total * coeff - total;
2064    alg_add_t2_m         total := total * coeff + multiplicand;
2065    alg_sub_t2_m         total := total * coeff - multiplicand;
2066
2067    The first operand must be either alg_zero or alg_m.  */
2068
2069 struct algorithm
2070 {
2071   short cost;
2072   short ops;
2073   /* The size of the OP and LOG fields are not directly related to the
2074      word size, but the worst-case algorithms will be if we have few
2075      consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2076      In that case we will generate shift-by-2, add, shift-by-2, add,...,
2077      in total wordsize operations.  */
2078   enum alg_code op[MAX_BITS_PER_WORD];
2079   char log[MAX_BITS_PER_WORD];
2080 };
2081
2082 static void synth_mult                  PARAMS ((struct algorithm *,
2083                                                  unsigned HOST_WIDE_INT,
2084                                                  int));
2085 static unsigned HOST_WIDE_INT choose_multiplier PARAMS ((unsigned HOST_WIDE_INT,
2086                                                          int, int,
2087                                                          unsigned HOST_WIDE_INT *,
2088                                                          int *, int *));
2089 static unsigned HOST_WIDE_INT invert_mod2n      PARAMS ((unsigned HOST_WIDE_INT,
2090                                                          int));
2091 /* Compute and return the best algorithm for multiplying by T.
2092    The algorithm must cost less than cost_limit
2093    If retval.cost >= COST_LIMIT, no algorithm was found and all
2094    other field of the returned struct are undefined.  */
2095
2096 static void
2097 synth_mult (alg_out, t, cost_limit)
2098      struct algorithm *alg_out;
2099      unsigned HOST_WIDE_INT t;
2100      int cost_limit;
2101 {
2102   int m;
2103   struct algorithm *alg_in, *best_alg;
2104   int cost;
2105   unsigned HOST_WIDE_INT q;
2106
2107   /* Indicate that no algorithm is yet found.  If no algorithm
2108      is found, this value will be returned and indicate failure.  */
2109   alg_out->cost = cost_limit;
2110
2111   if (cost_limit <= 0)
2112     return;
2113
2114   /* t == 1 can be done in zero cost.  */
2115   if (t == 1)
2116     {
2117       alg_out->ops = 1;
2118       alg_out->cost = 0;
2119       alg_out->op[0] = alg_m;
2120       return;
2121     }
2122
2123   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2124      fail now.  */
2125   if (t == 0)
2126     {
2127       if (zero_cost >= cost_limit)
2128         return;
2129       else
2130         {
2131           alg_out->ops = 1;
2132           alg_out->cost = zero_cost;
2133           alg_out->op[0] = alg_zero;
2134           return;
2135         }
2136     }
2137
2138   /* We'll be needing a couple extra algorithm structures now.  */
2139
2140   alg_in = (struct algorithm *)alloca (sizeof (struct algorithm));
2141   best_alg = (struct algorithm *)alloca (sizeof (struct algorithm));
2142
2143   /* If we have a group of zero bits at the low-order part of T, try
2144      multiplying by the remaining bits and then doing a shift.  */
2145
2146   if ((t & 1) == 0)
2147     {
2148       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2149       if (m < BITS_PER_WORD)
2150         {
2151           q = t >> m;
2152           cost = shift_cost[m];
2153           synth_mult (alg_in, q, cost_limit - cost);
2154
2155           cost += alg_in->cost;
2156           if (cost < cost_limit)
2157             {
2158               struct algorithm *x;
2159               x = alg_in, alg_in = best_alg, best_alg = x;
2160               best_alg->log[best_alg->ops] = m;
2161               best_alg->op[best_alg->ops] = alg_shift;
2162               cost_limit = cost;
2163             }
2164         }
2165     }
2166
2167   /* If we have an odd number, add or subtract one.  */
2168   if ((t & 1) != 0)
2169     {
2170       unsigned HOST_WIDE_INT w;
2171
2172       for (w = 1; (w & t) != 0; w <<= 1)
2173         ;
2174       /* If T was -1, then W will be zero after the loop.  This is another
2175          case where T ends with ...111.  Handling this with (T + 1) and
2176          subtract 1 produces slightly better code and results in algorithm
2177          selection much faster than treating it like the ...0111 case
2178          below.  */
2179       if (w == 0
2180           || (w > 2
2181               /* Reject the case where t is 3.
2182                  Thus we prefer addition in that case.  */
2183               && t != 3))
2184         {
2185           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2186
2187           cost = add_cost;
2188           synth_mult (alg_in, t + 1, cost_limit - cost);
2189
2190           cost += alg_in->cost;
2191           if (cost < cost_limit)
2192             {
2193               struct algorithm *x;
2194               x = alg_in, alg_in = best_alg, best_alg = x;
2195               best_alg->log[best_alg->ops] = 0;
2196               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2197               cost_limit = cost;
2198             }
2199         }
2200       else
2201         {
2202           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2203
2204           cost = add_cost;
2205           synth_mult (alg_in, t - 1, cost_limit - cost);
2206
2207           cost += alg_in->cost;
2208           if (cost < cost_limit)
2209             {
2210               struct algorithm *x;
2211               x = alg_in, alg_in = best_alg, best_alg = x;
2212               best_alg->log[best_alg->ops] = 0;
2213               best_alg->op[best_alg->ops] = alg_add_t_m2;
2214               cost_limit = cost;
2215             }
2216         }
2217     }
2218
2219   /* Look for factors of t of the form
2220      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2221      If we find such a factor, we can multiply by t using an algorithm that
2222      multiplies by q, shift the result by m and add/subtract it to itself.
2223
2224      We search for large factors first and loop down, even if large factors
2225      are less probable than small; if we find a large factor we will find a
2226      good sequence quickly, and therefore be able to prune (by decreasing
2227      COST_LIMIT) the search.  */
2228
2229   for (m = floor_log2 (t - 1); m >= 2; m--)
2230     {
2231       unsigned HOST_WIDE_INT d;
2232
2233       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2234       if (t % d == 0 && t > d && m < BITS_PER_WORD)
2235         {
2236           cost = MIN (shiftadd_cost[m], add_cost + shift_cost[m]);
2237           synth_mult (alg_in, t / d, cost_limit - cost);
2238
2239           cost += alg_in->cost;
2240           if (cost < cost_limit)
2241             {
2242               struct algorithm *x;
2243               x = alg_in, alg_in = best_alg, best_alg = x;
2244               best_alg->log[best_alg->ops] = m;
2245               best_alg->op[best_alg->ops] = alg_add_factor;
2246               cost_limit = cost;
2247             }
2248           /* Other factors will have been taken care of in the recursion.  */
2249           break;
2250         }
2251
2252       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2253       if (t % d == 0 && t > d && m < BITS_PER_WORD)
2254         {
2255           cost = MIN (shiftsub_cost[m], add_cost + shift_cost[m]);
2256           synth_mult (alg_in, t / d, cost_limit - cost);
2257
2258           cost += alg_in->cost;
2259           if (cost < cost_limit)
2260             {
2261               struct algorithm *x;
2262               x = alg_in, alg_in = best_alg, best_alg = x;
2263               best_alg->log[best_alg->ops] = m;
2264               best_alg->op[best_alg->ops] = alg_sub_factor;
2265               cost_limit = cost;
2266             }
2267           break;
2268         }
2269     }
2270
2271   /* Try shift-and-add (load effective address) instructions,
2272      i.e. do a*3, a*5, a*9.  */
2273   if ((t & 1) != 0)
2274     {
2275       q = t - 1;
2276       q = q & -q;
2277       m = exact_log2 (q);
2278       if (m >= 0 && m < BITS_PER_WORD)
2279         {
2280           cost = shiftadd_cost[m];
2281           synth_mult (alg_in, (t - 1) >> m, cost_limit - cost);
2282
2283           cost += alg_in->cost;
2284           if (cost < cost_limit)
2285             {
2286               struct algorithm *x;
2287               x = alg_in, alg_in = best_alg, best_alg = x;
2288               best_alg->log[best_alg->ops] = m;
2289               best_alg->op[best_alg->ops] = alg_add_t2_m;
2290               cost_limit = cost;
2291             }
2292         }
2293
2294       q = t + 1;
2295       q = q & -q;
2296       m = exact_log2 (q);
2297       if (m >= 0 && m < BITS_PER_WORD)
2298         {
2299           cost = shiftsub_cost[m];
2300           synth_mult (alg_in, (t + 1) >> m, cost_limit - cost);
2301
2302           cost += alg_in->cost;
2303           if (cost < cost_limit)
2304             {
2305               struct algorithm *x;
2306               x = alg_in, alg_in = best_alg, best_alg = x;
2307               best_alg->log[best_alg->ops] = m;
2308               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2309               cost_limit = cost;
2310             }
2311         }
2312     }
2313
2314   /* If cost_limit has not decreased since we stored it in alg_out->cost,
2315      we have not found any algorithm.  */
2316   if (cost_limit == alg_out->cost)
2317     return;
2318
2319   /* If we are getting a too long sequence for `struct algorithm'
2320      to record, make this search fail.  */
2321   if (best_alg->ops == MAX_BITS_PER_WORD)
2322     return;
2323
2324   /* Copy the algorithm from temporary space to the space at alg_out.
2325      We avoid using structure assignment because the majority of
2326      best_alg is normally undefined, and this is a critical function.  */
2327   alg_out->ops = best_alg->ops + 1;
2328   alg_out->cost = cost_limit;
2329   memcpy (alg_out->op, best_alg->op,
2330           alg_out->ops * sizeof *alg_out->op);
2331   memcpy (alg_out->log, best_alg->log,
2332           alg_out->ops * sizeof *alg_out->log);
2333 }
2334 \f
2335 /* Perform a multiplication and return an rtx for the result.
2336    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
2337    TARGET is a suggestion for where to store the result (an rtx).
2338
2339    We check specially for a constant integer as OP1.
2340    If you want this check for OP0 as well, then before calling
2341    you should swap the two operands if OP0 would be constant.  */
2342
2343 rtx
2344 expand_mult (mode, op0, op1, target, unsignedp)
2345      enum machine_mode mode;
2346      register rtx op0, op1, target;
2347      int unsignedp;
2348 {
2349   rtx const_op1 = op1;
2350
2351   /* synth_mult does an `unsigned int' multiply.  As long as the mode is
2352      less than or equal in size to `unsigned int' this doesn't matter.
2353      If the mode is larger than `unsigned int', then synth_mult works only
2354      if the constant value exactly fits in an `unsigned int' without any
2355      truncation.  This means that multiplying by negative values does
2356      not work; results are off by 2^32 on a 32 bit machine.  */
2357
2358   /* If we are multiplying in DImode, it may still be a win
2359      to try to work with shifts and adds.  */
2360   if (GET_CODE (op1) == CONST_DOUBLE
2361       && GET_MODE_CLASS (GET_MODE (op1)) == MODE_INT
2362       && HOST_BITS_PER_INT >= BITS_PER_WORD
2363       && CONST_DOUBLE_HIGH (op1) == 0)
2364     const_op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
2365   else if (HOST_BITS_PER_INT < GET_MODE_BITSIZE (mode)
2366            && GET_CODE (op1) == CONST_INT
2367            && INTVAL (op1) < 0)
2368     const_op1 = 0;
2369
2370   /* We used to test optimize here, on the grounds that it's better to
2371      produce a smaller program when -O is not used.
2372      But this causes such a terrible slowdown sometimes
2373      that it seems better to use synth_mult always.  */
2374
2375   if (const_op1 && GET_CODE (const_op1) == CONST_INT
2376       && (unsignedp || ! flag_trapv))
2377     {
2378       struct algorithm alg;
2379       struct algorithm alg2;
2380       HOST_WIDE_INT val = INTVAL (op1);
2381       HOST_WIDE_INT val_so_far;
2382       rtx insn;
2383       int mult_cost;
2384       enum {basic_variant, negate_variant, add_variant} variant = basic_variant;
2385
2386       /* Try to do the computation three ways: multiply by the negative of OP1
2387          and then negate, do the multiplication directly, or do multiplication
2388          by OP1 - 1.  */
2389
2390       mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
2391       mult_cost = MIN (12 * add_cost, mult_cost);
2392
2393       synth_mult (&alg, val, mult_cost);
2394
2395       /* This works only if the inverted value actually fits in an
2396          `unsigned int' */
2397       if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2398         {
2399           synth_mult (&alg2, - val,
2400                       (alg.cost < mult_cost ? alg.cost : mult_cost) - negate_cost);
2401           if (alg2.cost + negate_cost < alg.cost)
2402             alg = alg2, variant = negate_variant;
2403         }
2404
2405       /* This proves very useful for division-by-constant.  */
2406       synth_mult (&alg2, val - 1,
2407                   (alg.cost < mult_cost ? alg.cost : mult_cost) - add_cost);
2408       if (alg2.cost + add_cost < alg.cost)
2409         alg = alg2, variant = add_variant;
2410
2411       if (alg.cost < mult_cost)
2412         {
2413           /* We found something cheaper than a multiply insn.  */
2414           int opno;
2415           rtx accum, tem;
2416           enum machine_mode nmode;
2417
2418           op0 = protect_from_queue (op0, 0);
2419
2420           /* Avoid referencing memory over and over.
2421              For speed, but also for correctness when mem is volatile.  */
2422           if (GET_CODE (op0) == MEM)
2423             op0 = force_reg (mode, op0);
2424
2425           /* ACCUM starts out either as OP0 or as a zero, depending on
2426              the first operation.  */
2427
2428           if (alg.op[0] == alg_zero)
2429             {
2430               accum = copy_to_mode_reg (mode, const0_rtx);
2431               val_so_far = 0;
2432             }
2433           else if (alg.op[0] == alg_m)
2434             {
2435               accum = copy_to_mode_reg (mode, op0);
2436               val_so_far = 1;
2437             }
2438           else
2439             abort ();
2440
2441           for (opno = 1; opno < alg.ops; opno++)
2442             {
2443               int log = alg.log[opno];
2444               int preserve = preserve_subexpressions_p ();
2445               rtx shift_subtarget = preserve ? 0 : accum;
2446               rtx add_target
2447                 = (opno == alg.ops - 1 && target != 0 && variant != add_variant
2448                    && ! preserve)
2449                   ? target : 0;
2450               rtx accum_target = preserve ? 0 : accum;
2451
2452               switch (alg.op[opno])
2453                 {
2454                 case alg_shift:
2455                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2456                                         build_int_2 (log, 0), NULL_RTX, 0);
2457                   val_so_far <<= log;
2458                   break;
2459
2460                 case alg_add_t_m2:
2461                   tem = expand_shift (LSHIFT_EXPR, mode, op0,
2462                                       build_int_2 (log, 0), NULL_RTX, 0);
2463                   accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2464                                          add_target
2465                                          ? add_target : accum_target);
2466                   val_so_far += (HOST_WIDE_INT) 1 << log;
2467                   break;
2468
2469                 case alg_sub_t_m2:
2470                   tem = expand_shift (LSHIFT_EXPR, mode, op0,
2471                                       build_int_2 (log, 0), NULL_RTX, 0);
2472                   accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2473                                          add_target
2474                                          ? add_target : accum_target);
2475                   val_so_far -= (HOST_WIDE_INT) 1 << log;
2476                   break;
2477
2478                 case alg_add_t2_m:
2479                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2480                                         build_int_2 (log, 0), shift_subtarget,
2481                                         0);
2482                   accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2483                                          add_target
2484                                          ? add_target : accum_target);
2485                   val_so_far = (val_so_far << log) + 1;
2486                   break;
2487
2488                 case alg_sub_t2_m:
2489                   accum = expand_shift (LSHIFT_EXPR, mode, accum,
2490                                         build_int_2 (log, 0), shift_subtarget,
2491                                         0);
2492                   accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2493                                          add_target
2494                                          ? add_target : accum_target);
2495                   val_so_far = (val_so_far << log) - 1;
2496                   break;
2497
2498                 case alg_add_factor:
2499                   tem = expand_shift (LSHIFT_EXPR, mode, accum,
2500                                       build_int_2 (log, 0), NULL_RTX, 0);
2501                   accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2502                                          add_target
2503                                          ? add_target : accum_target);
2504                   val_so_far += val_so_far << log;
2505                   break;
2506
2507                 case alg_sub_factor:
2508                   tem = expand_shift (LSHIFT_EXPR, mode, accum,
2509                                       build_int_2 (log, 0), NULL_RTX, 0);
2510                   accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2511                                          (add_target ? add_target
2512                                           : preserve ? 0 : tem));
2513                   val_so_far = (val_so_far << log) - val_so_far;
2514                   break;
2515
2516                 default:
2517                   abort ();
2518                 }
2519
2520               /* Write a REG_EQUAL note on the last insn so that we can cse
2521                  multiplication sequences.  Note that if ACCUM is a SUBREG,
2522                  we've set the inner register and must properly indicate
2523                  that.  */
2524
2525               tem = op0, nmode = mode;
2526               if (GET_CODE (accum) == SUBREG)
2527                 {
2528                   nmode = GET_MODE (SUBREG_REG (accum));
2529                   tem = gen_lowpart (nmode, op0);
2530                 }
2531
2532               insn = get_last_insn ();
2533               set_unique_reg_note (insn,
2534                                    REG_EQUAL,
2535                                    gen_rtx_MULT (nmode, tem,
2536                                                  GEN_INT (val_so_far)));
2537             }
2538
2539           if (variant == negate_variant)
2540             {
2541               val_so_far = - val_so_far;
2542               accum = expand_unop (mode, neg_optab, accum, target, 0);
2543             }
2544           else if (variant == add_variant)
2545             {
2546               val_so_far = val_so_far + 1;
2547               accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2548             }
2549
2550           if (val != val_so_far)
2551             abort ();
2552
2553           return accum;
2554         }
2555     }
2556
2557   /* This used to use umul_optab if unsigned, but for non-widening multiply
2558      there is no difference between signed and unsigned.  */
2559   op0 = expand_binop (mode,
2560                       ! unsignedp
2561                        && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
2562                        ? smulv_optab : smul_optab,
2563                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
2564   if (op0 == 0)
2565     abort ();
2566   return op0;
2567 }
2568 \f
2569 /* Return the smallest n such that 2**n >= X.  */
2570
2571 int
2572 ceil_log2 (x)
2573      unsigned HOST_WIDE_INT x;
2574 {
2575   return floor_log2 (x - 1) + 1;
2576 }
2577
2578 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
2579    replace division by D, and put the least significant N bits of the result
2580    in *MULTIPLIER_PTR and return the most significant bit.
2581
2582    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
2583    needed precision is in PRECISION (should be <= N).
2584
2585    PRECISION should be as small as possible so this function can choose
2586    multiplier more freely.
2587
2588    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
2589    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
2590
2591    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
2592    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
2593
2594 static
2595 unsigned HOST_WIDE_INT
2596 choose_multiplier (d, n, precision, multiplier_ptr, post_shift_ptr, lgup_ptr)
2597      unsigned HOST_WIDE_INT d;
2598      int n;
2599      int precision;
2600      unsigned HOST_WIDE_INT *multiplier_ptr;
2601      int *post_shift_ptr;
2602      int *lgup_ptr;
2603 {
2604   HOST_WIDE_INT mhigh_hi, mlow_hi;
2605   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
2606   int lgup, post_shift;
2607   int pow, pow2;
2608   unsigned HOST_WIDE_INT nl, dummy1;
2609   HOST_WIDE_INT nh, dummy2;
2610
2611   /* lgup = ceil(log2(divisor)); */
2612   lgup = ceil_log2 (d);
2613
2614   if (lgup > n)
2615     abort ();
2616
2617   pow = n + lgup;
2618   pow2 = n + lgup - precision;
2619
2620   if (pow == 2 * HOST_BITS_PER_WIDE_INT)
2621     {
2622       /* We could handle this with some effort, but this case is much better
2623          handled directly with a scc insn, so rely on caller using that.  */
2624       abort ();
2625     }
2626
2627   /* mlow = 2^(N + lgup)/d */
2628  if (pow >= HOST_BITS_PER_WIDE_INT)
2629     {
2630       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
2631       nl = 0;
2632     }
2633   else
2634     {
2635       nh = 0;
2636       nl = (unsigned HOST_WIDE_INT) 1 << pow;
2637     }
2638   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2639                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
2640
2641   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
2642   if (pow2 >= HOST_BITS_PER_WIDE_INT)
2643     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
2644   else
2645     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
2646   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2647                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
2648
2649   if (mhigh_hi && nh - d >= d)
2650     abort ();
2651   if (mhigh_hi > 1 || mlow_hi > 1)
2652     abort ();
2653   /* assert that mlow < mhigh.  */
2654   if (! (mlow_hi < mhigh_hi || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo)))
2655     abort();
2656
2657   /* If precision == N, then mlow, mhigh exceed 2^N
2658      (but they do not exceed 2^(N+1)).  */
2659
2660   /* Reduce to lowest terms */
2661   for (post_shift = lgup; post_shift > 0; post_shift--)
2662     {
2663       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
2664       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
2665       if (ml_lo >= mh_lo)
2666         break;
2667
2668       mlow_hi = 0;
2669       mlow_lo = ml_lo;
2670       mhigh_hi = 0;
2671       mhigh_lo = mh_lo;
2672     }
2673
2674   *post_shift_ptr = post_shift;
2675   *lgup_ptr = lgup;
2676   if (n < HOST_BITS_PER_WIDE_INT)
2677     {
2678       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
2679       *multiplier_ptr = mhigh_lo & mask;
2680       return mhigh_lo >= mask;
2681     }
2682   else
2683     {
2684       *multiplier_ptr = mhigh_lo;
2685       return mhigh_hi;
2686     }
2687 }
2688
2689 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
2690    congruent to 1 (mod 2**N).  */
2691
2692 static unsigned HOST_WIDE_INT
2693 invert_mod2n (x, n)
2694      unsigned HOST_WIDE_INT x;
2695      int n;
2696 {
2697   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
2698
2699   /* The algorithm notes that the choice y = x satisfies
2700      x*y == 1 mod 2^3, since x is assumed odd.
2701      Each iteration doubles the number of bits of significance in y.  */
2702
2703   unsigned HOST_WIDE_INT mask;
2704   unsigned HOST_WIDE_INT y = x;
2705   int nbit = 3;
2706
2707   mask = (n == HOST_BITS_PER_WIDE_INT
2708           ? ~(unsigned HOST_WIDE_INT) 0
2709           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
2710
2711   while (nbit < n)
2712     {
2713       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
2714       nbit *= 2;
2715     }
2716   return y;
2717 }
2718
2719 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
2720    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
2721    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
2722    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
2723    become signed.
2724
2725    The result is put in TARGET if that is convenient.
2726
2727    MODE is the mode of operation.  */
2728
2729 rtx
2730 expand_mult_highpart_adjust (mode, adj_operand, op0, op1, target, unsignedp)
2731      enum machine_mode mode;
2732      register rtx adj_operand, op0, op1, target;
2733      int unsignedp;
2734 {
2735   rtx tem;
2736   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
2737
2738   tem = expand_shift (RSHIFT_EXPR, mode, op0,
2739                       build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2740                       NULL_RTX, 0);
2741   tem = expand_and (tem, op1, NULL_RTX);
2742   adj_operand
2743     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2744                      adj_operand);
2745
2746   tem = expand_shift (RSHIFT_EXPR, mode, op1,
2747                       build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2748                       NULL_RTX, 0);
2749   tem = expand_and (tem, op0, NULL_RTX);
2750   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2751                           target);
2752
2753   return target;
2754 }
2755
2756 /* Emit code to multiply OP0 and CNST1, putting the high half of the result
2757    in TARGET if that is convenient, and return where the result is.  If the
2758    operation can not be performed, 0 is returned.
2759
2760    MODE is the mode of operation and result.
2761
2762    UNSIGNEDP nonzero means unsigned multiply.
2763
2764    MAX_COST is the total allowed cost for the expanded RTL.  */
2765
2766 rtx
2767 expand_mult_highpart (mode, op0, cnst1, target, unsignedp, max_cost)
2768      enum machine_mode mode;
2769      register rtx op0, target;
2770      unsigned HOST_WIDE_INT cnst1;
2771      int unsignedp;
2772      int max_cost;
2773 {
2774   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
2775   optab mul_highpart_optab;
2776   optab moptab;
2777   rtx tem;
2778   int size = GET_MODE_BITSIZE (mode);
2779   rtx op1, wide_op1;
2780
2781   /* We can't support modes wider than HOST_BITS_PER_INT.  */
2782   if (size > HOST_BITS_PER_WIDE_INT)
2783     abort ();
2784
2785   op1 = GEN_INT (cnst1);
2786
2787   if (GET_MODE_BITSIZE (wider_mode) <= HOST_BITS_PER_INT)
2788     wide_op1 = op1;
2789   else
2790     wide_op1
2791       = immed_double_const (cnst1,
2792                             (unsignedp
2793                              ? (HOST_WIDE_INT) 0
2794                              : -(cnst1 >> (HOST_BITS_PER_WIDE_INT - 1))),
2795                             wider_mode);
2796
2797   /* expand_mult handles constant multiplication of word_mode
2798      or narrower.  It does a poor job for large modes.  */
2799   if (size < BITS_PER_WORD
2800       && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
2801     {
2802       /* We have to do this, since expand_binop doesn't do conversion for
2803          multiply.  Maybe change expand_binop to handle widening multiply?  */
2804       op0 = convert_to_mode (wider_mode, op0, unsignedp);
2805
2806       /* We know that this can't have signed overflow, so pretend this is
2807          an unsigned multiply.  */
2808       tem = expand_mult (wider_mode, op0, wide_op1, NULL_RTX, 0);
2809       tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2810                           build_int_2 (size, 0), NULL_RTX, 1);
2811       return convert_modes (mode, wider_mode, tem, unsignedp);
2812     }
2813
2814   if (target == 0)
2815     target = gen_reg_rtx (mode);
2816
2817   /* Firstly, try using a multiplication insn that only generates the needed
2818      high part of the product, and in the sign flavor of unsignedp.  */
2819   if (mul_highpart_cost[(int) mode] < max_cost)
2820     {
2821       mul_highpart_optab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
2822       target = expand_binop (mode, mul_highpart_optab,
2823                              op0, op1, target, unsignedp, OPTAB_DIRECT);
2824       if (target)
2825         return target;
2826     }
2827
2828   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
2829      Need to adjust the result after the multiplication.  */
2830   if (size - 1 < BITS_PER_WORD
2831       && (mul_highpart_cost[(int) mode] + 2 * shift_cost[size-1] + 4 * add_cost
2832           < max_cost))
2833     {
2834       mul_highpart_optab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
2835       target = expand_binop (mode, mul_highpart_optab,
2836                              op0, op1, target, unsignedp, OPTAB_DIRECT);
2837       if (target)
2838         /* We used the wrong signedness.  Adjust the result.  */
2839         return expand_mult_highpart_adjust (mode, target, op0,
2840                                             op1, target, unsignedp);
2841     }
2842
2843   /* Try widening multiplication.  */
2844   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
2845   if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2846       && mul_widen_cost[(int) wider_mode] < max_cost)
2847     {
2848       op1 = force_reg (mode, op1);
2849       goto try;
2850     }
2851
2852   /* Try widening the mode and perform a non-widening multiplication.  */
2853   moptab = smul_optab;
2854   if (smul_optab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2855       && size - 1 < BITS_PER_WORD
2856       && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
2857     {
2858       op1 = wide_op1;
2859       goto try;
2860     }
2861
2862   /* Try widening multiplication of opposite signedness, and adjust.  */
2863   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
2864   if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2865       && size - 1 < BITS_PER_WORD
2866       && (mul_widen_cost[(int) wider_mode]
2867           + 2 * shift_cost[size-1] + 4 * add_cost < max_cost))
2868     {
2869       rtx regop1 = force_reg (mode, op1);
2870       tem = expand_binop (wider_mode, moptab, op0, regop1,
2871                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
2872       if (tem != 0)
2873         {
2874           /* Extract the high half of the just generated product.  */
2875           tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2876                               build_int_2 (size, 0), NULL_RTX, 1);
2877           tem = convert_modes (mode, wider_mode, tem, unsignedp);
2878           /* We used the wrong signedness.  Adjust the result.  */
2879           return expand_mult_highpart_adjust (mode, tem, op0, op1,
2880                                               target, unsignedp);
2881         }
2882     }
2883
2884   return 0;
2885
2886  try:
2887   /* Pass NULL_RTX as target since TARGET has wrong mode.  */
2888   tem = expand_binop (wider_mode, moptab, op0, op1,
2889                       NULL_RTX, unsignedp, OPTAB_WIDEN);
2890   if (tem == 0)
2891     return 0;
2892
2893   /* Extract the high half of the just generated product.  */
2894   if (mode == word_mode)
2895     {
2896       return gen_highpart (mode, tem);
2897     }
2898   else
2899     {
2900       tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2901                           build_int_2 (size, 0), NULL_RTX, 1);
2902       return convert_modes (mode, wider_mode, tem, unsignedp);
2903     }
2904 }
2905 \f
2906 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
2907    if that is convenient, and returning where the result is.
2908    You may request either the quotient or the remainder as the result;
2909    specify REM_FLAG nonzero to get the remainder.
2910
2911    CODE is the expression code for which kind of division this is;
2912    it controls how rounding is done.  MODE is the machine mode to use.
2913    UNSIGNEDP nonzero means do unsigned division.  */
2914
2915 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
2916    and then correct it by or'ing in missing high bits
2917    if result of ANDI is nonzero.
2918    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
2919    This could optimize to a bfexts instruction.
2920    But C doesn't use these operations, so their optimizations are
2921    left for later.  */
2922 /* ??? For modulo, we don't actually need the highpart of the first product,
2923    the low part will do nicely.  And for small divisors, the second multiply
2924    can also be a low-part only multiply or even be completely left out.
2925    E.g. to calculate the remainder of a division by 3 with a 32 bit
2926    multiply, multiply with 0x55555556 and extract the upper two bits;
2927    the result is exact for inputs up to 0x1fffffff.
2928    The input range can be reduced by using cross-sum rules.
2929    For odd divisors >= 3, the following table gives right shift counts
2930    so that if an number is shifted by an integer multiple of the given
2931    amount, the remainder stays the same:
2932    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
2933    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
2934    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
2935    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
2936    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
2937
2938    Cross-sum rules for even numbers can be derived by leaving as many bits
2939    to the right alone as the divisor has zeros to the right.
2940    E.g. if x is an unsigned 32 bit number:
2941    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
2942    */
2943
2944 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
2945
2946 rtx
2947 expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
2948      int rem_flag;
2949      enum tree_code code;
2950      enum machine_mode mode;
2951      register rtx op0, op1, target;
2952      int unsignedp;
2953 {
2954   enum machine_mode compute_mode;
2955   register rtx tquotient;
2956   rtx quotient = 0, remainder = 0;
2957   rtx last;
2958   int size;
2959   rtx insn, set;
2960   optab optab1, optab2;
2961   int op1_is_constant, op1_is_pow2;
2962   int max_cost, extra_cost;
2963   static HOST_WIDE_INT last_div_const = 0;
2964
2965   op1_is_constant = GET_CODE (op1) == CONST_INT;
2966   op1_is_pow2 = (op1_is_constant
2967                  && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
2968                       || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1))))));
2969
2970   /*
2971      This is the structure of expand_divmod:
2972
2973      First comes code to fix up the operands so we can perform the operations
2974      correctly and efficiently.
2975
2976      Second comes a switch statement with code specific for each rounding mode.
2977      For some special operands this code emits all RTL for the desired
2978      operation, for other cases, it generates only a quotient and stores it in
2979      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
2980      to indicate that it has not done anything.
2981
2982      Last comes code that finishes the operation.  If QUOTIENT is set and
2983      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
2984      QUOTIENT is not set, it is computed using trunc rounding.
2985
2986      We try to generate special code for division and remainder when OP1 is a
2987      constant.  If |OP1| = 2**n we can use shifts and some other fast
2988      operations.  For other values of OP1, we compute a carefully selected
2989      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
2990      by m.
2991
2992      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
2993      half of the product.  Different strategies for generating the product are
2994      implemented in expand_mult_highpart.
2995
2996      If what we actually want is the remainder, we generate that by another
2997      by-constant multiplication and a subtraction.  */
2998
2999   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3000      code below will malfunction if we are, so check here and handle
3001      the special case if so.  */
3002   if (op1 == const1_rtx)
3003     return rem_flag ? const0_rtx : op0;
3004
3005     /* When dividing by -1, we could get an overflow.
3006      negv_optab can handle overflows.  */
3007   if (! unsignedp && op1 == constm1_rtx)
3008     {
3009       if (rem_flag)
3010         return const0_rtx;
3011       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3012                         ? negv_optab : neg_optab, op0, target, 0);
3013     }
3014
3015   if (target
3016       /* Don't use the function value register as a target
3017          since we have to read it as well as write it,
3018          and function-inlining gets confused by this.  */
3019       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3020           /* Don't clobber an operand while doing a multi-step calculation.  */
3021           || ((rem_flag || op1_is_constant)
3022               && (reg_mentioned_p (target, op0)
3023                   || (GET_CODE (op0) == MEM && GET_CODE (target) == MEM)))
3024           || reg_mentioned_p (target, op1)
3025           || (GET_CODE (op1) == MEM && GET_CODE (target) == MEM)))
3026     target = 0;
3027
3028   /* Get the mode in which to perform this computation.  Normally it will
3029      be MODE, but sometimes we can't do the desired operation in MODE.
3030      If so, pick a wider mode in which we can do the operation.  Convert
3031      to that mode at the start to avoid repeated conversions.
3032
3033      First see what operations we need.  These depend on the expression
3034      we are evaluating.  (We assume that divxx3 insns exist under the
3035      same conditions that modxx3 insns and that these insns don't normally
3036      fail.  If these assumptions are not correct, we may generate less
3037      efficient code in some cases.)
3038
3039      Then see if we find a mode in which we can open-code that operation
3040      (either a division, modulus, or shift).  Finally, check for the smallest
3041      mode for which we can do the operation with a library call.  */
3042
3043   /* We might want to refine this now that we have division-by-constant
3044      optimization.  Since expand_mult_highpart tries so many variants, it is
3045      not straightforward to generalize this.  Maybe we should make an array
3046      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3047
3048   optab1 = (op1_is_pow2 ? (unsignedp ? lshr_optab : ashr_optab)
3049             : (unsignedp ? udiv_optab : sdiv_optab));
3050   optab2 = (op1_is_pow2 ? optab1 : (unsignedp ? udivmod_optab : sdivmod_optab));
3051
3052   for (compute_mode = mode; compute_mode != VOIDmode;
3053        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3054     if (optab1->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing
3055         || optab2->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing)
3056       break;
3057
3058   if (compute_mode == VOIDmode)
3059     for (compute_mode = mode; compute_mode != VOIDmode;
3060          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3061       if (optab1->handlers[(int) compute_mode].libfunc
3062           || optab2->handlers[(int) compute_mode].libfunc)
3063         break;
3064
3065   /* If we still couldn't find a mode, use MODE, but we'll probably abort
3066      in expand_binop.  */
3067   if (compute_mode == VOIDmode)
3068     compute_mode = mode;
3069
3070   if (target && GET_MODE (target) == compute_mode)
3071     tquotient = target;
3072   else
3073     tquotient = gen_reg_rtx (compute_mode);
3074
3075   size = GET_MODE_BITSIZE (compute_mode);
3076 #if 0
3077   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3078      (mode), and thereby get better code when OP1 is a constant.  Do that
3079      later.  It will require going over all usages of SIZE below.  */
3080   size = GET_MODE_BITSIZE (mode);
3081 #endif
3082
3083   /* Only deduct something for a REM if the last divide done was
3084      for a different constant.   Then set the constant of the last
3085      divide.  */
3086   max_cost = div_cost[(int) compute_mode]
3087     - (rem_flag && ! (last_div_const != 0 && op1_is_constant
3088                       && INTVAL (op1) == last_div_const)
3089        ? mul_cost[(int) compute_mode] + add_cost : 0);
3090
3091   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3092
3093   /* Now convert to the best mode to use.  */
3094   if (compute_mode != mode)
3095     {
3096       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3097       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3098
3099       /* convert_modes may have placed op1 into a register, so we
3100          must recompute the following.  */
3101       op1_is_constant = GET_CODE (op1) == CONST_INT;
3102       op1_is_pow2 = (op1_is_constant
3103                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3104                           || (! unsignedp
3105                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3106     }
3107
3108   /* If one of the operands is a volatile MEM, copy it into a register.  */
3109
3110   if (GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0))
3111     op0 = force_reg (compute_mode, op0);
3112   if (GET_CODE (op1) == MEM && MEM_VOLATILE_P (op1))
3113     op1 = force_reg (compute_mode, op1);
3114
3115   /* If we need the remainder or if OP1 is constant, we need to
3116      put OP0 in a register in case it has any queued subexpressions.  */
3117   if (rem_flag || op1_is_constant)
3118     op0 = force_reg (compute_mode, op0);
3119
3120   last = get_last_insn ();
3121
3122   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3123   if (unsignedp)
3124     {
3125       if (code == FLOOR_DIV_EXPR)
3126         code = TRUNC_DIV_EXPR;
3127       if (code == FLOOR_MOD_EXPR)
3128         code = TRUNC_MOD_EXPR;
3129       if (code == EXACT_DIV_EXPR && op1_is_pow2)
3130         code = TRUNC_DIV_EXPR;
3131     }
3132
3133   if (op1 != const0_rtx)
3134     switch (code)
3135       {
3136       case TRUNC_MOD_EXPR:
3137       case TRUNC_DIV_EXPR:
3138         if (op1_is_constant)
3139           {
3140             if (unsignedp)
3141               {
3142                 unsigned HOST_WIDE_INT mh, ml;
3143                 int pre_shift, post_shift;
3144                 int dummy;
3145                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3146
3147                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3148                   {
3149                     pre_shift = floor_log2 (d);
3150                     if (rem_flag)
3151                       {
3152                         remainder
3153                           = expand_binop (compute_mode, and_optab, op0,
3154                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3155                                           remainder, 1,
3156                                           OPTAB_LIB_WIDEN);
3157                         if (remainder)
3158                           return gen_lowpart (mode, remainder);
3159                       }
3160                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3161                                              build_int_2 (pre_shift, 0),
3162                                              tquotient, 1);
3163                   }
3164                 else if (size <= HOST_BITS_PER_WIDE_INT)
3165                   {
3166                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3167                       {
3168                         /* Most significant bit of divisor is set; emit an scc
3169                            insn.  */
3170                         quotient = emit_store_flag (tquotient, GEU, op0, op1,
3171                                                     compute_mode, 1, 1);
3172                         if (quotient == 0)
3173                           goto fail1;
3174                       }
3175                     else
3176                       {
3177                         /* Find a suitable multiplier and right shift count
3178                            instead of multiplying with D.  */
3179
3180                         mh = choose_multiplier (d, size, size,
3181                                                 &ml, &post_shift, &dummy);
3182
3183                         /* If the suggested multiplier is more than SIZE bits,
3184                            we can do better for even divisors, using an
3185                            initial right shift.  */
3186                         if (mh != 0 && (d & 1) == 0)
3187                           {
3188                             pre_shift = floor_log2 (d & -d);
3189                             mh = choose_multiplier (d >> pre_shift, size,
3190                                                     size - pre_shift,
3191                                                     &ml, &post_shift, &dummy);
3192                             if (mh)
3193                               abort ();
3194                           }
3195                         else
3196                           pre_shift = 0;
3197
3198                         if (mh != 0)
3199                           {
3200                             rtx t1, t2, t3, t4;
3201
3202                             if (post_shift - 1 >= BITS_PER_WORD)
3203                               goto fail1;
3204
3205                             extra_cost = (shift_cost[post_shift - 1]
3206                                           + shift_cost[1] + 2 * add_cost);
3207                             t1 = expand_mult_highpart (compute_mode, op0, ml,
3208                                                        NULL_RTX, 1,
3209                                                        max_cost - extra_cost);
3210                             if (t1 == 0)
3211                               goto fail1;
3212                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
3213                                                                op0, t1),
3214                                                 NULL_RTX);
3215                             t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3216                                                build_int_2 (1, 0), NULL_RTX,1);
3217                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
3218                                                               t1, t3),
3219                                                 NULL_RTX);
3220                             quotient
3221                               = expand_shift (RSHIFT_EXPR, compute_mode, t4,
3222                                               build_int_2 (post_shift - 1, 0),
3223                                               tquotient, 1);
3224                           }
3225                         else
3226                           {
3227                             rtx t1, t2;
3228
3229                             if (pre_shift >= BITS_PER_WORD
3230                                 || post_shift >= BITS_PER_WORD)
3231                               goto fail1;
3232
3233                             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3234                                                build_int_2 (pre_shift, 0),
3235                                                NULL_RTX, 1);
3236                             extra_cost = (shift_cost[pre_shift]
3237                                           + shift_cost[post_shift]);
3238                             t2 = expand_mult_highpart (compute_mode, t1, ml,
3239                                                        NULL_RTX, 1,
3240                                                        max_cost - extra_cost);
3241                             if (t2 == 0)
3242                               goto fail1;
3243                             quotient
3244                               = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3245                                               build_int_2 (post_shift, 0),
3246                                               tquotient, 1);
3247                           }
3248                       }
3249                   }
3250                 else            /* Too wide mode to use tricky code */
3251                   break;
3252
3253                 insn = get_last_insn ();
3254                 if (insn != last
3255                     && (set = single_set (insn)) != 0
3256                     && SET_DEST (set) == quotient)
3257                   set_unique_reg_note (insn,
3258                                        REG_EQUAL,
3259                                        gen_rtx_UDIV (compute_mode, op0, op1));
3260               }
3261             else                /* TRUNC_DIV, signed */
3262               {
3263                 unsigned HOST_WIDE_INT ml;
3264                 int lgup, post_shift;
3265                 HOST_WIDE_INT d = INTVAL (op1);
3266                 unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
3267
3268                 /* n rem d = n rem -d */
3269                 if (rem_flag && d < 0)
3270                   {
3271                     d = abs_d;
3272                     op1 = GEN_INT (abs_d);
3273                   }
3274
3275                 if (d == 1)
3276                   quotient = op0;
3277                 else if (d == -1)
3278                   quotient = expand_unop (compute_mode, neg_optab, op0,
3279                                           tquotient, 0);
3280                 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
3281                   {
3282                     /* This case is not handled correctly below.  */
3283                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
3284                                                 compute_mode, 1, 1);
3285                     if (quotient == 0)
3286                       goto fail1;
3287                   }
3288                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
3289                          && (rem_flag ? smod_pow2_cheap : sdiv_pow2_cheap))
3290                   ;
3291                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
3292                   {
3293                     lgup = floor_log2 (abs_d);
3294                     if (BRANCH_COST < 1 || (abs_d != 2 && BRANCH_COST < 3))
3295                       {
3296                         rtx label = gen_label_rtx ();
3297                         rtx t1;
3298
3299                         t1 = copy_to_mode_reg (compute_mode, op0);
3300                         do_cmp_and_jump (t1, const0_rtx, GE,
3301                                          compute_mode, label);
3302                         expand_inc (t1, GEN_INT (abs_d - 1));
3303                         emit_label (label);
3304                         quotient = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3305                                                  build_int_2 (lgup, 0),
3306                                                  tquotient, 0);
3307                       }
3308                     else
3309                       {
3310                         rtx t1, t2, t3;
3311                         t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3312                                            build_int_2 (size - 1, 0),
3313                                            NULL_RTX, 0);
3314                         t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3315                                            build_int_2 (size - lgup, 0),
3316                                            NULL_RTX, 1);
3317                         t3 = force_operand (gen_rtx_PLUS (compute_mode,
3318                                                           op0, t2),
3319                                             NULL_RTX);
3320                         quotient = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3321                                                  build_int_2 (lgup, 0),
3322                                                  tquotient, 0);
3323                       }
3324
3325                     /* We have computed OP0 / abs(OP1).  If OP1 is negative, negate
3326                        the quotient.  */
3327                     if (d < 0)
3328                       {
3329                         insn = get_last_insn ();
3330                         if (insn != last
3331                             && (set = single_set (insn)) != 0
3332                             && SET_DEST (set) == quotient
3333                             && abs_d < ((unsigned HOST_WIDE_INT) 1
3334                                         << (HOST_BITS_PER_WIDE_INT - 1)))
3335                           set_unique_reg_note (insn,
3336                                                REG_EQUAL,
3337                                                gen_rtx_DIV (compute_mode,
3338                                                             op0,
3339                                                             GEN_INT (abs_d)));
3340
3341                         quotient = expand_unop (compute_mode, neg_optab,
3342                                                 quotient, quotient, 0);
3343                       }
3344                   }
3345                 else if (size <= HOST_BITS_PER_WIDE_INT)
3346                   {
3347                     choose_multiplier (abs_d, size, size - 1,
3348                                        &ml, &post_shift, &lgup);
3349                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
3350                       {
3351                         rtx t1, t2, t3;
3352
3353                         if (post_shift >= BITS_PER_WORD
3354                             || size - 1 >= BITS_PER_WORD)
3355                           goto fail1;
3356
3357                         extra_cost = (shift_cost[post_shift]
3358                                       + shift_cost[size - 1] + add_cost);
3359                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3360                                                    NULL_RTX, 0,
3361                                                    max_cost - extra_cost);
3362                         if (t1 == 0)
3363                           goto fail1;
3364                         t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3365                                            build_int_2 (post_shift, 0), NULL_RTX, 0);
3366                         t3 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3367                                            build_int_2 (size - 1, 0), NULL_RTX, 0);
3368                         if (d < 0)
3369                           quotient
3370                             = force_operand (gen_rtx_MINUS (compute_mode,
3371                                                             t3, t2),
3372                                              tquotient);
3373                         else
3374                           quotient
3375                             = force_operand (gen_rtx_MINUS (compute_mode,
3376                                                             t2, t3),
3377                                              tquotient);
3378                       }
3379                     else
3380                       {
3381                         rtx t1, t2, t3, t4;
3382
3383                         if (post_shift >= BITS_PER_WORD
3384                             || size - 1 >= BITS_PER_WORD)
3385                           goto fail1;
3386
3387                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
3388                         extra_cost = (shift_cost[post_shift]
3389                                       + shift_cost[size - 1] + 2 * add_cost);
3390                         t1 = expand_mult_highpart (compute_mode, op0, ml,
3391                                                    NULL_RTX, 0,
3392                                                    max_cost - extra_cost);
3393                         if (t1 == 0)
3394                           goto fail1;
3395                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
3396                                                           t1, op0),
3397                                             NULL_RTX);
3398                         t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3399                                            build_int_2 (post_shift, 0),
3400                                            NULL_RTX, 0);
3401                         t4 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3402                                            build_int_2 (size - 1, 0),
3403                                            NULL_RTX, 0);
3404                         if (d < 0)
3405                           quotient
3406                             = force_operand (gen_rtx_MINUS (compute_mode,
3407                                                             t4, t3),
3408                                              tquotient);
3409                         else
3410                           quotient
3411                             = force_operand (gen_rtx_MINUS (compute_mode,
3412                                                             t3, t4),
3413                                              tquotient);
3414                       }
3415                   }
3416                 else            /* Too wide mode to use tricky code */
3417                   break;
3418
3419                 insn = get_last_insn ();
3420                 if (insn != last
3421                     && (set = single_set (insn)) != 0
3422                     && SET_DEST (set) == quotient)
3423                   set_unique_reg_note (insn,
3424                                        REG_EQUAL,
3425                                        gen_rtx_DIV (compute_mode, op0, op1));
3426               }
3427             break;
3428           }
3429       fail1:
3430         delete_insns_since (last);
3431         break;
3432
3433       case FLOOR_DIV_EXPR:
3434       case FLOOR_MOD_EXPR:
3435       /* We will come here only for signed operations.  */
3436         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3437           {
3438             unsigned HOST_WIDE_INT mh, ml;
3439             int pre_shift, lgup, post_shift;
3440             HOST_WIDE_INT d = INTVAL (op1);
3441
3442             if (d > 0)
3443               {
3444                 /* We could just as easily deal with negative constants here,
3445                    but it does not seem worth the trouble for GCC 2.6.  */
3446                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3447                   {
3448                     pre_shift = floor_log2 (d);
3449                     if (rem_flag)
3450                       {
3451                         remainder = expand_binop (compute_mode, and_optab, op0,
3452                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3453                                                   remainder, 0, OPTAB_LIB_WIDEN);
3454                         if (remainder)
3455                           return gen_lowpart (mode, remainder);
3456                       }
3457                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3458                                              build_int_2 (pre_shift, 0),
3459                                              tquotient, 0);
3460                   }
3461                 else
3462                   {
3463                     rtx t1, t2, t3, t4;
3464
3465                     mh = choose_multiplier (d, size, size - 1,
3466                                             &ml, &post_shift, &lgup);
3467                     if (mh)
3468                       abort ();
3469
3470                     if (post_shift < BITS_PER_WORD
3471                         && size - 1 < BITS_PER_WORD)
3472                       {
3473                         t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3474                                            build_int_2 (size - 1, 0),
3475                                            NULL_RTX, 0);
3476                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
3477                                            NULL_RTX, 0, OPTAB_WIDEN);
3478                         extra_cost = (shift_cost[post_shift]
3479                                       + shift_cost[size - 1] + 2 * add_cost);
3480                         t3 = expand_mult_highpart (compute_mode, t2, ml,
3481                                                    NULL_RTX, 1,
3482                                                    max_cost - extra_cost);
3483                         if (t3 != 0)
3484                           {
3485                             t4 = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3486                                                build_int_2 (post_shift, 0),
3487                                                NULL_RTX, 1);
3488                             quotient = expand_binop (compute_mode, xor_optab,
3489                                                      t4, t1, tquotient, 0,
3490                                                      OPTAB_WIDEN);
3491                           }
3492                       }
3493                   }
3494               }
3495             else
3496               {
3497                 rtx nsign, t1, t2, t3, t4;
3498                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
3499                                                   op0, constm1_rtx), NULL_RTX);
3500                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
3501                                    0, OPTAB_WIDEN);
3502                 nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3503                                       build_int_2 (size - 1, 0), NULL_RTX, 0);
3504                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
3505                                     NULL_RTX);
3506                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
3507                                     NULL_RTX, 0);
3508                 if (t4)
3509                   {
3510                     rtx t5;
3511                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
3512                                       NULL_RTX, 0);
3513                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
3514                                                             t4, t5),
3515                                               tquotient);
3516                   }
3517               }
3518           }
3519
3520         if (quotient != 0)
3521           break;
3522         delete_insns_since (last);
3523
3524         /* Try using an instruction that produces both the quotient and
3525            remainder, using truncation.  We can easily compensate the quotient
3526            or remainder to get floor rounding, once we have the remainder.
3527            Notice that we compute also the final remainder value here,
3528            and return the result right away.  */
3529         if (target == 0 || GET_MODE (target) != compute_mode)
3530           target = gen_reg_rtx (compute_mode);
3531
3532         if (rem_flag)
3533           {
3534             remainder
3535               = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
3536             quotient = gen_reg_rtx (compute_mode);
3537           }
3538         else
3539           {
3540             quotient
3541               = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
3542             remainder = gen_reg_rtx (compute_mode);
3543           }
3544
3545         if (expand_twoval_binop (sdivmod_optab, op0, op1,
3546                                  quotient, remainder, 0))
3547           {
3548             /* This could be computed with a branch-less sequence.
3549                Save that for later.  */
3550             rtx tem;
3551             rtx label = gen_label_rtx ();
3552             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
3553             tem = expand_binop (compute_mode, xor_optab, op0, op1,
3554                                 NULL_RTX, 0, OPTAB_WIDEN);
3555             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
3556             expand_dec (quotient, const1_rtx);
3557             expand_inc (remainder, op1);
3558             emit_label (label);
3559             return gen_lowpart (mode, rem_flag ? remainder : quotient);
3560           }
3561
3562         /* No luck with division elimination or divmod.  Have to do it
3563            by conditionally adjusting op0 *and* the result.  */
3564         {
3565           rtx label1, label2, label3, label4, label5;
3566           rtx adjusted_op0;
3567           rtx tem;
3568
3569           quotient = gen_reg_rtx (compute_mode);
3570           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3571           label1 = gen_label_rtx ();
3572           label2 = gen_label_rtx ();
3573           label3 = gen_label_rtx ();
3574           label4 = gen_label_rtx ();
3575           label5 = gen_label_rtx ();
3576           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
3577           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
3578           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3579                               quotient, 0, OPTAB_LIB_WIDEN);
3580           if (tem != quotient)
3581             emit_move_insn (quotient, tem);
3582           emit_jump_insn (gen_jump (label5));
3583           emit_barrier ();
3584           emit_label (label1);
3585           expand_inc (adjusted_op0, const1_rtx);
3586           emit_jump_insn (gen_jump (label4));
3587           emit_barrier ();
3588           emit_label (label2);
3589           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
3590           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3591                               quotient, 0, OPTAB_LIB_WIDEN);
3592           if (tem != quotient)
3593             emit_move_insn (quotient, tem);
3594           emit_jump_insn (gen_jump (label5));
3595           emit_barrier ();
3596           emit_label (label3);
3597           expand_dec (adjusted_op0, const1_rtx);
3598           emit_label (label4);
3599           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3600                               quotient, 0, OPTAB_LIB_WIDEN);
3601           if (tem != quotient)
3602             emit_move_insn (quotient, tem);
3603           expand_dec (quotient, const1_rtx);
3604           emit_label (label5);
3605         }
3606         break;
3607
3608       case CEIL_DIV_EXPR:
3609       case CEIL_MOD_EXPR:
3610         if (unsignedp)
3611           {
3612             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
3613               {
3614                 rtx t1, t2, t3;
3615                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3616                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3617                                    build_int_2 (floor_log2 (d), 0),
3618                                    tquotient, 1);
3619                 t2 = expand_binop (compute_mode, and_optab, op0,
3620                                    GEN_INT (d - 1),
3621                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3622                 t3 = gen_reg_rtx (compute_mode);
3623                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3624                                       compute_mode, 1, 1);
3625                 if (t3 == 0)
3626                   {
3627                     rtx lab;
3628                     lab = gen_label_rtx ();
3629                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
3630                     expand_inc (t1, const1_rtx);
3631                     emit_label (lab);
3632                     quotient = t1;
3633                   }
3634                 else
3635                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
3636                                                           t1, t3),
3637                                             tquotient);
3638                 break;
3639               }
3640
3641             /* Try using an instruction that produces both the quotient and
3642                remainder, using truncation.  We can easily compensate the
3643                quotient or remainder to get ceiling rounding, once we have the
3644                remainder.  Notice that we compute also the final remainder
3645                value here, and return the result right away.  */
3646             if (target == 0 || GET_MODE (target) != compute_mode)
3647               target = gen_reg_rtx (compute_mode);
3648
3649             if (rem_flag)
3650               {
3651                 remainder = (GET_CODE (target) == REG
3652                              ? target : gen_reg_rtx (compute_mode));
3653                 quotient = gen_reg_rtx (compute_mode);
3654               }
3655             else
3656               {
3657                 quotient = (GET_CODE (target) == REG
3658                             ? target : gen_reg_rtx (compute_mode));
3659                 remainder = gen_reg_rtx (compute_mode);
3660               }
3661
3662             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
3663                                      remainder, 1))
3664               {
3665                 /* This could be computed with a branch-less sequence.
3666                    Save that for later.  */
3667                 rtx label = gen_label_rtx ();
3668                 do_cmp_and_jump (remainder, const0_rtx, EQ,
3669                                  compute_mode, label);
3670                 expand_inc (quotient, const1_rtx);
3671                 expand_dec (remainder, op1);
3672                 emit_label (label);
3673                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3674               }
3675
3676             /* No luck with division elimination or divmod.  Have to do it
3677                by conditionally adjusting op0 *and* the result.  */
3678             {
3679               rtx label1, label2;
3680               rtx adjusted_op0, tem;
3681
3682               quotient = gen_reg_rtx (compute_mode);
3683               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3684               label1 = gen_label_rtx ();
3685               label2 = gen_label_rtx ();
3686               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
3687                                compute_mode, label1);
3688               emit_move_insn  (quotient, const0_rtx);
3689               emit_jump_insn (gen_jump (label2));
3690               emit_barrier ();
3691               emit_label (label1);
3692               expand_dec (adjusted_op0, const1_rtx);
3693               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
3694                                   quotient, 1, OPTAB_LIB_WIDEN);
3695               if (tem != quotient)
3696                 emit_move_insn (quotient, tem);
3697               expand_inc (quotient, const1_rtx);
3698               emit_label (label2);
3699             }
3700           }
3701         else /* signed */
3702           {
3703             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3704                 && INTVAL (op1) >= 0)
3705               {
3706                 /* This is extremely similar to the code for the unsigned case
3707                    above.  For 2.7 we should merge these variants, but for
3708                    2.6.1 I don't want to touch the code for unsigned since that
3709                    get used in C.  The signed case will only be used by other
3710                    languages (Ada).  */
3711
3712                 rtx t1, t2, t3;
3713                 unsigned HOST_WIDE_INT d = INTVAL (op1);
3714                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3715                                    build_int_2 (floor_log2 (d), 0),
3716                                    tquotient, 0);
3717                 t2 = expand_binop (compute_mode, and_optab, op0,
3718                                    GEN_INT (d - 1),
3719                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3720                 t3 = gen_reg_rtx (compute_mode);
3721                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3722                                       compute_mode, 1, 1);
3723                 if (t3 == 0)
3724                   {
3725                     rtx lab;
3726                     lab = gen_label_rtx ();
3727                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
3728                     expand_inc (t1, const1_rtx);
3729                     emit_label (lab);
3730                     quotient = t1;
3731                   }
3732                 else
3733                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
3734                                                           t1, t3),
3735                                             tquotient);
3736                 break;
3737               }
3738
3739             /* Try using an instruction that produces both the quotient and
3740                remainder, using truncation.  We can easily compensate the
3741                quotient or remainder to get ceiling rounding, once we have the
3742                remainder.  Notice that we compute also the final remainder
3743                value here, and return the result right away.  */
3744             if (target == 0 || GET_MODE (target) != compute_mode)
3745               target = gen_reg_rtx (compute_mode);
3746             if (rem_flag)
3747               {
3748                 remainder= (GET_CODE (target) == REG
3749                             ? target : gen_reg_rtx (compute_mode));
3750                 quotient = gen_reg_rtx (compute_mode);
3751               }
3752             else
3753               {
3754                 quotient = (GET_CODE (target) == REG
3755                             ? target : gen_reg_rtx (compute_mode));
3756                 remainder = gen_reg_rtx (compute_mode);
3757               }
3758
3759             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
3760                                      remainder, 0))
3761               {
3762                 /* This could be computed with a branch-less sequence.
3763                    Save that for later.  */
3764                 rtx tem;
3765                 rtx label = gen_label_rtx ();
3766                 do_cmp_and_jump (remainder, const0_rtx, EQ,
3767                                  compute_mode, label);
3768                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
3769                                     NULL_RTX, 0, OPTAB_WIDEN);
3770                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
3771                 expand_inc (quotient, const1_rtx);
3772                 expand_dec (remainder, op1);
3773                 emit_label (label);
3774                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3775               }
3776
3777             /* No luck with division elimination or divmod.  Have to do it
3778                by conditionally adjusting op0 *and* the result.  */
3779             {
3780               rtx label1, label2, label3, label4, label5;
3781               rtx adjusted_op0;
3782               rtx tem;
3783
3784               quotient = gen_reg_rtx (compute_mode);
3785               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3786               label1 = gen_label_rtx ();
3787               label2 = gen_label_rtx ();
3788               label3 = gen_label_rtx ();
3789               label4 = gen_label_rtx ();
3790               label5 = gen_label_rtx ();
3791               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
3792               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
3793                                compute_mode, label1);
3794               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3795                                   quotient, 0, OPTAB_LIB_WIDEN);
3796               if (tem != quotient)
3797                 emit_move_insn (quotient, tem);
3798               emit_jump_insn (gen_jump (label5));
3799               emit_barrier ();
3800               emit_label (label1);
3801               expand_dec (adjusted_op0, const1_rtx);
3802               emit_jump_insn (gen_jump (label4));
3803               emit_barrier ();
3804               emit_label (label2);
3805               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
3806                                compute_mode, label3);
3807               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3808                                   quotient, 0, OPTAB_LIB_WIDEN);
3809               if (tem != quotient)
3810                 emit_move_insn (quotient, tem);
3811               emit_jump_insn (gen_jump (label5));
3812               emit_barrier ();
3813               emit_label (label3);
3814               expand_inc (adjusted_op0, const1_rtx);
3815               emit_label (label4);
3816               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3817                                   quotient, 0, OPTAB_LIB_WIDEN);
3818               if (tem != quotient)
3819                 emit_move_insn (quotient, tem);
3820               expand_inc (quotient, const1_rtx);
3821               emit_label (label5);
3822             }
3823           }
3824         break;
3825
3826       case EXACT_DIV_EXPR:
3827         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3828           {
3829             HOST_WIDE_INT d = INTVAL (op1);
3830             unsigned HOST_WIDE_INT ml;
3831             int pre_shift;
3832             rtx t1;
3833
3834             pre_shift = floor_log2 (d & -d);
3835             ml = invert_mod2n (d >> pre_shift, size);
3836             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3837                                build_int_2 (pre_shift, 0), NULL_RTX, unsignedp);
3838             quotient = expand_mult (compute_mode, t1, GEN_INT (ml), NULL_RTX,
3839                                     0);
3840
3841             insn = get_last_insn ();
3842             set_unique_reg_note (insn,
3843                                  REG_EQUAL,
3844                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
3845                                                  compute_mode,
3846                                                  op0, op1));
3847           }
3848         break;
3849
3850       case ROUND_DIV_EXPR:
3851       case ROUND_MOD_EXPR:
3852         if (unsignedp)
3853           {
3854             rtx tem;
3855             rtx label;
3856             label = gen_label_rtx ();
3857             quotient = gen_reg_rtx (compute_mode);
3858             remainder = gen_reg_rtx (compute_mode);
3859             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
3860               {
3861                 rtx tem;
3862                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
3863                                          quotient, 1, OPTAB_LIB_WIDEN);
3864                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
3865                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
3866                                           remainder, 1, OPTAB_LIB_WIDEN);
3867               }
3868             tem = plus_constant (op1, -1);
3869             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
3870                                 build_int_2 (1, 0), NULL_RTX, 1);
3871             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
3872             expand_inc (quotient, const1_rtx);
3873             expand_dec (remainder, op1);
3874             emit_label (label);
3875           }
3876         else
3877           {
3878             rtx abs_rem, abs_op1, tem, mask;
3879             rtx label;
3880             label = gen_label_rtx ();
3881             quotient = gen_reg_rtx (compute_mode);
3882             remainder = gen_reg_rtx (compute_mode);
3883             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
3884               {
3885                 rtx tem;
3886                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
3887                                          quotient, 0, OPTAB_LIB_WIDEN);
3888                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
3889                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
3890                                           remainder, 0, OPTAB_LIB_WIDEN);
3891               }
3892             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
3893             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
3894             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
3895                                 build_int_2 (1, 0), NULL_RTX, 1);
3896             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
3897             tem = expand_binop (compute_mode, xor_optab, op0, op1,
3898                                 NULL_RTX, 0, OPTAB_WIDEN);
3899             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
3900                                 build_int_2 (size - 1, 0), NULL_RTX, 0);
3901             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
3902                                 NULL_RTX, 0, OPTAB_WIDEN);
3903             tem = expand_binop (compute_mode, sub_optab, tem, mask,
3904                                 NULL_RTX, 0, OPTAB_WIDEN);
3905             expand_inc (quotient, tem);
3906             tem = expand_binop (compute_mode, xor_optab, mask, op1,
3907                                 NULL_RTX, 0, OPTAB_WIDEN);
3908             tem = expand_binop (compute_mode, sub_optab, tem, mask,
3909                                 NULL_RTX, 0, OPTAB_WIDEN);
3910             expand_dec (remainder, tem);
3911             emit_label (label);
3912           }
3913         return gen_lowpart (mode, rem_flag ? remainder : quotient);
3914
3915       default:
3916         abort ();
3917       }
3918
3919   if (quotient == 0)
3920     {
3921       if (target && GET_MODE (target) != compute_mode)
3922         target = 0;
3923
3924       if (rem_flag)
3925         {
3926           /* Try to produce the remainder without producing the quotient.
3927              If we seem to have a divmod patten that does not require widening,
3928              don't try windening here.  We should really have an WIDEN argument
3929              to expand_twoval_binop, since what we'd really like to do here is
3930              1) try a mod insn in compute_mode
3931              2) try a divmod insn in compute_mode
3932              3) try a div insn in compute_mode and multiply-subtract to get
3933                 remainder
3934              4) try the same things with widening allowed.  */
3935           remainder
3936             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
3937                                  op0, op1, target,
3938                                  unsignedp,
3939                                  ((optab2->handlers[(int) compute_mode].insn_code
3940                                    != CODE_FOR_nothing)
3941                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
3942           if (remainder == 0)
3943             {
3944               /* No luck there.  Can we do remainder and divide at once
3945                  without a library call?  */
3946               remainder = gen_reg_rtx (compute_mode);
3947               if (! expand_twoval_binop ((unsignedp
3948                                           ? udivmod_optab
3949                                           : sdivmod_optab),
3950                                          op0, op1,
3951                                          NULL_RTX, remainder, unsignedp))
3952                 remainder = 0;
3953             }
3954
3955           if (remainder)
3956             return gen_lowpart (mode, remainder);
3957         }
3958
3959       /* Produce the quotient.  Try a quotient insn, but not a library call.
3960          If we have a divmod in this mode, use it in preference to widening
3961          the div (for this test we assume it will not fail). Note that optab2
3962          is set to the one of the two optabs that the call below will use.  */
3963       quotient
3964         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
3965                              op0, op1, rem_flag ? NULL_RTX : target,
3966                              unsignedp,
3967                              ((optab2->handlers[(int) compute_mode].insn_code
3968                                != CODE_FOR_nothing)
3969                               ? OPTAB_DIRECT : OPTAB_WIDEN));
3970
3971       if (quotient == 0)
3972         {
3973           /* No luck there.  Try a quotient-and-remainder insn,
3974              keeping the quotient alone.  */
3975           quotient = gen_reg_rtx (compute_mode);
3976           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
3977                                      op0, op1,
3978                                      quotient, NULL_RTX, unsignedp))
3979             {
3980               quotient = 0;
3981               if (! rem_flag)
3982                 /* Still no luck.  If we are not computing the remainder,
3983                    use a library call for the quotient.  */
3984                 quotient = sign_expand_binop (compute_mode,
3985                                               udiv_optab, sdiv_optab,
3986                                               op0, op1, target,
3987                                               unsignedp, OPTAB_LIB_WIDEN);
3988             }
3989         }
3990     }
3991
3992   if (rem_flag)
3993     {
3994       if (target && GET_MODE (target) != compute_mode)
3995         target = 0;
3996
3997       if (quotient == 0)
3998         /* No divide instruction either.  Use library for remainder.  */
3999         remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4000                                        op0, op1, target,
4001                                        unsignedp, OPTAB_LIB_WIDEN);
4002       else
4003         {
4004           /* We divided.  Now finish doing X - Y * (X / Y).  */
4005           remainder = expand_mult (compute_mode, quotient, op1,
4006                                    NULL_RTX, unsignedp);
4007           remainder = expand_binop (compute_mode, sub_optab, op0,
4008                                     remainder, target, unsignedp,
4009                                     OPTAB_LIB_WIDEN);
4010         }
4011     }
4012
4013   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4014 }
4015 \f
4016 /* Return a tree node with data type TYPE, describing the value of X.
4017    Usually this is an RTL_EXPR, if there is no obvious better choice.
4018    X may be an expression, however we only support those expressions
4019    generated by loop.c.   */
4020
4021 tree
4022 make_tree (type, x)
4023      tree type;
4024      rtx x;
4025 {
4026   tree t;
4027
4028   switch (GET_CODE (x))
4029     {
4030     case CONST_INT:
4031       t = build_int_2 (INTVAL (x),
4032                        (TREE_UNSIGNED (type)
4033                         && (GET_MODE_BITSIZE (TYPE_MODE (type)) < HOST_BITS_PER_WIDE_INT))
4034                        || INTVAL (x) >= 0 ? 0 : -1);
4035       TREE_TYPE (t) = type;
4036       return t;
4037
4038     case CONST_DOUBLE:
4039       if (GET_MODE (x) == VOIDmode)
4040         {
4041           t = build_int_2 (CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4042           TREE_TYPE (t) = type;
4043         }
4044       else
4045         {
4046           REAL_VALUE_TYPE d;
4047
4048           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4049           t = build_real (type, d);
4050         }
4051
4052       return t;
4053
4054     case PLUS:
4055       return fold (build (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4056                           make_tree (type, XEXP (x, 1))));
4057
4058     case MINUS:
4059       return fold (build (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4060                           make_tree (type, XEXP (x, 1))));
4061
4062     case NEG:
4063       return fold (build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))));
4064
4065     case MULT:
4066       return fold (build (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4067                           make_tree (type, XEXP (x, 1))));
4068
4069     case ASHIFT:
4070       return fold (build (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4071                           make_tree (type, XEXP (x, 1))));
4072
4073     case LSHIFTRT:
4074       return fold (convert (type,
4075                             build (RSHIFT_EXPR, unsigned_type (type),
4076                                    make_tree (unsigned_type (type),
4077                                               XEXP (x, 0)),
4078                                    make_tree (type, XEXP (x, 1)))));
4079
4080     case ASHIFTRT:
4081       return fold (convert (type,
4082                             build (RSHIFT_EXPR, signed_type (type),
4083                                    make_tree (signed_type (type), XEXP (x, 0)),
4084                                    make_tree (type, XEXP (x, 1)))));
4085
4086     case DIV:
4087       if (TREE_CODE (type) != REAL_TYPE)
4088         t = signed_type (type);
4089       else
4090         t = type;
4091
4092       return fold (convert (type,
4093                             build (TRUNC_DIV_EXPR, t,
4094                                    make_tree (t, XEXP (x, 0)),
4095                                    make_tree (t, XEXP (x, 1)))));
4096     case UDIV:
4097       t = unsigned_type (type);
4098       return fold (convert (type,
4099                             build (TRUNC_DIV_EXPR, t,
4100                                    make_tree (t, XEXP (x, 0)),
4101                                    make_tree (t, XEXP (x, 1)))));
4102    default:
4103       t = make_node (RTL_EXPR);
4104       TREE_TYPE (t) = type;
4105
4106 #ifdef POINTERS_EXTEND_UNSIGNED
4107       /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
4108          ptr_mode.  So convert.  */
4109       if (POINTER_TYPE_P (type) && GET_MODE (x) != TYPE_MODE (type))
4110         x = convert_memory_address (TYPE_MODE (type), x);
4111 #endif
4112
4113       RTL_EXPR_RTL (t) = x;
4114       /* There are no insns to be output
4115          when this rtl_expr is used.  */
4116       RTL_EXPR_SEQUENCE (t) = 0;
4117       return t;
4118     }
4119 }
4120
4121 /* Return an rtx representing the value of X * MULT + ADD.
4122    TARGET is a suggestion for where to store the result (an rtx).
4123    MODE is the machine mode for the computation.
4124    X and MULT must have mode MODE.  ADD may have a different mode.
4125    So can X (defaults to same as MODE).
4126    UNSIGNEDP is non-zero to do unsigned multiplication.
4127    This may emit insns.  */
4128
4129 rtx
4130 expand_mult_add (x, target, mult, add, mode, unsignedp)
4131      rtx x, target, mult, add;
4132      enum machine_mode mode;
4133      int unsignedp;
4134 {
4135   tree type = type_for_mode (mode, unsignedp);
4136   tree add_type = (GET_MODE (add) == VOIDmode
4137                    ? type : type_for_mode (GET_MODE (add), unsignedp));
4138   tree result =  fold (build (PLUS_EXPR, type,
4139                               fold (build (MULT_EXPR, type,
4140                                            make_tree (type, x),
4141                                            make_tree (type, mult))),
4142                               make_tree (add_type, add)));
4143
4144   return expand_expr (result, target, VOIDmode, 0);
4145 }
4146 \f
4147 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
4148    and returning TARGET.
4149
4150    If TARGET is 0, a pseudo-register or constant is returned.  */
4151
4152 rtx
4153 expand_and (op0, op1, target)
4154      rtx op0, op1, target;
4155 {
4156   enum machine_mode mode = VOIDmode;
4157   rtx tem;
4158
4159   if (GET_MODE (op0) != VOIDmode)
4160     mode = GET_MODE (op0);
4161   else if (GET_MODE (op1) != VOIDmode)
4162     mode = GET_MODE (op1);
4163
4164   if (mode != VOIDmode)
4165     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
4166   else if (GET_CODE (op0) == CONST_INT && GET_CODE (op1) == CONST_INT)
4167     tem = GEN_INT (INTVAL (op0) & INTVAL (op1));
4168   else
4169     abort ();
4170
4171   if (target == 0)
4172     target = tem;
4173   else if (tem != target)
4174     emit_move_insn (target, tem);
4175   return target;
4176 }
4177 \f
4178 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
4179    and storing in TARGET.  Normally return TARGET.
4180    Return 0 if that cannot be done.
4181
4182    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
4183    it is VOIDmode, they cannot both be CONST_INT.
4184
4185    UNSIGNEDP is for the case where we have to widen the operands
4186    to perform the operation.  It says to use zero-extension.
4187
4188    NORMALIZEP is 1 if we should convert the result to be either zero
4189    or one.  Normalize is -1 if we should convert the result to be
4190    either zero or -1.  If NORMALIZEP is zero, the result will be left
4191    "raw" out of the scc insn.  */
4192
4193 rtx
4194 emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep)
4195      rtx target;
4196      enum rtx_code code;
4197      rtx op0, op1;
4198      enum machine_mode mode;
4199      int unsignedp;
4200      int normalizep;
4201 {
4202   rtx subtarget;
4203   enum insn_code icode;
4204   enum machine_mode compare_mode;
4205   enum machine_mode target_mode = GET_MODE (target);
4206   rtx tem;
4207   rtx last = get_last_insn ();
4208   rtx pattern, comparison;
4209
4210   if (unsignedp)
4211     code = unsigned_condition (code);
4212
4213   /* If one operand is constant, make it the second one.  Only do this
4214      if the other operand is not constant as well.  */
4215
4216   if ((CONSTANT_P (op0) && ! CONSTANT_P (op1))
4217       || (GET_CODE (op0) == CONST_INT && GET_CODE (op1) != CONST_INT))
4218     {
4219       tem = op0;
4220       op0 = op1;
4221       op1 = tem;
4222       code = swap_condition (code);
4223     }
4224
4225   if (mode == VOIDmode)
4226     mode = GET_MODE (op0);
4227
4228   /* For some comparisons with 1 and -1, we can convert this to
4229      comparisons with zero.  This will often produce more opportunities for
4230      store-flag insns.  */
4231
4232   switch (code)
4233     {
4234     case LT:
4235       if (op1 == const1_rtx)
4236         op1 = const0_rtx, code = LE;
4237       break;
4238     case LE:
4239       if (op1 == constm1_rtx)
4240         op1 = const0_rtx, code = LT;
4241       break;
4242     case GE:
4243       if (op1 == const1_rtx)
4244         op1 = const0_rtx, code = GT;
4245       break;
4246     case GT:
4247       if (op1 == constm1_rtx)
4248         op1 = const0_rtx, code = GE;
4249       break;
4250     case GEU:
4251       if (op1 == const1_rtx)
4252         op1 = const0_rtx, code = NE;
4253       break;
4254     case LTU:
4255       if (op1 == const1_rtx)
4256         op1 = const0_rtx, code = EQ;
4257       break;
4258     default:
4259       break;
4260     }
4261
4262   /* If we are comparing a double-word integer with zero, we can convert
4263      the comparison into one involving a single word.  */
4264   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
4265       && GET_MODE_CLASS (mode) == MODE_INT
4266       && op1 == const0_rtx)
4267     {
4268       if (code == EQ || code == NE)
4269         {
4270           /* Do a logical OR of the two words and compare the result.  */
4271           rtx op0h = gen_highpart (word_mode, op0);
4272           rtx op0l = gen_lowpart (word_mode, op0);
4273           rtx op0both = expand_binop (word_mode, ior_optab, op0h, op0l,
4274                                       NULL_RTX, unsignedp, OPTAB_DIRECT);
4275           if (op0both != 0)
4276             return emit_store_flag (target, code, op0both, op1, word_mode,
4277                                     unsignedp, normalizep);
4278         }
4279       else if (code == LT || code == GE)
4280         /* If testing the sign bit, can just test on high word.  */
4281         return emit_store_flag (target, code, gen_highpart (word_mode, op0),
4282                                 op1, word_mode, unsignedp, normalizep);
4283     }
4284
4285   /* From now on, we won't change CODE, so set ICODE now.  */
4286   icode = setcc_gen_code[(int) code];
4287
4288   /* If this is A < 0 or A >= 0, we can do this by taking the ones
4289      complement of A (for GE) and shifting the sign bit to the low bit.  */
4290   if (op1 == const0_rtx && (code == LT || code == GE)
4291       && GET_MODE_CLASS (mode) == MODE_INT
4292       && (normalizep || STORE_FLAG_VALUE == 1
4293           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4294               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4295                   == (HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
4296     {
4297       subtarget = target;
4298
4299       /* If the result is to be wider than OP0, it is best to convert it
4300          first.  If it is to be narrower, it is *incorrect* to convert it
4301          first.  */
4302       if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
4303         {
4304           op0 = protect_from_queue (op0, 0);
4305           op0 = convert_modes (target_mode, mode, op0, 0);
4306           mode = target_mode;
4307         }
4308
4309       if (target_mode != mode)
4310         subtarget = 0;
4311
4312       if (code == GE)
4313         op0 = expand_unop (mode, one_cmpl_optab, op0,
4314                            ((STORE_FLAG_VALUE == 1 || normalizep)
4315                             ? 0 : subtarget), 0);
4316
4317       if (STORE_FLAG_VALUE == 1 || normalizep)
4318         /* If we are supposed to produce a 0/1 value, we want to do
4319            a logical shift from the sign bit to the low-order bit; for
4320            a -1/0 value, we do an arithmetic shift.  */
4321         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
4322                             size_int (GET_MODE_BITSIZE (mode) - 1),
4323                             subtarget, normalizep != -1);
4324
4325       if (mode != target_mode)
4326         op0 = convert_modes (target_mode, mode, op0, 0);
4327
4328       return op0;
4329     }
4330
4331   if (icode != CODE_FOR_nothing)
4332     {
4333       insn_operand_predicate_fn pred;
4334
4335       /* We think we may be able to do this with a scc insn.  Emit the
4336          comparison and then the scc insn.
4337
4338          compare_from_rtx may call emit_queue, which would be deleted below
4339          if the scc insn fails.  So call it ourselves before setting LAST.
4340          Likewise for do_pending_stack_adjust.  */
4341
4342       emit_queue ();
4343       do_pending_stack_adjust ();
4344       last = get_last_insn ();
4345
4346       comparison
4347         = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX, 0);
4348       if (GET_CODE (comparison) == CONST_INT)
4349         return (comparison == const0_rtx ? const0_rtx
4350                 : normalizep == 1 ? const1_rtx
4351                 : normalizep == -1 ? constm1_rtx
4352                 : const_true_rtx);
4353
4354       /* If the code of COMPARISON doesn't match CODE, something is
4355          wrong; we can no longer be sure that we have the operation.
4356          We could handle this case, but it should not happen.  */
4357
4358       if (GET_CODE (comparison) != code)
4359         abort ();
4360
4361       /* Get a reference to the target in the proper mode for this insn.  */
4362       compare_mode = insn_data[(int) icode].operand[0].mode;
4363       subtarget = target;
4364       pred = insn_data[(int) icode].operand[0].predicate;
4365       if (preserve_subexpressions_p ()
4366           || ! (*pred) (subtarget, compare_mode))
4367         subtarget = gen_reg_rtx (compare_mode);
4368
4369       pattern = GEN_FCN (icode) (subtarget);
4370       if (pattern)
4371         {
4372           emit_insn (pattern);
4373
4374           /* If we are converting to a wider mode, first convert to
4375              TARGET_MODE, then normalize.  This produces better combining
4376              opportunities on machines that have a SIGN_EXTRACT when we are
4377              testing a single bit.  This mostly benefits the 68k.
4378
4379              If STORE_FLAG_VALUE does not have the sign bit set when
4380              interpreted in COMPARE_MODE, we can do this conversion as
4381              unsigned, which is usually more efficient.  */
4382           if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
4383             {
4384               convert_move (target, subtarget,
4385                             (GET_MODE_BITSIZE (compare_mode)
4386                              <= HOST_BITS_PER_WIDE_INT)
4387                             && 0 == (STORE_FLAG_VALUE
4388                                      & ((HOST_WIDE_INT) 1
4389                                         << (GET_MODE_BITSIZE (compare_mode) -1))));
4390               op0 = target;
4391               compare_mode = target_mode;
4392             }
4393           else
4394             op0 = subtarget;
4395
4396           /* If we want to keep subexpressions around, don't reuse our
4397              last target.  */
4398
4399           if (preserve_subexpressions_p ())
4400             subtarget = 0;
4401
4402           /* Now normalize to the proper value in COMPARE_MODE.  Sometimes
4403              we don't have to do anything.  */
4404           if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
4405             ;
4406           /* STORE_FLAG_VALUE might be the most negative number, so write
4407              the comparison this way to avoid a compiler-time warning.  */
4408           else if (- normalizep == STORE_FLAG_VALUE)
4409             op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
4410
4411           /* We don't want to use STORE_FLAG_VALUE < 0 below since this
4412              makes it hard to use a value of just the sign bit due to
4413              ANSI integer constant typing rules.  */
4414           else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
4415                    && (STORE_FLAG_VALUE
4416                        & ((HOST_WIDE_INT) 1
4417                           << (GET_MODE_BITSIZE (compare_mode) - 1))))
4418             op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
4419                                 size_int (GET_MODE_BITSIZE (compare_mode) - 1),
4420                                 subtarget, normalizep == 1);
4421           else if (STORE_FLAG_VALUE & 1)
4422             {
4423               op0 = expand_and (op0, const1_rtx, subtarget);
4424               if (normalizep == -1)
4425                 op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
4426             }
4427           else
4428             abort ();
4429
4430           /* If we were converting to a smaller mode, do the
4431              conversion now.  */
4432           if (target_mode != compare_mode)
4433             {
4434               convert_move (target, op0, 0);
4435               return target;
4436             }
4437           else
4438             return op0;
4439         }
4440     }
4441
4442   delete_insns_since (last);
4443
4444   /* If expensive optimizations, use different pseudo registers for each
4445      insn, instead of reusing the same pseudo.  This leads to better CSE,
4446      but slows down the compiler, since there are more pseudos */
4447   subtarget = (!flag_expensive_optimizations
4448                && (target_mode == mode)) ? target : NULL_RTX;
4449
4450   /* If we reached here, we can't do this with a scc insn.  However, there
4451      are some comparisons that can be done directly.  For example, if
4452      this is an equality comparison of integers, we can try to exclusive-or
4453      (or subtract) the two operands and use a recursive call to try the
4454      comparison with zero.  Don't do any of these cases if branches are
4455      very cheap.  */
4456
4457   if (BRANCH_COST > 0
4458       && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
4459       && op1 != const0_rtx)
4460     {
4461       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
4462                           OPTAB_WIDEN);
4463
4464       if (tem == 0)
4465         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
4466                             OPTAB_WIDEN);
4467       if (tem != 0)
4468         tem = emit_store_flag (target, code, tem, const0_rtx,
4469                                mode, unsignedp, normalizep);
4470       if (tem == 0)
4471         delete_insns_since (last);
4472       return tem;
4473     }
4474
4475   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
4476      the constant zero.  Reject all other comparisons at this point.  Only
4477      do LE and GT if branches are expensive since they are expensive on
4478      2-operand machines.  */
4479
4480   if (BRANCH_COST == 0
4481       || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
4482       || (code != EQ && code != NE
4483           && (BRANCH_COST <= 1 || (code != LE && code != GT))))
4484     return 0;
4485
4486   /* See what we need to return.  We can only return a 1, -1, or the
4487      sign bit.  */
4488
4489   if (normalizep == 0)
4490     {
4491       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
4492         normalizep = STORE_FLAG_VALUE;
4493
4494       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4495                && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4496                    == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
4497         ;
4498       else
4499         return 0;
4500     }
4501
4502   /* Try to put the result of the comparison in the sign bit.  Assume we can't
4503      do the necessary operation below.  */
4504
4505   tem = 0;
4506
4507   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
4508      the sign bit set.  */
4509
4510   if (code == LE)
4511     {
4512       /* This is destructive, so SUBTARGET can't be OP0.  */
4513       if (rtx_equal_p (subtarget, op0))
4514         subtarget = 0;
4515
4516       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
4517                           OPTAB_WIDEN);
4518       if (tem)
4519         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
4520                             OPTAB_WIDEN);
4521     }
4522
4523   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
4524      number of bits in the mode of OP0, minus one.  */
4525
4526   if (code == GT)
4527     {
4528       if (rtx_equal_p (subtarget, op0))
4529         subtarget = 0;
4530
4531       tem = expand_shift (RSHIFT_EXPR, mode, op0,
4532                           size_int (GET_MODE_BITSIZE (mode) - 1),
4533                           subtarget, 0);
4534       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
4535                           OPTAB_WIDEN);
4536     }
4537
4538   if (code == EQ || code == NE)
4539     {
4540       /* For EQ or NE, one way to do the comparison is to apply an operation
4541          that converts the operand into a positive number if it is non-zero
4542          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
4543          for NE we negate.  This puts the result in the sign bit.  Then we
4544          normalize with a shift, if needed.
4545
4546          Two operations that can do the above actions are ABS and FFS, so try
4547          them.  If that doesn't work, and MODE is smaller than a full word,
4548          we can use zero-extension to the wider mode (an unsigned conversion)
4549          as the operation.  */
4550
4551       /* Note that ABS doesn't yield a positive number for INT_MIN, but
4552          that is compensated by the subsequent overflow when subtracting
4553          one / negating. */
4554
4555       if (abs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
4556         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
4557       else if (ffs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
4558         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
4559       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
4560         {
4561           op0 = protect_from_queue (op0, 0);
4562           tem = convert_modes (word_mode, mode, op0, 1);
4563           mode = word_mode;
4564         }
4565
4566       if (tem != 0)
4567         {
4568           if (code == EQ)
4569             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
4570                                 0, OPTAB_WIDEN);
4571           else
4572             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
4573         }
4574
4575       /* If we couldn't do it that way, for NE we can "or" the two's complement
4576          of the value with itself.  For EQ, we take the one's complement of
4577          that "or", which is an extra insn, so we only handle EQ if branches
4578          are expensive.  */
4579
4580       if (tem == 0 && (code == NE || BRANCH_COST > 1))
4581         {
4582           if (rtx_equal_p (subtarget, op0))
4583             subtarget = 0;
4584
4585           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
4586           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
4587                               OPTAB_WIDEN);
4588
4589           if (tem && code == EQ)
4590             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
4591         }
4592     }
4593
4594   if (tem && normalizep)
4595     tem = expand_shift (RSHIFT_EXPR, mode, tem,
4596                         size_int (GET_MODE_BITSIZE (mode) - 1),
4597                         subtarget, normalizep == 1);
4598
4599   if (tem)
4600     {
4601       if (GET_MODE (tem) != target_mode)
4602         {
4603           convert_move (target, tem, 0);
4604           tem = target;
4605         }
4606       else if (!subtarget)
4607         {
4608           emit_move_insn (target, tem);
4609           tem = target;
4610         }
4611     }
4612   else
4613     delete_insns_since (last);
4614
4615   return tem;
4616 }
4617
4618 /* Like emit_store_flag, but always succeeds.  */
4619
4620 rtx
4621 emit_store_flag_force (target, code, op0, op1, mode, unsignedp, normalizep)
4622      rtx target;
4623      enum rtx_code code;
4624      rtx op0, op1;
4625      enum machine_mode mode;
4626      int unsignedp;
4627      int normalizep;
4628 {
4629   rtx tem, label;
4630
4631   /* First see if emit_store_flag can do the job.  */
4632   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
4633   if (tem != 0)
4634     return tem;
4635
4636   if (normalizep == 0)
4637     normalizep = 1;
4638
4639   /* If this failed, we have to do this with set/compare/jump/set code.  */
4640
4641   if (GET_CODE (target) != REG
4642       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
4643     target = gen_reg_rtx (GET_MODE (target));
4644
4645   emit_move_insn (target, const1_rtx);
4646   label = gen_label_rtx ();
4647   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, 0,
4648                            NULL_RTX, label);
4649
4650   emit_move_insn (target, const0_rtx);
4651   emit_label (label);
4652
4653   return target;
4654 }
4655 \f
4656 /* Perform possibly multi-word comparison and conditional jump to LABEL
4657    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE
4658
4659    The algorithm is based on the code in expr.c:do_jump.
4660
4661    Note that this does not perform a general comparison.  Only variants
4662    generated within expmed.c are correctly handled, others abort (but could
4663    be handled if needed).  */
4664
4665 static void
4666 do_cmp_and_jump (arg1, arg2, op, mode, label)
4667      rtx arg1, arg2, label;
4668      enum rtx_code op;
4669      enum machine_mode mode;
4670 {
4671   /* If this mode is an integer too wide to compare properly,
4672      compare word by word.  Rely on cse to optimize constant cases.  */
4673
4674   if (GET_MODE_CLASS (mode) == MODE_INT
4675       && ! can_compare_p (op, mode, ccp_jump))
4676     {
4677       rtx label2 = gen_label_rtx ();
4678
4679       switch (op)
4680         {
4681         case LTU:
4682           do_jump_by_parts_greater_rtx (mode, 1, arg2, arg1, label2, label);
4683           break;
4684
4685         case LEU:
4686           do_jump_by_parts_greater_rtx (mode, 1, arg1, arg2, label, label2);
4687           break;
4688
4689         case LT:
4690           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label2, label);
4691           break;
4692
4693         case GT:
4694           do_jump_by_parts_greater_rtx (mode, 0, arg1, arg2, label2, label);
4695           break;
4696
4697         case GE:
4698           do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label, label2);
4699           break;
4700
4701           /* do_jump_by_parts_equality_rtx compares with zero.  Luckily
4702              that's the only equality operations we do */
4703         case EQ:
4704           if (arg2 != const0_rtx || mode != GET_MODE(arg1))
4705             abort();
4706           do_jump_by_parts_equality_rtx (arg1, label2, label);
4707           break;
4708
4709         case NE:
4710           if (arg2 != const0_rtx || mode != GET_MODE(arg1))
4711             abort();
4712           do_jump_by_parts_equality_rtx (arg1, label, label2);
4713           break;
4714
4715         default:
4716           abort();
4717         }
4718
4719       emit_label (label2);
4720     }
4721   else
4722     {
4723       emit_cmp_and_jump_insns (arg1, arg2, op, NULL_RTX, mode, 0, 0, label);
4724     }
4725 }