gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
   5    2011, 2012
   6    Free Software Foundation, Inc.
   7
   8 This file is part of GCC.
   9
  10 GCC is free software; you can redistribute it and/or modify it under
  11 the terms of the GNU General Public License as published by the Free
  12 Software Foundation; either version 3, or (at your option) any later
  13 version.
  14
  15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  17 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  18 for more details.
  19
  20 You should have received a copy of the GNU General Public License
  21 along with GCC; see the file COPYING3.  If not see
  22 <http://www.gnu.org/licenses/>.  */
  23
  24
  25 #include "config.h"
  26 #include "system.h"
  27 #include "coretypes.h"
  28 #include "tm.h"
  29 #include "diagnostic-core.h"
  30 #include "rtl.h"
  31 #include "tree.h"
  32 #include "tm_p.h"
  33 #include "flags.h"
  34 #include "insn-config.h"
  35 #include "expr.h"
  36 #include "optabs.h"
  37 #include "recog.h"
  38 #include "langhooks.h"
  39 #include "df.h"
  40 #include "target.h"
  41 #include "expmed.h"
  42
  43 struct target_expmed default_target_expmed;
  44 #if SWITCHABLE_TARGET
  45 struct target_expmed *this_target_expmed = &default_target_expmed;
  46 #endif
  47
  48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  49                                    unsigned HOST_WIDE_INT,
  50                                    unsigned HOST_WIDE_INT,
  51                                    unsigned HOST_WIDE_INT,
  52                                    unsigned HOST_WIDE_INT,
  53                                    rtx);
  54 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  55                                    unsigned HOST_WIDE_INT,
  56                                    unsigned HOST_WIDE_INT,
  57                                    unsigned HOST_WIDE_INT,
  58                                    rtx);
  59 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  60                                     unsigned HOST_WIDE_INT,
  61                                     unsigned HOST_WIDE_INT,
  62                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  63 static rtx mask_rtx (enum machine_mode, int, int, int);
  64 static rtx lshift_value (enum machine_mode, rtx, int, int);
  65 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  66                                     unsigned HOST_WIDE_INT, int);
  67 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  68 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  69 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  70
  71 /* Test whether a value is zero of a power of two.  */
  72 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
  73
  74 #ifndef SLOW_UNALIGNED_ACCESS
  75 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  76 #endif
  77
  78
  79 /* Reduce conditional compilation elsewhere.  */
  80 #ifndef HAVE_insv
  81 #define HAVE_insv       0
  82 #define CODE_FOR_insv   CODE_FOR_nothing
  83 #define gen_insv(a,b,c,d) NULL_RTX
  84 #endif
  85 #ifndef HAVE_extv
  86 #define HAVE_extv       0
  87 #define CODE_FOR_extv   CODE_FOR_nothing
  88 #define gen_extv(a,b,c,d) NULL_RTX
  89 #endif
  90 #ifndef HAVE_extzv
  91 #define HAVE_extzv      0
  92 #define CODE_FOR_extzv  CODE_FOR_nothing
  93 #define gen_extzv(a,b,c,d) NULL_RTX
  94 #endif
  95
  96 struct init_expmed_rtl
  97 {
  98   struct rtx_def reg;           rtunion reg_fld[2];
  99   struct rtx_def plus;  rtunion plus_fld1;
 100   struct rtx_def neg;
 101   struct rtx_def mult;  rtunion mult_fld1;
 102   struct rtx_def sdiv;  rtunion sdiv_fld1;
 103   struct rtx_def udiv;  rtunion udiv_fld1;
 104   struct rtx_def zext;
 105   struct rtx_def sdiv_32;       rtunion sdiv_32_fld1;
 106   struct rtx_def smod_32;       rtunion smod_32_fld1;
 107   struct rtx_def wide_mult;     rtunion wide_mult_fld1;
 108   struct rtx_def wide_lshr;     rtunion wide_lshr_fld1;
 109   struct rtx_def wide_trunc;
 110   struct rtx_def shift; rtunion shift_fld1;
 111   struct rtx_def shift_mult;    rtunion shift_mult_fld1;
 112   struct rtx_def shift_add;     rtunion shift_add_fld1;
 113   struct rtx_def shift_sub0;    rtunion shift_sub0_fld1;
 114   struct rtx_def shift_sub1;    rtunion shift_sub1_fld1;
 115
 116   rtx pow2[MAX_BITS_PER_WORD];
 117   rtx cint[MAX_BITS_PER_WORD];
 118 };
 119
 120 static void
 121 init_expmed_one_mode (struct init_expmed_rtl *all,
 122                       enum machine_mode mode, int speed)
 123 {
 124   int m, n, mode_bitsize;
 125
 126   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 127
 128   PUT_MODE (&all->reg, mode);
 129   PUT_MODE (&all->plus, mode);
 130   PUT_MODE (&all->neg, mode);
 131   PUT_MODE (&all->mult, mode);
 132   PUT_MODE (&all->sdiv, mode);
 133   PUT_MODE (&all->udiv, mode);
 134   PUT_MODE (&all->sdiv_32, mode);
 135   PUT_MODE (&all->smod_32, mode);
 136   PUT_MODE (&all->wide_trunc, mode);
 137   PUT_MODE (&all->shift, mode);
 138   PUT_MODE (&all->shift_mult, mode);
 139   PUT_MODE (&all->shift_add, mode);
 140   PUT_MODE (&all->shift_sub0, mode);
 141   PUT_MODE (&all->shift_sub1, mode);
 142
 143   add_cost[speed][mode] = set_src_cost (&all->plus, speed);
 144   neg_cost[speed][mode] = set_src_cost (&all->neg, speed);
 145   mul_cost[speed][mode] = set_src_cost (&all->mult, speed);
 146   sdiv_cost[speed][mode] = set_src_cost (&all->sdiv, speed);
 147   udiv_cost[speed][mode] = set_src_cost (&all->udiv, speed);
 148
 149   sdiv_pow2_cheap[speed][mode] = (set_src_cost (&all->sdiv_32, speed)
 150                                   <= 2 * add_cost[speed][mode]);
 151   smod_pow2_cheap[speed][mode] = (set_src_cost (&all->smod_32, speed)
 152                                   <= 4 * add_cost[speed][mode]);
 153
 154   shift_cost[speed][mode][0] = 0;
 155   shiftadd_cost[speed][mode][0] = shiftsub0_cost[speed][mode][0]
 156     = shiftsub1_cost[speed][mode][0] = add_cost[speed][mode];
 157
 158   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 159   for (m = 1; m < n; m++)
 160     {
 161       XEXP (&all->shift, 1) = all->cint[m];
 162       XEXP (&all->shift_mult, 1) = all->pow2[m];
 163
 164       shift_cost[speed][mode][m] = set_src_cost (&all->shift, speed);
 165       shiftadd_cost[speed][mode][m] = set_src_cost (&all->shift_add, speed);
 166       shiftsub0_cost[speed][mode][m] = set_src_cost (&all->shift_sub0, speed);
 167       shiftsub1_cost[speed][mode][m] = set_src_cost (&all->shift_sub1, speed);
 168     }
 169
 170   if (SCALAR_INT_MODE_P (mode))
 171     {
 172       enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
 173
 174       if (wider_mode != VOIDmode)
 175         {
 176           PUT_MODE (&all->zext, wider_mode);
 177           PUT_MODE (&all->wide_mult, wider_mode);
 178           PUT_MODE (&all->wide_lshr, wider_mode);
 179           XEXP (&all->wide_lshr, 1) = GEN_INT (mode_bitsize);
 180
 181           mul_widen_cost[speed][wider_mode]
 182             = set_src_cost (&all->wide_mult, speed);
 183           mul_highpart_cost[speed][mode]
 184             = set_src_cost (&all->wide_trunc, speed);
 185         }
 186     }
 187 }
 188
 189 void
 190 init_expmed (void)
 191 {
 192   struct init_expmed_rtl all;
 193   enum machine_mode mode;
 194   int m, speed;
 195
 196   memset (&all, 0, sizeof all);
 197   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 198     {
 199       all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 200       all.cint[m] = GEN_INT (m);
 201     }
 202
 203   PUT_CODE (&all.reg, REG);
 204   /* Avoid using hard regs in ways which may be unsupported.  */
 205   SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
 206
 207   PUT_CODE (&all.plus, PLUS);
 208   XEXP (&all.plus, 0) = &all.reg;
 209   XEXP (&all.plus, 1) = &all.reg;
 210
 211   PUT_CODE (&all.neg, NEG);
 212   XEXP (&all.neg, 0) = &all.reg;
 213
 214   PUT_CODE (&all.mult, MULT);
 215   XEXP (&all.mult, 0) = &all.reg;
 216   XEXP (&all.mult, 1) = &all.reg;
 217
 218   PUT_CODE (&all.sdiv, DIV);
 219   XEXP (&all.sdiv, 0) = &all.reg;
 220   XEXP (&all.sdiv, 1) = &all.reg;
 221
 222   PUT_CODE (&all.udiv, UDIV);
 223   XEXP (&all.udiv, 0) = &all.reg;
 224   XEXP (&all.udiv, 1) = &all.reg;
 225
 226   PUT_CODE (&all.sdiv_32, DIV);
 227   XEXP (&all.sdiv_32, 0) = &all.reg;
 228   XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? all.cint[32] : GEN_INT (32);
 229
 230   PUT_CODE (&all.smod_32, MOD);
 231   XEXP (&all.smod_32, 0) = &all.reg;
 232   XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
 233
 234   PUT_CODE (&all.zext, ZERO_EXTEND);
 235   XEXP (&all.zext, 0) = &all.reg;
 236
 237   PUT_CODE (&all.wide_mult, MULT);
 238   XEXP (&all.wide_mult, 0) = &all.zext;
 239   XEXP (&all.wide_mult, 1) = &all.zext;
 240
 241   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 242   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 243
 244   PUT_CODE (&all.wide_trunc, TRUNCATE);
 245   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 246
 247   PUT_CODE (&all.shift, ASHIFT);
 248   XEXP (&all.shift, 0) = &all.reg;
 249
 250   PUT_CODE (&all.shift_mult, MULT);
 251   XEXP (&all.shift_mult, 0) = &all.reg;
 252
 253   PUT_CODE (&all.shift_add, PLUS);
 254   XEXP (&all.shift_add, 0) = &all.shift_mult;
 255   XEXP (&all.shift_add, 1) = &all.reg;
 256
 257   PUT_CODE (&all.shift_sub0, MINUS);
 258   XEXP (&all.shift_sub0, 0) = &all.shift_mult;
 259   XEXP (&all.shift_sub0, 1) = &all.reg;
 260
 261   PUT_CODE (&all.shift_sub1, MINUS);
 262   XEXP (&all.shift_sub1, 0) = &all.reg;
 263   XEXP (&all.shift_sub1, 1) = &all.shift_mult;
 264
 265   for (speed = 0; speed < 2; speed++)
 266     {
 267       crtl->maybe_hot_insn_p = speed;
 268       zero_cost[speed] = set_src_cost (const0_rtx, speed);
 269
 270       for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 271            mode != VOIDmode;
 272            mode = GET_MODE_WIDER_MODE (mode))
 273         init_expmed_one_mode (&all, mode, speed);
 274
 275       for (mode = GET_CLASS_NARROWEST_MODE (MODE_VECTOR_INT);
 276            mode != VOIDmode;
 277            mode = GET_MODE_WIDER_MODE (mode))
 278         init_expmed_one_mode (&all, mode, speed);
 279     }
 280
 281   if (alg_hash_used_p)
 282     memset (alg_hash, 0, sizeof (alg_hash));
 283   else
 284     alg_hash_used_p = true;
 285   default_rtl_profile ();
 286 }
 287
 288 /* Return an rtx representing minus the value of X.
 289    MODE is the intended mode of the result,
 290    useful if X is a CONST_INT.  */
 291
 292 rtx
 293 negate_rtx (enum machine_mode mode, rtx x)
 294 {
 295   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 296
 297   if (result == 0)
 298     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 299
 300   return result;
 301 }
 302
 303 /* Report on the availability of insv/extv/extzv and the desired mode
 304    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 305    is false; else the mode of the specified operand.  If OPNO is -1,
 306    all the caller cares about is whether the insn is available.  */
 307 enum machine_mode
 308 mode_for_extraction (enum extraction_pattern pattern, int opno)
 309 {
 310   const struct insn_data_d *data;
 311
 312   switch (pattern)
 313     {
 314     case EP_insv:
 315       if (HAVE_insv)
 316         {
 317           data = &insn_data[CODE_FOR_insv];
 318           break;
 319         }
 320       return MAX_MACHINE_MODE;
 321
 322     case EP_extv:
 323       if (HAVE_extv)
 324         {
 325           data = &insn_data[CODE_FOR_extv];
 326           break;
 327         }
 328       return MAX_MACHINE_MODE;
 329
 330     case EP_extzv:
 331       if (HAVE_extzv)
 332         {
 333           data = &insn_data[CODE_FOR_extzv];
 334           break;
 335         }
 336       return MAX_MACHINE_MODE;
 337
 338     default:
 339       gcc_unreachable ();
 340     }
 341
 342   if (opno == -1)
 343     return VOIDmode;
 344
 345   /* Everyone who uses this function used to follow it with
 346      if (result == VOIDmode) result = word_mode; */
 347   if (data->operand[opno].mode == VOIDmode)
 348     return word_mode;
 349   return data->operand[opno].mode;
 350 }
 351 \f
 352 /* A subroutine of store_bit_field, with the same arguments.  Return true
 353    if the operation could be implemented.
 354
 355    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 356    no other way of implementing the operation.  If FALLBACK_P is false,
 357    return false instead.  */
 358
 359 static bool
 360 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 361                    unsigned HOST_WIDE_INT bitnum,
 362                    unsigned HOST_WIDE_INT bitregion_start,
 363                    unsigned HOST_WIDE_INT bitregion_end,
 364                    enum machine_mode fieldmode,
 365                    rtx value, bool fallback_p)
 366 {
 367   unsigned int unit
 368     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
 369   unsigned HOST_WIDE_INT offset, bitpos;
 370   rtx op0 = str_rtx;
 371   int byte_offset;
 372   rtx orig_value;
 373
 374   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 375
 376   while (GET_CODE (op0) == SUBREG)
 377     {
 378       /* The following line once was done only if WORDS_BIG_ENDIAN,
 379          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 380          meaningful at a much higher level; when structures are copied
 381          between memory and regs, the higher-numbered regs
 382          always get higher addresses.  */
 383       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 384       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 385
 386       byte_offset = 0;
 387
 388       /* Paradoxical subregs need special handling on big endian machines.  */
 389       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 390         {
 391           int difference = inner_mode_size - outer_mode_size;
 392
 393           if (WORDS_BIG_ENDIAN)
 394             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 395           if (BYTES_BIG_ENDIAN)
 396             byte_offset += difference % UNITS_PER_WORD;
 397         }
 398       else
 399         byte_offset = SUBREG_BYTE (op0);
 400
 401       bitnum += byte_offset * BITS_PER_UNIT;
 402       op0 = SUBREG_REG (op0);
 403     }
 404
 405   /* No action is needed if the target is a register and if the field
 406      lies completely outside that register.  This can occur if the source
 407      code contains an out-of-bounds access to a small array.  */
 408   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 409     return true;
 410
 411   /* Use vec_set patterns for inserting parts of vectors whenever
 412      available.  */
 413   if (VECTOR_MODE_P (GET_MODE (op0))
 414       && !MEM_P (op0)
 415       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 416       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 417       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 418       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 419     {
 420       struct expand_operand ops[3];
 421       enum machine_mode outermode = GET_MODE (op0);
 422       enum machine_mode innermode = GET_MODE_INNER (outermode);
 423       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 424       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 425
 426       create_fixed_operand (&ops[0], op0);
 427       create_input_operand (&ops[1], value, innermode);
 428       create_integer_operand (&ops[2], pos);
 429       if (maybe_expand_insn (icode, 3, ops))
 430         return true;
 431     }
 432
 433   /* If the target is a register, overwriting the entire object, or storing
 434      a full-word or multi-word field can be done with just a SUBREG.
 435
 436      If the target is memory, storing any naturally aligned field can be
 437      done with a simple store.  For targets that support fast unaligned
 438      memory, any naturally sized, unit aligned field can be done directly.  */
 439
 440   offset = bitnum / unit;
 441   bitpos = bitnum % unit;
 442   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 443                 + (offset * UNITS_PER_WORD);
 444
 445   if (bitpos == 0
 446       && bitsize == GET_MODE_BITSIZE (fieldmode)
 447       && (!MEM_P (op0)
 448           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 449               || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 450              && ((GET_MODE (op0) == fieldmode && byte_offset == 0)
 451                  || validate_subreg (fieldmode, GET_MODE (op0), op0,
 452                                      byte_offset)))
 453           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 454              || (offset * BITS_PER_UNIT % bitsize == 0
 455                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 456     {
 457       if (MEM_P (op0))
 458         op0 = adjust_address (op0, fieldmode, offset);
 459       else if (GET_MODE (op0) != fieldmode)
 460         op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 461                                    byte_offset);
 462       emit_move_insn (op0, value);
 463       return true;
 464     }
 465
 466   /* Make sure we are playing with integral modes.  Pun with subregs
 467      if we aren't.  This must come after the entire register case above,
 468      since that case is valid for any mode.  The following cases are only
 469      valid for integral modes.  */
 470   {
 471     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 472     if (imode != GET_MODE (op0))
 473       {
 474         if (MEM_P (op0))
 475           op0 = adjust_address (op0, imode, 0);
 476         else
 477           {
 478             gcc_assert (imode != BLKmode);
 479             op0 = gen_lowpart (imode, op0);
 480           }
 481       }
 482   }
 483
 484   /* We may be accessing data outside the field, which means
 485      we can alias adjacent data.  */
 486   /* ?? not always for C++0x memory model ?? */
 487   if (MEM_P (op0))
 488     {
 489       op0 = shallow_copy_rtx (op0);
 490       set_mem_alias_set (op0, 0);
 491       set_mem_expr (op0, 0);
 492     }
 493
 494   /* If OP0 is a register, BITPOS must count within a word.
 495      But as we have it, it counts within whatever size OP0 now has.
 496      On a bigendian machine, these are not the same, so convert.  */
 497   if (BYTES_BIG_ENDIAN
 498       && !MEM_P (op0)
 499       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 500     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 501
 502   /* Storing an lsb-aligned field in a register
 503      can be done with a movestrict instruction.  */
 504
 505   if (!MEM_P (op0)
 506       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 507       && bitsize == GET_MODE_BITSIZE (fieldmode)
 508       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 509     {
 510       struct expand_operand ops[2];
 511       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 512       rtx arg0 = op0;
 513       unsigned HOST_WIDE_INT subreg_off;
 514
 515       if (GET_CODE (arg0) == SUBREG)
 516         {
 517           /* Else we've got some float mode source being extracted into
 518              a different float mode destination -- this combination of
 519              subregs results in Severe Tire Damage.  */
 520           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 521                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 522                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 523           arg0 = SUBREG_REG (arg0);
 524         }
 525
 526       subreg_off = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 527                    + (offset * UNITS_PER_WORD);
 528       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 529         {
 530           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 531
 532           create_fixed_operand (&ops[0], arg0);
 533           /* Shrink the source operand to FIELDMODE.  */
 534           create_convert_operand_to (&ops[1], value, fieldmode, false);
 535           if (maybe_expand_insn (icode, 2, ops))
 536             return true;
 537         }
 538     }
 539
 540   /* Handle fields bigger than a word.  */
 541
 542   if (bitsize > BITS_PER_WORD)
 543     {
 544       /* Here we transfer the words of the field
 545          in the order least significant first.
 546          This is because the most significant word is the one which may
 547          be less than full.
 548          However, only do that if the value is not BLKmode.  */
 549
 550       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 551       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 552       unsigned int i;
 553       rtx last;
 554
 555       /* This is the mode we must force value to, so that there will be enough
 556          subwords to extract.  Note that fieldmode will often (always?) be
 557          VOIDmode, because that is what store_field uses to indicate that this
 558          is a bit field, but passing VOIDmode to operand_subword_force
 559          is not allowed.  */
 560       fieldmode = GET_MODE (value);
 561       if (fieldmode == VOIDmode)
 562         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 563
 564       last = get_last_insn ();
 565       for (i = 0; i < nwords; i++)
 566         {
 567           /* If I is 0, use the low-order word in both field and target;
 568              if I is 1, use the next to lowest word; and so on.  */
 569           unsigned int wordnum = (backwards
 570                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 571                                   - i - 1
 572                                   : i);
 573           unsigned int bit_offset = (backwards
 574                                      ? MAX ((int) bitsize - ((int) i + 1)
 575                                             * BITS_PER_WORD,
 576                                             0)
 577                                      : (int) i * BITS_PER_WORD);
 578           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 579           unsigned HOST_WIDE_INT new_bitsize =
 580             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 581
 582           /* If the remaining chunk doesn't have full wordsize we have
 583              to make sure that for big endian machines the higher order
 584              bits are used.  */
 585           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 586             value_word = simplify_expand_binop (word_mode, lshr_optab,
 587                                                 value_word,
 588                                                 GEN_INT (BITS_PER_WORD
 589                                                          - new_bitsize),
 590                                                 NULL_RTX, true,
 591                                                 OPTAB_LIB_WIDEN);
 592
 593           if (!store_bit_field_1 (op0, new_bitsize,
 594                                   bitnum + bit_offset,
 595                                   bitregion_start, bitregion_end,
 596                                   word_mode,
 597                                   value_word, fallback_p))
 598             {
 599               delete_insns_since (last);
 600               return false;
 601             }
 602         }
 603       return true;
 604     }
 605
 606   /* From here on we can assume that the field to be stored in is
 607      a full-word (whatever type that is), since it is shorter than a word.  */
 608
 609   /* OFFSET is the number of words or bytes (UNIT says which)
 610      from STR_RTX to the first word or byte containing part of the field.  */
 611
 612   if (!MEM_P (op0))
 613     {
 614       if (offset != 0
 615           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 616         {
 617           if (!REG_P (op0))
 618             {
 619               /* Since this is a destination (lvalue), we can't copy
 620                  it to a pseudo.  We can remove a SUBREG that does not
 621                  change the size of the operand.  Such a SUBREG may
 622                  have been added above.  */
 623               gcc_assert (GET_CODE (op0) == SUBREG
 624                           && (GET_MODE_SIZE (GET_MODE (op0))
 625                               == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
 626               op0 = SUBREG_REG (op0);
 627             }
 628           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 629                                 op0, (offset * UNITS_PER_WORD));
 630         }
 631       offset = 0;
 632     }
 633
 634   /* If VALUE has a floating-point or complex mode, access it as an
 635      integer of the corresponding size.  This can occur on a machine
 636      with 64 bit registers that uses SFmode for float.  It can also
 637      occur for unaligned float or complex fields.  */
 638   orig_value = value;
 639   if (GET_MODE (value) != VOIDmode
 640       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 641       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 642     {
 643       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 644       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 645     }
 646
 647   /* Now OFFSET is nonzero only if OP0 is memory
 648      and is therefore always measured in bytes.  */
 649
 650   if (HAVE_insv
 651       && GET_MODE (value) != BLKmode
 652       && bitsize > 0
 653       && GET_MODE_BITSIZE (op_mode) >= bitsize
 654       /* Do not use insv for volatile bitfields when
 655          -fstrict-volatile-bitfields is in effect.  */
 656       && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
 657            && flag_strict_volatile_bitfields > 0)
 658       && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
 659             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode)))
 660       /* Do not use insv if the bit region is restricted and
 661          op_mode integer at offset doesn't fit into the
 662          restricted region.  */
 663       && !(MEM_P (op0) && bitregion_end
 664            && bitnum - bitpos + GET_MODE_BITSIZE (op_mode)
 665               > bitregion_end + 1))
 666     {
 667       struct expand_operand ops[4];
 668       int xbitpos = bitpos;
 669       rtx value1;
 670       rtx xop0 = op0;
 671       rtx last = get_last_insn ();
 672       bool copy_back = false;
 673
 674       /* Add OFFSET into OP0's address.  */
 675       if (MEM_P (xop0))
 676         xop0 = adjust_address (xop0, byte_mode, offset);
 677
 678       /* If xop0 is a register, we need it in OP_MODE
 679          to make it acceptable to the format of insv.  */
 680       if (GET_CODE (xop0) == SUBREG)
 681         /* We can't just change the mode, because this might clobber op0,
 682            and we will need the original value of op0 if insv fails.  */
 683         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 684       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 685         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 686
 687       /* If the destination is a paradoxical subreg such that we need a
 688          truncate to the inner mode, perform the insertion on a temporary and
 689          truncate the result to the original destination.  Note that we can't
 690          just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 691          X) 0)) is (reg:N X).  */
 692       if (GET_CODE (xop0) == SUBREG
 693           && REG_P (SUBREG_REG (xop0))
 694           && (!TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 695                                               op_mode)))
 696         {
 697           rtx tem = gen_reg_rtx (op_mode);
 698           emit_move_insn (tem, xop0);
 699           xop0 = tem;
 700           copy_back = true;
 701         }
 702
 703       /* We have been counting XBITPOS within UNIT.
 704          Count instead within the size of the register.  */
 705       if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
 706         xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
 707
 708       unit = GET_MODE_BITSIZE (op_mode);
 709
 710       /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 711          "backwards" from the size of the unit we are inserting into.
 712          Otherwise, we count bits from the most significant on a
 713          BYTES/BITS_BIG_ENDIAN machine.  */
 714
 715       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 716         xbitpos = unit - bitsize - xbitpos;
 717
 718       /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 719       value1 = value;
 720       if (GET_MODE (value) != op_mode)
 721         {
 722           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 723             {
 724               /* Optimization: Don't bother really extending VALUE
 725                  if it has all the bits we will actually use.  However,
 726                  if we must narrow it, be sure we do it correctly.  */
 727
 728               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 729                 {
 730                   rtx tmp;
 731
 732                   tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 733                   if (! tmp)
 734                     tmp = simplify_gen_subreg (op_mode,
 735                                                force_reg (GET_MODE (value),
 736                                                           value1),
 737                                                GET_MODE (value), 0);
 738                   value1 = tmp;
 739                 }
 740               else
 741                 value1 = gen_lowpart (op_mode, value1);
 742             }
 743           else if (CONST_INT_P (value))
 744             value1 = gen_int_mode (INTVAL (value), op_mode);
 745           else
 746             /* Parse phase is supposed to make VALUE's data type
 747                match that of the component reference, which is a type
 748                at least as wide as the field; so VALUE should have
 749                a mode that corresponds to that type.  */
 750             gcc_assert (CONSTANT_P (value));
 751         }
 752
 753       create_fixed_operand (&ops[0], xop0);
 754       create_integer_operand (&ops[1], bitsize);
 755       create_integer_operand (&ops[2], xbitpos);
 756       create_input_operand (&ops[3], value1, op_mode);
 757       if (maybe_expand_insn (CODE_FOR_insv, 4, ops))
 758         {
 759           if (copy_back)
 760             convert_move (op0, xop0, true);
 761           return true;
 762         }
 763       delete_insns_since (last);
 764     }
 765
 766   /* If OP0 is a memory, try copying it to a register and seeing if a
 767      cheap register alternative is available.  */
 768   if (HAVE_insv && MEM_P (op0))
 769     {
 770       enum machine_mode bestmode;
 771       unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
 772
 773       if (bitregion_end)
 774         maxbits = bitregion_end - bitregion_start + 1;
 775
 776       /* Get the mode to use for inserting into this field.  If OP0 is
 777          BLKmode, get the smallest mode consistent with the alignment. If
 778          OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
 779          mode. Otherwise, use the smallest mode containing the field.  */
 780
 781       if (GET_MODE (op0) == BLKmode
 782           || GET_MODE_BITSIZE (GET_MODE (op0)) > maxbits
 783           || (op_mode != MAX_MACHINE_MODE
 784               && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
 785         bestmode = get_best_mode (bitsize, bitnum,
 786                                   bitregion_start, bitregion_end,
 787                                   MEM_ALIGN (op0),
 788                                   (op_mode == MAX_MACHINE_MODE
 789                                    ? VOIDmode : op_mode),
 790                                   MEM_VOLATILE_P (op0));
 791       else
 792         bestmode = GET_MODE (op0);
 793
 794       if (bestmode != VOIDmode
 795           && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
 796           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 797                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 798         {
 799           rtx last, tempreg, xop0;
 800           unsigned HOST_WIDE_INT xoffset, xbitpos;
 801
 802           last = get_last_insn ();
 803
 804           /* Adjust address to point to the containing unit of
 805              that mode.  Compute the offset as a multiple of this unit,
 806              counting in bytes.  */
 807           unit = GET_MODE_BITSIZE (bestmode);
 808           xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 809           xbitpos = bitnum % unit;
 810           xop0 = adjust_address (op0, bestmode, xoffset);
 811
 812           /* Fetch that unit, store the bitfield in it, then store
 813              the unit.  */
 814           tempreg = copy_to_reg (xop0);
 815           if (store_bit_field_1 (tempreg, bitsize, xbitpos,
 816                                  bitregion_start, bitregion_end,
 817                                  fieldmode, orig_value, false))
 818             {
 819               emit_move_insn (xop0, tempreg);
 820               return true;
 821             }
 822           delete_insns_since (last);
 823         }
 824     }
 825
 826   if (!fallback_p)
 827     return false;
 828
 829   store_fixed_bit_field (op0, offset, bitsize, bitpos,
 830                          bitregion_start, bitregion_end, value);
 831   return true;
 832 }
 833
 834 /* Generate code to store value from rtx VALUE
 835    into a bit-field within structure STR_RTX
 836    containing BITSIZE bits starting at bit BITNUM.
 837
 838    BITREGION_START is bitpos of the first bitfield in this region.
 839    BITREGION_END is the bitpos of the ending bitfield in this region.
 840    These two fields are 0, if the C++ memory model does not apply,
 841    or we are not interested in keeping track of bitfield regions.
 842
 843    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
 844
 845 void
 846 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 847                  unsigned HOST_WIDE_INT bitnum,
 848                  unsigned HOST_WIDE_INT bitregion_start,
 849                  unsigned HOST_WIDE_INT bitregion_end,
 850                  enum machine_mode fieldmode,
 851                  rtx value)
 852 {
 853   /* Under the C++0x memory model, we must not touch bits outside the
 854      bit region.  Adjust the address to start at the beginning of the
 855      bit region.  */
 856   if (MEM_P (str_rtx) && bitregion_start > 0)
 857     {
 858       enum machine_mode bestmode;
 859       enum machine_mode op_mode;
 860       unsigned HOST_WIDE_INT offset;
 861
 862       op_mode = mode_for_extraction (EP_insv, 3);
 863       if (op_mode == MAX_MACHINE_MODE)
 864         op_mode = VOIDmode;
 865
 866       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
 867
 868       offset = bitregion_start / BITS_PER_UNIT;
 869       bitnum -= bitregion_start;
 870       bitregion_end -= bitregion_start;
 871       bitregion_start = 0;
 872       bestmode = get_best_mode (bitsize, bitnum,
 873                                 bitregion_start, bitregion_end,
 874                                 MEM_ALIGN (str_rtx),
 875                                 op_mode,
 876                                 MEM_VOLATILE_P (str_rtx));
 877       str_rtx = adjust_address (str_rtx, bestmode, offset);
 878     }
 879
 880   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
 881                           bitregion_start, bitregion_end,
 882                           fieldmode, value, true))
 883     gcc_unreachable ();
 884 }
 885 \f
 886 /* Use shifts and boolean operations to store VALUE
 887    into a bit field of width BITSIZE
 888    in a memory location specified by OP0 except offset by OFFSET bytes.
 889      (OFFSET must be 0 if OP0 is a register.)
 890    The field starts at position BITPOS within the byte.
 891     (If OP0 is a register, it may be a full word or a narrower mode,
 892      but BITPOS still counts within a full word,
 893      which is significant on bigendian machines.)  */
 894
 895 static void
 896 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
 897                        unsigned HOST_WIDE_INT bitsize,
 898                        unsigned HOST_WIDE_INT bitpos,
 899                        unsigned HOST_WIDE_INT bitregion_start,
 900                        unsigned HOST_WIDE_INT bitregion_end,
 901                        rtx value)
 902 {
 903   enum machine_mode mode;
 904   unsigned int total_bits = BITS_PER_WORD;
 905   rtx temp;
 906   int all_zero = 0;
 907   int all_one = 0;
 908
 909   /* There is a case not handled here:
 910      a structure with a known alignment of just a halfword
 911      and a field split across two aligned halfwords within the structure.
 912      Or likewise a structure with a known alignment of just a byte
 913      and a field split across two bytes.
 914      Such cases are not supposed to be able to occur.  */
 915
 916   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 917     {
 918       gcc_assert (!offset);
 919       /* Special treatment for a bit field split across two registers.  */
 920       if (bitsize + bitpos > BITS_PER_WORD)
 921         {
 922           store_split_bit_field (op0, bitsize, bitpos,
 923                                  bitregion_start, bitregion_end,
 924                                  value);
 925           return;
 926         }
 927     }
 928   else
 929     {
 930       unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
 931
 932       if (bitregion_end)
 933         maxbits = bitregion_end - bitregion_start + 1;
 934
 935       /* Get the proper mode to use for this field.  We want a mode that
 936          includes the entire field.  If such a mode would be larger than
 937          a word, we won't be doing the extraction the normal way.
 938          We don't want a mode bigger than the destination.  */
 939
 940       mode = GET_MODE (op0);
 941       if (GET_MODE_BITSIZE (mode) == 0
 942           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 943         mode = word_mode;
 944
 945       if (MEM_VOLATILE_P (op0)
 946           && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
 947           && GET_MODE_BITSIZE (GET_MODE (op0)) <= maxbits
 948           && flag_strict_volatile_bitfields > 0)
 949         mode = GET_MODE (op0);
 950       else
 951         mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 952                               bitregion_start, bitregion_end,
 953                               MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 954
 955       if (mode == VOIDmode)
 956         {
 957           /* The only way this should occur is if the field spans word
 958              boundaries.  */
 959           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 960                                  bitregion_start, bitregion_end, value);
 961           return;
 962         }
 963
 964       total_bits = GET_MODE_BITSIZE (mode);
 965
 966       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 967          be in the range 0 to total_bits-1, and put any excess bytes in
 968          OFFSET.  */
 969       if (bitpos >= total_bits)
 970         {
 971           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 972           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 973                      * BITS_PER_UNIT);
 974         }
 975
 976       /* Get ref to an aligned byte, halfword, or word containing the field.
 977          Adjust BITPOS to be position within a word,
 978          and OFFSET to be the offset of that word.
 979          Then alter OP0 to refer to that word.  */
 980       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 981       offset -= (offset % (total_bits / BITS_PER_UNIT));
 982       op0 = adjust_address (op0, mode, offset);
 983     }
 984
 985   mode = GET_MODE (op0);
 986
 987   /* Now MODE is either some integral mode for a MEM as OP0,
 988      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 989      The bit field is contained entirely within OP0.
 990      BITPOS is the starting bit number within OP0.
 991      (OP0's mode may actually be narrower than MODE.)  */
 992
 993   if (BYTES_BIG_ENDIAN)
 994       /* BITPOS is the distance between our msb
 995          and that of the containing datum.
 996          Convert it to the distance from the lsb.  */
 997       bitpos = total_bits - bitsize - bitpos;
 998
 999   /* Now BITPOS is always the distance between our lsb
1000      and that of OP0.  */
1001
1002   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
1003      we must first convert its mode to MODE.  */
1004
1005   if (CONST_INT_P (value))
1006     {
1007       HOST_WIDE_INT v = INTVAL (value);
1008
1009       if (bitsize < HOST_BITS_PER_WIDE_INT)
1010         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
1011
1012       if (v == 0)
1013         all_zero = 1;
1014       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1015                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
1016                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
1017         all_one = 1;
1018
1019       value = lshift_value (mode, value, bitpos, bitsize);
1020     }
1021   else
1022     {
1023       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1024                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
1025
1026       if (GET_MODE (value) != mode)
1027         value = convert_to_mode (mode, value, 1);
1028
1029       if (must_and)
1030         value = expand_binop (mode, and_optab, value,
1031                               mask_rtx (mode, 0, bitsize, 0),
1032                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1033       if (bitpos > 0)
1034         value = expand_shift (LSHIFT_EXPR, mode, value,
1035                               bitpos, NULL_RTX, 1);
1036     }
1037
1038   /* Now clear the chosen bits in OP0,
1039      except that if VALUE is -1 we need not bother.  */
1040   /* We keep the intermediates in registers to allow CSE to combine
1041      consecutive bitfield assignments.  */
1042
1043   temp = force_reg (mode, op0);
1044
1045   if (! all_one)
1046     {
1047       temp = expand_binop (mode, and_optab, temp,
1048                            mask_rtx (mode, bitpos, bitsize, 1),
1049                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1050       temp = force_reg (mode, temp);
1051     }
1052
1053   /* Now logical-or VALUE into OP0, unless it is zero.  */
1054
1055   if (! all_zero)
1056     {
1057       temp = expand_binop (mode, ior_optab, temp, value,
1058                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1059       temp = force_reg (mode, temp);
1060     }
1061
1062   if (op0 != temp)
1063     {
1064       op0 = copy_rtx (op0);
1065       emit_move_insn (op0, temp);
1066     }
1067 }
1068 \f
1069 /* Store a bit field that is split across multiple accessible memory objects.
1070
1071    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1072    BITSIZE is the field width; BITPOS the position of its first bit
1073    (within the word).
1074    VALUE is the value to store.
1075
1076    This does not yet handle fields wider than BITS_PER_WORD.  */
1077
1078 static void
1079 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1080                        unsigned HOST_WIDE_INT bitpos,
1081                        unsigned HOST_WIDE_INT bitregion_start,
1082                        unsigned HOST_WIDE_INT bitregion_end,
1083                        rtx value)
1084 {
1085   unsigned int unit;
1086   unsigned int bitsdone = 0;
1087
1088   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1089      much at a time.  */
1090   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1091     unit = BITS_PER_WORD;
1092   else
1093     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1094
1095   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1096      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1097      that VALUE might be a floating-point constant.  */
1098   if (CONSTANT_P (value) && !CONST_INT_P (value))
1099     {
1100       rtx word = gen_lowpart_common (word_mode, value);
1101
1102       if (word && (value != word))
1103         value = word;
1104       else
1105         value = gen_lowpart_common (word_mode,
1106                                     force_reg (GET_MODE (value) != VOIDmode
1107                                                ? GET_MODE (value)
1108                                                : word_mode, value));
1109     }
1110
1111   while (bitsdone < bitsize)
1112     {
1113       unsigned HOST_WIDE_INT thissize;
1114       rtx part, word;
1115       unsigned HOST_WIDE_INT thispos;
1116       unsigned HOST_WIDE_INT offset;
1117
1118       offset = (bitpos + bitsdone) / unit;
1119       thispos = (bitpos + bitsdone) % unit;
1120
1121       /* When region of bytes we can touch is restricted, decrease
1122          UNIT close to the end of the region as needed.  */
1123       if (bitregion_end
1124           && unit > BITS_PER_UNIT
1125           && bitpos + bitsdone - thispos + unit > bitregion_end + 1)
1126         {
1127           unit = unit / 2;
1128           continue;
1129         }
1130
1131       /* THISSIZE must not overrun a word boundary.  Otherwise,
1132          store_fixed_bit_field will call us again, and we will mutually
1133          recurse forever.  */
1134       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1135       thissize = MIN (thissize, unit - thispos);
1136
1137       if (BYTES_BIG_ENDIAN)
1138         {
1139           int total_bits;
1140
1141           /* We must do an endian conversion exactly the same way as it is
1142              done in extract_bit_field, so that the two calls to
1143              extract_fixed_bit_field will have comparable arguments.  */
1144           if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1145             total_bits = BITS_PER_WORD;
1146           else
1147             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1148
1149           /* Fetch successively less significant portions.  */
1150           if (CONST_INT_P (value))
1151             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1152                              >> (bitsize - bitsdone - thissize))
1153                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1154           else
1155             /* The args are chosen so that the last part includes the
1156                lsb.  Give extract_bit_field the value it needs (with
1157                endianness compensation) to fetch the piece we want.  */
1158             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1159                                             total_bits - bitsize + bitsdone,
1160                                             NULL_RTX, 1, false);
1161         }
1162       else
1163         {
1164           /* Fetch successively more significant portions.  */
1165           if (CONST_INT_P (value))
1166             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1167                              >> bitsdone)
1168                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1169           else
1170             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1171                                             bitsdone, NULL_RTX, 1, false);
1172         }
1173
1174       /* If OP0 is a register, then handle OFFSET here.
1175
1176          When handling multiword bitfields, extract_bit_field may pass
1177          down a word_mode SUBREG of a larger REG for a bitfield that actually
1178          crosses a word boundary.  Thus, for a SUBREG, we must find
1179          the current word starting from the base register.  */
1180       if (GET_CODE (op0) == SUBREG)
1181         {
1182           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1183           enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1184           if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1185             word = word_offset ? const0_rtx : op0;
1186           else
1187             word = operand_subword_force (SUBREG_REG (op0), word_offset,
1188                                           GET_MODE (SUBREG_REG (op0)));
1189           offset = 0;
1190         }
1191       else if (REG_P (op0))
1192         {
1193           enum machine_mode op0_mode = GET_MODE (op0);
1194           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1195             word = offset ? const0_rtx : op0;
1196           else
1197             word = operand_subword_force (op0, offset, GET_MODE (op0));
1198           offset = 0;
1199         }
1200       else
1201         word = op0;
1202
1203       /* OFFSET is in UNITs, and UNIT is in bits.
1204          store_fixed_bit_field wants offset in bytes.  If WORD is const0_rtx,
1205          it is just an out-of-bounds access.  Ignore it.  */
1206       if (word != const0_rtx)
1207         store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1208                                thispos, bitregion_start, bitregion_end, part);
1209       bitsdone += thissize;
1210     }
1211 }
1212 \f
1213 /* A subroutine of extract_bit_field_1 that converts return value X
1214    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1215    to extract_bit_field.  */
1216
1217 static rtx
1218 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1219                              enum machine_mode tmode, bool unsignedp)
1220 {
1221   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1222     return x;
1223
1224   /* If the x mode is not a scalar integral, first convert to the
1225      integer mode of that size and then access it as a floating-point
1226      value via a SUBREG.  */
1227   if (!SCALAR_INT_MODE_P (tmode))
1228     {
1229       enum machine_mode smode;
1230
1231       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1232       x = convert_to_mode (smode, x, unsignedp);
1233       x = force_reg (smode, x);
1234       return gen_lowpart (tmode, x);
1235     }
1236
1237   return convert_to_mode (tmode, x, unsignedp);
1238 }
1239
1240 /* A subroutine of extract_bit_field, with the same arguments.
1241    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1242    if we can find no other means of implementing the operation.
1243    if FALLBACK_P is false, return NULL instead.  */
1244
1245 static rtx
1246 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1247                      unsigned HOST_WIDE_INT bitnum,
1248                      int unsignedp, bool packedp, rtx target,
1249                      enum machine_mode mode, enum machine_mode tmode,
1250                      bool fallback_p)
1251 {
1252   unsigned int unit
1253     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1254   unsigned HOST_WIDE_INT offset, bitpos;
1255   rtx op0 = str_rtx;
1256   enum machine_mode int_mode;
1257   enum machine_mode ext_mode;
1258   enum machine_mode mode1;
1259   int byte_offset;
1260
1261   if (tmode == VOIDmode)
1262     tmode = mode;
1263
1264   while (GET_CODE (op0) == SUBREG)
1265     {
1266       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1267       op0 = SUBREG_REG (op0);
1268     }
1269
1270   /* If we have an out-of-bounds access to a register, just return an
1271      uninitialized register of the required mode.  This can occur if the
1272      source code contains an out-of-bounds access to a small array.  */
1273   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1274     return gen_reg_rtx (tmode);
1275
1276   if (REG_P (op0)
1277       && mode == GET_MODE (op0)
1278       && bitnum == 0
1279       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1280     {
1281       /* We're trying to extract a full register from itself.  */
1282       return op0;
1283     }
1284
1285   /* See if we can get a better vector mode before extracting.  */
1286   if (VECTOR_MODE_P (GET_MODE (op0))
1287       && !MEM_P (op0)
1288       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1289     {
1290       enum machine_mode new_mode;
1291
1292       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1293         new_mode = MIN_MODE_VECTOR_FLOAT;
1294       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1295         new_mode = MIN_MODE_VECTOR_FRACT;
1296       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1297         new_mode = MIN_MODE_VECTOR_UFRACT;
1298       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1299         new_mode = MIN_MODE_VECTOR_ACCUM;
1300       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1301         new_mode = MIN_MODE_VECTOR_UACCUM;
1302       else
1303         new_mode = MIN_MODE_VECTOR_INT;
1304
1305       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1306         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1307             && targetm.vector_mode_supported_p (new_mode))
1308           break;
1309       if (new_mode != VOIDmode)
1310         op0 = gen_lowpart (new_mode, op0);
1311     }
1312
1313   /* Use vec_extract patterns for extracting parts of vectors whenever
1314      available.  */
1315   if (VECTOR_MODE_P (GET_MODE (op0))
1316       && !MEM_P (op0)
1317       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1318       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1319           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1320     {
1321       struct expand_operand ops[3];
1322       enum machine_mode outermode = GET_MODE (op0);
1323       enum machine_mode innermode = GET_MODE_INNER (outermode);
1324       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1325       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1326
1327       create_output_operand (&ops[0], target, innermode);
1328       create_input_operand (&ops[1], op0, outermode);
1329       create_integer_operand (&ops[2], pos);
1330       if (maybe_expand_insn (icode, 3, ops))
1331         {
1332           target = ops[0].value;
1333           if (GET_MODE (target) != mode)
1334             return gen_lowpart (tmode, target);
1335           return target;
1336         }
1337     }
1338
1339   /* Make sure we are playing with integral modes.  Pun with subregs
1340      if we aren't.  */
1341   {
1342     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1343     if (imode != GET_MODE (op0))
1344       {
1345         if (MEM_P (op0))
1346           op0 = adjust_address (op0, imode, 0);
1347         else if (imode != BLKmode)
1348           {
1349             op0 = gen_lowpart (imode, op0);
1350
1351             /* If we got a SUBREG, force it into a register since we
1352                aren't going to be able to do another SUBREG on it.  */
1353             if (GET_CODE (op0) == SUBREG)
1354               op0 = force_reg (imode, op0);
1355           }
1356         else if (REG_P (op0))
1357           {
1358             rtx reg, subreg;
1359             imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1360                                             MODE_INT);
1361             reg = gen_reg_rtx (imode);
1362             subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1363             emit_move_insn (subreg, op0);
1364             op0 = reg;
1365             bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1366           }
1367         else
1368           {
1369             rtx mem = assign_stack_temp (GET_MODE (op0),
1370                                          GET_MODE_SIZE (GET_MODE (op0)));
1371             emit_move_insn (mem, op0);
1372             op0 = adjust_address (mem, BLKmode, 0);
1373           }
1374       }
1375   }
1376
1377   /* We may be accessing data outside the field, which means
1378      we can alias adjacent data.  */
1379   if (MEM_P (op0))
1380     {
1381       op0 = shallow_copy_rtx (op0);
1382       set_mem_alias_set (op0, 0);
1383       set_mem_expr (op0, 0);
1384     }
1385
1386   /* Extraction of a full-word or multi-word value from a structure
1387      in a register or aligned memory can be done with just a SUBREG.
1388      A subword value in the least significant part of a register
1389      can also be extracted with a SUBREG.  For this, we need the
1390      byte offset of the value in op0.  */
1391
1392   bitpos = bitnum % unit;
1393   offset = bitnum / unit;
1394   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1395
1396   /* If OP0 is a register, BITPOS must count within a word.
1397      But as we have it, it counts within whatever size OP0 now has.
1398      On a bigendian machine, these are not the same, so convert.  */
1399   if (BYTES_BIG_ENDIAN
1400       && !MEM_P (op0)
1401       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1402     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1403
1404   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1405      If that's wrong, the solution is to test for it and set TARGET to 0
1406      if needed.  */
1407
1408   /* Only scalar integer modes can be converted via subregs.  There is an
1409      additional problem for FP modes here in that they can have a precision
1410      which is different from the size.  mode_for_size uses precision, but
1411      we want a mode based on the size, so we must avoid calling it for FP
1412      modes.  */
1413   mode1  = (SCALAR_INT_MODE_P (tmode)
1414             ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1415             : mode);
1416
1417   /* If the bitfield is volatile, we need to make sure the access
1418      remains on a type-aligned boundary.  */
1419   if (GET_CODE (op0) == MEM
1420       && MEM_VOLATILE_P (op0)
1421       && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1422       && flag_strict_volatile_bitfields > 0)
1423     goto no_subreg_mode_swap;
1424
1425   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1426         && bitpos % BITS_PER_WORD == 0)
1427        || (mode1 != BLKmode
1428            /* ??? The big endian test here is wrong.  This is correct
1429               if the value is in a register, and if mode_for_size is not
1430               the same mode as op0.  This causes us to get unnecessarily
1431               inefficient code from the Thumb port when -mbig-endian.  */
1432            && (BYTES_BIG_ENDIAN
1433                ? bitpos + bitsize == BITS_PER_WORD
1434                : bitpos == 0)))
1435       && ((!MEM_P (op0)
1436            && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0))
1437            && GET_MODE_SIZE (mode1) != 0
1438            && byte_offset % GET_MODE_SIZE (mode1) == 0)
1439           || (MEM_P (op0)
1440               && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1441                   || (offset * BITS_PER_UNIT % bitsize == 0
1442                       && MEM_ALIGN (op0) % bitsize == 0)))))
1443     {
1444       if (MEM_P (op0))
1445         op0 = adjust_address (op0, mode1, offset);
1446       else if (mode1 != GET_MODE (op0))
1447         {
1448           rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1449                                          byte_offset);
1450           if (sub == NULL)
1451             goto no_subreg_mode_swap;
1452           op0 = sub;
1453         }
1454       if (mode1 != mode)
1455         return convert_to_mode (tmode, op0, unsignedp);
1456       return op0;
1457     }
1458  no_subreg_mode_swap:
1459
1460   /* Handle fields bigger than a word.  */
1461
1462   if (bitsize > BITS_PER_WORD)
1463     {
1464       /* Here we transfer the words of the field
1465          in the order least significant first.
1466          This is because the most significant word is the one which may
1467          be less than full.  */
1468
1469       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1470       unsigned int i;
1471
1472       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1473         target = gen_reg_rtx (mode);
1474
1475       /* Indicate for flow that the entire target reg is being set.  */
1476       emit_clobber (target);
1477
1478       for (i = 0; i < nwords; i++)
1479         {
1480           /* If I is 0, use the low-order word in both field and target;
1481              if I is 1, use the next to lowest word; and so on.  */
1482           /* Word number in TARGET to use.  */
1483           unsigned int wordnum
1484             = (WORDS_BIG_ENDIAN
1485                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1486                : i);
1487           /* Offset from start of field in OP0.  */
1488           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1489                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1490                                                 * (int) BITS_PER_WORD))
1491                                      : (int) i * BITS_PER_WORD);
1492           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1493           rtx result_part
1494             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1495                                            bitsize - i * BITS_PER_WORD),
1496                                  bitnum + bit_offset, 1, false, target_part, mode,
1497                                  word_mode);
1498
1499           gcc_assert (target_part);
1500
1501           if (result_part != target_part)
1502             emit_move_insn (target_part, result_part);
1503         }
1504
1505       if (unsignedp)
1506         {
1507           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1508              need to be zero'd out.  */
1509           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1510             {
1511               unsigned int i, total_words;
1512
1513               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1514               for (i = nwords; i < total_words; i++)
1515                 emit_move_insn
1516                   (operand_subword (target,
1517                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1518                                     1, VOIDmode),
1519                    const0_rtx);
1520             }
1521           return target;
1522         }
1523
1524       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1525       target = expand_shift (LSHIFT_EXPR, mode, target,
1526                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1527       return expand_shift (RSHIFT_EXPR, mode, target,
1528                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1529     }
1530
1531   /* From here on we know the desired field is smaller than a word.  */
1532
1533   /* Check if there is a correspondingly-sized integer field, so we can
1534      safely extract it as one size of integer, if necessary; then
1535      truncate or extend to the size that is wanted; then use SUBREGs or
1536      convert_to_mode to get one of the modes we really wanted.  */
1537
1538   int_mode = int_mode_for_mode (tmode);
1539   if (int_mode == BLKmode)
1540     int_mode = int_mode_for_mode (mode);
1541   /* Should probably push op0 out to memory and then do a load.  */
1542   gcc_assert (int_mode != BLKmode);
1543
1544   /* OFFSET is the number of words or bytes (UNIT says which)
1545      from STR_RTX to the first word or byte containing part of the field.  */
1546   if (!MEM_P (op0))
1547     {
1548       if (offset != 0
1549           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1550         {
1551           if (!REG_P (op0))
1552             op0 = copy_to_reg (op0);
1553           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1554                                 op0, (offset * UNITS_PER_WORD));
1555         }
1556       offset = 0;
1557     }
1558
1559   /* Now OFFSET is nonzero only for memory operands.  */
1560   ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1561   if (ext_mode != MAX_MACHINE_MODE
1562       && bitsize > 0
1563       && GET_MODE_BITSIZE (ext_mode) >= bitsize
1564       /* Do not use extv/extzv for volatile bitfields when
1565          -fstrict-volatile-bitfields is in effect.  */
1566       && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
1567            && flag_strict_volatile_bitfields > 0)
1568       /* If op0 is a register, we need it in EXT_MODE to make it
1569          acceptable to the format of ext(z)v.  */
1570       && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1571       && !((REG_P (op0) || GET_CODE (op0) == SUBREG)
1572            && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode))))
1573     {
1574       struct expand_operand ops[4];
1575       unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1576       rtx xop0 = op0;
1577       rtx xtarget = target;
1578       rtx xspec_target = target;
1579       rtx xspec_target_subreg = 0;
1580
1581       /* If op0 is a register, we need it in EXT_MODE to make it
1582          acceptable to the format of ext(z)v.  */
1583       if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
1584         xop0 = gen_lowpart_SUBREG (ext_mode, xop0);
1585       if (MEM_P (xop0))
1586         /* Get ref to first byte containing part of the field.  */
1587         xop0 = adjust_address (xop0, byte_mode, xoffset);
1588
1589       /* Now convert from counting within UNIT to counting in EXT_MODE.  */
1590       if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
1591         xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
1592
1593       unit = GET_MODE_BITSIZE (ext_mode);
1594
1595       /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1596          "backwards" from the size of the unit we are extracting from.
1597          Otherwise, we count bits from the most significant on a
1598          BYTES/BITS_BIG_ENDIAN machine.  */
1599
1600       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1601         xbitpos = unit - bitsize - xbitpos;
1602
1603       if (xtarget == 0)
1604         xtarget = xspec_target = gen_reg_rtx (tmode);
1605
1606       if (GET_MODE (xtarget) != ext_mode)
1607         {
1608           /* Don't use LHS paradoxical subreg if explicit truncation is needed
1609              between the mode of the extraction (word_mode) and the target
1610              mode.  Instead, create a temporary and use convert_move to set
1611              the target.  */
1612           if (REG_P (xtarget)
1613               && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (xtarget), ext_mode))
1614             {
1615               xtarget = gen_lowpart (ext_mode, xtarget);
1616               if (GET_MODE_PRECISION (ext_mode)
1617                   > GET_MODE_PRECISION (GET_MODE (xspec_target)))
1618                 xspec_target_subreg = xtarget;
1619             }
1620           else
1621             xtarget = gen_reg_rtx (ext_mode);
1622         }
1623
1624       create_output_operand (&ops[0], xtarget, ext_mode);
1625       create_fixed_operand (&ops[1], xop0);
1626       create_integer_operand (&ops[2], bitsize);
1627       create_integer_operand (&ops[3], xbitpos);
1628       if (maybe_expand_insn (unsignedp ? CODE_FOR_extzv : CODE_FOR_extv,
1629                              4, ops))
1630         {
1631           xtarget = ops[0].value;
1632           if (xtarget == xspec_target)
1633             return xtarget;
1634           if (xtarget == xspec_target_subreg)
1635             return xspec_target;
1636           return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
1637         }
1638     }
1639
1640   /* If OP0 is a memory, try copying it to a register and seeing if a
1641      cheap register alternative is available.  */
1642   if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1643     {
1644       enum machine_mode bestmode;
1645
1646       /* Get the mode to use for inserting into this field.  If
1647          OP0 is BLKmode, get the smallest mode consistent with the
1648          alignment. If OP0 is a non-BLKmode object that is no
1649          wider than EXT_MODE, use its mode. Otherwise, use the
1650          smallest mode containing the field.  */
1651
1652       if (GET_MODE (op0) == BLKmode
1653           || (ext_mode != MAX_MACHINE_MODE
1654               && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
1655         bestmode = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
1656                                   (ext_mode == MAX_MACHINE_MODE
1657                                    ? VOIDmode : ext_mode),
1658                                   MEM_VOLATILE_P (op0));
1659       else
1660         bestmode = GET_MODE (op0);
1661
1662       if (bestmode != VOIDmode
1663           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1664                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1665         {
1666           unsigned HOST_WIDE_INT xoffset, xbitpos;
1667
1668           /* Compute the offset as a multiple of this unit,
1669              counting in bytes.  */
1670           unit = GET_MODE_BITSIZE (bestmode);
1671           xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1672           xbitpos = bitnum % unit;
1673
1674           /* Make sure the register is big enough for the whole field.  */
1675           if (xoffset * BITS_PER_UNIT + unit
1676               >= offset * BITS_PER_UNIT + bitsize)
1677             {
1678               rtx last, result, xop0;
1679
1680               last = get_last_insn ();
1681
1682               /* Fetch it to a register in that size.  */
1683               xop0 = adjust_address (op0, bestmode, xoffset);
1684               xop0 = force_reg (bestmode, xop0);
1685               result = extract_bit_field_1 (xop0, bitsize, xbitpos,
1686                                             unsignedp, packedp, target,
1687                                             mode, tmode, false);
1688               if (result)
1689                 return result;
1690
1691               delete_insns_since (last);
1692             }
1693         }
1694     }
1695
1696   if (!fallback_p)
1697     return NULL;
1698
1699   target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1700                                     bitpos, target, unsignedp, packedp);
1701   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1702 }
1703
1704 /* Generate code to extract a byte-field from STR_RTX
1705    containing BITSIZE bits, starting at BITNUM,
1706    and put it in TARGET if possible (if TARGET is nonzero).
1707    Regardless of TARGET, we return the rtx for where the value is placed.
1708
1709    STR_RTX is the structure containing the byte (a REG or MEM).
1710    UNSIGNEDP is nonzero if this is an unsigned bit field.
1711    PACKEDP is nonzero if the field has the packed attribute.
1712    MODE is the natural mode of the field value once extracted.
1713    TMODE is the mode the caller would like the value to have;
1714    but the value may be returned with type MODE instead.
1715
1716    If a TARGET is specified and we can store in it at no extra cost,
1717    we do so, and return TARGET.
1718    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1719    if they are equally easy.  */
1720
1721 rtx
1722 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1723                    unsigned HOST_WIDE_INT bitnum, int unsignedp, bool packedp,
1724                    rtx target, enum machine_mode mode, enum machine_mode tmode)
1725 {
1726   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, packedp,
1727                               target, mode, tmode, true);
1728 }
1729 \f
1730 /* Extract a bit field using shifts and boolean operations
1731    Returns an rtx to represent the value.
1732    OP0 addresses a register (word) or memory (byte).
1733    BITPOS says which bit within the word or byte the bit field starts in.
1734    OFFSET says how many bytes farther the bit field starts;
1735     it is 0 if OP0 is a register.
1736    BITSIZE says how many bits long the bit field is.
1737     (If OP0 is a register, it may be narrower than a full word,
1738      but BITPOS still counts within a full word,
1739      which is significant on bigendian machines.)
1740
1741    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1742    PACKEDP is true if the field has the packed attribute.
1743
1744    If TARGET is nonzero, attempts to store the value there
1745    and return TARGET, but this is not guaranteed.
1746    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1747
1748 static rtx
1749 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1750                          unsigned HOST_WIDE_INT offset,
1751                          unsigned HOST_WIDE_INT bitsize,
1752                          unsigned HOST_WIDE_INT bitpos, rtx target,
1753                          int unsignedp, bool packedp)
1754 {
1755   unsigned int total_bits = BITS_PER_WORD;
1756   enum machine_mode mode;
1757
1758   if (GET_CODE (op0) == SUBREG || REG_P (op0))
1759     {
1760       /* Special treatment for a bit field split across two registers.  */
1761       if (bitsize + bitpos > BITS_PER_WORD)
1762         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1763     }
1764   else
1765     {
1766       /* Get the proper mode to use for this field.  We want a mode that
1767          includes the entire field.  If such a mode would be larger than
1768          a word, we won't be doing the extraction the normal way.  */
1769
1770       if (MEM_VOLATILE_P (op0)
1771           && flag_strict_volatile_bitfields > 0)
1772         {
1773           if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1774             mode = GET_MODE (op0);
1775           else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1776             mode = GET_MODE (target);
1777           else
1778             mode = tmode;
1779         }
1780       else
1781         mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT, 0, 0,
1782                               MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1783
1784       if (mode == VOIDmode)
1785         /* The only way this should occur is if the field spans word
1786            boundaries.  */
1787         return extract_split_bit_field (op0, bitsize,
1788                                         bitpos + offset * BITS_PER_UNIT,
1789                                         unsignedp);
1790
1791       total_bits = GET_MODE_BITSIZE (mode);
1792
1793       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1794          be in the range 0 to total_bits-1, and put any excess bytes in
1795          OFFSET.  */
1796       if (bitpos >= total_bits)
1797         {
1798           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1799           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1800                      * BITS_PER_UNIT);
1801         }
1802
1803       /* If we're accessing a volatile MEM, we can't do the next
1804          alignment step if it results in a multi-word access where we
1805          otherwise wouldn't have one.  So, check for that case
1806          here.  */
1807       if (MEM_P (op0)
1808           && MEM_VOLATILE_P (op0)
1809           && flag_strict_volatile_bitfields > 0
1810           && bitpos + bitsize <= total_bits
1811           && bitpos + bitsize + (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT > total_bits)
1812         {
1813           if (STRICT_ALIGNMENT)
1814             {
1815               static bool informed_about_misalignment = false;
1816               bool warned;
1817
1818               if (packedp)
1819                 {
1820                   if (bitsize == total_bits)
1821                     warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1822                                          "multiple accesses to volatile structure member"
1823                                          " because of packed attribute");
1824                   else
1825                     warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1826                                          "multiple accesses to volatile structure bitfield"
1827                                          " because of packed attribute");
1828
1829                   return extract_split_bit_field (op0, bitsize,
1830                                                   bitpos + offset * BITS_PER_UNIT,
1831                                                   unsignedp);
1832                 }
1833
1834               if (bitsize == total_bits)
1835                 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1836                                      "mis-aligned access used for structure member");
1837               else
1838                 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1839                                      "mis-aligned access used for structure bitfield");
1840
1841               if (! informed_about_misalignment && warned)
1842                 {
1843                   informed_about_misalignment = true;
1844                   inform (input_location,
1845                           "when a volatile object spans multiple type-sized locations,"
1846                           " the compiler must choose between using a single mis-aligned access to"
1847                           " preserve the volatility, or using multiple aligned accesses to avoid"
1848                           " runtime faults; this code may fail at runtime if the hardware does"
1849                           " not allow this access");
1850                 }
1851             }
1852         }
1853       else
1854         {
1855
1856           /* Get ref to an aligned byte, halfword, or word containing the field.
1857              Adjust BITPOS to be position within a word,
1858              and OFFSET to be the offset of that word.
1859              Then alter OP0 to refer to that word.  */
1860           bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1861           offset -= (offset % (total_bits / BITS_PER_UNIT));
1862         }
1863
1864       op0 = adjust_address (op0, mode, offset);
1865     }
1866
1867   mode = GET_MODE (op0);
1868
1869   if (BYTES_BIG_ENDIAN)
1870     /* BITPOS is the distance between our msb and that of OP0.
1871        Convert it to the distance from the lsb.  */
1872     bitpos = total_bits - bitsize - bitpos;
1873
1874   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1875      We have reduced the big-endian case to the little-endian case.  */
1876
1877   if (unsignedp)
1878     {
1879       if (bitpos)
1880         {
1881           /* If the field does not already start at the lsb,
1882              shift it so it does.  */
1883           /* Maybe propagate the target for the shift.  */
1884           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1885           if (tmode != mode)
1886             subtarget = 0;
1887           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitpos, subtarget, 1);
1888         }
1889       /* Convert the value to the desired mode.  */
1890       if (mode != tmode)
1891         op0 = convert_to_mode (tmode, op0, 1);
1892
1893       /* Unless the msb of the field used to be the msb when we shifted,
1894          mask out the upper bits.  */
1895
1896       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1897         return expand_binop (GET_MODE (op0), and_optab, op0,
1898                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1899                              target, 1, OPTAB_LIB_WIDEN);
1900       return op0;
1901     }
1902
1903   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1904      then arithmetic-shift its lsb to the lsb of the word.  */
1905   op0 = force_reg (mode, op0);
1906
1907   /* Find the narrowest integer mode that contains the field.  */
1908
1909   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1910        mode = GET_MODE_WIDER_MODE (mode))
1911     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1912       {
1913         op0 = convert_to_mode (mode, op0, 0);
1914         break;
1915       }
1916
1917   if (mode != tmode)
1918     target = 0;
1919
1920   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1921     {
1922       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitpos);
1923       /* Maybe propagate the target for the shift.  */
1924       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1925       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1926     }
1927
1928   return expand_shift (RSHIFT_EXPR, mode, op0,
1929                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1930 }
1931 \f
1932 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1933    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1934    complement of that if COMPLEMENT.  The mask is truncated if
1935    necessary to the width of mode MODE.  The mask is zero-extended if
1936    BITSIZE+BITPOS is too small for MODE.  */
1937
1938 static rtx
1939 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1940 {
1941   double_int mask;
1942
1943   mask = double_int_mask (bitsize);
1944   mask = double_int_lshift (mask, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1945
1946   if (complement)
1947     mask = double_int_not (mask);
1948
1949   return immed_double_int_const (mask, mode);
1950 }
1951
1952 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1953    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1954
1955 static rtx
1956 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1957 {
1958   double_int val;
1959
1960   val = double_int_zext (uhwi_to_double_int (INTVAL (value)), bitsize);
1961   val = double_int_lshift (val, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1962
1963   return immed_double_int_const (val, mode);
1964 }
1965 \f
1966 /* Extract a bit field that is split across two words
1967    and return an RTX for the result.
1968
1969    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1970    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1971    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1972
1973 static rtx
1974 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1975                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1976 {
1977   unsigned int unit;
1978   unsigned int bitsdone = 0;
1979   rtx result = NULL_RTX;
1980   int first = 1;
1981
1982   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1983      much at a time.  */
1984   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1985     unit = BITS_PER_WORD;
1986   else
1987     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1988
1989   while (bitsdone < bitsize)
1990     {
1991       unsigned HOST_WIDE_INT thissize;
1992       rtx part, word;
1993       unsigned HOST_WIDE_INT thispos;
1994       unsigned HOST_WIDE_INT offset;
1995
1996       offset = (bitpos + bitsdone) / unit;
1997       thispos = (bitpos + bitsdone) % unit;
1998
1999       /* THISSIZE must not overrun a word boundary.  Otherwise,
2000          extract_fixed_bit_field will call us again, and we will mutually
2001          recurse forever.  */
2002       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2003       thissize = MIN (thissize, unit - thispos);
2004
2005       /* If OP0 is a register, then handle OFFSET here.
2006
2007          When handling multiword bitfields, extract_bit_field may pass
2008          down a word_mode SUBREG of a larger REG for a bitfield that actually
2009          crosses a word boundary.  Thus, for a SUBREG, we must find
2010          the current word starting from the base register.  */
2011       if (GET_CODE (op0) == SUBREG)
2012         {
2013           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
2014           word = operand_subword_force (SUBREG_REG (op0), word_offset,
2015                                         GET_MODE (SUBREG_REG (op0)));
2016           offset = 0;
2017         }
2018       else if (REG_P (op0))
2019         {
2020           word = operand_subword_force (op0, offset, GET_MODE (op0));
2021           offset = 0;
2022         }
2023       else
2024         word = op0;
2025
2026       /* Extract the parts in bit-counting order,
2027          whose meaning is determined by BYTES_PER_UNIT.
2028          OFFSET is in UNITs, and UNIT is in bits.
2029          extract_fixed_bit_field wants offset in bytes.  */
2030       part = extract_fixed_bit_field (word_mode, word,
2031                                       offset * unit / BITS_PER_UNIT,
2032                                       thissize, thispos, 0, 1, false);
2033       bitsdone += thissize;
2034
2035       /* Shift this part into place for the result.  */
2036       if (BYTES_BIG_ENDIAN)
2037         {
2038           if (bitsize != bitsdone)
2039             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2040                                  bitsize - bitsdone, 0, 1);
2041         }
2042       else
2043         {
2044           if (bitsdone != thissize)
2045             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2046                                  bitsdone - thissize, 0, 1);
2047         }
2048
2049       if (first)
2050         result = part;
2051       else
2052         /* Combine the parts with bitwise or.  This works
2053            because we extracted each part as an unsigned bit field.  */
2054         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2055                                OPTAB_LIB_WIDEN);
2056
2057       first = 0;
2058     }
2059
2060   /* Unsigned bit field: we are done.  */
2061   if (unsignedp)
2062     return result;
2063   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2064   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2065                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2066   return expand_shift (RSHIFT_EXPR, word_mode, result,
2067                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2068 }
2069 \f
2070 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2071    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2072    MODE, fill the upper bits with zeros.  Fail if the layout of either
2073    mode is unknown (as for CC modes) or if the extraction would involve
2074    unprofitable mode punning.  Return the value on success, otherwise
2075    return null.
2076
2077    This is different from gen_lowpart* in these respects:
2078
2079      - the returned value must always be considered an rvalue
2080
2081      - when MODE is wider than SRC_MODE, the extraction involves
2082        a zero extension
2083
2084      - when MODE is smaller than SRC_MODE, the extraction involves
2085        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2086
2087    In other words, this routine performs a computation, whereas the
2088    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2089    operations.  */
2090
2091 rtx
2092 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2093 {
2094   enum machine_mode int_mode, src_int_mode;
2095
2096   if (mode == src_mode)
2097     return src;
2098
2099   if (CONSTANT_P (src))
2100     {
2101       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2102          fails, it will happily create (subreg (symbol_ref)) or similar
2103          invalid SUBREGs.  */
2104       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2105       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2106       if (ret)
2107         return ret;
2108
2109       if (GET_MODE (src) == VOIDmode
2110           || !validate_subreg (mode, src_mode, src, byte))
2111         return NULL_RTX;
2112
2113       src = force_reg (GET_MODE (src), src);
2114       return gen_rtx_SUBREG (mode, src, byte);
2115     }
2116
2117   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2118     return NULL_RTX;
2119
2120   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2121       && MODES_TIEABLE_P (mode, src_mode))
2122     {
2123       rtx x = gen_lowpart_common (mode, src);
2124       if (x)
2125         return x;
2126     }
2127
2128   src_int_mode = int_mode_for_mode (src_mode);
2129   int_mode = int_mode_for_mode (mode);
2130   if (src_int_mode == BLKmode || int_mode == BLKmode)
2131     return NULL_RTX;
2132
2133   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2134     return NULL_RTX;
2135   if (!MODES_TIEABLE_P (int_mode, mode))
2136     return NULL_RTX;
2137
2138   src = gen_lowpart (src_int_mode, src);
2139   src = convert_modes (int_mode, src_int_mode, src, true);
2140   src = gen_lowpart (mode, src);
2141   return src;
2142 }
2143 \f
2144 /* Add INC into TARGET.  */
2145
2146 void
2147 expand_inc (rtx target, rtx inc)
2148 {
2149   rtx value = expand_binop (GET_MODE (target), add_optab,
2150                             target, inc,
2151                             target, 0, OPTAB_LIB_WIDEN);
2152   if (value != target)
2153     emit_move_insn (target, value);
2154 }
2155
2156 /* Subtract DEC from TARGET.  */
2157
2158 void
2159 expand_dec (rtx target, rtx dec)
2160 {
2161   rtx value = expand_binop (GET_MODE (target), sub_optab,
2162                             target, dec,
2163                             target, 0, OPTAB_LIB_WIDEN);
2164   if (value != target)
2165     emit_move_insn (target, value);
2166 }
2167 \f
2168 /* Output a shift instruction for expression code CODE,
2169    with SHIFTED being the rtx for the value to shift,
2170    and AMOUNT the rtx for the amount to shift by.
2171    Store the result in the rtx TARGET, if that is convenient.
2172    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2173    Return the rtx for where the value is.  */
2174
2175 static rtx
2176 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2177                 rtx amount, rtx target, int unsignedp)
2178 {
2179   rtx op1, temp = 0;
2180   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2181   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2182   optab lshift_optab = ashl_optab;
2183   optab rshift_arith_optab = ashr_optab;
2184   optab rshift_uns_optab = lshr_optab;
2185   optab lrotate_optab = rotl_optab;
2186   optab rrotate_optab = rotr_optab;
2187   enum machine_mode op1_mode;
2188   int attempt;
2189   bool speed = optimize_insn_for_speed_p ();
2190
2191   op1 = amount;
2192   op1_mode = GET_MODE (op1);
2193
2194   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2195      shift amount is a vector, use the vector/vector shift patterns.  */
2196   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2197     {
2198       lshift_optab = vashl_optab;
2199       rshift_arith_optab = vashr_optab;
2200       rshift_uns_optab = vlshr_optab;
2201       lrotate_optab = vrotl_optab;
2202       rrotate_optab = vrotr_optab;
2203     }
2204
2205   /* Previously detected shift-counts computed by NEGATE_EXPR
2206      and shifted in the other direction; but that does not work
2207      on all machines.  */
2208
2209   if (SHIFT_COUNT_TRUNCATED)
2210     {
2211       if (CONST_INT_P (op1)
2212           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2213               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2214         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2215                        % GET_MODE_BITSIZE (mode));
2216       else if (GET_CODE (op1) == SUBREG
2217                && subreg_lowpart_p (op1)
2218                && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1))))
2219         op1 = SUBREG_REG (op1);
2220     }
2221
2222   if (op1 == const0_rtx)
2223     return shifted;
2224
2225   /* Check whether its cheaper to implement a left shift by a constant
2226      bit count by a sequence of additions.  */
2227   if (code == LSHIFT_EXPR
2228       && CONST_INT_P (op1)
2229       && INTVAL (op1) > 0
2230       && INTVAL (op1) < GET_MODE_PRECISION (mode)
2231       && INTVAL (op1) < MAX_BITS_PER_WORD
2232       && shift_cost[speed][mode][INTVAL (op1)] > INTVAL (op1) * add_cost[speed][mode]
2233       && shift_cost[speed][mode][INTVAL (op1)] != MAX_COST)
2234     {
2235       int i;
2236       for (i = 0; i < INTVAL (op1); i++)
2237         {
2238           temp = force_reg (mode, shifted);
2239           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2240                                   unsignedp, OPTAB_LIB_WIDEN);
2241         }
2242       return shifted;
2243     }
2244
2245   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2246     {
2247       enum optab_methods methods;
2248
2249       if (attempt == 0)
2250         methods = OPTAB_DIRECT;
2251       else if (attempt == 1)
2252         methods = OPTAB_WIDEN;
2253       else
2254         methods = OPTAB_LIB_WIDEN;
2255
2256       if (rotate)
2257         {
2258           /* Widening does not work for rotation.  */
2259           if (methods == OPTAB_WIDEN)
2260             continue;
2261           else if (methods == OPTAB_LIB_WIDEN)
2262             {
2263               /* If we have been unable to open-code this by a rotation,
2264                  do it as the IOR of two shifts.  I.e., to rotate A
2265                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2266                  where C is the bitsize of A.
2267
2268                  It is theoretically possible that the target machine might
2269                  not be able to perform either shift and hence we would
2270                  be making two libcalls rather than just the one for the
2271                  shift (similarly if IOR could not be done).  We will allow
2272                  this extremely unlikely lossage to avoid complicating the
2273                  code below.  */
2274
2275               rtx subtarget = target == shifted ? 0 : target;
2276               rtx new_amount, other_amount;
2277               rtx temp1;
2278
2279               new_amount = op1;
2280               if (CONST_INT_P (op1))
2281                 other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
2282                                         - INTVAL (op1));
2283               else
2284                 other_amount
2285                   = simplify_gen_binary (MINUS, GET_MODE (op1),
2286                                          GEN_INT (GET_MODE_PRECISION (mode)),
2287                                          op1);
2288
2289               shifted = force_reg (mode, shifted);
2290
2291               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2292                                      mode, shifted, new_amount, 0, 1);
2293               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2294                                       mode, shifted, other_amount,
2295                                       subtarget, 1);
2296               return expand_binop (mode, ior_optab, temp, temp1, target,
2297                                    unsignedp, methods);
2298             }
2299
2300           temp = expand_binop (mode,
2301                                left ? lrotate_optab : rrotate_optab,
2302                                shifted, op1, target, unsignedp, methods);
2303         }
2304       else if (unsignedp)
2305         temp = expand_binop (mode,
2306                              left ? lshift_optab : rshift_uns_optab,
2307                              shifted, op1, target, unsignedp, methods);
2308
2309       /* Do arithmetic shifts.
2310          Also, if we are going to widen the operand, we can just as well
2311          use an arithmetic right-shift instead of a logical one.  */
2312       if (temp == 0 && ! rotate
2313           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2314         {
2315           enum optab_methods methods1 = methods;
2316
2317           /* If trying to widen a log shift to an arithmetic shift,
2318              don't accept an arithmetic shift of the same size.  */
2319           if (unsignedp)
2320             methods1 = OPTAB_MUST_WIDEN;
2321
2322           /* Arithmetic shift */
2323
2324           temp = expand_binop (mode,
2325                                left ? lshift_optab : rshift_arith_optab,
2326                                shifted, op1, target, unsignedp, methods1);
2327         }
2328
2329       /* We used to try extzv here for logical right shifts, but that was
2330          only useful for one machine, the VAX, and caused poor code
2331          generation there for lshrdi3, so the code was deleted and a
2332          define_expand for lshrsi3 was added to vax.md.  */
2333     }
2334
2335   gcc_assert (temp);
2336   return temp;
2337 }
2338
2339 /* Output a shift instruction for expression code CODE,
2340    with SHIFTED being the rtx for the value to shift,
2341    and AMOUNT the amount to shift by.
2342    Store the result in the rtx TARGET, if that is convenient.
2343    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2344    Return the rtx for where the value is.  */
2345
2346 rtx
2347 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2348               int amount, rtx target, int unsignedp)
2349 {
2350   return expand_shift_1 (code, mode,
2351                          shifted, GEN_INT (amount), target, unsignedp);
2352 }
2353
2354 /* Output a shift instruction for expression code CODE,
2355    with SHIFTED being the rtx for the value to shift,
2356    and AMOUNT the tree for the amount to shift by.
2357    Store the result in the rtx TARGET, if that is convenient.
2358    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2359    Return the rtx for where the value is.  */
2360
2361 rtx
2362 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2363                        tree amount, rtx target, int unsignedp)
2364 {
2365   return expand_shift_1 (code, mode,
2366                          shifted, expand_normal (amount), target, unsignedp);
2367 }
2368
2369 \f
2370 /* Indicates the type of fixup needed after a constant multiplication.
2371    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2372    the result should be negated, and ADD_VARIANT means that the
2373    multiplicand should be added to the result.  */
2374 enum mult_variant {basic_variant, negate_variant, add_variant};
2375
2376 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2377                         const struct mult_cost *, enum machine_mode mode);
2378 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2379                                  struct algorithm *, enum mult_variant *, int);
2380 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2381                               const struct algorithm *, enum mult_variant);
2382 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2383 static rtx extract_high_half (enum machine_mode, rtx);
2384 static rtx expmed_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2385 static rtx expmed_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2386                                        int, int);
2387 /* Compute and return the best algorithm for multiplying by T.
2388    The algorithm must cost less than cost_limit
2389    If retval.cost >= COST_LIMIT, no algorithm was found and all
2390    other field of the returned struct are undefined.
2391    MODE is the machine mode of the multiplication.  */
2392
2393 static void
2394 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2395             const struct mult_cost *cost_limit, enum machine_mode mode)
2396 {
2397   int m;
2398   struct algorithm *alg_in, *best_alg;
2399   struct mult_cost best_cost;
2400   struct mult_cost new_limit;
2401   int op_cost, op_latency;
2402   unsigned HOST_WIDE_INT orig_t = t;
2403   unsigned HOST_WIDE_INT q;
2404   int maxm, hash_index;
2405   bool cache_hit = false;
2406   enum alg_code cache_alg = alg_zero;
2407   bool speed = optimize_insn_for_speed_p ();
2408   enum machine_mode imode;
2409
2410   /* Indicate that no algorithm is yet found.  If no algorithm
2411      is found, this value will be returned and indicate failure.  */
2412   alg_out->cost.cost = cost_limit->cost + 1;
2413   alg_out->cost.latency = cost_limit->latency + 1;
2414
2415   if (cost_limit->cost < 0
2416       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2417     return;
2418
2419   /* Be prepared for vector modes.  */
2420   imode = GET_MODE_INNER (mode);
2421   if (imode == VOIDmode)
2422     imode = mode;
2423
2424   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2425
2426   /* Restrict the bits of "t" to the multiplication's mode.  */
2427   t &= GET_MODE_MASK (imode);
2428
2429   /* t == 1 can be done in zero cost.  */
2430   if (t == 1)
2431     {
2432       alg_out->ops = 1;
2433       alg_out->cost.cost = 0;
2434       alg_out->cost.latency = 0;
2435       alg_out->op[0] = alg_m;
2436       return;
2437     }
2438
2439   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2440      fail now.  */
2441   if (t == 0)
2442     {
2443       if (MULT_COST_LESS (cost_limit, zero_cost[speed]))
2444         return;
2445       else
2446         {
2447           alg_out->ops = 1;
2448           alg_out->cost.cost = zero_cost[speed];
2449           alg_out->cost.latency = zero_cost[speed];
2450           alg_out->op[0] = alg_zero;
2451           return;
2452         }
2453     }
2454
2455   /* We'll be needing a couple extra algorithm structures now.  */
2456
2457   alg_in = XALLOCA (struct algorithm);
2458   best_alg = XALLOCA (struct algorithm);
2459   best_cost = *cost_limit;
2460
2461   /* Compute the hash index.  */
2462   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2463
2464   /* See if we already know what to do for T.  */
2465   if (alg_hash[hash_index].t == t
2466       && alg_hash[hash_index].mode == mode
2467       && alg_hash[hash_index].mode == mode
2468       && alg_hash[hash_index].speed == speed
2469       && alg_hash[hash_index].alg != alg_unknown)
2470     {
2471       cache_alg = alg_hash[hash_index].alg;
2472
2473       if (cache_alg == alg_impossible)
2474         {
2475           /* The cache tells us that it's impossible to synthesize
2476              multiplication by T within alg_hash[hash_index].cost.  */
2477           if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2478             /* COST_LIMIT is at least as restrictive as the one
2479                recorded in the hash table, in which case we have no
2480                hope of synthesizing a multiplication.  Just
2481                return.  */
2482             return;
2483
2484           /* If we get here, COST_LIMIT is less restrictive than the
2485              one recorded in the hash table, so we may be able to
2486              synthesize a multiplication.  Proceed as if we didn't
2487              have the cache entry.  */
2488         }
2489       else
2490         {
2491           if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2492             /* The cached algorithm shows that this multiplication
2493                requires more cost than COST_LIMIT.  Just return.  This
2494                way, we don't clobber this cache entry with
2495                alg_impossible but retain useful information.  */
2496             return;
2497
2498           cache_hit = true;
2499
2500           switch (cache_alg)
2501             {
2502             case alg_shift:
2503               goto do_alg_shift;
2504
2505             case alg_add_t_m2:
2506             case alg_sub_t_m2:
2507               goto do_alg_addsub_t_m2;
2508
2509             case alg_add_factor:
2510             case alg_sub_factor:
2511               goto do_alg_addsub_factor;
2512
2513             case alg_add_t2_m:
2514               goto do_alg_add_t2_m;
2515
2516             case alg_sub_t2_m:
2517               goto do_alg_sub_t2_m;
2518
2519             default:
2520               gcc_unreachable ();
2521             }
2522         }
2523     }
2524
2525   /* If we have a group of zero bits at the low-order part of T, try
2526      multiplying by the remaining bits and then doing a shift.  */
2527
2528   if ((t & 1) == 0)
2529     {
2530     do_alg_shift:
2531       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2532       if (m < maxm)
2533         {
2534           q = t >> m;
2535           /* The function expand_shift will choose between a shift and
2536              a sequence of additions, so the observed cost is given as
2537              MIN (m * add_cost[speed][mode], shift_cost[speed][mode][m]).  */
2538           op_cost = m * add_cost[speed][mode];
2539           if (shift_cost[speed][mode][m] < op_cost)
2540             op_cost = shift_cost[speed][mode][m];
2541           new_limit.cost = best_cost.cost - op_cost;
2542           new_limit.latency = best_cost.latency - op_cost;
2543           synth_mult (alg_in, q, &new_limit, mode);
2544
2545           alg_in->cost.cost += op_cost;
2546           alg_in->cost.latency += op_cost;
2547           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2548             {
2549               struct algorithm *x;
2550               best_cost = alg_in->cost;
2551               x = alg_in, alg_in = best_alg, best_alg = x;
2552               best_alg->log[best_alg->ops] = m;
2553               best_alg->op[best_alg->ops] = alg_shift;
2554             }
2555
2556           /* See if treating ORIG_T as a signed number yields a better
2557              sequence.  Try this sequence only for a negative ORIG_T
2558              as it would be useless for a non-negative ORIG_T.  */
2559           if ((HOST_WIDE_INT) orig_t < 0)
2560             {
2561               /* Shift ORIG_T as follows because a right shift of a
2562                  negative-valued signed type is implementation
2563                  defined.  */
2564               q = ~(~orig_t >> m);
2565               /* The function expand_shift will choose between a shift
2566                  and a sequence of additions, so the observed cost is
2567                  given as MIN (m * add_cost[speed][mode],
2568                  shift_cost[speed][mode][m]).  */
2569               op_cost = m * add_cost[speed][mode];
2570               if (shift_cost[speed][mode][m] < op_cost)
2571                 op_cost = shift_cost[speed][mode][m];
2572               new_limit.cost = best_cost.cost - op_cost;
2573               new_limit.latency = best_cost.latency - op_cost;
2574               synth_mult (alg_in, q, &new_limit, mode);
2575
2576               alg_in->cost.cost += op_cost;
2577               alg_in->cost.latency += op_cost;
2578               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2579                 {
2580                   struct algorithm *x;
2581                   best_cost = alg_in->cost;
2582                   x = alg_in, alg_in = best_alg, best_alg = x;
2583                   best_alg->log[best_alg->ops] = m;
2584                   best_alg->op[best_alg->ops] = alg_shift;
2585                 }
2586             }
2587         }
2588       if (cache_hit)
2589         goto done;
2590     }
2591
2592   /* If we have an odd number, add or subtract one.  */
2593   if ((t & 1) != 0)
2594     {
2595       unsigned HOST_WIDE_INT w;
2596
2597     do_alg_addsub_t_m2:
2598       for (w = 1; (w & t) != 0; w <<= 1)
2599         ;
2600       /* If T was -1, then W will be zero after the loop.  This is another
2601          case where T ends with ...111.  Handling this with (T + 1) and
2602          subtract 1 produces slightly better code and results in algorithm
2603          selection much faster than treating it like the ...0111 case
2604          below.  */
2605       if (w == 0
2606           || (w > 2
2607               /* Reject the case where t is 3.
2608                  Thus we prefer addition in that case.  */
2609               && t != 3))
2610         {
2611           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2612
2613           op_cost = add_cost[speed][mode];
2614           new_limit.cost = best_cost.cost - op_cost;
2615           new_limit.latency = best_cost.latency - op_cost;
2616           synth_mult (alg_in, t + 1, &new_limit, mode);
2617
2618           alg_in->cost.cost += op_cost;
2619           alg_in->cost.latency += op_cost;
2620           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2621             {
2622               struct algorithm *x;
2623               best_cost = alg_in->cost;
2624               x = alg_in, alg_in = best_alg, best_alg = x;
2625               best_alg->log[best_alg->ops] = 0;
2626               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2627             }
2628         }
2629       else
2630         {
2631           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2632
2633           op_cost = add_cost[speed][mode];
2634           new_limit.cost = best_cost.cost - op_cost;
2635           new_limit.latency = best_cost.latency - op_cost;
2636           synth_mult (alg_in, t - 1, &new_limit, mode);
2637
2638           alg_in->cost.cost += op_cost;
2639           alg_in->cost.latency += op_cost;
2640           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2641             {
2642               struct algorithm *x;
2643               best_cost = alg_in->cost;
2644               x = alg_in, alg_in = best_alg, best_alg = x;
2645               best_alg->log[best_alg->ops] = 0;
2646               best_alg->op[best_alg->ops] = alg_add_t_m2;
2647             }
2648         }
2649
2650       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2651          quickly with a - a * n for some appropriate constant n.  */
2652       m = exact_log2 (-orig_t + 1);
2653       if (m >= 0 && m < maxm)
2654         {
2655           op_cost = shiftsub1_cost[speed][mode][m];
2656           new_limit.cost = best_cost.cost - op_cost;
2657           new_limit.latency = best_cost.latency - op_cost;
2658           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2659                       &new_limit, mode);
2660
2661           alg_in->cost.cost += op_cost;
2662           alg_in->cost.latency += op_cost;
2663           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2664             {
2665               struct algorithm *x;
2666               best_cost = alg_in->cost;
2667               x = alg_in, alg_in = best_alg, best_alg = x;
2668               best_alg->log[best_alg->ops] = m;
2669               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2670             }
2671         }
2672
2673       if (cache_hit)
2674         goto done;
2675     }
2676
2677   /* Look for factors of t of the form
2678      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2679      If we find such a factor, we can multiply by t using an algorithm that
2680      multiplies by q, shift the result by m and add/subtract it to itself.
2681
2682      We search for large factors first and loop down, even if large factors
2683      are less probable than small; if we find a large factor we will find a
2684      good sequence quickly, and therefore be able to prune (by decreasing
2685      COST_LIMIT) the search.  */
2686
2687  do_alg_addsub_factor:
2688   for (m = floor_log2 (t - 1); m >= 2; m--)
2689     {
2690       unsigned HOST_WIDE_INT d;
2691
2692       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2693       if (t % d == 0 && t > d && m < maxm
2694           && (!cache_hit || cache_alg == alg_add_factor))
2695         {
2696           /* If the target has a cheap shift-and-add instruction use
2697              that in preference to a shift insn followed by an add insn.
2698              Assume that the shift-and-add is "atomic" with a latency
2699              equal to its cost, otherwise assume that on superscalar
2700              hardware the shift may be executed concurrently with the
2701              earlier steps in the algorithm.  */
2702           op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2703           if (shiftadd_cost[speed][mode][m] < op_cost)
2704             {
2705               op_cost = shiftadd_cost[speed][mode][m];
2706               op_latency = op_cost;
2707             }
2708           else
2709             op_latency = add_cost[speed][mode];
2710
2711           new_limit.cost = best_cost.cost - op_cost;
2712           new_limit.latency = best_cost.latency - op_latency;
2713           synth_mult (alg_in, t / d, &new_limit, mode);
2714
2715           alg_in->cost.cost += op_cost;
2716           alg_in->cost.latency += op_latency;
2717           if (alg_in->cost.latency < op_cost)
2718             alg_in->cost.latency = op_cost;
2719           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2720             {
2721               struct algorithm *x;
2722               best_cost = alg_in->cost;
2723               x = alg_in, alg_in = best_alg, best_alg = x;
2724               best_alg->log[best_alg->ops] = m;
2725               best_alg->op[best_alg->ops] = alg_add_factor;
2726             }
2727           /* Other factors will have been taken care of in the recursion.  */
2728           break;
2729         }
2730
2731       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2732       if (t % d == 0 && t > d && m < maxm
2733           && (!cache_hit || cache_alg == alg_sub_factor))
2734         {
2735           /* If the target has a cheap shift-and-subtract insn use
2736              that in preference to a shift insn followed by a sub insn.
2737              Assume that the shift-and-sub is "atomic" with a latency
2738              equal to it's cost, otherwise assume that on superscalar
2739              hardware the shift may be executed concurrently with the
2740              earlier steps in the algorithm.  */
2741           op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2742           if (shiftsub0_cost[speed][mode][m] < op_cost)
2743             {
2744               op_cost = shiftsub0_cost[speed][mode][m];
2745               op_latency = op_cost;
2746             }
2747           else
2748             op_latency = add_cost[speed][mode];
2749
2750           new_limit.cost = best_cost.cost - op_cost;
2751           new_limit.latency = best_cost.latency - op_latency;
2752           synth_mult (alg_in, t / d, &new_limit, mode);
2753
2754           alg_in->cost.cost += op_cost;
2755           alg_in->cost.latency += op_latency;
2756           if (alg_in->cost.latency < op_cost)
2757             alg_in->cost.latency = op_cost;
2758           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2759             {
2760               struct algorithm *x;
2761               best_cost = alg_in->cost;
2762               x = alg_in, alg_in = best_alg, best_alg = x;
2763               best_alg->log[best_alg->ops] = m;
2764               best_alg->op[best_alg->ops] = alg_sub_factor;
2765             }
2766           break;
2767         }
2768     }
2769   if (cache_hit)
2770     goto done;
2771
2772   /* Try shift-and-add (load effective address) instructions,
2773      i.e. do a*3, a*5, a*9.  */
2774   if ((t & 1) != 0)
2775     {
2776     do_alg_add_t2_m:
2777       q = t - 1;
2778       q = q & -q;
2779       m = exact_log2 (q);
2780       if (m >= 0 && m < maxm)
2781         {
2782           op_cost = shiftadd_cost[speed][mode][m];
2783           new_limit.cost = best_cost.cost - op_cost;
2784           new_limit.latency = best_cost.latency - op_cost;
2785           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2786
2787           alg_in->cost.cost += op_cost;
2788           alg_in->cost.latency += op_cost;
2789           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2790             {
2791               struct algorithm *x;
2792               best_cost = alg_in->cost;
2793               x = alg_in, alg_in = best_alg, best_alg = x;
2794               best_alg->log[best_alg->ops] = m;
2795               best_alg->op[best_alg->ops] = alg_add_t2_m;
2796             }
2797         }
2798       if (cache_hit)
2799         goto done;
2800
2801     do_alg_sub_t2_m:
2802       q = t + 1;
2803       q = q & -q;
2804       m = exact_log2 (q);
2805       if (m >= 0 && m < maxm)
2806         {
2807           op_cost = shiftsub0_cost[speed][mode][m];
2808           new_limit.cost = best_cost.cost - op_cost;
2809           new_limit.latency = best_cost.latency - op_cost;
2810           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2811
2812           alg_in->cost.cost += op_cost;
2813           alg_in->cost.latency += op_cost;
2814           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2815             {
2816               struct algorithm *x;
2817               best_cost = alg_in->cost;
2818               x = alg_in, alg_in = best_alg, best_alg = x;
2819               best_alg->log[best_alg->ops] = m;
2820               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2821             }
2822         }
2823       if (cache_hit)
2824         goto done;
2825     }
2826
2827  done:
2828   /* If best_cost has not decreased, we have not found any algorithm.  */
2829   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2830     {
2831       /* We failed to find an algorithm.  Record alg_impossible for
2832          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2833          we are asked to find an algorithm for T within the same or
2834          lower COST_LIMIT, we can immediately return to the
2835          caller.  */
2836       alg_hash[hash_index].t = t;
2837       alg_hash[hash_index].mode = mode;
2838       alg_hash[hash_index].speed = speed;
2839       alg_hash[hash_index].alg = alg_impossible;
2840       alg_hash[hash_index].cost = *cost_limit;
2841       return;
2842     }
2843
2844   /* Cache the result.  */
2845   if (!cache_hit)
2846     {
2847       alg_hash[hash_index].t = t;
2848       alg_hash[hash_index].mode = mode;
2849       alg_hash[hash_index].speed = speed;
2850       alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2851       alg_hash[hash_index].cost.cost = best_cost.cost;
2852       alg_hash[hash_index].cost.latency = best_cost.latency;
2853     }
2854
2855   /* If we are getting a too long sequence for `struct algorithm'
2856      to record, make this search fail.  */
2857   if (best_alg->ops == MAX_BITS_PER_WORD)
2858     return;
2859
2860   /* Copy the algorithm from temporary space to the space at alg_out.
2861      We avoid using structure assignment because the majority of
2862      best_alg is normally undefined, and this is a critical function.  */
2863   alg_out->ops = best_alg->ops + 1;
2864   alg_out->cost = best_cost;
2865   memcpy (alg_out->op, best_alg->op,
2866           alg_out->ops * sizeof *alg_out->op);
2867   memcpy (alg_out->log, best_alg->log,
2868           alg_out->ops * sizeof *alg_out->log);
2869 }
2870 \f
2871 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2872    Try three variations:
2873
2874        - a shift/add sequence based on VAL itself
2875        - a shift/add sequence based on -VAL, followed by a negation
2876        - a shift/add sequence based on VAL - 1, followed by an addition.
2877
2878    Return true if the cheapest of these cost less than MULT_COST,
2879    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2880
2881 static bool
2882 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2883                      struct algorithm *alg, enum mult_variant *variant,
2884                      int mult_cost)
2885 {
2886   struct algorithm alg2;
2887   struct mult_cost limit;
2888   int op_cost;
2889   bool speed = optimize_insn_for_speed_p ();
2890
2891   /* Fail quickly for impossible bounds.  */
2892   if (mult_cost < 0)
2893     return false;
2894
2895   /* Ensure that mult_cost provides a reasonable upper bound.
2896      Any constant multiplication can be performed with less
2897      than 2 * bits additions.  */
2898   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost[speed][mode];
2899   if (mult_cost > op_cost)
2900     mult_cost = op_cost;
2901
2902   *variant = basic_variant;
2903   limit.cost = mult_cost;
2904   limit.latency = mult_cost;
2905   synth_mult (alg, val, &limit, mode);
2906
2907   /* This works only if the inverted value actually fits in an
2908      `unsigned int' */
2909   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2910     {
2911       op_cost = neg_cost[speed][mode];
2912       if (MULT_COST_LESS (&alg->cost, mult_cost))
2913         {
2914           limit.cost = alg->cost.cost - op_cost;
2915           limit.latency = alg->cost.latency - op_cost;
2916         }
2917       else
2918         {
2919           limit.cost = mult_cost - op_cost;
2920           limit.latency = mult_cost - op_cost;
2921         }
2922
2923       synth_mult (&alg2, -val, &limit, mode);
2924       alg2.cost.cost += op_cost;
2925       alg2.cost.latency += op_cost;
2926       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2927         *alg = alg2, *variant = negate_variant;
2928     }
2929
2930   /* This proves very useful for division-by-constant.  */
2931   op_cost = add_cost[speed][mode];
2932   if (MULT_COST_LESS (&alg->cost, mult_cost))
2933     {
2934       limit.cost = alg->cost.cost - op_cost;
2935       limit.latency = alg->cost.latency - op_cost;
2936     }
2937   else
2938     {
2939       limit.cost = mult_cost - op_cost;
2940       limit.latency = mult_cost - op_cost;
2941     }
2942
2943   synth_mult (&alg2, val - 1, &limit, mode);
2944   alg2.cost.cost += op_cost;
2945   alg2.cost.latency += op_cost;
2946   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2947     *alg = alg2, *variant = add_variant;
2948
2949   return MULT_COST_LESS (&alg->cost, mult_cost);
2950 }
2951
2952 /* A subroutine of expand_mult, used for constant multiplications.
2953    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2954    convenient.  Use the shift/add sequence described by ALG and apply
2955    the final fixup specified by VARIANT.  */
2956
2957 static rtx
2958 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2959                    rtx target, const struct algorithm *alg,
2960                    enum mult_variant variant)
2961 {
2962   HOST_WIDE_INT val_so_far;
2963   rtx insn, accum, tem;
2964   int opno;
2965   enum machine_mode nmode;
2966
2967   /* Avoid referencing memory over and over and invalid sharing
2968      on SUBREGs.  */
2969   op0 = force_reg (mode, op0);
2970
2971   /* ACCUM starts out either as OP0 or as a zero, depending on
2972      the first operation.  */
2973
2974   if (alg->op[0] == alg_zero)
2975     {
2976       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
2977       val_so_far = 0;
2978     }
2979   else if (alg->op[0] == alg_m)
2980     {
2981       accum = copy_to_mode_reg (mode, op0);
2982       val_so_far = 1;
2983     }
2984   else
2985     gcc_unreachable ();
2986
2987   for (opno = 1; opno < alg->ops; opno++)
2988     {
2989       int log = alg->log[opno];
2990       rtx shift_subtarget = optimize ? 0 : accum;
2991       rtx add_target
2992         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2993            && !optimize)
2994           ? target : 0;
2995       rtx accum_target = optimize ? 0 : accum;
2996       rtx accum_inner;
2997
2998       switch (alg->op[opno])
2999         {
3000         case alg_shift:
3001           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3002           /* REG_EQUAL note will be attached to the following insn.  */
3003           emit_move_insn (accum, tem);
3004           val_so_far <<= log;
3005           break;
3006
3007         case alg_add_t_m2:
3008           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3009           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3010                                  add_target ? add_target : accum_target);
3011           val_so_far += (HOST_WIDE_INT) 1 << log;
3012           break;
3013
3014         case alg_sub_t_m2:
3015           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3016           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3017                                  add_target ? add_target : accum_target);
3018           val_so_far -= (HOST_WIDE_INT) 1 << log;
3019           break;
3020
3021         case alg_add_t2_m:
3022           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3023                                 log, shift_subtarget, 0);
3024           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3025                                  add_target ? add_target : accum_target);
3026           val_so_far = (val_so_far << log) + 1;
3027           break;
3028
3029         case alg_sub_t2_m:
3030           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3031                                 log, shift_subtarget, 0);
3032           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3033                                  add_target ? add_target : accum_target);
3034           val_so_far = (val_so_far << log) - 1;
3035           break;
3036
3037         case alg_add_factor:
3038           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3039           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3040                                  add_target ? add_target : accum_target);
3041           val_so_far += val_so_far << log;
3042           break;
3043
3044         case alg_sub_factor:
3045           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3046           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3047                                  (add_target
3048                                   ? add_target : (optimize ? 0 : tem)));
3049           val_so_far = (val_so_far << log) - val_so_far;
3050           break;
3051
3052         default:
3053           gcc_unreachable ();
3054         }
3055
3056       if (SCALAR_INT_MODE_P (mode))
3057         {
3058           /* Write a REG_EQUAL note on the last insn so that we can cse
3059              multiplication sequences.  Note that if ACCUM is a SUBREG,
3060              we've set the inner register and must properly indicate that.  */
3061           tem = op0, nmode = mode;
3062           accum_inner = accum;
3063           if (GET_CODE (accum) == SUBREG)
3064             {
3065               accum_inner = SUBREG_REG (accum);
3066               nmode = GET_MODE (accum_inner);
3067               tem = gen_lowpart (nmode, op0);
3068             }
3069
3070           insn = get_last_insn ();
3071           set_dst_reg_note (insn, REG_EQUAL,
3072                             gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)),
3073                             accum_inner);
3074         }
3075     }
3076
3077   if (variant == negate_variant)
3078     {
3079       val_so_far = -val_so_far;
3080       accum = expand_unop (mode, neg_optab, accum, target, 0);
3081     }
3082   else if (variant == add_variant)
3083     {
3084       val_so_far = val_so_far + 1;
3085       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3086     }
3087
3088   /* Compare only the bits of val and val_so_far that are significant
3089      in the result mode, to avoid sign-/zero-extension confusion.  */
3090   nmode = GET_MODE_INNER (mode);
3091   if (nmode == VOIDmode)
3092     nmode = mode;
3093   val &= GET_MODE_MASK (nmode);
3094   val_so_far &= GET_MODE_MASK (nmode);
3095   gcc_assert (val == val_so_far);
3096
3097   return accum;
3098 }
3099
3100 /* Perform a multiplication and return an rtx for the result.
3101    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3102    TARGET is a suggestion for where to store the result (an rtx).
3103
3104    We check specially for a constant integer as OP1.
3105    If you want this check for OP0 as well, then before calling
3106    you should swap the two operands if OP0 would be constant.  */
3107
3108 rtx
3109 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3110              int unsignedp)
3111 {
3112   enum mult_variant variant;
3113   struct algorithm algorithm;
3114   rtx scalar_op1;
3115   int max_cost;
3116   bool speed = optimize_insn_for_speed_p ();
3117   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3118
3119   if (CONSTANT_P (op0))
3120     {
3121       rtx temp = op0;
3122       op0 = op1;
3123       op1 = temp;
3124     }
3125
3126   /* For vectors, there are several simplifications that can be made if
3127      all elements of the vector constant are identical.  */
3128   scalar_op1 = op1;
3129   if (GET_CODE (op1) == CONST_VECTOR)
3130     {
3131       int i, n = CONST_VECTOR_NUNITS (op1);
3132       scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3133       for (i = 1; i < n; ++i)
3134         if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3135           goto skip_scalar;
3136     }
3137
3138   if (INTEGRAL_MODE_P (mode))
3139     {
3140       rtx fake_reg;
3141       HOST_WIDE_INT coeff = 0;
3142       bool is_neg = false;
3143       int mode_bitsize;
3144
3145       if (op1 == CONST0_RTX (mode))
3146         return op1;
3147       if (op1 == CONST1_RTX (mode))
3148         return op0;
3149       if (op1 == CONSTM1_RTX (mode))
3150         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3151                             op0, target, 0);
3152
3153       if (do_trapv)
3154         goto skip_synth;
3155
3156       /* These are the operations that are potentially turned into
3157          a sequence of shifts and additions.  */
3158       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3159
3160       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3161          less than or equal in size to `unsigned int' this doesn't matter.
3162          If the mode is larger than `unsigned int', then synth_mult works
3163          only if the constant value exactly fits in an `unsigned int' without
3164          any truncation.  This means that multiplying by negative values does
3165          not work; results are off by 2^32 on a 32 bit machine.  */
3166
3167       if (CONST_INT_P (scalar_op1))
3168         {
3169           coeff = INTVAL (scalar_op1);
3170           is_neg = coeff < 0;
3171         }
3172       else if (CONST_DOUBLE_P (scalar_op1))
3173         {
3174           /* If we are multiplying in DImode, it may still be a win
3175              to try to work with shifts and adds.  */
3176           if (CONST_DOUBLE_HIGH (scalar_op1) == 0
3177               && CONST_DOUBLE_LOW (scalar_op1) > 0)
3178             {
3179               coeff = CONST_DOUBLE_LOW (scalar_op1);
3180               is_neg = false;
3181             }
3182           else if (CONST_DOUBLE_LOW (scalar_op1) == 0)
3183             {
3184               coeff = CONST_DOUBLE_HIGH (scalar_op1);
3185               if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3186                 {
3187                   int shift = floor_log2 (coeff) + HOST_BITS_PER_WIDE_INT;
3188                   if (shift < HOST_BITS_PER_DOUBLE_INT - 1
3189                       || mode_bitsize <= HOST_BITS_PER_DOUBLE_INT)
3190                     return expand_shift (LSHIFT_EXPR, mode, op0,
3191                                          shift, target, unsignedp);
3192                 }
3193               goto skip_synth;
3194             }
3195         }
3196       else
3197         goto skip_synth;
3198
3199       /* We used to test optimize here, on the grounds that it's better to
3200          produce a smaller program when -O is not used.  But this causes
3201          such a terrible slowdown sometimes that it seems better to always
3202          use synth_mult.  */
3203
3204       /* Special case powers of two.  */
3205       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3206         return expand_shift (LSHIFT_EXPR, mode, op0,
3207                              floor_log2 (coeff), target, unsignedp);
3208
3209       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3210
3211       /* Attempt to handle multiplication of DImode values by negative
3212          coefficients, by performing the multiplication by a positive
3213          multiplier and then inverting the result.  */
3214       /* ??? How is this not slightly redundant with the neg variant?  */
3215       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3216         {
3217           /* Its safe to use -coeff even for INT_MIN, as the
3218              result is interpreted as an unsigned coefficient.
3219              Exclude cost of op0 from max_cost to match the cost
3220              calculation of the synth_mult.  */
3221           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3222                       - neg_cost[speed][mode]);
3223           if (max_cost > 0
3224               && choose_mult_variant (mode, -coeff, &algorithm,
3225                                       &variant, max_cost))
3226             {
3227               rtx temp = expand_mult_const (mode, op0, -coeff, NULL_RTX,
3228                                             &algorithm, variant);
3229               return expand_unop (mode, neg_optab, temp, target, 0);
3230             }
3231         }
3232
3233       /* Exclude cost of op0 from max_cost to match the cost
3234          calculation of the synth_mult.  */
3235       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3236       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3237         return expand_mult_const (mode, op0, coeff, target,
3238                                   &algorithm, variant);
3239     }
3240  skip_synth:
3241
3242   /* Expand x*2.0 as x+x.  */
3243   if (GET_CODE (scalar_op1) == CONST_DOUBLE && FLOAT_MODE_P (mode))
3244     {
3245       REAL_VALUE_TYPE d;
3246       REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3247
3248       if (REAL_VALUES_EQUAL (d, dconst2))
3249         {
3250           op0 = force_reg (GET_MODE (op0), op0);
3251           return expand_binop (mode, add_optab, op0, op0,
3252                                target, unsignedp, OPTAB_LIB_WIDEN);
3253         }
3254     }
3255  skip_scalar:
3256
3257   /* This used to use umul_optab if unsigned, but for non-widening multiply
3258      there is no difference between signed and unsigned.  */
3259   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3260                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3261   gcc_assert (op0);
3262   return op0;
3263 }
3264
3265 /* Perform a widening multiplication and return an rtx for the result.
3266    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3267    TARGET is a suggestion for where to store the result (an rtx).
3268    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3269    or smul_widen_optab.
3270
3271    We check specially for a constant integer as OP1, comparing the
3272    cost of a widening multiply against the cost of a sequence of shifts
3273    and adds.  */
3274
3275 rtx
3276 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3277                       int unsignedp, optab this_optab)
3278 {
3279   bool speed = optimize_insn_for_speed_p ();
3280   rtx cop1;
3281
3282   if (CONST_INT_P (op1)
3283       && GET_MODE (op0) != VOIDmode
3284       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3285                                 this_optab == umul_widen_optab))
3286       && CONST_INT_P (cop1)
3287       && (INTVAL (cop1) >= 0
3288           || HWI_COMPUTABLE_MODE_P (mode)))
3289     {
3290       HOST_WIDE_INT coeff = INTVAL (cop1);
3291       int max_cost;
3292       enum mult_variant variant;
3293       struct algorithm algorithm;
3294
3295       /* Special case powers of two.  */
3296       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3297         {
3298           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3299           return expand_shift (LSHIFT_EXPR, mode, op0,
3300                                floor_log2 (coeff), target, unsignedp);
3301         }
3302
3303       /* Exclude cost of op0 from max_cost to match the cost
3304          calculation of the synth_mult.  */
3305       max_cost = mul_widen_cost[speed][mode];
3306       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3307                                max_cost))
3308         {
3309           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3310           return expand_mult_const (mode, op0, coeff, target,
3311                                     &algorithm, variant);
3312         }
3313     }
3314   return expand_binop (mode, this_optab, op0, op1, target,
3315                        unsignedp, OPTAB_LIB_WIDEN);
3316 }
3317 \f
3318 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3319    replace division by D, and put the least significant N bits of the result
3320    in *MULTIPLIER_PTR and return the most significant bit.
3321
3322    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3323    needed precision is in PRECISION (should be <= N).
3324
3325    PRECISION should be as small as possible so this function can choose
3326    multiplier more freely.
3327
3328    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3329    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3330
3331    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3332    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3333
3334 unsigned HOST_WIDE_INT
3335 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3336                    unsigned HOST_WIDE_INT *multiplier_ptr,
3337                    int *post_shift_ptr, int *lgup_ptr)
3338 {
3339   HOST_WIDE_INT mhigh_hi, mlow_hi;
3340   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3341   int lgup, post_shift;
3342   int pow, pow2;
3343   unsigned HOST_WIDE_INT nl, dummy1;
3344   HOST_WIDE_INT nh, dummy2;
3345
3346   /* lgup = ceil(log2(divisor)); */
3347   lgup = ceil_log2 (d);
3348
3349   gcc_assert (lgup <= n);
3350
3351   pow = n + lgup;
3352   pow2 = n + lgup - precision;
3353
3354   /* We could handle this with some effort, but this case is much
3355      better handled directly with a scc insn, so rely on caller using
3356      that.  */
3357   gcc_assert (pow != HOST_BITS_PER_DOUBLE_INT);
3358
3359   /* mlow = 2^(N + lgup)/d */
3360  if (pow >= HOST_BITS_PER_WIDE_INT)
3361     {
3362       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3363       nl = 0;
3364     }
3365   else
3366     {
3367       nh = 0;
3368       nl = (unsigned HOST_WIDE_INT) 1 << pow;
3369     }
3370   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3371                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3372
3373   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3374   if (pow2 >= HOST_BITS_PER_WIDE_INT)
3375     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3376   else
3377     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3378   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3379                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3380
3381   gcc_assert (!mhigh_hi || nh - d < d);
3382   gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3383   /* Assert that mlow < mhigh.  */
3384   gcc_assert (mlow_hi < mhigh_hi
3385               || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3386
3387   /* If precision == N, then mlow, mhigh exceed 2^N
3388      (but they do not exceed 2^(N+1)).  */
3389
3390   /* Reduce to lowest terms.  */
3391   for (post_shift = lgup; post_shift > 0; post_shift--)
3392     {
3393       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3394       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3395       if (ml_lo >= mh_lo)
3396         break;
3397
3398       mlow_hi = 0;
3399       mlow_lo = ml_lo;
3400       mhigh_hi = 0;
3401       mhigh_lo = mh_lo;
3402     }
3403
3404   *post_shift_ptr = post_shift;
3405   *lgup_ptr = lgup;
3406   if (n < HOST_BITS_PER_WIDE_INT)
3407     {
3408       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3409       *multiplier_ptr = mhigh_lo & mask;
3410       return mhigh_lo >= mask;
3411     }
3412   else
3413     {
3414       *multiplier_ptr = mhigh_lo;
3415       return mhigh_hi;
3416     }
3417 }
3418
3419 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3420    congruent to 1 (mod 2**N).  */
3421
3422 static unsigned HOST_WIDE_INT
3423 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3424 {
3425   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3426
3427   /* The algorithm notes that the choice y = x satisfies
3428      x*y == 1 mod 2^3, since x is assumed odd.
3429      Each iteration doubles the number of bits of significance in y.  */
3430
3431   unsigned HOST_WIDE_INT mask;
3432   unsigned HOST_WIDE_INT y = x;
3433   int nbit = 3;
3434
3435   mask = (n == HOST_BITS_PER_WIDE_INT
3436           ? ~(unsigned HOST_WIDE_INT) 0
3437           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3438
3439   while (nbit < n)
3440     {
3441       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3442       nbit *= 2;
3443     }
3444   return y;
3445 }
3446
3447 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3448    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3449    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3450    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3451    become signed.
3452
3453    The result is put in TARGET if that is convenient.
3454
3455    MODE is the mode of operation.  */
3456
3457 rtx
3458 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3459                              rtx op1, rtx target, int unsignedp)
3460 {
3461   rtx tem;
3462   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3463
3464   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3465                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3466   tem = expand_and (mode, tem, op1, NULL_RTX);
3467   adj_operand
3468     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3469                      adj_operand);
3470
3471   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3472                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3473   tem = expand_and (mode, tem, op0, NULL_RTX);
3474   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3475                           target);
3476
3477   return target;
3478 }
3479
3480 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3481
3482 static rtx
3483 extract_high_half (enum machine_mode mode, rtx op)
3484 {
3485   enum machine_mode wider_mode;
3486
3487   if (mode == word_mode)
3488     return gen_highpart (mode, op);
3489
3490   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3491
3492   wider_mode = GET_MODE_WIDER_MODE (mode);
3493   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3494                      GET_MODE_BITSIZE (mode), 0, 1);
3495   return convert_modes (mode, wider_mode, op, 0);
3496 }
3497
3498 /* Like expmed_mult_highpart, but only consider using a multiplication
3499    optab.  OP1 is an rtx for the constant operand.  */
3500
3501 static rtx
3502 expmed_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3503                             rtx target, int unsignedp, int max_cost)
3504 {
3505   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3506   enum machine_mode wider_mode;
3507   optab moptab;
3508   rtx tem;
3509   int size;
3510   bool speed = optimize_insn_for_speed_p ();
3511
3512   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3513
3514   wider_mode = GET_MODE_WIDER_MODE (mode);
3515   size = GET_MODE_BITSIZE (mode);
3516
3517   /* Firstly, try using a multiplication insn that only generates the needed
3518      high part of the product, and in the sign flavor of unsignedp.  */
3519   if (mul_highpart_cost[speed][mode] < max_cost)
3520     {
3521       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3522       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3523                           unsignedp, OPTAB_DIRECT);
3524       if (tem)
3525         return tem;
3526     }
3527
3528   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3529      Need to adjust the result after the multiplication.  */
3530   if (size - 1 < BITS_PER_WORD
3531       && (mul_highpart_cost[speed][mode] + 2 * shift_cost[speed][mode][size-1]
3532           + 4 * add_cost[speed][mode] < max_cost))
3533     {
3534       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3535       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3536                           unsignedp, OPTAB_DIRECT);
3537       if (tem)
3538         /* We used the wrong signedness.  Adjust the result.  */
3539         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3540                                             tem, unsignedp);
3541     }
3542
3543   /* Try widening multiplication.  */
3544   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3545   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3546       && mul_widen_cost[speed][wider_mode] < max_cost)
3547     {
3548       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3549                           unsignedp, OPTAB_WIDEN);
3550       if (tem)
3551         return extract_high_half (mode, tem);
3552     }
3553
3554   /* Try widening the mode and perform a non-widening multiplication.  */
3555   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3556       && size - 1 < BITS_PER_WORD
3557       && mul_cost[speed][wider_mode] + shift_cost[speed][mode][size-1] < max_cost)
3558     {
3559       rtx insns, wop0, wop1;
3560
3561       /* We need to widen the operands, for example to ensure the
3562          constant multiplier is correctly sign or zero extended.
3563          Use a sequence to clean-up any instructions emitted by
3564          the conversions if things don't work out.  */
3565       start_sequence ();
3566       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3567       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3568       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3569                           unsignedp, OPTAB_WIDEN);
3570       insns = get_insns ();
3571       end_sequence ();
3572
3573       if (tem)
3574         {
3575           emit_insn (insns);
3576           return extract_high_half (mode, tem);
3577         }
3578     }
3579
3580   /* Try widening multiplication of opposite signedness, and adjust.  */
3581   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3582   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3583       && size - 1 < BITS_PER_WORD
3584       && (mul_widen_cost[speed][wider_mode] + 2 * shift_cost[speed][mode][size-1]
3585           + 4 * add_cost[speed][mode] < max_cost))
3586     {
3587       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3588                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3589       if (tem != 0)
3590         {
3591           tem = extract_high_half (mode, tem);
3592           /* We used the wrong signedness.  Adjust the result.  */
3593           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3594                                               target, unsignedp);
3595         }
3596     }
3597
3598   return 0;
3599 }
3600
3601 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3602    putting the high half of the result in TARGET if that is convenient,
3603    and return where the result is.  If the operation can not be performed,
3604    0 is returned.
3605
3606    MODE is the mode of operation and result.
3607
3608    UNSIGNEDP nonzero means unsigned multiply.
3609
3610    MAX_COST is the total allowed cost for the expanded RTL.  */
3611
3612 static rtx
3613 expmed_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3614                       rtx target, int unsignedp, int max_cost)
3615 {
3616   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3617   unsigned HOST_WIDE_INT cnst1;
3618   int extra_cost;
3619   bool sign_adjust = false;
3620   enum mult_variant variant;
3621   struct algorithm alg;
3622   rtx tem;
3623   bool speed = optimize_insn_for_speed_p ();
3624
3625   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3626   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3627   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3628
3629   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3630
3631   /* We can't optimize modes wider than BITS_PER_WORD.
3632      ??? We might be able to perform double-word arithmetic if
3633      mode == word_mode, however all the cost calculations in
3634      synth_mult etc. assume single-word operations.  */
3635   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3636     return expmed_mult_highpart_optab (mode, op0, op1, target,
3637                                        unsignedp, max_cost);
3638
3639   extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1];
3640
3641   /* Check whether we try to multiply by a negative constant.  */
3642   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3643     {
3644       sign_adjust = true;
3645       extra_cost += add_cost[speed][mode];
3646     }
3647
3648   /* See whether shift/add multiplication is cheap enough.  */
3649   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3650                            max_cost - extra_cost))
3651     {
3652       /* See whether the specialized multiplication optabs are
3653          cheaper than the shift/add version.  */
3654       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3655                                         alg.cost.cost + extra_cost);
3656       if (tem)
3657         return tem;
3658
3659       tem = convert_to_mode (wider_mode, op0, unsignedp);
3660       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3661       tem = extract_high_half (mode, tem);
3662
3663       /* Adjust result for signedness.  */
3664       if (sign_adjust)
3665         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3666
3667       return tem;
3668     }
3669   return expmed_mult_highpart_optab (mode, op0, op1, target,
3670                                      unsignedp, max_cost);
3671 }
3672
3673
3674 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3675
3676 static rtx
3677 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3678 {
3679   unsigned HOST_WIDE_INT masklow, maskhigh;
3680   rtx result, temp, shift, label;
3681   int logd;
3682
3683   logd = floor_log2 (d);
3684   result = gen_reg_rtx (mode);
3685
3686   /* Avoid conditional branches when they're expensive.  */
3687   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3688       && optimize_insn_for_speed_p ())
3689     {
3690       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3691                                       mode, 0, -1);
3692       if (signmask)
3693         {
3694           signmask = force_reg (mode, signmask);
3695           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3696           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3697
3698           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3699              which instruction sequence to use.  If logical right shifts
3700              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3701              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3702
3703           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3704           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3705               || (set_src_cost (temp, optimize_insn_for_speed_p ())
3706                   > COSTS_N_INSNS (2)))
3707             {
3708               temp = expand_binop (mode, xor_optab, op0, signmask,
3709                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3710               temp = expand_binop (mode, sub_optab, temp, signmask,
3711                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3712               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3713                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3714               temp = expand_binop (mode, xor_optab, temp, signmask,
3715                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3716               temp = expand_binop (mode, sub_optab, temp, signmask,
3717                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3718             }
3719           else
3720             {
3721               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3722                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3723               signmask = force_reg (mode, signmask);
3724
3725               temp = expand_binop (mode, add_optab, op0, signmask,
3726                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3727               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3728                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3729               temp = expand_binop (mode, sub_optab, temp, signmask,
3730                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3731             }
3732           return temp;
3733         }
3734     }
3735
3736   /* Mask contains the mode's signbit and the significant bits of the
3737      modulus.  By including the signbit in the operation, many targets
3738      can avoid an explicit compare operation in the following comparison
3739      against zero.  */
3740
3741   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3742   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3743     {
3744       masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3745       maskhigh = -1;
3746     }
3747   else
3748     maskhigh = (HOST_WIDE_INT) -1
3749                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3750
3751   temp = expand_binop (mode, and_optab, op0,
3752                        immed_double_const (masklow, maskhigh, mode),
3753                        result, 1, OPTAB_LIB_WIDEN);
3754   if (temp != result)
3755     emit_move_insn (result, temp);
3756
3757   label = gen_label_rtx ();
3758   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3759
3760   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3761                        0, OPTAB_LIB_WIDEN);
3762   masklow = (HOST_WIDE_INT) -1 << logd;
3763   maskhigh = -1;
3764   temp = expand_binop (mode, ior_optab, temp,
3765                        immed_double_const (masklow, maskhigh, mode),
3766                        result, 1, OPTAB_LIB_WIDEN);
3767   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3768                        0, OPTAB_LIB_WIDEN);
3769   if (temp != result)
3770     emit_move_insn (result, temp);
3771   emit_label (label);
3772   return result;
3773 }
3774
3775 /* Expand signed division of OP0 by a power of two D in mode MODE.
3776    This routine is only called for positive values of D.  */
3777
3778 static rtx
3779 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3780 {
3781   rtx temp, label;
3782   int logd;
3783
3784   logd = floor_log2 (d);
3785
3786   if (d == 2
3787       && BRANCH_COST (optimize_insn_for_speed_p (),
3788                       false) >= 1)
3789     {
3790       temp = gen_reg_rtx (mode);
3791       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3792       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3793                            0, OPTAB_LIB_WIDEN);
3794       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3795     }
3796
3797 #ifdef HAVE_conditional_move
3798   if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3799       >= 2)
3800     {
3801       rtx temp2;
3802
3803       /* ??? emit_conditional_move forces a stack adjustment via
3804          compare_from_rtx so, if the sequence is discarded, it will
3805          be lost.  Do it now instead.  */
3806       do_pending_stack_adjust ();
3807
3808       start_sequence ();
3809       temp2 = copy_to_mode_reg (mode, op0);
3810       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3811                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3812       temp = force_reg (mode, temp);
3813
3814       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3815       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3816                                      mode, temp, temp2, mode, 0);
3817       if (temp2)
3818         {
3819           rtx seq = get_insns ();
3820           end_sequence ();
3821           emit_insn (seq);
3822           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3823         }
3824       end_sequence ();
3825     }
3826 #endif
3827
3828   if (BRANCH_COST (optimize_insn_for_speed_p (),
3829                    false) >= 2)
3830     {
3831       int ushift = GET_MODE_BITSIZE (mode) - logd;
3832
3833       temp = gen_reg_rtx (mode);
3834       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3835       if (shift_cost[optimize_insn_for_speed_p ()][mode][ushift] > COSTS_N_INSNS (1))
3836         temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3837                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3838       else
3839         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3840                              ushift, NULL_RTX, 1);
3841       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3842                            0, OPTAB_LIB_WIDEN);
3843       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3844     }
3845
3846   label = gen_label_rtx ();
3847   temp = copy_to_mode_reg (mode, op0);
3848   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3849   expand_inc (temp, GEN_INT (d - 1));
3850   emit_label (label);
3851   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3852 }
3853 \f
3854 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3855    if that is convenient, and returning where the result is.
3856    You may request either the quotient or the remainder as the result;
3857    specify REM_FLAG nonzero to get the remainder.
3858
3859    CODE is the expression code for which kind of division this is;
3860    it controls how rounding is done.  MODE is the machine mode to use.
3861    UNSIGNEDP nonzero means do unsigned division.  */
3862
3863 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3864    and then correct it by or'ing in missing high bits
3865    if result of ANDI is nonzero.
3866    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3867    This could optimize to a bfexts instruction.
3868    But C doesn't use these operations, so their optimizations are
3869    left for later.  */
3870 /* ??? For modulo, we don't actually need the highpart of the first product,
3871    the low part will do nicely.  And for small divisors, the second multiply
3872    can also be a low-part only multiply or even be completely left out.
3873    E.g. to calculate the remainder of a division by 3 with a 32 bit
3874    multiply, multiply with 0x55555556 and extract the upper two bits;
3875    the result is exact for inputs up to 0x1fffffff.
3876    The input range can be reduced by using cross-sum rules.
3877    For odd divisors >= 3, the following table gives right shift counts
3878    so that if a number is shifted by an integer multiple of the given
3879    amount, the remainder stays the same:
3880    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3881    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3882    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3883    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3884    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3885
3886    Cross-sum rules for even numbers can be derived by leaving as many bits
3887    to the right alone as the divisor has zeros to the right.
3888    E.g. if x is an unsigned 32 bit number:
3889    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3890    */
3891
3892 rtx
3893 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3894                rtx op0, rtx op1, rtx target, int unsignedp)
3895 {
3896   enum machine_mode compute_mode;
3897   rtx tquotient;
3898   rtx quotient = 0, remainder = 0;
3899   rtx last;
3900   int size;
3901   rtx insn;
3902   optab optab1, optab2;
3903   int op1_is_constant, op1_is_pow2 = 0;
3904   int max_cost, extra_cost;
3905   static HOST_WIDE_INT last_div_const = 0;
3906   static HOST_WIDE_INT ext_op1;
3907   bool speed = optimize_insn_for_speed_p ();
3908
3909   op1_is_constant = CONST_INT_P (op1);
3910   if (op1_is_constant)
3911     {
3912       ext_op1 = INTVAL (op1);
3913       if (unsignedp)
3914         ext_op1 &= GET_MODE_MASK (mode);
3915       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3916                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3917     }
3918
3919   /*
3920      This is the structure of expand_divmod:
3921
3922      First comes code to fix up the operands so we can perform the operations
3923      correctly and efficiently.
3924
3925      Second comes a switch statement with code specific for each rounding mode.
3926      For some special operands this code emits all RTL for the desired
3927      operation, for other cases, it generates only a quotient and stores it in
3928      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3929      to indicate that it has not done anything.
3930
3931      Last comes code that finishes the operation.  If QUOTIENT is set and
3932      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3933      QUOTIENT is not set, it is computed using trunc rounding.
3934
3935      We try to generate special code for division and remainder when OP1 is a
3936      constant.  If |OP1| = 2**n we can use shifts and some other fast
3937      operations.  For other values of OP1, we compute a carefully selected
3938      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3939      by m.
3940
3941      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3942      half of the product.  Different strategies for generating the product are
3943      implemented in expmed_mult_highpart.
3944
3945      If what we actually want is the remainder, we generate that by another
3946      by-constant multiplication and a subtraction.  */
3947
3948   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3949      code below will malfunction if we are, so check here and handle
3950      the special case if so.  */
3951   if (op1 == const1_rtx)
3952     return rem_flag ? const0_rtx : op0;
3953
3954     /* When dividing by -1, we could get an overflow.
3955      negv_optab can handle overflows.  */
3956   if (! unsignedp && op1 == constm1_rtx)
3957     {
3958       if (rem_flag)
3959         return const0_rtx;
3960       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3961                           ? negv_optab : neg_optab, op0, target, 0);
3962     }
3963
3964   if (target
3965       /* Don't use the function value register as a target
3966          since we have to read it as well as write it,
3967          and function-inlining gets confused by this.  */
3968       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3969           /* Don't clobber an operand while doing a multi-step calculation.  */
3970           || ((rem_flag || op1_is_constant)
3971               && (reg_mentioned_p (target, op0)
3972                   || (MEM_P (op0) && MEM_P (target))))
3973           || reg_mentioned_p (target, op1)
3974           || (MEM_P (op1) && MEM_P (target))))
3975     target = 0;
3976
3977   /* Get the mode in which to perform this computation.  Normally it will
3978      be MODE, but sometimes we can't do the desired operation in MODE.
3979      If so, pick a wider mode in which we can do the operation.  Convert
3980      to that mode at the start to avoid repeated conversions.
3981
3982      First see what operations we need.  These depend on the expression
3983      we are evaluating.  (We assume that divxx3 insns exist under the
3984      same conditions that modxx3 insns and that these insns don't normally
3985      fail.  If these assumptions are not correct, we may generate less
3986      efficient code in some cases.)
3987
3988      Then see if we find a mode in which we can open-code that operation
3989      (either a division, modulus, or shift).  Finally, check for the smallest
3990      mode for which we can do the operation with a library call.  */
3991
3992   /* We might want to refine this now that we have division-by-constant
3993      optimization.  Since expmed_mult_highpart tries so many variants, it is
3994      not straightforward to generalize this.  Maybe we should make an array
3995      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3996
3997   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3998             ? (unsignedp ? lshr_optab : ashr_optab)
3999             : (unsignedp ? udiv_optab : sdiv_optab));
4000   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
4001             ? optab1
4002             : (unsignedp ? udivmod_optab : sdivmod_optab));
4003
4004   for (compute_mode = mode; compute_mode != VOIDmode;
4005        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4006     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4007         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4008       break;
4009
4010   if (compute_mode == VOIDmode)
4011     for (compute_mode = mode; compute_mode != VOIDmode;
4012          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4013       if (optab_libfunc (optab1, compute_mode)
4014           || optab_libfunc (optab2, compute_mode))
4015         break;
4016
4017   /* If we still couldn't find a mode, use MODE, but expand_binop will
4018      probably die.  */
4019   if (compute_mode == VOIDmode)
4020     compute_mode = mode;
4021
4022   if (target && GET_MODE (target) == compute_mode)
4023     tquotient = target;
4024   else
4025     tquotient = gen_reg_rtx (compute_mode);
4026
4027   size = GET_MODE_BITSIZE (compute_mode);
4028 #if 0
4029   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4030      (mode), and thereby get better code when OP1 is a constant.  Do that
4031      later.  It will require going over all usages of SIZE below.  */
4032   size = GET_MODE_BITSIZE (mode);
4033 #endif
4034
4035   /* Only deduct something for a REM if the last divide done was
4036      for a different constant.   Then set the constant of the last
4037      divide.  */
4038   max_cost = unsignedp ? udiv_cost[speed][compute_mode] : sdiv_cost[speed][compute_mode];
4039   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4040                      && INTVAL (op1) == last_div_const))
4041     max_cost -= mul_cost[speed][compute_mode] + add_cost[speed][compute_mode];
4042
4043   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4044
4045   /* Now convert to the best mode to use.  */
4046   if (compute_mode != mode)
4047     {
4048       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4049       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4050
4051       /* convert_modes may have placed op1 into a register, so we
4052          must recompute the following.  */
4053       op1_is_constant = CONST_INT_P (op1);
4054       op1_is_pow2 = (op1_is_constant
4055                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4056                           || (! unsignedp
4057                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
4058     }
4059
4060   /* If one of the operands is a volatile MEM, copy it into a register.  */
4061
4062   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4063     op0 = force_reg (compute_mode, op0);
4064   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4065     op1 = force_reg (compute_mode, op1);
4066
4067   /* If we need the remainder or if OP1 is constant, we need to
4068      put OP0 in a register in case it has any queued subexpressions.  */
4069   if (rem_flag || op1_is_constant)
4070     op0 = force_reg (compute_mode, op0);
4071
4072   last = get_last_insn ();
4073
4074   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4075   if (unsignedp)
4076     {
4077       if (code == FLOOR_DIV_EXPR)
4078         code = TRUNC_DIV_EXPR;
4079       if (code == FLOOR_MOD_EXPR)
4080         code = TRUNC_MOD_EXPR;
4081       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4082         code = TRUNC_DIV_EXPR;
4083     }
4084
4085   if (op1 != const0_rtx)
4086     switch (code)
4087       {
4088       case TRUNC_MOD_EXPR:
4089       case TRUNC_DIV_EXPR:
4090         if (op1_is_constant)
4091           {
4092             if (unsignedp)
4093               {
4094                 unsigned HOST_WIDE_INT mh, ml;
4095                 int pre_shift, post_shift;
4096                 int dummy;
4097                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4098                                             & GET_MODE_MASK (compute_mode));
4099
4100                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4101                   {
4102                     pre_shift = floor_log2 (d);
4103                     if (rem_flag)
4104                       {
4105                         remainder
4106                           = expand_binop (compute_mode, and_optab, op0,
4107                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4108                                           remainder, 1,
4109                                           OPTAB_LIB_WIDEN);
4110                         if (remainder)
4111                           return gen_lowpart (mode, remainder);
4112                       }
4113                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4114                                              pre_shift, tquotient, 1);
4115                   }
4116                 else if (size <= HOST_BITS_PER_WIDE_INT)
4117                   {
4118                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4119                       {
4120                         /* Most significant bit of divisor is set; emit an scc
4121                            insn.  */
4122                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4123                                                           compute_mode, 1, 1);
4124                       }
4125                     else
4126                       {
4127                         /* Find a suitable multiplier and right shift count
4128                            instead of multiplying with D.  */
4129
4130                         mh = choose_multiplier (d, size, size,
4131                                                 &ml, &post_shift, &dummy);
4132
4133                         /* If the suggested multiplier is more than SIZE bits,
4134                            we can do better for even divisors, using an
4135                            initial right shift.  */
4136                         if (mh != 0 && (d & 1) == 0)
4137                           {
4138                             pre_shift = floor_log2 (d & -d);
4139                             mh = choose_multiplier (d >> pre_shift, size,
4140                                                     size - pre_shift,
4141                                                     &ml, &post_shift, &dummy);
4142                             gcc_assert (!mh);
4143                           }
4144                         else
4145                           pre_shift = 0;
4146
4147                         if (mh != 0)
4148                           {
4149                             rtx t1, t2, t3, t4;
4150
4151                             if (post_shift - 1 >= BITS_PER_WORD)
4152                               goto fail1;
4153
4154                             extra_cost
4155                               = (shift_cost[speed][compute_mode][post_shift - 1]
4156                                  + shift_cost[speed][compute_mode][1]
4157                                  + 2 * add_cost[speed][compute_mode]);
4158                             t1 = expmed_mult_highpart (compute_mode, op0,
4159                                                        GEN_INT (ml),
4160                                                        NULL_RTX, 1,
4161                                                        max_cost - extra_cost);
4162                             if (t1 == 0)
4163                               goto fail1;
4164                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4165                                                                op0, t1),
4166                                                 NULL_RTX);
4167                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4168                                                t2, 1, NULL_RTX, 1);
4169                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4170                                                               t1, t3),
4171                                                 NULL_RTX);
4172                             quotient = expand_shift
4173                               (RSHIFT_EXPR, compute_mode, t4,
4174                                post_shift - 1, tquotient, 1);
4175                           }
4176                         else
4177                           {
4178                             rtx t1, t2;
4179
4180                             if (pre_shift >= BITS_PER_WORD
4181                                 || post_shift >= BITS_PER_WORD)
4182                               goto fail1;
4183
4184                             t1 = expand_shift
4185                               (RSHIFT_EXPR, compute_mode, op0,
4186                                pre_shift, NULL_RTX, 1);
4187                             extra_cost
4188                               = (shift_cost[speed][compute_mode][pre_shift]
4189                                  + shift_cost[speed][compute_mode][post_shift]);
4190                             t2 = expmed_mult_highpart (compute_mode, t1,
4191                                                        GEN_INT (ml),
4192                                                        NULL_RTX, 1,
4193                                                        max_cost - extra_cost);
4194                             if (t2 == 0)
4195                               goto fail1;
4196                             quotient = expand_shift
4197                               (RSHIFT_EXPR, compute_mode, t2,
4198                                post_shift, tquotient, 1);
4199                           }
4200                       }
4201                   }
4202                 else            /* Too wide mode to use tricky code */
4203                   break;
4204
4205                 insn = get_last_insn ();
4206                 if (insn != last)
4207                   set_dst_reg_note (insn, REG_EQUAL,
4208                                     gen_rtx_UDIV (compute_mode, op0, op1),
4209                                     quotient);
4210               }
4211             else                /* TRUNC_DIV, signed */
4212               {
4213                 unsigned HOST_WIDE_INT ml;
4214                 int lgup, post_shift;
4215                 rtx mlr;
4216                 HOST_WIDE_INT d = INTVAL (op1);
4217                 unsigned HOST_WIDE_INT abs_d;
4218
4219                 /* Since d might be INT_MIN, we have to cast to
4220                    unsigned HOST_WIDE_INT before negating to avoid
4221                    undefined signed overflow.  */
4222                 abs_d = (d >= 0
4223                          ? (unsigned HOST_WIDE_INT) d
4224                          : - (unsigned HOST_WIDE_INT) d);
4225
4226                 /* n rem d = n rem -d */
4227                 if (rem_flag && d < 0)
4228                   {
4229                     d = abs_d;
4230                     op1 = gen_int_mode (abs_d, compute_mode);
4231                   }
4232
4233                 if (d == 1)
4234                   quotient = op0;
4235                 else if (d == -1)
4236                   quotient = expand_unop (compute_mode, neg_optab, op0,
4237                                           tquotient, 0);
4238                 else if (HOST_BITS_PER_WIDE_INT >= size
4239                          && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4240                   {
4241                     /* This case is not handled correctly below.  */
4242                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4243                                                 compute_mode, 1, 1);
4244                     if (quotient == 0)
4245                       goto fail1;
4246                   }
4247                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4248                          && (rem_flag ? smod_pow2_cheap[speed][compute_mode]
4249                                       : sdiv_pow2_cheap[speed][compute_mode])
4250                          /* We assume that cheap metric is true if the
4251                             optab has an expander for this mode.  */
4252                          && ((optab_handler ((rem_flag ? smod_optab
4253                                               : sdiv_optab),
4254                                              compute_mode)
4255                               != CODE_FOR_nothing)
4256                              || (optab_handler (sdivmod_optab,
4257                                                 compute_mode)
4258                                  != CODE_FOR_nothing)))
4259                   ;
4260                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4261                   {
4262                     if (rem_flag)
4263                       {
4264                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4265                         if (remainder)
4266                           return gen_lowpart (mode, remainder);
4267                       }
4268
4269                     if (sdiv_pow2_cheap[speed][compute_mode]
4270                         && ((optab_handler (sdiv_optab, compute_mode)
4271                              != CODE_FOR_nothing)
4272                             || (optab_handler (sdivmod_optab, compute_mode)
4273                                 != CODE_FOR_nothing)))
4274                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4275                                                 compute_mode, op0,
4276                                                 gen_int_mode (abs_d,
4277                                                               compute_mode),
4278                                                 NULL_RTX, 0);
4279                     else
4280                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4281
4282                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4283                        negate the quotient.  */
4284                     if (d < 0)
4285                       {
4286                         insn = get_last_insn ();
4287                         if (insn != last
4288                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4289                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4290                           set_dst_reg_note (insn, REG_EQUAL,
4291                                             gen_rtx_DIV (compute_mode, op0,
4292                                                          gen_int_mode
4293                                                            (abs_d,
4294                                                             compute_mode)),
4295                                             quotient);
4296
4297                         quotient = expand_unop (compute_mode, neg_optab,
4298                                                 quotient, quotient, 0);
4299                       }
4300                   }
4301                 else if (size <= HOST_BITS_PER_WIDE_INT)
4302                   {
4303                     choose_multiplier (abs_d, size, size - 1,
4304                                        &ml, &post_shift, &lgup);
4305                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4306                       {
4307                         rtx t1, t2, t3;
4308
4309                         if (post_shift >= BITS_PER_WORD
4310                             || size - 1 >= BITS_PER_WORD)
4311                           goto fail1;
4312
4313                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4314                                       + shift_cost[speed][compute_mode][size - 1]
4315                                       + add_cost[speed][compute_mode]);
4316                         t1 = expmed_mult_highpart (compute_mode, op0,
4317                                                    GEN_INT (ml), NULL_RTX, 0,
4318                                                    max_cost - extra_cost);
4319                         if (t1 == 0)
4320                           goto fail1;
4321                         t2 = expand_shift
4322                           (RSHIFT_EXPR, compute_mode, t1,
4323                            post_shift, NULL_RTX, 0);
4324                         t3 = expand_shift
4325                           (RSHIFT_EXPR, compute_mode, op0,
4326                            size - 1, NULL_RTX, 0);
4327                         if (d < 0)
4328                           quotient
4329                             = force_operand (gen_rtx_MINUS (compute_mode,
4330                                                             t3, t2),
4331                                              tquotient);
4332                         else
4333                           quotient
4334                             = force_operand (gen_rtx_MINUS (compute_mode,
4335                                                             t2, t3),
4336                                              tquotient);
4337                       }
4338                     else
4339                       {
4340                         rtx t1, t2, t3, t4;
4341
4342                         if (post_shift >= BITS_PER_WORD
4343                             || size - 1 >= BITS_PER_WORD)
4344                           goto fail1;
4345
4346                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4347                         mlr = gen_int_mode (ml, compute_mode);
4348                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4349                                       + shift_cost[speed][compute_mode][size - 1]
4350                                       + 2 * add_cost[speed][compute_mode]);
4351                         t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4352                                                    NULL_RTX, 0,
4353                                                    max_cost - extra_cost);
4354                         if (t1 == 0)
4355                           goto fail1;
4356                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4357                                                           t1, op0),
4358                                             NULL_RTX);
4359                         t3 = expand_shift
4360                           (RSHIFT_EXPR, compute_mode, t2,
4361                            post_shift, NULL_RTX, 0);
4362                         t4 = expand_shift
4363                           (RSHIFT_EXPR, compute_mode, op0,
4364                            size - 1, NULL_RTX, 0);
4365                         if (d < 0)
4366                           quotient
4367                             = force_operand (gen_rtx_MINUS (compute_mode,
4368                                                             t4, t3),
4369                                              tquotient);
4370                         else
4371                           quotient
4372                             = force_operand (gen_rtx_MINUS (compute_mode,
4373                                                             t3, t4),
4374                                              tquotient);
4375                       }
4376                   }
4377                 else            /* Too wide mode to use tricky code */
4378                   break;
4379
4380                 insn = get_last_insn ();
4381                 if (insn != last)
4382                   set_dst_reg_note (insn, REG_EQUAL,
4383                                     gen_rtx_DIV (compute_mode, op0, op1),
4384                                     quotient);
4385               }
4386             break;
4387           }
4388       fail1:
4389         delete_insns_since (last);
4390         break;
4391
4392       case FLOOR_DIV_EXPR:
4393       case FLOOR_MOD_EXPR:
4394       /* We will come here only for signed operations.  */
4395         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4396           {
4397             unsigned HOST_WIDE_INT mh, ml;
4398             int pre_shift, lgup, post_shift;
4399             HOST_WIDE_INT d = INTVAL (op1);
4400
4401             if (d > 0)
4402               {
4403                 /* We could just as easily deal with negative constants here,
4404                    but it does not seem worth the trouble for GCC 2.6.  */
4405                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4406                   {
4407                     pre_shift = floor_log2 (d);
4408                     if (rem_flag)
4409                       {
4410                         remainder = expand_binop (compute_mode, and_optab, op0,
4411                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4412                                                   remainder, 0, OPTAB_LIB_WIDEN);
4413                         if (remainder)
4414                           return gen_lowpart (mode, remainder);
4415                       }
4416                     quotient = expand_shift
4417                       (RSHIFT_EXPR, compute_mode, op0,
4418                        pre_shift, tquotient, 0);
4419                   }
4420                 else
4421                   {
4422                     rtx t1, t2, t3, t4;
4423
4424                     mh = choose_multiplier (d, size, size - 1,
4425                                             &ml, &post_shift, &lgup);
4426                     gcc_assert (!mh);
4427
4428                     if (post_shift < BITS_PER_WORD
4429                         && size - 1 < BITS_PER_WORD)
4430                       {
4431                         t1 = expand_shift
4432                           (RSHIFT_EXPR, compute_mode, op0,
4433                            size - 1, NULL_RTX, 0);
4434                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4435                                            NULL_RTX, 0, OPTAB_WIDEN);
4436                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4437                                       + shift_cost[speed][compute_mode][size - 1]
4438                                       + 2 * add_cost[speed][compute_mode]);
4439                         t3 = expmed_mult_highpart (compute_mode, t2,
4440                                                    GEN_INT (ml), NULL_RTX, 1,
4441                                                    max_cost - extra_cost);
4442                         if (t3 != 0)
4443                           {
4444                             t4 = expand_shift
4445                               (RSHIFT_EXPR, compute_mode, t3,
4446                                post_shift, NULL_RTX, 1);
4447                             quotient = expand_binop (compute_mode, xor_optab,
4448                                                      t4, t1, tquotient, 0,
4449                                                      OPTAB_WIDEN);
4450                           }
4451                       }
4452                   }
4453               }
4454             else
4455               {
4456                 rtx nsign, t1, t2, t3, t4;
4457                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4458                                                   op0, constm1_rtx), NULL_RTX);
4459                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4460                                    0, OPTAB_WIDEN);
4461                 nsign = expand_shift
4462                   (RSHIFT_EXPR, compute_mode, t2,
4463                    size - 1, NULL_RTX, 0);
4464                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4465                                     NULL_RTX);
4466                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4467                                     NULL_RTX, 0);
4468                 if (t4)
4469                   {
4470                     rtx t5;
4471                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4472                                       NULL_RTX, 0);
4473                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4474                                                             t4, t5),
4475                                               tquotient);
4476                   }
4477               }
4478           }
4479
4480         if (quotient != 0)
4481           break;
4482         delete_insns_since (last);
4483
4484         /* Try using an instruction that produces both the quotient and
4485            remainder, using truncation.  We can easily compensate the quotient
4486            or remainder to get floor rounding, once we have the remainder.
4487            Notice that we compute also the final remainder value here,
4488            and return the result right away.  */
4489         if (target == 0 || GET_MODE (target) != compute_mode)
4490           target = gen_reg_rtx (compute_mode);
4491
4492         if (rem_flag)
4493           {
4494             remainder
4495               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4496             quotient = gen_reg_rtx (compute_mode);
4497           }
4498         else
4499           {
4500             quotient
4501               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4502             remainder = gen_reg_rtx (compute_mode);
4503           }
4504
4505         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4506                                  quotient, remainder, 0))
4507           {
4508             /* This could be computed with a branch-less sequence.
4509                Save that for later.  */
4510             rtx tem;
4511             rtx label = gen_label_rtx ();
4512             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4513             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4514                                 NULL_RTX, 0, OPTAB_WIDEN);
4515             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4516             expand_dec (quotient, const1_rtx);
4517             expand_inc (remainder, op1);
4518             emit_label (label);
4519             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4520           }
4521
4522         /* No luck with division elimination or divmod.  Have to do it
4523            by conditionally adjusting op0 *and* the result.  */
4524         {
4525           rtx label1, label2, label3, label4, label5;
4526           rtx adjusted_op0;
4527           rtx tem;
4528
4529           quotient = gen_reg_rtx (compute_mode);
4530           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4531           label1 = gen_label_rtx ();
4532           label2 = gen_label_rtx ();
4533           label3 = gen_label_rtx ();
4534           label4 = gen_label_rtx ();
4535           label5 = gen_label_rtx ();
4536           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4537           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4538           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4539                               quotient, 0, OPTAB_LIB_WIDEN);
4540           if (tem != quotient)
4541             emit_move_insn (quotient, tem);
4542           emit_jump_insn (gen_jump (label5));
4543           emit_barrier ();
4544           emit_label (label1);
4545           expand_inc (adjusted_op0, const1_rtx);
4546           emit_jump_insn (gen_jump (label4));
4547           emit_barrier ();
4548           emit_label (label2);
4549           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4550           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4551                               quotient, 0, OPTAB_LIB_WIDEN);
4552           if (tem != quotient)
4553             emit_move_insn (quotient, tem);
4554           emit_jump_insn (gen_jump (label5));
4555           emit_barrier ();
4556           emit_label (label3);
4557           expand_dec (adjusted_op0, const1_rtx);
4558           emit_label (label4);
4559           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4560                               quotient, 0, OPTAB_LIB_WIDEN);
4561           if (tem != quotient)
4562             emit_move_insn (quotient, tem);
4563           expand_dec (quotient, const1_rtx);
4564           emit_label (label5);
4565         }
4566         break;
4567
4568       case CEIL_DIV_EXPR:
4569       case CEIL_MOD_EXPR:
4570         if (unsignedp)
4571           {
4572             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4573               {
4574                 rtx t1, t2, t3;
4575                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4576                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4577                                    floor_log2 (d), tquotient, 1);
4578                 t2 = expand_binop (compute_mode, and_optab, op0,
4579                                    GEN_INT (d - 1),
4580                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4581                 t3 = gen_reg_rtx (compute_mode);
4582                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4583                                       compute_mode, 1, 1);
4584                 if (t3 == 0)
4585                   {
4586                     rtx lab;
4587                     lab = gen_label_rtx ();
4588                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4589                     expand_inc (t1, const1_rtx);
4590                     emit_label (lab);
4591                     quotient = t1;
4592                   }
4593                 else
4594                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4595                                                           t1, t3),
4596                                             tquotient);
4597                 break;
4598               }
4599
4600             /* Try using an instruction that produces both the quotient and
4601                remainder, using truncation.  We can easily compensate the
4602                quotient or remainder to get ceiling rounding, once we have the
4603                remainder.  Notice that we compute also the final remainder
4604                value here, and return the result right away.  */
4605             if (target == 0 || GET_MODE (target) != compute_mode)
4606               target = gen_reg_rtx (compute_mode);
4607
4608             if (rem_flag)
4609               {
4610                 remainder = (REG_P (target)
4611                              ? target : gen_reg_rtx (compute_mode));
4612                 quotient = gen_reg_rtx (compute_mode);
4613               }
4614             else
4615               {
4616                 quotient = (REG_P (target)
4617                             ? target : gen_reg_rtx (compute_mode));
4618                 remainder = gen_reg_rtx (compute_mode);
4619               }
4620
4621             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4622                                      remainder, 1))
4623               {
4624                 /* This could be computed with a branch-less sequence.
4625                    Save that for later.  */
4626                 rtx label = gen_label_rtx ();
4627                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4628                                  compute_mode, label);
4629                 expand_inc (quotient, const1_rtx);
4630                 expand_dec (remainder, op1);
4631                 emit_label (label);
4632                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4633               }
4634
4635             /* No luck with division elimination or divmod.  Have to do it
4636                by conditionally adjusting op0 *and* the result.  */
4637             {
4638               rtx label1, label2;
4639               rtx adjusted_op0, tem;
4640
4641               quotient = gen_reg_rtx (compute_mode);
4642               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4643               label1 = gen_label_rtx ();
4644               label2 = gen_label_rtx ();
4645               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4646                                compute_mode, label1);
4647               emit_move_insn  (quotient, const0_rtx);
4648               emit_jump_insn (gen_jump (label2));
4649               emit_barrier ();
4650               emit_label (label1);
4651               expand_dec (adjusted_op0, const1_rtx);
4652               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4653                                   quotient, 1, OPTAB_LIB_WIDEN);
4654               if (tem != quotient)
4655                 emit_move_insn (quotient, tem);
4656               expand_inc (quotient, const1_rtx);
4657               emit_label (label2);
4658             }
4659           }
4660         else /* signed */
4661           {
4662             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4663                 && INTVAL (op1) >= 0)
4664               {
4665                 /* This is extremely similar to the code for the unsigned case
4666                    above.  For 2.7 we should merge these variants, but for
4667                    2.6.1 I don't want to touch the code for unsigned since that
4668                    get used in C.  The signed case will only be used by other
4669                    languages (Ada).  */
4670
4671                 rtx t1, t2, t3;
4672                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4673                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4674                                    floor_log2 (d), tquotient, 0);
4675                 t2 = expand_binop (compute_mode, and_optab, op0,
4676                                    GEN_INT (d - 1),
4677                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4678                 t3 = gen_reg_rtx (compute_mode);
4679                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4680                                       compute_mode, 1, 1);
4681                 if (t3 == 0)
4682                   {
4683                     rtx lab;
4684                     lab = gen_label_rtx ();
4685                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4686                     expand_inc (t1, const1_rtx);
4687                     emit_label (lab);
4688                     quotient = t1;
4689                   }
4690                 else
4691                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4692                                                           t1, t3),
4693                                             tquotient);
4694                 break;
4695               }
4696
4697             /* Try using an instruction that produces both the quotient and
4698                remainder, using truncation.  We can easily compensate the
4699                quotient or remainder to get ceiling rounding, once we have the
4700                remainder.  Notice that we compute also the final remainder
4701                value here, and return the result right away.  */
4702             if (target == 0 || GET_MODE (target) != compute_mode)
4703               target = gen_reg_rtx (compute_mode);
4704             if (rem_flag)
4705               {
4706                 remainder= (REG_P (target)
4707                             ? target : gen_reg_rtx (compute_mode));
4708                 quotient = gen_reg_rtx (compute_mode);
4709               }
4710             else
4711               {
4712                 quotient = (REG_P (target)
4713                             ? target : gen_reg_rtx (compute_mode));
4714                 remainder = gen_reg_rtx (compute_mode);
4715               }
4716
4717             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4718                                      remainder, 0))
4719               {
4720                 /* This could be computed with a branch-less sequence.
4721                    Save that for later.  */
4722                 rtx tem;
4723                 rtx label = gen_label_rtx ();
4724                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4725                                  compute_mode, label);
4726                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4727                                     NULL_RTX, 0, OPTAB_WIDEN);
4728                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4729                 expand_inc (quotient, const1_rtx);
4730                 expand_dec (remainder, op1);
4731                 emit_label (label);
4732                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4733               }
4734
4735             /* No luck with division elimination or divmod.  Have to do it
4736                by conditionally adjusting op0 *and* the result.  */
4737             {
4738               rtx label1, label2, label3, label4, label5;
4739               rtx adjusted_op0;
4740               rtx tem;
4741
4742               quotient = gen_reg_rtx (compute_mode);
4743               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4744               label1 = gen_label_rtx ();
4745               label2 = gen_label_rtx ();
4746               label3 = gen_label_rtx ();
4747               label4 = gen_label_rtx ();
4748               label5 = gen_label_rtx ();
4749               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4750               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4751                                compute_mode, label1);
4752               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4753                                   quotient, 0, OPTAB_LIB_WIDEN);
4754               if (tem != quotient)
4755                 emit_move_insn (quotient, tem);
4756               emit_jump_insn (gen_jump (label5));
4757               emit_barrier ();
4758               emit_label (label1);
4759               expand_dec (adjusted_op0, const1_rtx);
4760               emit_jump_insn (gen_jump (label4));
4761               emit_barrier ();
4762               emit_label (label2);
4763               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4764                                compute_mode, label3);
4765               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4766                                   quotient, 0, OPTAB_LIB_WIDEN);
4767               if (tem != quotient)
4768                 emit_move_insn (quotient, tem);
4769               emit_jump_insn (gen_jump (label5));
4770               emit_barrier ();
4771               emit_label (label3);
4772               expand_inc (adjusted_op0, const1_rtx);
4773               emit_label (label4);
4774               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4775                                   quotient, 0, OPTAB_LIB_WIDEN);
4776               if (tem != quotient)
4777                 emit_move_insn (quotient, tem);
4778               expand_inc (quotient, const1_rtx);
4779               emit_label (label5);
4780             }
4781           }
4782         break;
4783
4784       case EXACT_DIV_EXPR:
4785         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4786           {
4787             HOST_WIDE_INT d = INTVAL (op1);
4788             unsigned HOST_WIDE_INT ml;
4789             int pre_shift;
4790             rtx t1;
4791
4792             pre_shift = floor_log2 (d & -d);
4793             ml = invert_mod2n (d >> pre_shift, size);
4794             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4795                                pre_shift, NULL_RTX, unsignedp);
4796             quotient = expand_mult (compute_mode, t1,
4797                                     gen_int_mode (ml, compute_mode),
4798                                     NULL_RTX, 1);
4799
4800             insn = get_last_insn ();
4801             set_dst_reg_note (insn, REG_EQUAL,
4802                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4803                                               compute_mode, op0, op1),
4804                               quotient);
4805           }
4806         break;
4807
4808       case ROUND_DIV_EXPR:
4809       case ROUND_MOD_EXPR:
4810         if (unsignedp)
4811           {
4812             rtx tem;
4813             rtx label;
4814             label = gen_label_rtx ();
4815             quotient = gen_reg_rtx (compute_mode);
4816             remainder = gen_reg_rtx (compute_mode);
4817             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4818               {
4819                 rtx tem;
4820                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4821                                          quotient, 1, OPTAB_LIB_WIDEN);
4822                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4823                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4824                                           remainder, 1, OPTAB_LIB_WIDEN);
4825               }
4826             tem = plus_constant (compute_mode, op1, -1);
4827             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4828             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4829             expand_inc (quotient, const1_rtx);
4830             expand_dec (remainder, op1);
4831             emit_label (label);
4832           }
4833         else
4834           {
4835             rtx abs_rem, abs_op1, tem, mask;
4836             rtx label;
4837             label = gen_label_rtx ();
4838             quotient = gen_reg_rtx (compute_mode);
4839             remainder = gen_reg_rtx (compute_mode);
4840             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4841               {
4842                 rtx tem;
4843                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4844                                          quotient, 0, OPTAB_LIB_WIDEN);
4845                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4846                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4847                                           remainder, 0, OPTAB_LIB_WIDEN);
4848               }
4849             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4850             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4851             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4852                                 1, NULL_RTX, 1);
4853             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4854             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4855                                 NULL_RTX, 0, OPTAB_WIDEN);
4856             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4857                                  size - 1, NULL_RTX, 0);
4858             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4859                                 NULL_RTX, 0, OPTAB_WIDEN);
4860             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4861                                 NULL_RTX, 0, OPTAB_WIDEN);
4862             expand_inc (quotient, tem);
4863             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4864                                 NULL_RTX, 0, OPTAB_WIDEN);
4865             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4866                                 NULL_RTX, 0, OPTAB_WIDEN);
4867             expand_dec (remainder, tem);
4868             emit_label (label);
4869           }
4870         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4871
4872       default:
4873         gcc_unreachable ();
4874       }
4875
4876   if (quotient == 0)
4877     {
4878       if (target && GET_MODE (target) != compute_mode)
4879         target = 0;
4880
4881       if (rem_flag)
4882         {
4883           /* Try to produce the remainder without producing the quotient.
4884              If we seem to have a divmod pattern that does not require widening,
4885              don't try widening here.  We should really have a WIDEN argument
4886              to expand_twoval_binop, since what we'd really like to do here is
4887              1) try a mod insn in compute_mode
4888              2) try a divmod insn in compute_mode
4889              3) try a div insn in compute_mode and multiply-subtract to get
4890                 remainder
4891              4) try the same things with widening allowed.  */
4892           remainder
4893             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4894                                  op0, op1, target,
4895                                  unsignedp,
4896                                  ((optab_handler (optab2, compute_mode)
4897                                    != CODE_FOR_nothing)
4898                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4899           if (remainder == 0)
4900             {
4901               /* No luck there.  Can we do remainder and divide at once
4902                  without a library call?  */
4903               remainder = gen_reg_rtx (compute_mode);
4904               if (! expand_twoval_binop ((unsignedp
4905                                           ? udivmod_optab
4906                                           : sdivmod_optab),
4907                                          op0, op1,
4908                                          NULL_RTX, remainder, unsignedp))
4909                 remainder = 0;
4910             }
4911
4912           if (remainder)
4913             return gen_lowpart (mode, remainder);
4914         }
4915
4916       /* Produce the quotient.  Try a quotient insn, but not a library call.
4917          If we have a divmod in this mode, use it in preference to widening
4918          the div (for this test we assume it will not fail). Note that optab2
4919          is set to the one of the two optabs that the call below will use.  */
4920       quotient
4921         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4922                              op0, op1, rem_flag ? NULL_RTX : target,
4923                              unsignedp,
4924                              ((optab_handler (optab2, compute_mode)
4925                                != CODE_FOR_nothing)
4926                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4927
4928       if (quotient == 0)
4929         {
4930           /* No luck there.  Try a quotient-and-remainder insn,
4931              keeping the quotient alone.  */
4932           quotient = gen_reg_rtx (compute_mode);
4933           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4934                                      op0, op1,
4935                                      quotient, NULL_RTX, unsignedp))
4936             {
4937               quotient = 0;
4938               if (! rem_flag)
4939                 /* Still no luck.  If we are not computing the remainder,
4940                    use a library call for the quotient.  */
4941                 quotient = sign_expand_binop (compute_mode,
4942                                               udiv_optab, sdiv_optab,
4943                                               op0, op1, target,
4944                                               unsignedp, OPTAB_LIB_WIDEN);
4945             }
4946         }
4947     }
4948
4949   if (rem_flag)
4950     {
4951       if (target && GET_MODE (target) != compute_mode)
4952         target = 0;
4953
4954       if (quotient == 0)
4955         {
4956           /* No divide instruction either.  Use library for remainder.  */
4957           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4958                                          op0, op1, target,
4959                                          unsignedp, OPTAB_LIB_WIDEN);
4960           /* No remainder function.  Try a quotient-and-remainder
4961              function, keeping the remainder.  */
4962           if (!remainder)
4963             {
4964               remainder = gen_reg_rtx (compute_mode);
4965               if (!expand_twoval_binop_libfunc
4966                   (unsignedp ? udivmod_optab : sdivmod_optab,
4967                    op0, op1,
4968                    NULL_RTX, remainder,
4969                    unsignedp ? UMOD : MOD))
4970                 remainder = NULL_RTX;
4971             }
4972         }
4973       else
4974         {
4975           /* We divided.  Now finish doing X - Y * (X / Y).  */
4976           remainder = expand_mult (compute_mode, quotient, op1,
4977                                    NULL_RTX, unsignedp);
4978           remainder = expand_binop (compute_mode, sub_optab, op0,
4979                                     remainder, target, unsignedp,
4980                                     OPTAB_LIB_WIDEN);
4981         }
4982     }
4983
4984   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4985 }
4986 \f
4987 /* Return a tree node with data type TYPE, describing the value of X.
4988    Usually this is an VAR_DECL, if there is no obvious better choice.
4989    X may be an expression, however we only support those expressions
4990    generated by loop.c.  */
4991
4992 tree
4993 make_tree (tree type, rtx x)
4994 {
4995   tree t;
4996
4997   switch (GET_CODE (x))
4998     {
4999     case CONST_INT:
5000       {
5001         HOST_WIDE_INT hi = 0;
5002
5003         if (INTVAL (x) < 0
5004             && !(TYPE_UNSIGNED (type)
5005                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
5006                      < HOST_BITS_PER_WIDE_INT)))
5007           hi = -1;
5008
5009         t = build_int_cst_wide (type, INTVAL (x), hi);
5010
5011         return t;
5012       }
5013
5014     case CONST_DOUBLE:
5015       if (GET_MODE (x) == VOIDmode)
5016         t = build_int_cst_wide (type,
5017                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
5018       else
5019         {
5020           REAL_VALUE_TYPE d;
5021
5022           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
5023           t = build_real (type, d);
5024         }
5025
5026       return t;
5027
5028     case CONST_VECTOR:
5029       {
5030         int units = CONST_VECTOR_NUNITS (x);
5031         tree itype = TREE_TYPE (type);
5032         tree *elts;
5033         int i;
5034
5035         /* Build a tree with vector elements.  */
5036         elts = XALLOCAVEC (tree, units);
5037         for (i = units - 1; i >= 0; --i)
5038           {
5039             rtx elt = CONST_VECTOR_ELT (x, i);
5040             elts[i] = make_tree (itype, elt);
5041           }
5042
5043         return build_vector (type, elts);
5044       }
5045
5046     case PLUS:
5047       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5048                           make_tree (type, XEXP (x, 1)));
5049
5050     case MINUS:
5051       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5052                           make_tree (type, XEXP (x, 1)));
5053
5054     case NEG:
5055       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5056
5057     case MULT:
5058       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5059                           make_tree (type, XEXP (x, 1)));
5060
5061     case ASHIFT:
5062       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5063                           make_tree (type, XEXP (x, 1)));
5064
5065     case LSHIFTRT:
5066       t = unsigned_type_for (type);
5067       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5068                                          make_tree (t, XEXP (x, 0)),
5069                                          make_tree (type, XEXP (x, 1))));
5070
5071     case ASHIFTRT:
5072       t = signed_type_for (type);
5073       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5074                                          make_tree (t, XEXP (x, 0)),
5075                                          make_tree (type, XEXP (x, 1))));
5076
5077     case DIV:
5078       if (TREE_CODE (type) != REAL_TYPE)
5079         t = signed_type_for (type);
5080       else
5081         t = type;
5082
5083       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5084                                          make_tree (t, XEXP (x, 0)),
5085                                          make_tree (t, XEXP (x, 1))));
5086     case UDIV:
5087       t = unsigned_type_for (type);
5088       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5089                                          make_tree (t, XEXP (x, 0)),
5090                                          make_tree (t, XEXP (x, 1))));
5091
5092     case SIGN_EXTEND:
5093     case ZERO_EXTEND:
5094       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5095                                           GET_CODE (x) == ZERO_EXTEND);
5096       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5097
5098     case CONST:
5099       return make_tree (type, XEXP (x, 0));
5100
5101     case SYMBOL_REF:
5102       t = SYMBOL_REF_DECL (x);
5103       if (t)
5104         return fold_convert (type, build_fold_addr_expr (t));
5105       /* else fall through.  */
5106
5107     default:
5108       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5109
5110       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5111          address mode to pointer mode.  */
5112       if (POINTER_TYPE_P (type))
5113         x = convert_memory_address_addr_space
5114               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5115
5116       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5117          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5118       t->decl_with_rtl.rtl = x;
5119
5120       return t;
5121     }
5122 }
5123 \f
5124 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5125    and returning TARGET.
5126
5127    If TARGET is 0, a pseudo-register or constant is returned.  */
5128
5129 rtx
5130 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5131 {
5132   rtx tem = 0;
5133
5134   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5135     tem = simplify_binary_operation (AND, mode, op0, op1);
5136   if (tem == 0)
5137     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5138
5139   if (target == 0)
5140     target = tem;
5141   else if (tem != target)
5142     emit_move_insn (target, tem);
5143   return target;
5144 }
5145
5146 /* Helper function for emit_store_flag.  */
5147 static rtx
5148 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5149              enum machine_mode mode, enum machine_mode compare_mode,
5150              int unsignedp, rtx x, rtx y, int normalizep,
5151              enum machine_mode target_mode)
5152 {
5153   struct expand_operand ops[4];
5154   rtx op0, last, comparison, subtarget;
5155   enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5156
5157   last = get_last_insn ();
5158   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5159   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5160   if (!x || !y)
5161     {
5162       delete_insns_since (last);
5163       return NULL_RTX;
5164     }
5165
5166   if (target_mode == VOIDmode)
5167     target_mode = result_mode;
5168   if (!target)
5169     target = gen_reg_rtx (target_mode);
5170
5171   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5172
5173   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5174   create_fixed_operand (&ops[1], comparison);
5175   create_fixed_operand (&ops[2], x);
5176   create_fixed_operand (&ops[3], y);
5177   if (!maybe_expand_insn (icode, 4, ops))
5178     {
5179       delete_insns_since (last);
5180       return NULL_RTX;
5181     }
5182   subtarget = ops[0].value;
5183
5184   /* If we are converting to a wider mode, first convert to
5185      TARGET_MODE, then normalize.  This produces better combining
5186      opportunities on machines that have a SIGN_EXTRACT when we are
5187      testing a single bit.  This mostly benefits the 68k.
5188
5189      If STORE_FLAG_VALUE does not have the sign bit set when
5190      interpreted in MODE, we can do this conversion as unsigned, which
5191      is usually more efficient.  */
5192   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5193     {
5194       convert_move (target, subtarget,
5195                     val_signbit_known_clear_p (result_mode,
5196                                                STORE_FLAG_VALUE));
5197       op0 = target;
5198       result_mode = target_mode;
5199     }
5200   else
5201     op0 = subtarget;
5202
5203   /* If we want to keep subexpressions around, don't reuse our last
5204      target.  */
5205   if (optimize)
5206     subtarget = 0;
5207
5208   /* Now normalize to the proper value in MODE.  Sometimes we don't
5209      have to do anything.  */
5210   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5211     ;
5212   /* STORE_FLAG_VALUE might be the most negative number, so write
5213      the comparison this way to avoid a compiler-time warning.  */
5214   else if (- normalizep == STORE_FLAG_VALUE)
5215     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5216
5217   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5218      it hard to use a value of just the sign bit due to ANSI integer
5219      constant typing rules.  */
5220   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5221     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5222                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5223                         normalizep == 1);
5224   else
5225     {
5226       gcc_assert (STORE_FLAG_VALUE & 1);
5227
5228       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5229       if (normalizep == -1)
5230         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5231     }
5232
5233   /* If we were converting to a smaller mode, do the conversion now.  */
5234   if (target_mode != result_mode)
5235     {
5236       convert_move (target, op0, 0);
5237       return target;
5238     }
5239   else
5240     return op0;
5241 }
5242
5243
5244 /* A subroutine of emit_store_flag only including "tricks" that do not
5245    need a recursive call.  These are kept separate to avoid infinite
5246    loops.  */
5247
5248 static rtx
5249 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5250                    enum machine_mode mode, int unsignedp, int normalizep,
5251                    enum machine_mode target_mode)
5252 {
5253   rtx subtarget;
5254   enum insn_code icode;
5255   enum machine_mode compare_mode;
5256   enum mode_class mclass;
5257   enum rtx_code scode;
5258   rtx tem;
5259
5260   if (unsignedp)
5261     code = unsigned_condition (code);
5262   scode = swap_condition (code);
5263
5264   /* If one operand is constant, make it the second one.  Only do this
5265      if the other operand is not constant as well.  */
5266
5267   if (swap_commutative_operands_p (op0, op1))
5268     {
5269       tem = op0;
5270       op0 = op1;
5271       op1 = tem;
5272       code = swap_condition (code);
5273     }
5274
5275   if (mode == VOIDmode)
5276     mode = GET_MODE (op0);
5277
5278   /* For some comparisons with 1 and -1, we can convert this to
5279      comparisons with zero.  This will often produce more opportunities for
5280      store-flag insns.  */
5281
5282   switch (code)
5283     {
5284     case LT:
5285       if (op1 == const1_rtx)
5286         op1 = const0_rtx, code = LE;
5287       break;
5288     case LE:
5289       if (op1 == constm1_rtx)
5290         op1 = const0_rtx, code = LT;
5291       break;
5292     case GE:
5293       if (op1 == const1_rtx)
5294         op1 = const0_rtx, code = GT;
5295       break;
5296     case GT:
5297       if (op1 == constm1_rtx)
5298         op1 = const0_rtx, code = GE;
5299       break;
5300     case GEU:
5301       if (op1 == const1_rtx)
5302         op1 = const0_rtx, code = NE;
5303       break;
5304     case LTU:
5305       if (op1 == const1_rtx)
5306         op1 = const0_rtx, code = EQ;
5307       break;
5308     default:
5309       break;
5310     }
5311
5312   /* If we are comparing a double-word integer with zero or -1, we can
5313      convert the comparison into one involving a single word.  */
5314   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5315       && GET_MODE_CLASS (mode) == MODE_INT
5316       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5317     {
5318       if ((code == EQ || code == NE)
5319           && (op1 == const0_rtx || op1 == constm1_rtx))
5320         {
5321           rtx op00, op01;
5322
5323           /* Do a logical OR or AND of the two words and compare the
5324              result.  */
5325           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5326           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5327           tem = expand_binop (word_mode,
5328                               op1 == const0_rtx ? ior_optab : and_optab,
5329                               op00, op01, NULL_RTX, unsignedp,
5330                               OPTAB_DIRECT);
5331
5332           if (tem != 0)
5333             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5334                                    unsignedp, normalizep);
5335         }
5336       else if ((code == LT || code == GE) && op1 == const0_rtx)
5337         {
5338           rtx op0h;
5339
5340           /* If testing the sign bit, can just test on high word.  */
5341           op0h = simplify_gen_subreg (word_mode, op0, mode,
5342                                       subreg_highpart_offset (word_mode,
5343                                                               mode));
5344           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5345                                  unsignedp, normalizep);
5346         }
5347       else
5348         tem = NULL_RTX;
5349
5350       if (tem)
5351         {
5352           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5353             return tem;
5354           if (!target)
5355             target = gen_reg_rtx (target_mode);
5356
5357           convert_move (target, tem,
5358                         !val_signbit_known_set_p (word_mode,
5359                                                   (normalizep ? normalizep
5360                                                    : STORE_FLAG_VALUE)));
5361           return target;
5362         }
5363     }
5364
5365   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5366      complement of A (for GE) and shifting the sign bit to the low bit.  */
5367   if (op1 == const0_rtx && (code == LT || code == GE)
5368       && GET_MODE_CLASS (mode) == MODE_INT
5369       && (normalizep || STORE_FLAG_VALUE == 1
5370           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5371     {
5372       subtarget = target;
5373
5374       if (!target)
5375         target_mode = mode;
5376
5377       /* If the result is to be wider than OP0, it is best to convert it
5378          first.  If it is to be narrower, it is *incorrect* to convert it
5379          first.  */
5380       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5381         {
5382           op0 = convert_modes (target_mode, mode, op0, 0);
5383           mode = target_mode;
5384         }
5385
5386       if (target_mode != mode)
5387         subtarget = 0;
5388
5389       if (code == GE)
5390         op0 = expand_unop (mode, one_cmpl_optab, op0,
5391                            ((STORE_FLAG_VALUE == 1 || normalizep)
5392                             ? 0 : subtarget), 0);
5393
5394       if (STORE_FLAG_VALUE == 1 || normalizep)
5395         /* If we are supposed to produce a 0/1 value, we want to do
5396            a logical shift from the sign bit to the low-order bit; for
5397            a -1/0 value, we do an arithmetic shift.  */
5398         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5399                             GET_MODE_BITSIZE (mode) - 1,
5400                             subtarget, normalizep != -1);
5401
5402       if (mode != target_mode)
5403         op0 = convert_modes (target_mode, mode, op0, 0);
5404
5405       return op0;
5406     }
5407
5408   mclass = GET_MODE_CLASS (mode);
5409   for (compare_mode = mode; compare_mode != VOIDmode;
5410        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5411     {
5412      enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5413      icode = optab_handler (cstore_optab, optab_mode);
5414      if (icode != CODE_FOR_nothing)
5415         {
5416           do_pending_stack_adjust ();
5417           tem = emit_cstore (target, icode, code, mode, compare_mode,
5418                              unsignedp, op0, op1, normalizep, target_mode);
5419           if (tem)
5420             return tem;
5421
5422           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5423             {
5424               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5425                                  unsignedp, op1, op0, normalizep, target_mode);
5426               if (tem)
5427                 return tem;
5428             }
5429           break;
5430         }
5431     }
5432
5433   return 0;
5434 }
5435
5436 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5437    and storing in TARGET.  Normally return TARGET.
5438    Return 0 if that cannot be done.
5439
5440    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5441    it is VOIDmode, they cannot both be CONST_INT.
5442
5443    UNSIGNEDP is for the case where we have to widen the operands
5444    to perform the operation.  It says to use zero-extension.
5445
5446    NORMALIZEP is 1 if we should convert the result to be either zero
5447    or one.  Normalize is -1 if we should convert the result to be
5448    either zero or -1.  If NORMALIZEP is zero, the result will be left
5449    "raw" out of the scc insn.  */
5450
5451 rtx
5452 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5453                  enum machine_mode mode, int unsignedp, int normalizep)
5454 {
5455   enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5456   enum rtx_code rcode;
5457   rtx subtarget;
5458   rtx tem, last, trueval;
5459
5460   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5461                            target_mode);
5462   if (tem)
5463     return tem;
5464
5465   /* If we reached here, we can't do this with a scc insn, however there
5466      are some comparisons that can be done in other ways.  Don't do any
5467      of these cases if branches are very cheap.  */
5468   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5469     return 0;
5470
5471   /* See what we need to return.  We can only return a 1, -1, or the
5472      sign bit.  */
5473
5474   if (normalizep == 0)
5475     {
5476       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5477         normalizep = STORE_FLAG_VALUE;
5478
5479       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5480         ;
5481       else
5482         return 0;
5483     }
5484
5485   last = get_last_insn ();
5486
5487   /* If optimizing, use different pseudo registers for each insn, instead
5488      of reusing the same pseudo.  This leads to better CSE, but slows
5489      down the compiler, since there are more pseudos */
5490   subtarget = (!optimize
5491                && (target_mode == mode)) ? target : NULL_RTX;
5492   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5493
5494   /* For floating-point comparisons, try the reverse comparison or try
5495      changing the "orderedness" of the comparison.  */
5496   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5497     {
5498       enum rtx_code first_code;
5499       bool and_them;
5500
5501       rcode = reverse_condition_maybe_unordered (code);
5502       if (can_compare_p (rcode, mode, ccp_store_flag)
5503           && (code == ORDERED || code == UNORDERED
5504               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5505               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5506         {
5507           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5508                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5509
5510           /* For the reverse comparison, use either an addition or a XOR.  */
5511           if (want_add
5512               && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5513                            optimize_insn_for_speed_p ()) == 0)
5514             {
5515               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5516                                        STORE_FLAG_VALUE, target_mode);
5517               if (tem)
5518                 return expand_binop (target_mode, add_optab, tem,
5519                                      GEN_INT (normalizep),
5520                                      target, 0, OPTAB_WIDEN);
5521             }
5522           else if (!want_add
5523                    && rtx_cost (trueval, XOR, 1,
5524                                 optimize_insn_for_speed_p ()) == 0)
5525             {
5526               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5527                                        normalizep, target_mode);
5528               if (tem)
5529                 return expand_binop (target_mode, xor_optab, tem, trueval,
5530                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5531             }
5532         }
5533
5534       delete_insns_since (last);
5535
5536       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5537       if (code == ORDERED || code == UNORDERED)
5538         return 0;
5539
5540       and_them = split_comparison (code, mode, &first_code, &code);
5541
5542       /* If there are no NaNs, the first comparison should always fall through.
5543          Effectively change the comparison to the other one.  */
5544       if (!HONOR_NANS (mode))
5545         {
5546           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5547           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5548                                     target_mode);
5549         }
5550
5551 #ifdef HAVE_conditional_move
5552       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5553          conditional move.  */
5554       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5555                                normalizep, target_mode);
5556       if (tem == 0)
5557         return 0;
5558
5559       if (and_them)
5560         tem = emit_conditional_move (target, code, op0, op1, mode,
5561                                      tem, const0_rtx, GET_MODE (tem), 0);
5562       else
5563         tem = emit_conditional_move (target, code, op0, op1, mode,
5564                                      trueval, tem, GET_MODE (tem), 0);
5565
5566       if (tem == 0)
5567         delete_insns_since (last);
5568       return tem;
5569 #else
5570       return 0;
5571 #endif
5572     }
5573
5574   /* The remaining tricks only apply to integer comparisons.  */
5575
5576   if (GET_MODE_CLASS (mode) != MODE_INT)
5577     return 0;
5578
5579   /* If this is an equality comparison of integers, we can try to exclusive-or
5580      (or subtract) the two operands and use a recursive call to try the
5581      comparison with zero.  Don't do any of these cases if branches are
5582      very cheap.  */
5583
5584   if ((code == EQ || code == NE) && op1 != const0_rtx)
5585     {
5586       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5587                           OPTAB_WIDEN);
5588
5589       if (tem == 0)
5590         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5591                             OPTAB_WIDEN);
5592       if (tem != 0)
5593         tem = emit_store_flag (target, code, tem, const0_rtx,
5594                                mode, unsignedp, normalizep);
5595       if (tem != 0)
5596         return tem;
5597
5598       delete_insns_since (last);
5599     }
5600
5601   /* For integer comparisons, try the reverse comparison.  However, for
5602      small X and if we'd have anyway to extend, implementing "X != 0"
5603      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5604   rcode = reverse_condition (code);
5605   if (can_compare_p (rcode, mode, ccp_store_flag)
5606       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5607             && code == NE
5608             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5609             && op1 == const0_rtx))
5610     {
5611       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5612                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5613
5614       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5615       if (want_add
5616           && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5617                        optimize_insn_for_speed_p ()) == 0)
5618         {
5619           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5620                                    STORE_FLAG_VALUE, target_mode);
5621           if (tem != 0)
5622             tem = expand_binop (target_mode, add_optab, tem,
5623                                 GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5624         }
5625       else if (!want_add
5626                && rtx_cost (trueval, XOR, 1,
5627                             optimize_insn_for_speed_p ()) == 0)
5628         {
5629           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5630                                    normalizep, target_mode);
5631           if (tem != 0)
5632             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5633                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5634         }
5635
5636       if (tem != 0)
5637         return tem;
5638       delete_insns_since (last);
5639     }
5640
5641   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5642      the constant zero.  Reject all other comparisons at this point.  Only
5643      do LE and GT if branches are expensive since they are expensive on
5644      2-operand machines.  */
5645
5646   if (op1 != const0_rtx
5647       || (code != EQ && code != NE
5648           && (BRANCH_COST (optimize_insn_for_speed_p (),
5649                            false) <= 1 || (code != LE && code != GT))))
5650     return 0;
5651
5652   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5653      do the necessary operation below.  */
5654
5655   tem = 0;
5656
5657   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5658      the sign bit set.  */
5659
5660   if (code == LE)
5661     {
5662       /* This is destructive, so SUBTARGET can't be OP0.  */
5663       if (rtx_equal_p (subtarget, op0))
5664         subtarget = 0;
5665
5666       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5667                           OPTAB_WIDEN);
5668       if (tem)
5669         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5670                             OPTAB_WIDEN);
5671     }
5672
5673   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5674      number of bits in the mode of OP0, minus one.  */
5675
5676   if (code == GT)
5677     {
5678       if (rtx_equal_p (subtarget, op0))
5679         subtarget = 0;
5680
5681       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5682                           GET_MODE_BITSIZE (mode) - 1,
5683                           subtarget, 0);
5684       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5685                           OPTAB_WIDEN);
5686     }
5687
5688   if (code == EQ || code == NE)
5689     {
5690       /* For EQ or NE, one way to do the comparison is to apply an operation
5691          that converts the operand into a positive number if it is nonzero
5692          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5693          for NE we negate.  This puts the result in the sign bit.  Then we
5694          normalize with a shift, if needed.
5695
5696          Two operations that can do the above actions are ABS and FFS, so try
5697          them.  If that doesn't work, and MODE is smaller than a full word,
5698          we can use zero-extension to the wider mode (an unsigned conversion)
5699          as the operation.  */
5700
5701       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5702          that is compensated by the subsequent overflow when subtracting
5703          one / negating.  */
5704
5705       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5706         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5707       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5708         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5709       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5710         {
5711           tem = convert_modes (word_mode, mode, op0, 1);
5712           mode = word_mode;
5713         }
5714
5715       if (tem != 0)
5716         {
5717           if (code == EQ)
5718             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5719                                 0, OPTAB_WIDEN);
5720           else
5721             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5722         }
5723
5724       /* If we couldn't do it that way, for NE we can "or" the two's complement
5725          of the value with itself.  For EQ, we take the one's complement of
5726          that "or", which is an extra insn, so we only handle EQ if branches
5727          are expensive.  */
5728
5729       if (tem == 0
5730           && (code == NE
5731               || BRANCH_COST (optimize_insn_for_speed_p (),
5732                               false) > 1))
5733         {
5734           if (rtx_equal_p (subtarget, op0))
5735             subtarget = 0;
5736
5737           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5738           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5739                               OPTAB_WIDEN);
5740
5741           if (tem && code == EQ)
5742             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5743         }
5744     }
5745
5746   if (tem && normalizep)
5747     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5748                         GET_MODE_BITSIZE (mode) - 1,
5749                         subtarget, normalizep == 1);
5750
5751   if (tem)
5752     {
5753       if (!target)
5754         ;
5755       else if (GET_MODE (tem) != target_mode)
5756         {
5757           convert_move (target, tem, 0);
5758           tem = target;
5759         }
5760       else if (!subtarget)
5761         {
5762           emit_move_insn (target, tem);
5763           tem = target;
5764         }
5765     }
5766   else
5767     delete_insns_since (last);
5768
5769   return tem;
5770 }
5771
5772 /* Like emit_store_flag, but always succeeds.  */
5773
5774 rtx
5775 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5776                        enum machine_mode mode, int unsignedp, int normalizep)
5777 {
5778   rtx tem, label;
5779   rtx trueval, falseval;
5780
5781   /* First see if emit_store_flag can do the job.  */
5782   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5783   if (tem != 0)
5784     return tem;
5785
5786   if (!target)
5787     target = gen_reg_rtx (word_mode);
5788
5789   /* If this failed, we have to do this with set/compare/jump/set code.
5790      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5791   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5792   if (code == NE
5793       && GET_MODE_CLASS (mode) == MODE_INT
5794       && REG_P (target)
5795       && op0 == target
5796       && op1 == const0_rtx)
5797     {
5798       label = gen_label_rtx ();
5799       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5800                                mode, NULL_RTX, NULL_RTX, label, -1);
5801       emit_move_insn (target, trueval);
5802       emit_label (label);
5803       return target;
5804     }
5805
5806   if (!REG_P (target)
5807       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5808     target = gen_reg_rtx (GET_MODE (target));
5809
5810   /* Jump in the right direction if the target cannot implement CODE
5811      but can jump on its reverse condition.  */
5812   falseval = const0_rtx;
5813   if (! can_compare_p (code, mode, ccp_jump)
5814       && (! FLOAT_MODE_P (mode)
5815           || code == ORDERED || code == UNORDERED
5816           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5817           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5818     {
5819       enum rtx_code rcode;
5820       if (FLOAT_MODE_P (mode))
5821         rcode = reverse_condition_maybe_unordered (code);
5822       else
5823         rcode = reverse_condition (code);
5824
5825       /* Canonicalize to UNORDERED for the libcall.  */
5826       if (can_compare_p (rcode, mode, ccp_jump)
5827           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5828         {
5829           falseval = trueval;
5830           trueval = const0_rtx;
5831           code = rcode;
5832         }
5833     }
5834
5835   emit_move_insn (target, trueval);
5836   label = gen_label_rtx ();
5837   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5838                            NULL_RTX, label, -1);
5839
5840   emit_move_insn (target, falseval);
5841   emit_label (label);
5842
5843   return target;
5844 }
5845 \f
5846 /* Perform possibly multi-word comparison and conditional jump to LABEL
5847    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5848    now a thin wrapper around do_compare_rtx_and_jump.  */
5849
5850 static void
5851 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5852                  rtx label)
5853 {
5854   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5855   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5856                            NULL_RTX, NULL_RTX, label, -1);
5857 }