]> gcc.gnu.org Git - gcc.git/blob - gcc/expmed.c
Fix reg-alloc error reported by Andreas Schwab to Trillian list.
[gcc.git] / gcc / expmed.c
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
4 1999, 2000, 2001 Free Software Foundation, Inc.
5
6 This file is part of GNU CC.
7
8 GNU CC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 2, or (at your option)
11 any later version.
12
13 GNU CC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GNU CC; see the file COPYING. If not, write to
20 the Free Software Foundation, 59 Temple Place - Suite 330,
21 Boston, MA 02111-1307, USA. */
22
23
24 #include "config.h"
25 #include "system.h"
26 #include "toplev.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "tm_p.h"
30 #include "flags.h"
31 #include "insn-flags.h"
32 #include "insn-codes.h"
33 #include "insn-config.h"
34 #include "expr.h"
35 #include "real.h"
36 #include "recog.h"
37
38 static void store_fixed_bit_field PARAMS ((rtx, unsigned HOST_WIDE_INT,
39 unsigned HOST_WIDE_INT,
40 unsigned HOST_WIDE_INT, rtx,
41 unsigned int));
42 static void store_split_bit_field PARAMS ((rtx, unsigned HOST_WIDE_INT,
43 unsigned HOST_WIDE_INT, rtx,
44 unsigned int));
45 static rtx extract_fixed_bit_field PARAMS ((enum machine_mode, rtx,
46 unsigned HOST_WIDE_INT,
47 unsigned HOST_WIDE_INT,
48 unsigned HOST_WIDE_INT,
49 rtx, int, unsigned int));
50 static rtx mask_rtx PARAMS ((enum machine_mode, int,
51 int, int));
52 static rtx lshift_value PARAMS ((enum machine_mode, rtx,
53 int, int));
54 static rtx extract_split_bit_field PARAMS ((rtx, unsigned HOST_WIDE_INT,
55 unsigned HOST_WIDE_INT, int,
56 unsigned int));
57 static void do_cmp_and_jump PARAMS ((rtx, rtx, enum rtx_code,
58 enum machine_mode, rtx));
59
60 /* Non-zero means divides or modulus operations are relatively cheap for
61 powers of two, so don't use branches; emit the operation instead.
62 Usually, this will mean that the MD file will emit non-branch
63 sequences. */
64
65 static int sdiv_pow2_cheap, smod_pow2_cheap;
66
67 #ifndef SLOW_UNALIGNED_ACCESS
68 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
69 #endif
70
71 /* For compilers that support multiple targets with different word sizes,
72 MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD. An example
73 is the H8/300(H) compiler. */
74
75 #ifndef MAX_BITS_PER_WORD
76 #define MAX_BITS_PER_WORD BITS_PER_WORD
77 #endif
78
79 /* Cost of various pieces of RTL. Note that some of these are indexed by
80 shift count and some by mode. */
81 static int add_cost, negate_cost, zero_cost;
82 static int shift_cost[MAX_BITS_PER_WORD];
83 static int shiftadd_cost[MAX_BITS_PER_WORD];
84 static int shiftsub_cost[MAX_BITS_PER_WORD];
85 static int mul_cost[NUM_MACHINE_MODES];
86 static int div_cost[NUM_MACHINE_MODES];
87 static int mul_widen_cost[NUM_MACHINE_MODES];
88 static int mul_highpart_cost[NUM_MACHINE_MODES];
89
90 void
91 init_expmed ()
92 {
93 /* This is "some random pseudo register" for purposes of calling recog
94 to see what insns exist. */
95 rtx reg = gen_rtx_REG (word_mode, 10000);
96 rtx shift_insn, shiftadd_insn, shiftsub_insn;
97 int dummy;
98 int m;
99 enum machine_mode mode, wider_mode;
100
101 start_sequence ();
102
103 reg = gen_rtx_REG (word_mode, 10000);
104
105 zero_cost = rtx_cost (const0_rtx, 0);
106 add_cost = rtx_cost (gen_rtx_PLUS (word_mode, reg, reg), SET);
107
108 shift_insn = emit_insn (gen_rtx_SET (VOIDmode, reg,
109 gen_rtx_ASHIFT (word_mode, reg,
110 const0_rtx)));
111
112 shiftadd_insn
113 = emit_insn (gen_rtx_SET (VOIDmode, reg,
114 gen_rtx_PLUS (word_mode,
115 gen_rtx_MULT (word_mode,
116 reg, const0_rtx),
117 reg)));
118
119 shiftsub_insn
120 = emit_insn (gen_rtx_SET (VOIDmode, reg,
121 gen_rtx_MINUS (word_mode,
122 gen_rtx_MULT (word_mode,
123 reg, const0_rtx),
124 reg)));
125
126 init_recog ();
127
128 shift_cost[0] = 0;
129 shiftadd_cost[0] = shiftsub_cost[0] = add_cost;
130
131 for (m = 1; m < MAX_BITS_PER_WORD; m++)
132 {
133 shift_cost[m] = shiftadd_cost[m] = shiftsub_cost[m] = 32000;
134
135 XEXP (SET_SRC (PATTERN (shift_insn)), 1) = GEN_INT (m);
136 if (recog (PATTERN (shift_insn), shift_insn, &dummy) >= 0)
137 shift_cost[m] = rtx_cost (SET_SRC (PATTERN (shift_insn)), SET);
138
139 XEXP (XEXP (SET_SRC (PATTERN (shiftadd_insn)), 0), 1)
140 = GEN_INT ((HOST_WIDE_INT) 1 << m);
141 if (recog (PATTERN (shiftadd_insn), shiftadd_insn, &dummy) >= 0)
142 shiftadd_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftadd_insn)), SET);
143
144 XEXP (XEXP (SET_SRC (PATTERN (shiftsub_insn)), 0), 1)
145 = GEN_INT ((HOST_WIDE_INT) 1 << m);
146 if (recog (PATTERN (shiftsub_insn), shiftsub_insn, &dummy) >= 0)
147 shiftsub_cost[m] = rtx_cost (SET_SRC (PATTERN (shiftsub_insn)), SET);
148 }
149
150 negate_cost = rtx_cost (gen_rtx_NEG (word_mode, reg), SET);
151
152 sdiv_pow2_cheap
153 = (rtx_cost (gen_rtx_DIV (word_mode, reg, GEN_INT (32)), SET)
154 <= 2 * add_cost);
155 smod_pow2_cheap
156 = (rtx_cost (gen_rtx_MOD (word_mode, reg, GEN_INT (32)), SET)
157 <= 2 * add_cost);
158
159 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
160 mode != VOIDmode;
161 mode = GET_MODE_WIDER_MODE (mode))
162 {
163 reg = gen_rtx_REG (mode, 10000);
164 div_cost[(int) mode] = rtx_cost (gen_rtx_UDIV (mode, reg, reg), SET);
165 mul_cost[(int) mode] = rtx_cost (gen_rtx_MULT (mode, reg, reg), SET);
166 wider_mode = GET_MODE_WIDER_MODE (mode);
167 if (wider_mode != VOIDmode)
168 {
169 mul_widen_cost[(int) wider_mode]
170 = rtx_cost (gen_rtx_MULT (wider_mode,
171 gen_rtx_ZERO_EXTEND (wider_mode, reg),
172 gen_rtx_ZERO_EXTEND (wider_mode, reg)),
173 SET);
174 mul_highpart_cost[(int) mode]
175 = rtx_cost (gen_rtx_TRUNCATE
176 (mode,
177 gen_rtx_LSHIFTRT (wider_mode,
178 gen_rtx_MULT (wider_mode,
179 gen_rtx_ZERO_EXTEND
180 (wider_mode, reg),
181 gen_rtx_ZERO_EXTEND
182 (wider_mode, reg)),
183 GEN_INT (GET_MODE_BITSIZE (mode)))),
184 SET);
185 }
186 }
187
188 end_sequence ();
189 }
190
191 /* Return an rtx representing minus the value of X.
192 MODE is the intended mode of the result,
193 useful if X is a CONST_INT. */
194
195 rtx
196 negate_rtx (mode, x)
197 enum machine_mode mode;
198 rtx x;
199 {
200 rtx result = simplify_unary_operation (NEG, mode, x, mode);
201
202 if (result == 0)
203 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
204
205 return result;
206 }
207 \f
208 /* Generate code to store value from rtx VALUE
209 into a bit-field within structure STR_RTX
210 containing BITSIZE bits starting at bit BITNUM.
211 FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
212 ALIGN is the alignment that STR_RTX is known to have.
213 TOTAL_SIZE is the size of the structure in bytes, or -1 if varying. */
214
215 /* ??? Note that there are two different ideas here for how
216 to determine the size to count bits within, for a register.
217 One is BITS_PER_WORD, and the other is the size of operand 3
218 of the insv pattern.
219
220 If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD
221 else, we use the mode of operand 3. */
222
223 rtx
224 store_bit_field (str_rtx, bitsize, bitnum, fieldmode, value, align, total_size)
225 rtx str_rtx;
226 unsigned HOST_WIDE_INT bitsize;
227 unsigned HOST_WIDE_INT bitnum;
228 enum machine_mode fieldmode;
229 rtx value;
230 unsigned int align;
231 HOST_WIDE_INT total_size;
232 {
233 unsigned int unit
234 = (GET_CODE (str_rtx) == MEM) ? BITS_PER_UNIT : BITS_PER_WORD;
235 unsigned HOST_WIDE_INT offset = bitnum / unit;
236 unsigned HOST_WIDE_INT bitpos = bitnum % unit;
237 register rtx op0 = str_rtx;
238 #ifdef HAVE_insv
239 unsigned HOST_WIDE_INT insv_bitsize;
240 enum machine_mode op_mode;
241
242 op_mode = insn_data[(int) CODE_FOR_insv].operand[3].mode;
243 if (op_mode == VOIDmode)
244 op_mode = word_mode;
245 insv_bitsize = GET_MODE_BITSIZE (op_mode);
246 #endif
247
248 /* It is wrong to have align==0, since every object is aligned at
249 least at a bit boundary. This usually means a bug elsewhere. */
250 if (align == 0)
251 abort ();
252
253 /* Discount the part of the structure before the desired byte.
254 We need to know how many bytes are safe to reference after it. */
255 if (total_size >= 0)
256 total_size -= (bitpos / BIGGEST_ALIGNMENT
257 * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
258
259 while (GET_CODE (op0) == SUBREG)
260 {
261 /* The following line once was done only if WORDS_BIG_ENDIAN,
262 but I think that is a mistake. WORDS_BIG_ENDIAN is
263 meaningful at a much higher level; when structures are copied
264 between memory and regs, the higher-numbered regs
265 always get higher addresses. */
266 offset += SUBREG_WORD (op0);
267 /* We used to adjust BITPOS here, but now we do the whole adjustment
268 right after the loop. */
269 op0 = SUBREG_REG (op0);
270 }
271
272 /* If OP0 is a register, BITPOS must count within a word.
273 But as we have it, it counts within whatever size OP0 now has.
274 On a bigendian machine, these are not the same, so convert. */
275 if (BYTES_BIG_ENDIAN
276 && GET_CODE (op0) != MEM
277 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
278 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
279
280 value = protect_from_queue (value, 0);
281
282 if (flag_force_mem)
283 value = force_not_mem (value);
284
285 /* If the target is a register, overwriting the entire object, or storing
286 a full-word or multi-word field can be done with just a SUBREG.
287
288 If the target is memory, storing any naturally aligned field can be
289 done with a simple store. For targets that support fast unaligned
290 memory, any naturally sized, unit aligned field can be done directly. */
291
292 if (bitsize == GET_MODE_BITSIZE (fieldmode)
293 && (GET_CODE (op0) != MEM
294 ? (GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
295 || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
296 : (! SLOW_UNALIGNED_ACCESS (fieldmode, align)
297 || (offset * BITS_PER_UNIT % bitsize == 0
298 && align % GET_MODE_BITSIZE (fieldmode) == 0)))
299 && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0))
300 {
301 if (GET_MODE (op0) != fieldmode)
302 {
303 if (GET_CODE (op0) == SUBREG)
304 {
305 if (GET_MODE (SUBREG_REG (op0)) == fieldmode
306 || GET_MODE_CLASS (fieldmode) == MODE_INT
307 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
308 op0 = SUBREG_REG (op0);
309 else
310 /* Else we've got some float mode source being extracted into
311 a different float mode destination -- this combination of
312 subregs results in Severe Tire Damage. */
313 abort ();
314 }
315 if (GET_CODE (op0) == REG)
316 op0 = gen_rtx_SUBREG (fieldmode, op0, offset);
317 else
318 op0 = change_address (op0, fieldmode,
319 plus_constant (XEXP (op0, 0), offset));
320 }
321 emit_move_insn (op0, value);
322 return value;
323 }
324
325 /* Make sure we are playing with integral modes. Pun with subregs
326 if we aren't. This must come after the entire register case above,
327 since that case is valid for any mode. The following cases are only
328 valid for integral modes. */
329 {
330 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
331 if (imode != GET_MODE (op0))
332 {
333 if (GET_CODE (op0) == MEM)
334 op0 = change_address (op0, imode, NULL_RTX);
335 else if (imode != BLKmode)
336 op0 = gen_lowpart (imode, op0);
337 else
338 abort ();
339 }
340 }
341
342 /* Storing an lsb-aligned field in a register
343 can be done with a movestrict instruction. */
344
345 if (GET_CODE (op0) != MEM
346 && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
347 && bitsize == GET_MODE_BITSIZE (fieldmode)
348 && (movstrict_optab->handlers[(int) fieldmode].insn_code
349 != CODE_FOR_nothing))
350 {
351 int icode = movstrict_optab->handlers[(int) fieldmode].insn_code;
352
353 /* Get appropriate low part of the value being stored. */
354 if (GET_CODE (value) == CONST_INT || GET_CODE (value) == REG)
355 value = gen_lowpart (fieldmode, value);
356 else if (!(GET_CODE (value) == SYMBOL_REF
357 || GET_CODE (value) == LABEL_REF
358 || GET_CODE (value) == CONST))
359 value = convert_to_mode (fieldmode, value, 0);
360
361 if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
362 value = copy_to_mode_reg (fieldmode, value);
363
364 if (GET_CODE (op0) == SUBREG)
365 {
366 if (GET_MODE (SUBREG_REG (op0)) == fieldmode
367 || GET_MODE_CLASS (fieldmode) == MODE_INT
368 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT)
369 op0 = SUBREG_REG (op0);
370 else
371 /* Else we've got some float mode source being extracted into
372 a different float mode destination -- this combination of
373 subregs results in Severe Tire Damage. */
374 abort ();
375 }
376
377 emit_insn (GEN_FCN (icode)
378 (gen_rtx_SUBREG (fieldmode, op0, offset), value));
379
380 return value;
381 }
382
383 /* Handle fields bigger than a word. */
384
385 if (bitsize > BITS_PER_WORD)
386 {
387 /* Here we transfer the words of the field
388 in the order least significant first.
389 This is because the most significant word is the one which may
390 be less than full.
391 However, only do that if the value is not BLKmode. */
392
393 unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
394 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
395 unsigned int i;
396
397 /* This is the mode we must force value to, so that there will be enough
398 subwords to extract. Note that fieldmode will often (always?) be
399 VOIDmode, because that is what store_field uses to indicate that this
400 is a bit field, but passing VOIDmode to operand_subword_force will
401 result in an abort. */
402 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
403
404 for (i = 0; i < nwords; i++)
405 {
406 /* If I is 0, use the low-order word in both field and target;
407 if I is 1, use the next to lowest word; and so on. */
408 unsigned int wordnum = (backwards ? nwords - i - 1 : i);
409 unsigned int bit_offset = (backwards
410 ? MAX ((int) bitsize - ((int) i + 1)
411 * BITS_PER_WORD,
412 0)
413 : (int) i * BITS_PER_WORD);
414
415 store_bit_field (op0, MIN (BITS_PER_WORD,
416 bitsize - i * BITS_PER_WORD),
417 bitnum + bit_offset, word_mode,
418 operand_subword_force (value, wordnum,
419 (GET_MODE (value) == VOIDmode
420 ? fieldmode
421 : GET_MODE (value))),
422 align, total_size);
423 }
424 return value;
425 }
426
427 /* From here on we can assume that the field to be stored in is
428 a full-word (whatever type that is), since it is shorter than a word. */
429
430 /* OFFSET is the number of words or bytes (UNIT says which)
431 from STR_RTX to the first word or byte containing part of the field. */
432
433 if (GET_CODE (op0) != MEM)
434 {
435 if (offset != 0
436 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
437 {
438 if (GET_CODE (op0) != REG)
439 {
440 /* Since this is a destination (lvalue), we can't copy it to a
441 pseudo. We can trivially remove a SUBREG that does not
442 change the size of the operand. Such a SUBREG may have been
443 added above. Otherwise, abort. */
444 if (GET_CODE (op0) == SUBREG
445 && (GET_MODE_SIZE (GET_MODE (op0))
446 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))))
447 op0 = SUBREG_REG (op0);
448 else
449 abort ();
450 }
451 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
452 op0, offset);
453 }
454 offset = 0;
455 }
456 else
457 {
458 op0 = protect_from_queue (op0, 1);
459 }
460
461 /* If VALUE is a floating-point mode, access it as an integer of the
462 corresponding size. This can occur on a machine with 64 bit registers
463 that uses SFmode for float. This can also occur for unaligned float
464 structure fields. */
465 if (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT)
466 {
467 if (GET_CODE (value) != REG)
468 value = copy_to_reg (value);
469 value = gen_rtx_SUBREG (word_mode, value, 0);
470 }
471
472 /* Now OFFSET is nonzero only if OP0 is memory
473 and is therefore always measured in bytes. */
474
475 #ifdef HAVE_insv
476 if (HAVE_insv
477 && GET_MODE (value) != BLKmode
478 && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
479 /* Ensure insv's size is wide enough for this field. */
480 && (insv_bitsize >= bitsize)
481 && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
482 && (bitsize + bitpos > insv_bitsize)))
483 {
484 int xbitpos = bitpos;
485 rtx value1;
486 rtx xop0 = op0;
487 rtx last = get_last_insn ();
488 rtx pat;
489 enum machine_mode maxmode;
490 int save_volatile_ok = volatile_ok;
491
492 maxmode = insn_data[(int) CODE_FOR_insv].operand[3].mode;
493 if (maxmode == VOIDmode)
494 maxmode = word_mode;
495
496 volatile_ok = 1;
497
498 /* If this machine's insv can only insert into a register, copy OP0
499 into a register and save it back later. */
500 /* This used to check flag_force_mem, but that was a serious
501 de-optimization now that flag_force_mem is enabled by -O2. */
502 if (GET_CODE (op0) == MEM
503 && ! ((*insn_data[(int) CODE_FOR_insv].operand[0].predicate)
504 (op0, VOIDmode)))
505 {
506 rtx tempreg;
507 enum machine_mode bestmode;
508
509 /* Get the mode to use for inserting into this field. If OP0 is
510 BLKmode, get the smallest mode consistent with the alignment. If
511 OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
512 mode. Otherwise, use the smallest mode containing the field. */
513
514 if (GET_MODE (op0) == BLKmode
515 || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
516 bestmode
517 = get_best_mode (bitsize, bitnum, align, maxmode,
518 MEM_VOLATILE_P (op0));
519 else
520 bestmode = GET_MODE (op0);
521
522 if (bestmode == VOIDmode
523 || (SLOW_UNALIGNED_ACCESS (bestmode, align)
524 && GET_MODE_BITSIZE (bestmode) > align))
525 goto insv_loses;
526
527 /* Adjust address to point to the containing unit of that mode. */
528 unit = GET_MODE_BITSIZE (bestmode);
529 /* Compute offset as multiple of this unit, counting in bytes. */
530 offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
531 bitpos = bitnum % unit;
532 op0 = change_address (op0, bestmode,
533 plus_constant (XEXP (op0, 0), offset));
534
535 /* Fetch that unit, store the bitfield in it, then store
536 the unit. */
537 tempreg = copy_to_reg (op0);
538 store_bit_field (tempreg, bitsize, bitpos, fieldmode, value,
539 align, total_size);
540 emit_move_insn (op0, tempreg);
541 return value;
542 }
543 volatile_ok = save_volatile_ok;
544
545 /* Add OFFSET into OP0's address. */
546 if (GET_CODE (xop0) == MEM)
547 xop0 = change_address (xop0, byte_mode,
548 plus_constant (XEXP (xop0, 0), offset));
549
550 /* If xop0 is a register, we need it in MAXMODE
551 to make it acceptable to the format of insv. */
552 if (GET_CODE (xop0) == SUBREG)
553 /* We can't just change the mode, because this might clobber op0,
554 and we will need the original value of op0 if insv fails. */
555 xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_WORD (xop0));
556 if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
557 xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
558
559 /* On big-endian machines, we count bits from the most significant.
560 If the bit field insn does not, we must invert. */
561
562 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
563 xbitpos = unit - bitsize - xbitpos;
564
565 /* We have been counting XBITPOS within UNIT.
566 Count instead within the size of the register. */
567 if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
568 xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
569
570 unit = GET_MODE_BITSIZE (maxmode);
571
572 /* Convert VALUE to maxmode (which insv insn wants) in VALUE1. */
573 value1 = value;
574 if (GET_MODE (value) != maxmode)
575 {
576 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
577 {
578 /* Optimization: Don't bother really extending VALUE
579 if it has all the bits we will actually use. However,
580 if we must narrow it, be sure we do it correctly. */
581
582 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
583 {
584 /* Avoid making subreg of a subreg, or of a mem. */
585 if (GET_CODE (value1) != REG)
586 value1 = copy_to_reg (value1);
587 value1 = gen_rtx_SUBREG (maxmode, value1, 0);
588 }
589 else
590 value1 = gen_lowpart (maxmode, value1);
591 }
592 else if (!CONSTANT_P (value))
593 /* Parse phase is supposed to make VALUE's data type
594 match that of the component reference, which is a type
595 at least as wide as the field; so VALUE should have
596 a mode that corresponds to that type. */
597 abort ();
598 }
599
600 /* If this machine's insv insists on a register,
601 get VALUE1 into a register. */
602 if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
603 (value1, maxmode)))
604 value1 = force_reg (maxmode, value1);
605
606 pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
607 if (pat)
608 emit_insn (pat);
609 else
610 {
611 delete_insns_since (last);
612 store_fixed_bit_field (op0, offset, bitsize, bitpos, value, align);
613 }
614 }
615 else
616 insv_loses:
617 #endif
618 /* Insv is not available; store using shifts and boolean ops. */
619 store_fixed_bit_field (op0, offset, bitsize, bitpos, value, align);
620 return value;
621 }
622 \f
623 /* Use shifts and boolean operations to store VALUE
624 into a bit field of width BITSIZE
625 in a memory location specified by OP0 except offset by OFFSET bytes.
626 (OFFSET must be 0 if OP0 is a register.)
627 The field starts at position BITPOS within the byte.
628 (If OP0 is a register, it may be a full word or a narrower mode,
629 but BITPOS still counts within a full word,
630 which is significant on bigendian machines.)
631 STRUCT_ALIGN is the alignment the structure is known to have.
632
633 Note that protect_from_queue has already been done on OP0 and VALUE. */
634
635 static void
636 store_fixed_bit_field (op0, offset, bitsize, bitpos, value, struct_align)
637 register rtx op0;
638 unsigned HOST_WIDE_INT offset, bitsize, bitpos;
639 register rtx value;
640 unsigned int struct_align;
641 {
642 register enum machine_mode mode;
643 unsigned int total_bits = BITS_PER_WORD;
644 rtx subtarget, temp;
645 int all_zero = 0;
646 int all_one = 0;
647
648 if (! SLOW_UNALIGNED_ACCESS (word_mode, struct_align))
649 struct_align = BIGGEST_ALIGNMENT;
650
651 /* There is a case not handled here:
652 a structure with a known alignment of just a halfword
653 and a field split across two aligned halfwords within the structure.
654 Or likewise a structure with a known alignment of just a byte
655 and a field split across two bytes.
656 Such cases are not supposed to be able to occur. */
657
658 if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
659 {
660 if (offset != 0)
661 abort ();
662 /* Special treatment for a bit field split across two registers. */
663 if (bitsize + bitpos > BITS_PER_WORD)
664 {
665 store_split_bit_field (op0, bitsize, bitpos,
666 value, BITS_PER_WORD);
667 return;
668 }
669 }
670 else
671 {
672 /* Get the proper mode to use for this field. We want a mode that
673 includes the entire field. If such a mode would be larger than
674 a word, we won't be doing the extraction the normal way. */
675
676 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
677 struct_align, word_mode,
678 GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0));
679
680 if (mode == VOIDmode)
681 {
682 /* The only way this should occur is if the field spans word
683 boundaries. */
684 store_split_bit_field (op0,
685 bitsize, bitpos + offset * BITS_PER_UNIT,
686 value, struct_align);
687 return;
688 }
689
690 total_bits = GET_MODE_BITSIZE (mode);
691
692 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
693 be in the range 0 to total_bits-1, and put any excess bytes in
694 OFFSET. */
695 if (bitpos >= total_bits)
696 {
697 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
698 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
699 * BITS_PER_UNIT);
700 }
701
702 /* Get ref to an aligned byte, halfword, or word containing the field.
703 Adjust BITPOS to be position within a word,
704 and OFFSET to be the offset of that word.
705 Then alter OP0 to refer to that word. */
706 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
707 offset -= (offset % (total_bits / BITS_PER_UNIT));
708 op0 = change_address (op0, mode,
709 plus_constant (XEXP (op0, 0), offset));
710 }
711
712 mode = GET_MODE (op0);
713
714 /* Now MODE is either some integral mode for a MEM as OP0,
715 or is a full-word for a REG as OP0. TOTAL_BITS corresponds.
716 The bit field is contained entirely within OP0.
717 BITPOS is the starting bit number within OP0.
718 (OP0's mode may actually be narrower than MODE.) */
719
720 if (BYTES_BIG_ENDIAN)
721 /* BITPOS is the distance between our msb
722 and that of the containing datum.
723 Convert it to the distance from the lsb. */
724 bitpos = total_bits - bitsize - bitpos;
725
726 /* Now BITPOS is always the distance between our lsb
727 and that of OP0. */
728
729 /* Shift VALUE left by BITPOS bits. If VALUE is not constant,
730 we must first convert its mode to MODE. */
731
732 if (GET_CODE (value) == CONST_INT)
733 {
734 register HOST_WIDE_INT v = INTVAL (value);
735
736 if (bitsize < HOST_BITS_PER_WIDE_INT)
737 v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
738
739 if (v == 0)
740 all_zero = 1;
741 else if ((bitsize < HOST_BITS_PER_WIDE_INT
742 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
743 || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
744 all_one = 1;
745
746 value = lshift_value (mode, value, bitpos, bitsize);
747 }
748 else
749 {
750 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
751 && bitpos + bitsize != GET_MODE_BITSIZE (mode));
752
753 if (GET_MODE (value) != mode)
754 {
755 if ((GET_CODE (value) == REG || GET_CODE (value) == SUBREG)
756 && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
757 value = gen_lowpart (mode, value);
758 else
759 value = convert_to_mode (mode, value, 1);
760 }
761
762 if (must_and)
763 value = expand_binop (mode, and_optab, value,
764 mask_rtx (mode, 0, bitsize, 0),
765 NULL_RTX, 1, OPTAB_LIB_WIDEN);
766 if (bitpos > 0)
767 value = expand_shift (LSHIFT_EXPR, mode, value,
768 build_int_2 (bitpos, 0), NULL_RTX, 1);
769 }
770
771 /* Now clear the chosen bits in OP0,
772 except that if VALUE is -1 we need not bother. */
773
774 subtarget = (GET_CODE (op0) == REG || ! flag_force_mem) ? op0 : 0;
775
776 if (! all_one)
777 {
778 temp = expand_binop (mode, and_optab, op0,
779 mask_rtx (mode, bitpos, bitsize, 1),
780 subtarget, 1, OPTAB_LIB_WIDEN);
781 subtarget = temp;
782 }
783 else
784 temp = op0;
785
786 /* Now logical-or VALUE into OP0, unless it is zero. */
787
788 if (! all_zero)
789 temp = expand_binop (mode, ior_optab, temp, value,
790 subtarget, 1, OPTAB_LIB_WIDEN);
791 if (op0 != temp)
792 emit_move_insn (op0, temp);
793 }
794 \f
795 /* Store a bit field that is split across multiple accessible memory objects.
796
797 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
798 BITSIZE is the field width; BITPOS the position of its first bit
799 (within the word).
800 VALUE is the value to store.
801 ALIGN is the known alignment of OP0.
802 This is also the size of the memory objects to be used.
803
804 This does not yet handle fields wider than BITS_PER_WORD. */
805
806 static void
807 store_split_bit_field (op0, bitsize, bitpos, value, align)
808 rtx op0;
809 unsigned HOST_WIDE_INT bitsize, bitpos;
810 rtx value;
811 unsigned int align;
812 {
813 unsigned int unit;
814 unsigned int bitsdone = 0;
815
816 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
817 much at a time. */
818 if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
819 unit = BITS_PER_WORD;
820 else
821 unit = MIN (align, BITS_PER_WORD);
822
823 /* If VALUE is a constant other than a CONST_INT, get it into a register in
824 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
825 that VALUE might be a floating-point constant. */
826 if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
827 {
828 rtx word = gen_lowpart_common (word_mode, value);
829
830 if (word && (value != word))
831 value = word;
832 else
833 value = gen_lowpart_common (word_mode,
834 force_reg (GET_MODE (value) != VOIDmode
835 ? GET_MODE (value)
836 : word_mode, value));
837 }
838 else if (GET_CODE (value) == ADDRESSOF)
839 value = copy_to_reg (value);
840
841 while (bitsdone < bitsize)
842 {
843 unsigned HOST_WIDE_INT thissize;
844 rtx part, word;
845 unsigned HOST_WIDE_INT thispos;
846 unsigned HOST_WIDE_INT offset;
847
848 offset = (bitpos + bitsdone) / unit;
849 thispos = (bitpos + bitsdone) % unit;
850
851 /* THISSIZE must not overrun a word boundary. Otherwise,
852 store_fixed_bit_field will call us again, and we will mutually
853 recurse forever. */
854 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
855 thissize = MIN (thissize, unit - thispos);
856
857 if (BYTES_BIG_ENDIAN)
858 {
859 int total_bits;
860
861 /* We must do an endian conversion exactly the same way as it is
862 done in extract_bit_field, so that the two calls to
863 extract_fixed_bit_field will have comparable arguments. */
864 if (GET_CODE (value) != MEM || GET_MODE (value) == BLKmode)
865 total_bits = BITS_PER_WORD;
866 else
867 total_bits = GET_MODE_BITSIZE (GET_MODE (value));
868
869 /* Fetch successively less significant portions. */
870 if (GET_CODE (value) == CONST_INT)
871 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
872 >> (bitsize - bitsdone - thissize))
873 & (((HOST_WIDE_INT) 1 << thissize) - 1));
874 else
875 /* The args are chosen so that the last part includes the
876 lsb. Give extract_bit_field the value it needs (with
877 endianness compensation) to fetch the piece we want.
878
879 ??? We have no idea what the alignment of VALUE is, so
880 we have to use a guess. */
881 part
882 = extract_fixed_bit_field
883 (word_mode, value, 0, thissize,
884 total_bits - bitsize + bitsdone, NULL_RTX, 1,
885 GET_MODE (value) == VOIDmode
886 ? UNITS_PER_WORD
887 : (GET_MODE (value) == BLKmode
888 ? 1 : GET_MODE_ALIGNMENT (GET_MODE (value))));
889 }
890 else
891 {
892 /* Fetch successively more significant portions. */
893 if (GET_CODE (value) == CONST_INT)
894 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
895 >> bitsdone)
896 & (((HOST_WIDE_INT) 1 << thissize) - 1));
897 else
898 part
899 = extract_fixed_bit_field
900 (word_mode, value, 0, thissize, bitsdone, NULL_RTX, 1,
901 GET_MODE (value) == VOIDmode
902 ? UNITS_PER_WORD
903 : (GET_MODE (value) == BLKmode
904 ? 1 : GET_MODE_ALIGNMENT (GET_MODE (value))));
905 }
906
907 /* If OP0 is a register, then handle OFFSET here.
908
909 When handling multiword bitfields, extract_bit_field may pass
910 down a word_mode SUBREG of a larger REG for a bitfield that actually
911 crosses a word boundary. Thus, for a SUBREG, we must find
912 the current word starting from the base register. */
913 if (GET_CODE (op0) == SUBREG)
914 {
915 word = operand_subword_force (SUBREG_REG (op0),
916 SUBREG_WORD (op0) + offset,
917 GET_MODE (SUBREG_REG (op0)));
918 offset = 0;
919 }
920 else if (GET_CODE (op0) == REG)
921 {
922 word = operand_subword_force (op0, offset, GET_MODE (op0));
923 offset = 0;
924 }
925 else
926 word = op0;
927
928 /* OFFSET is in UNITs, and UNIT is in bits.
929 store_fixed_bit_field wants offset in bytes. */
930 store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT,
931 thissize, thispos, part, align);
932 bitsdone += thissize;
933 }
934 }
935 \f
936 /* Generate code to extract a byte-field from STR_RTX
937 containing BITSIZE bits, starting at BITNUM,
938 and put it in TARGET if possible (if TARGET is nonzero).
939 Regardless of TARGET, we return the rtx for where the value is placed.
940 It may be a QUEUED.
941
942 STR_RTX is the structure containing the byte (a REG or MEM).
943 UNSIGNEDP is nonzero if this is an unsigned bit field.
944 MODE is the natural mode of the field value once extracted.
945 TMODE is the mode the caller would like the value to have;
946 but the value may be returned with type MODE instead.
947
948 ALIGN is the alignment that STR_RTX is known to have.
949 TOTAL_SIZE is the size in bytes of the containing structure,
950 or -1 if varying.
951
952 If a TARGET is specified and we can store in it at no extra cost,
953 we do so, and return TARGET.
954 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
955 if they are equally easy. */
956
957 rtx
958 extract_bit_field (str_rtx, bitsize, bitnum, unsignedp,
959 target, mode, tmode, align, total_size)
960 rtx str_rtx;
961 unsigned HOST_WIDE_INT bitsize;
962 unsigned HOST_WIDE_INT bitnum;
963 int unsignedp;
964 rtx target;
965 enum machine_mode mode, tmode;
966 unsigned int align;
967 HOST_WIDE_INT total_size;
968 {
969 unsigned int unit
970 = (GET_CODE (str_rtx) == MEM) ? BITS_PER_UNIT : BITS_PER_WORD;
971 unsigned HOST_WIDE_INT offset = bitnum / unit;
972 unsigned HOST_WIDE_INT bitpos = bitnum % unit;
973 register rtx op0 = str_rtx;
974 rtx spec_target = target;
975 rtx spec_target_subreg = 0;
976 enum machine_mode int_mode;
977 #ifdef HAVE_extv
978 unsigned HOST_WIDE_INT extv_bitsize;
979 enum machine_mode extv_mode;
980 #endif
981 #ifdef HAVE_extzv
982 unsigned HOST_WIDE_INT extzv_bitsize;
983 enum machine_mode extzv_mode;
984 #endif
985
986 #ifdef HAVE_extv
987 extv_mode = insn_data[(int) CODE_FOR_extv].operand[0].mode;
988 if (extv_mode == VOIDmode)
989 extv_mode = word_mode;
990 extv_bitsize = GET_MODE_BITSIZE (extv_mode);
991 #endif
992
993 #ifdef HAVE_extzv
994 extzv_mode = insn_data[(int) CODE_FOR_extzv].operand[0].mode;
995 if (extzv_mode == VOIDmode)
996 extzv_mode = word_mode;
997 extzv_bitsize = GET_MODE_BITSIZE (extzv_mode);
998 #endif
999
1000 /* Discount the part of the structure before the desired byte.
1001 We need to know how many bytes are safe to reference after it. */
1002 if (total_size >= 0)
1003 total_size -= (bitpos / BIGGEST_ALIGNMENT
1004 * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
1005
1006 if (tmode == VOIDmode)
1007 tmode = mode;
1008 while (GET_CODE (op0) == SUBREG)
1009 {
1010 int outer_size = GET_MODE_BITSIZE (GET_MODE (op0));
1011 int inner_size = GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op0)));
1012
1013 offset += SUBREG_WORD (op0);
1014
1015 inner_size = MIN (inner_size, BITS_PER_WORD);
1016
1017 if (BYTES_BIG_ENDIAN && (outer_size < inner_size))
1018 {
1019 bitpos += inner_size - outer_size;
1020 if (bitpos > unit)
1021 {
1022 offset += (bitpos / unit);
1023 bitpos %= unit;
1024 }
1025 }
1026
1027 op0 = SUBREG_REG (op0);
1028 }
1029
1030 if (GET_CODE (op0) == REG
1031 && mode == GET_MODE (op0)
1032 && bitnum == 0
1033 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1034 {
1035 /* We're trying to extract a full register from itself. */
1036 return op0;
1037 }
1038
1039 /* Make sure we are playing with integral modes. Pun with subregs
1040 if we aren't. */
1041 {
1042 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1043 if (imode != GET_MODE (op0))
1044 {
1045 if (GET_CODE (op0) == MEM)
1046 op0 = change_address (op0, imode, NULL_RTX);
1047 else if (imode != BLKmode)
1048 op0 = gen_lowpart (imode, op0);
1049 else
1050 abort ();
1051 }
1052 }
1053
1054 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1055 If that's wrong, the solution is to test for it and set TARGET to 0
1056 if needed. */
1057
1058 /* If OP0 is a register, BITPOS must count within a word.
1059 But as we have it, it counts within whatever size OP0 now has.
1060 On a bigendian machine, these are not the same, so convert. */
1061 if (BYTES_BIG_ENDIAN
1062 && GET_CODE (op0) != MEM
1063 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1064 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1065
1066 /* Extracting a full-word or multi-word value
1067 from a structure in a register or aligned memory.
1068 This can be done with just SUBREG.
1069 So too extracting a subword value in
1070 the least significant part of the register. */
1071
1072 if (((GET_CODE (op0) != MEM
1073 && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1074 GET_MODE_BITSIZE (GET_MODE (op0))))
1075 || (GET_CODE (op0) == MEM
1076 && (! SLOW_UNALIGNED_ACCESS (mode, align)
1077 || (offset * BITS_PER_UNIT % bitsize == 0
1078 && align % bitsize == 0))))
1079 && ((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1080 && bitpos % BITS_PER_WORD == 0)
1081 || (mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0) != BLKmode
1082 /* ??? The big endian test here is wrong. This is correct
1083 if the value is in a register, and if mode_for_size is not
1084 the same mode as op0. This causes us to get unnecessarily
1085 inefficient code from the Thumb port when -mbig-endian. */
1086 && (BYTES_BIG_ENDIAN
1087 ? bitpos + bitsize == BITS_PER_WORD
1088 : bitpos == 0))))
1089 {
1090 enum machine_mode mode1
1091 = (VECTOR_MODE_P (tmode) ? mode
1092 : mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0));
1093
1094 if (mode1 != GET_MODE (op0))
1095 {
1096 if (GET_CODE (op0) == SUBREG)
1097 {
1098 if (GET_MODE (SUBREG_REG (op0)) == mode1
1099 || GET_MODE_CLASS (mode1) == MODE_INT
1100 || GET_MODE_CLASS (mode1) == MODE_PARTIAL_INT)
1101 op0 = SUBREG_REG (op0);
1102 else
1103 /* Else we've got some float mode source being extracted into
1104 a different float mode destination -- this combination of
1105 subregs results in Severe Tire Damage. */
1106 abort ();
1107 }
1108 if (GET_CODE (op0) == REG)
1109 op0 = gen_rtx_SUBREG (mode1, op0, offset);
1110 else
1111 op0 = change_address (op0, mode1,
1112 plus_constant (XEXP (op0, 0), offset));
1113 }
1114 if (mode1 != mode)
1115 return convert_to_mode (tmode, op0, unsignedp);
1116 return op0;
1117 }
1118
1119 /* Handle fields bigger than a word. */
1120
1121 if (bitsize > BITS_PER_WORD)
1122 {
1123 /* Here we transfer the words of the field
1124 in the order least significant first.
1125 This is because the most significant word is the one which may
1126 be less than full. */
1127
1128 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1129 unsigned int i;
1130
1131 if (target == 0 || GET_CODE (target) != REG)
1132 target = gen_reg_rtx (mode);
1133
1134 /* Indicate for flow that the entire target reg is being set. */
1135 emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1136
1137 for (i = 0; i < nwords; i++)
1138 {
1139 /* If I is 0, use the low-order word in both field and target;
1140 if I is 1, use the next to lowest word; and so on. */
1141 /* Word number in TARGET to use. */
1142 unsigned int wordnum
1143 = (WORDS_BIG_ENDIAN
1144 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1145 : i);
1146 /* Offset from start of field in OP0. */
1147 unsigned int bit_offset = (WORDS_BIG_ENDIAN
1148 ? MAX (0, ((int) bitsize - ((int) i + 1)
1149 * (int) BITS_PER_WORD))
1150 : (int) i * BITS_PER_WORD);
1151 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1152 rtx result_part
1153 = extract_bit_field (op0, MIN (BITS_PER_WORD,
1154 bitsize - i * BITS_PER_WORD),
1155 bitnum + bit_offset, 1, target_part, mode,
1156 word_mode, align, total_size);
1157
1158 if (target_part == 0)
1159 abort ();
1160
1161 if (result_part != target_part)
1162 emit_move_insn (target_part, result_part);
1163 }
1164
1165 if (unsignedp)
1166 {
1167 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1168 need to be zero'd out. */
1169 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1170 {
1171 unsigned int i, total_words;
1172
1173 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1174 for (i = nwords; i < total_words; i++)
1175 {
1176 int wordnum = WORDS_BIG_ENDIAN ? total_words - i - 1 : i;
1177 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1178 emit_move_insn (target_part, const0_rtx);
1179 }
1180 }
1181 return target;
1182 }
1183
1184 /* Signed bit field: sign-extend with two arithmetic shifts. */
1185 target = expand_shift (LSHIFT_EXPR, mode, target,
1186 build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1187 NULL_RTX, 0);
1188 return expand_shift (RSHIFT_EXPR, mode, target,
1189 build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1190 NULL_RTX, 0);
1191 }
1192
1193 /* From here on we know the desired field is smaller than a word. */
1194
1195 /* Check if there is a correspondingly-sized integer field, so we can
1196 safely extract it as one size of integer, if necessary; then
1197 truncate or extend to the size that is wanted; then use SUBREGs or
1198 convert_to_mode to get one of the modes we really wanted. */
1199
1200 int_mode = int_mode_for_mode (tmode);
1201 if (int_mode == BLKmode)
1202 int_mode = int_mode_for_mode (mode);
1203 if (int_mode == BLKmode)
1204 abort(); /* Should probably push op0 out to memory and then
1205 do a load. */
1206
1207 /* OFFSET is the number of words or bytes (UNIT says which)
1208 from STR_RTX to the first word or byte containing part of the field. */
1209
1210 if (GET_CODE (op0) != MEM)
1211 {
1212 if (offset != 0
1213 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1214 {
1215 if (GET_CODE (op0) != REG)
1216 op0 = copy_to_reg (op0);
1217 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1218 op0, offset);
1219 }
1220 offset = 0;
1221 }
1222 else
1223 {
1224 op0 = protect_from_queue (str_rtx, 1);
1225 }
1226
1227 /* Now OFFSET is nonzero only for memory operands. */
1228
1229 if (unsignedp)
1230 {
1231 #ifdef HAVE_extzv
1232 if (HAVE_extzv
1233 && (extzv_bitsize >= bitsize)
1234 && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1235 && (bitsize + bitpos > extzv_bitsize)))
1236 {
1237 unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1238 rtx bitsize_rtx, bitpos_rtx;
1239 rtx last = get_last_insn ();
1240 rtx xop0 = op0;
1241 rtx xtarget = target;
1242 rtx xspec_target = spec_target;
1243 rtx xspec_target_subreg = spec_target_subreg;
1244 rtx pat;
1245 enum machine_mode maxmode;
1246
1247 maxmode = insn_data[(int) CODE_FOR_extzv].operand[0].mode;
1248 if (maxmode == VOIDmode)
1249 maxmode = word_mode;
1250
1251 if (GET_CODE (xop0) == MEM)
1252 {
1253 int save_volatile_ok = volatile_ok;
1254 volatile_ok = 1;
1255
1256 /* Is the memory operand acceptable? */
1257 if (! ((*insn_data[(int) CODE_FOR_extzv].operand[1].predicate)
1258 (xop0, GET_MODE (xop0))))
1259 {
1260 /* No, load into a reg and extract from there. */
1261 enum machine_mode bestmode;
1262
1263 /* Get the mode to use for inserting into this field. If
1264 OP0 is BLKmode, get the smallest mode consistent with the
1265 alignment. If OP0 is a non-BLKmode object that is no
1266 wider than MAXMODE, use its mode. Otherwise, use the
1267 smallest mode containing the field. */
1268
1269 if (GET_MODE (xop0) == BLKmode
1270 || (GET_MODE_SIZE (GET_MODE (op0))
1271 > GET_MODE_SIZE (maxmode)))
1272 bestmode = get_best_mode (bitsize, bitnum, align, maxmode,
1273 MEM_VOLATILE_P (xop0));
1274 else
1275 bestmode = GET_MODE (xop0);
1276
1277 if (bestmode == VOIDmode
1278 || (SLOW_UNALIGNED_ACCESS (bestmode, align)
1279 && GET_MODE_BITSIZE (bestmode) > align))
1280 goto extzv_loses;
1281
1282 /* Compute offset as multiple of this unit,
1283 counting in bytes. */
1284 unit = GET_MODE_BITSIZE (bestmode);
1285 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1286 xbitpos = bitnum % unit;
1287 xop0 = change_address (xop0, bestmode,
1288 plus_constant (XEXP (xop0, 0),
1289 xoffset));
1290 /* Fetch it to a register in that size. */
1291 xop0 = force_reg (bestmode, xop0);
1292
1293 /* XBITPOS counts within UNIT, which is what is expected. */
1294 }
1295 else
1296 /* Get ref to first byte containing part of the field. */
1297 xop0 = change_address (xop0, byte_mode,
1298 plus_constant (XEXP (xop0, 0), xoffset));
1299
1300 volatile_ok = save_volatile_ok;
1301 }
1302
1303 /* If op0 is a register, we need it in MAXMODE (which is usually
1304 SImode). to make it acceptable to the format of extzv. */
1305 if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1306 goto extzv_loses;
1307 if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
1308 xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1309
1310 /* On big-endian machines, we count bits from the most significant.
1311 If the bit field insn does not, we must invert. */
1312 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1313 xbitpos = unit - bitsize - xbitpos;
1314
1315 /* Now convert from counting within UNIT to counting in MAXMODE. */
1316 if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
1317 xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1318
1319 unit = GET_MODE_BITSIZE (maxmode);
1320
1321 if (xtarget == 0
1322 || (flag_force_mem && GET_CODE (xtarget) == MEM))
1323 xtarget = xspec_target = gen_reg_rtx (tmode);
1324
1325 if (GET_MODE (xtarget) != maxmode)
1326 {
1327 if (GET_CODE (xtarget) == REG)
1328 {
1329 int wider = (GET_MODE_SIZE (maxmode)
1330 > GET_MODE_SIZE (GET_MODE (xtarget)));
1331 xtarget = gen_lowpart (maxmode, xtarget);
1332 if (wider)
1333 xspec_target_subreg = xtarget;
1334 }
1335 else
1336 xtarget = gen_reg_rtx (maxmode);
1337 }
1338
1339 /* If this machine's extzv insists on a register target,
1340 make sure we have one. */
1341 if (! ((*insn_data[(int) CODE_FOR_extzv].operand[0].predicate)
1342 (xtarget, maxmode)))
1343 xtarget = gen_reg_rtx (maxmode);
1344
1345 bitsize_rtx = GEN_INT (bitsize);
1346 bitpos_rtx = GEN_INT (xbitpos);
1347
1348 pat = gen_extzv (protect_from_queue (xtarget, 1),
1349 xop0, bitsize_rtx, bitpos_rtx);
1350 if (pat)
1351 {
1352 emit_insn (pat);
1353 target = xtarget;
1354 spec_target = xspec_target;
1355 spec_target_subreg = xspec_target_subreg;
1356 }
1357 else
1358 {
1359 delete_insns_since (last);
1360 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1361 bitpos, target, 1, align);
1362 }
1363 }
1364 else
1365 extzv_loses:
1366 #endif
1367 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1368 bitpos, target, 1, align);
1369 }
1370 else
1371 {
1372 #ifdef HAVE_extv
1373 if (HAVE_extv
1374 && (extv_bitsize >= bitsize)
1375 && ! ((GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1376 && (bitsize + bitpos > extv_bitsize)))
1377 {
1378 int xbitpos = bitpos, xoffset = offset;
1379 rtx bitsize_rtx, bitpos_rtx;
1380 rtx last = get_last_insn ();
1381 rtx xop0 = op0, xtarget = target;
1382 rtx xspec_target = spec_target;
1383 rtx xspec_target_subreg = spec_target_subreg;
1384 rtx pat;
1385 enum machine_mode maxmode;
1386
1387 maxmode = insn_data[(int) CODE_FOR_extv].operand[0].mode;
1388 if (maxmode == VOIDmode)
1389 maxmode = word_mode;
1390
1391 if (GET_CODE (xop0) == MEM)
1392 {
1393 /* Is the memory operand acceptable? */
1394 if (! ((*insn_data[(int) CODE_FOR_extv].operand[1].predicate)
1395 (xop0, GET_MODE (xop0))))
1396 {
1397 /* No, load into a reg and extract from there. */
1398 enum machine_mode bestmode;
1399
1400 /* Get the mode to use for inserting into this field. If
1401 OP0 is BLKmode, get the smallest mode consistent with the
1402 alignment. If OP0 is a non-BLKmode object that is no
1403 wider than MAXMODE, use its mode. Otherwise, use the
1404 smallest mode containing the field. */
1405
1406 if (GET_MODE (xop0) == BLKmode
1407 || (GET_MODE_SIZE (GET_MODE (op0))
1408 > GET_MODE_SIZE (maxmode)))
1409 bestmode = get_best_mode (bitsize, bitnum, align, maxmode,
1410 MEM_VOLATILE_P (xop0));
1411 else
1412 bestmode = GET_MODE (xop0);
1413
1414 if (bestmode == VOIDmode
1415 || (SLOW_UNALIGNED_ACCESS (bestmode, align)
1416 && GET_MODE_BITSIZE (bestmode) > align))
1417 goto extv_loses;
1418
1419 /* Compute offset as multiple of this unit,
1420 counting in bytes. */
1421 unit = GET_MODE_BITSIZE (bestmode);
1422 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1423 xbitpos = bitnum % unit;
1424 xop0 = change_address (xop0, bestmode,
1425 plus_constant (XEXP (xop0, 0),
1426 xoffset));
1427 /* Fetch it to a register in that size. */
1428 xop0 = force_reg (bestmode, xop0);
1429
1430 /* XBITPOS counts within UNIT, which is what is expected. */
1431 }
1432 else
1433 /* Get ref to first byte containing part of the field. */
1434 xop0 = change_address (xop0, byte_mode,
1435 plus_constant (XEXP (xop0, 0), xoffset));
1436 }
1437
1438 /* If op0 is a register, we need it in MAXMODE (which is usually
1439 SImode) to make it acceptable to the format of extv. */
1440 if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1441 goto extv_loses;
1442 if (GET_CODE (xop0) == REG && GET_MODE (xop0) != maxmode)
1443 xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1444
1445 /* On big-endian machines, we count bits from the most significant.
1446 If the bit field insn does not, we must invert. */
1447 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1448 xbitpos = unit - bitsize - xbitpos;
1449
1450 /* XBITPOS counts within a size of UNIT.
1451 Adjust to count within a size of MAXMODE. */
1452 if (BITS_BIG_ENDIAN && GET_CODE (xop0) != MEM)
1453 xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1454
1455 unit = GET_MODE_BITSIZE (maxmode);
1456
1457 if (xtarget == 0
1458 || (flag_force_mem && GET_CODE (xtarget) == MEM))
1459 xtarget = xspec_target = gen_reg_rtx (tmode);
1460
1461 if (GET_MODE (xtarget) != maxmode)
1462 {
1463 if (GET_CODE (xtarget) == REG)
1464 {
1465 int wider = (GET_MODE_SIZE (maxmode)
1466 > GET_MODE_SIZE (GET_MODE (xtarget)));
1467 xtarget = gen_lowpart (maxmode, xtarget);
1468 if (wider)
1469 xspec_target_subreg = xtarget;
1470 }
1471 else
1472 xtarget = gen_reg_rtx (maxmode);
1473 }
1474
1475 /* If this machine's extv insists on a register target,
1476 make sure we have one. */
1477 if (! ((*insn_data[(int) CODE_FOR_extv].operand[0].predicate)
1478 (xtarget, maxmode)))
1479 xtarget = gen_reg_rtx (maxmode);
1480
1481 bitsize_rtx = GEN_INT (bitsize);
1482 bitpos_rtx = GEN_INT (xbitpos);
1483
1484 pat = gen_extv (protect_from_queue (xtarget, 1),
1485 xop0, bitsize_rtx, bitpos_rtx);
1486 if (pat)
1487 {
1488 emit_insn (pat);
1489 target = xtarget;
1490 spec_target = xspec_target;
1491 spec_target_subreg = xspec_target_subreg;
1492 }
1493 else
1494 {
1495 delete_insns_since (last);
1496 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1497 bitpos, target, 0, align);
1498 }
1499 }
1500 else
1501 extv_loses:
1502 #endif
1503 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1504 bitpos, target, 0, align);
1505 }
1506 if (target == spec_target)
1507 return target;
1508 if (target == spec_target_subreg)
1509 return spec_target;
1510 if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1511 {
1512 /* If the target mode is floating-point, first convert to the
1513 integer mode of that size and then access it as a floating-point
1514 value via a SUBREG. */
1515 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1516 {
1517 target = convert_to_mode (mode_for_size (GET_MODE_BITSIZE (tmode),
1518 MODE_INT, 0),
1519 target, unsignedp);
1520 if (GET_CODE (target) != REG)
1521 target = copy_to_reg (target);
1522 return gen_rtx_SUBREG (tmode, target, 0);
1523 }
1524 else
1525 return convert_to_mode (tmode, target, unsignedp);
1526 }
1527 return target;
1528 }
1529 \f
1530 /* Extract a bit field using shifts and boolean operations
1531 Returns an rtx to represent the value.
1532 OP0 addresses a register (word) or memory (byte).
1533 BITPOS says which bit within the word or byte the bit field starts in.
1534 OFFSET says how many bytes farther the bit field starts;
1535 it is 0 if OP0 is a register.
1536 BITSIZE says how many bits long the bit field is.
1537 (If OP0 is a register, it may be narrower than a full word,
1538 but BITPOS still counts within a full word,
1539 which is significant on bigendian machines.)
1540
1541 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1542 If TARGET is nonzero, attempts to store the value there
1543 and return TARGET, but this is not guaranteed.
1544 If TARGET is not used, create a pseudo-reg of mode TMODE for the value.
1545
1546 ALIGN is the alignment that STR_RTX is known to have. */
1547
1548 static rtx
1549 extract_fixed_bit_field (tmode, op0, offset, bitsize, bitpos,
1550 target, unsignedp, align)
1551 enum machine_mode tmode;
1552 register rtx op0, target;
1553 unsigned HOST_WIDE_INT offset, bitsize, bitpos;
1554 int unsignedp;
1555 unsigned int align;
1556 {
1557 unsigned int total_bits = BITS_PER_WORD;
1558 enum machine_mode mode;
1559
1560 if (GET_CODE (op0) == SUBREG || GET_CODE (op0) == REG)
1561 {
1562 /* Special treatment for a bit field split across two registers. */
1563 if (bitsize + bitpos > BITS_PER_WORD)
1564 return extract_split_bit_field (op0, bitsize, bitpos,
1565 unsignedp, align);
1566 }
1567 else
1568 {
1569 /* Get the proper mode to use for this field. We want a mode that
1570 includes the entire field. If such a mode would be larger than
1571 a word, we won't be doing the extraction the normal way. */
1572
1573 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT, align,
1574 word_mode,
1575 GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0));
1576
1577 if (mode == VOIDmode)
1578 /* The only way this should occur is if the field spans word
1579 boundaries. */
1580 return extract_split_bit_field (op0, bitsize,
1581 bitpos + offset * BITS_PER_UNIT,
1582 unsignedp, align);
1583
1584 total_bits = GET_MODE_BITSIZE (mode);
1585
1586 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
1587 be in the range 0 to total_bits-1, and put any excess bytes in
1588 OFFSET. */
1589 if (bitpos >= total_bits)
1590 {
1591 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1592 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1593 * BITS_PER_UNIT);
1594 }
1595
1596 /* Get ref to an aligned byte, halfword, or word containing the field.
1597 Adjust BITPOS to be position within a word,
1598 and OFFSET to be the offset of that word.
1599 Then alter OP0 to refer to that word. */
1600 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1601 offset -= (offset % (total_bits / BITS_PER_UNIT));
1602 op0 = change_address (op0, mode,
1603 plus_constant (XEXP (op0, 0), offset));
1604 }
1605
1606 mode = GET_MODE (op0);
1607
1608 if (BYTES_BIG_ENDIAN)
1609 {
1610 /* BITPOS is the distance between our msb and that of OP0.
1611 Convert it to the distance from the lsb. */
1612
1613 bitpos = total_bits - bitsize - bitpos;
1614 }
1615
1616 /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1617 We have reduced the big-endian case to the little-endian case. */
1618
1619 if (unsignedp)
1620 {
1621 if (bitpos)
1622 {
1623 /* If the field does not already start at the lsb,
1624 shift it so it does. */
1625 tree amount = build_int_2 (bitpos, 0);
1626 /* Maybe propagate the target for the shift. */
1627 /* But not if we will return it--could confuse integrate.c. */
1628 rtx subtarget = (target != 0 && GET_CODE (target) == REG
1629 && !REG_FUNCTION_VALUE_P (target)
1630 ? target : 0);
1631 if (tmode != mode) subtarget = 0;
1632 op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1633 }
1634 /* Convert the value to the desired mode. */
1635 if (mode != tmode)
1636 op0 = convert_to_mode (tmode, op0, 1);
1637
1638 /* Unless the msb of the field used to be the msb when we shifted,
1639 mask out the upper bits. */
1640
1641 if (GET_MODE_BITSIZE (mode) != bitpos + bitsize
1642 #if 0
1643 #ifdef SLOW_ZERO_EXTEND
1644 /* Always generate an `and' if
1645 we just zero-extended op0 and SLOW_ZERO_EXTEND, since it
1646 will combine fruitfully with the zero-extend. */
1647 || tmode != mode
1648 #endif
1649 #endif
1650 )
1651 return expand_binop (GET_MODE (op0), and_optab, op0,
1652 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1653 target, 1, OPTAB_LIB_WIDEN);
1654 return op0;
1655 }
1656
1657 /* To extract a signed bit-field, first shift its msb to the msb of the word,
1658 then arithmetic-shift its lsb to the lsb of the word. */
1659 op0 = force_reg (mode, op0);
1660 if (mode != tmode)
1661 target = 0;
1662
1663 /* Find the narrowest integer mode that contains the field. */
1664
1665 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1666 mode = GET_MODE_WIDER_MODE (mode))
1667 if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1668 {
1669 op0 = convert_to_mode (mode, op0, 0);
1670 break;
1671 }
1672
1673 if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1674 {
1675 tree amount = build_int_2 (GET_MODE_BITSIZE (mode) - (bitsize + bitpos), 0);
1676 /* Maybe propagate the target for the shift. */
1677 /* But not if we will return the result--could confuse integrate.c. */
1678 rtx subtarget = (target != 0 && GET_CODE (target) == REG
1679 && ! REG_FUNCTION_VALUE_P (target)
1680 ? target : 0);
1681 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1682 }
1683
1684 return expand_shift (RSHIFT_EXPR, mode, op0,
1685 build_int_2 (GET_MODE_BITSIZE (mode) - bitsize, 0),
1686 target, 0);
1687 }
1688 \f
1689 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1690 of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1691 complement of that if COMPLEMENT. The mask is truncated if
1692 necessary to the width of mode MODE. The mask is zero-extended if
1693 BITSIZE+BITPOS is too small for MODE. */
1694
1695 static rtx
1696 mask_rtx (mode, bitpos, bitsize, complement)
1697 enum machine_mode mode;
1698 int bitpos, bitsize, complement;
1699 {
1700 HOST_WIDE_INT masklow, maskhigh;
1701
1702 if (bitpos < HOST_BITS_PER_WIDE_INT)
1703 masklow = (HOST_WIDE_INT) -1 << bitpos;
1704 else
1705 masklow = 0;
1706
1707 if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1708 masklow &= ((unsigned HOST_WIDE_INT) -1
1709 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1710
1711 if (bitpos <= HOST_BITS_PER_WIDE_INT)
1712 maskhigh = -1;
1713 else
1714 maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1715
1716 if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1717 maskhigh &= ((unsigned HOST_WIDE_INT) -1
1718 >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1719 else
1720 maskhigh = 0;
1721
1722 if (complement)
1723 {
1724 maskhigh = ~maskhigh;
1725 masklow = ~masklow;
1726 }
1727
1728 return immed_double_const (masklow, maskhigh, mode);
1729 }
1730
1731 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1732 VALUE truncated to BITSIZE bits and then shifted left BITPOS bits. */
1733
1734 static rtx
1735 lshift_value (mode, value, bitpos, bitsize)
1736 enum machine_mode mode;
1737 rtx value;
1738 int bitpos, bitsize;
1739 {
1740 unsigned HOST_WIDE_INT v = INTVAL (value);
1741 HOST_WIDE_INT low, high;
1742
1743 if (bitsize < HOST_BITS_PER_WIDE_INT)
1744 v &= ~((HOST_WIDE_INT) -1 << bitsize);
1745
1746 if (bitpos < HOST_BITS_PER_WIDE_INT)
1747 {
1748 low = v << bitpos;
1749 high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1750 }
1751 else
1752 {
1753 low = 0;
1754 high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1755 }
1756
1757 return immed_double_const (low, high, mode);
1758 }
1759 \f
1760 /* Extract a bit field that is split across two words
1761 and return an RTX for the result.
1762
1763 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1764 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1765 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.
1766
1767 ALIGN is the known alignment of OP0. This is also the size of the
1768 memory objects to be used. */
1769
1770 static rtx
1771 extract_split_bit_field (op0, bitsize, bitpos, unsignedp, align)
1772 rtx op0;
1773 unsigned HOST_WIDE_INT bitsize, bitpos;
1774 int unsignedp;
1775 unsigned int align;
1776 {
1777 unsigned int unit;
1778 unsigned int bitsdone = 0;
1779 rtx result = NULL_RTX;
1780 int first = 1;
1781
1782 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1783 much at a time. */
1784 if (GET_CODE (op0) == REG || GET_CODE (op0) == SUBREG)
1785 unit = BITS_PER_WORD;
1786 else
1787 unit = MIN (align, BITS_PER_WORD);
1788
1789 while (bitsdone < bitsize)
1790 {
1791 unsigned HOST_WIDE_INT thissize;
1792 rtx part, word;
1793 unsigned HOST_WIDE_INT thispos;
1794 unsigned HOST_WIDE_INT offset;
1795
1796 offset = (bitpos + bitsdone) / unit;
1797 thispos = (bitpos + bitsdone) % unit;
1798
1799 /* THISSIZE must not overrun a word boundary. Otherwise,
1800 extract_fixed_bit_field will call us again, and we will mutually
1801 recurse forever. */
1802 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1803 thissize = MIN (thissize, unit - thispos);
1804
1805 /* If OP0 is a register, then handle OFFSET here.
1806
1807 When handling multiword bitfields, extract_bit_field may pass
1808 down a word_mode SUBREG of a larger REG for a bitfield that actually
1809 crosses a word boundary. Thus, for a SUBREG, we must find
1810 the current word starting from the base register. */
1811 if (GET_CODE (op0) == SUBREG)
1812 {
1813 word = operand_subword_force (SUBREG_REG (op0),
1814 SUBREG_WORD (op0) + offset,
1815 GET_MODE (SUBREG_REG (op0)));
1816 offset = 0;
1817 }
1818 else if (GET_CODE (op0) == REG)
1819 {
1820 word = operand_subword_force (op0, offset, GET_MODE (op0));
1821 offset = 0;
1822 }
1823 else
1824 word = op0;
1825
1826 /* Extract the parts in bit-counting order,
1827 whose meaning is determined by BYTES_PER_UNIT.
1828 OFFSET is in UNITs, and UNIT is in bits.
1829 extract_fixed_bit_field wants offset in bytes. */
1830 part = extract_fixed_bit_field (word_mode, word,
1831 offset * unit / BITS_PER_UNIT,
1832 thissize, thispos, 0, 1, align);
1833 bitsdone += thissize;
1834
1835 /* Shift this part into place for the result. */
1836 if (BYTES_BIG_ENDIAN)
1837 {
1838 if (bitsize != bitsdone)
1839 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1840 build_int_2 (bitsize - bitsdone, 0), 0, 1);
1841 }
1842 else
1843 {
1844 if (bitsdone != thissize)
1845 part = expand_shift (LSHIFT_EXPR, word_mode, part,
1846 build_int_2 (bitsdone - thissize, 0), 0, 1);
1847 }
1848
1849 if (first)
1850 result = part;
1851 else
1852 /* Combine the parts with bitwise or. This works
1853 because we extracted each part as an unsigned bit field. */
1854 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1855 OPTAB_LIB_WIDEN);
1856
1857 first = 0;
1858 }
1859
1860 /* Unsigned bit field: we are done. */
1861 if (unsignedp)
1862 return result;
1863 /* Signed bit field: sign-extend with two arithmetic shifts. */
1864 result = expand_shift (LSHIFT_EXPR, word_mode, result,
1865 build_int_2 (BITS_PER_WORD - bitsize, 0),
1866 NULL_RTX, 0);
1867 return expand_shift (RSHIFT_EXPR, word_mode, result,
1868 build_int_2 (BITS_PER_WORD - bitsize, 0), NULL_RTX, 0);
1869 }
1870 \f
1871 /* Add INC into TARGET. */
1872
1873 void
1874 expand_inc (target, inc)
1875 rtx target, inc;
1876 {
1877 rtx value = expand_binop (GET_MODE (target), add_optab,
1878 target, inc,
1879 target, 0, OPTAB_LIB_WIDEN);
1880 if (value != target)
1881 emit_move_insn (target, value);
1882 }
1883
1884 /* Subtract DEC from TARGET. */
1885
1886 void
1887 expand_dec (target, dec)
1888 rtx target, dec;
1889 {
1890 rtx value = expand_binop (GET_MODE (target), sub_optab,
1891 target, dec,
1892 target, 0, OPTAB_LIB_WIDEN);
1893 if (value != target)
1894 emit_move_insn (target, value);
1895 }
1896 \f
1897 /* Output a shift instruction for expression code CODE,
1898 with SHIFTED being the rtx for the value to shift,
1899 and AMOUNT the tree for the amount to shift by.
1900 Store the result in the rtx TARGET, if that is convenient.
1901 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
1902 Return the rtx for where the value is. */
1903
1904 rtx
1905 expand_shift (code, mode, shifted, amount, target, unsignedp)
1906 enum tree_code code;
1907 register enum machine_mode mode;
1908 rtx shifted;
1909 tree amount;
1910 register rtx target;
1911 int unsignedp;
1912 {
1913 register rtx op1, temp = 0;
1914 register int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
1915 register int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
1916 int try;
1917
1918 /* Previously detected shift-counts computed by NEGATE_EXPR
1919 and shifted in the other direction; but that does not work
1920 on all machines. */
1921
1922 op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
1923
1924 #ifdef SHIFT_COUNT_TRUNCATED
1925 if (SHIFT_COUNT_TRUNCATED)
1926 {
1927 if (GET_CODE (op1) == CONST_INT
1928 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
1929 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
1930 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
1931 % GET_MODE_BITSIZE (mode));
1932 else if (GET_CODE (op1) == SUBREG
1933 && SUBREG_WORD (op1) == 0)
1934 op1 = SUBREG_REG (op1);
1935 }
1936 #endif
1937
1938 if (op1 == const0_rtx)
1939 return shifted;
1940
1941 for (try = 0; temp == 0 && try < 3; try++)
1942 {
1943 enum optab_methods methods;
1944
1945 if (try == 0)
1946 methods = OPTAB_DIRECT;
1947 else if (try == 1)
1948 methods = OPTAB_WIDEN;
1949 else
1950 methods = OPTAB_LIB_WIDEN;
1951
1952 if (rotate)
1953 {
1954 /* Widening does not work for rotation. */
1955 if (methods == OPTAB_WIDEN)
1956 continue;
1957 else if (methods == OPTAB_LIB_WIDEN)
1958 {
1959 /* If we have been unable to open-code this by a rotation,
1960 do it as the IOR of two shifts. I.e., to rotate A
1961 by N bits, compute (A << N) | ((unsigned) A >> (C - N))
1962 where C is the bitsize of A.
1963
1964 It is theoretically possible that the target machine might
1965 not be able to perform either shift and hence we would
1966 be making two libcalls rather than just the one for the
1967 shift (similarly if IOR could not be done). We will allow
1968 this extremely unlikely lossage to avoid complicating the
1969 code below. */
1970
1971 rtx subtarget = target == shifted ? 0 : target;
1972 rtx temp1;
1973 tree type = TREE_TYPE (amount);
1974 tree new_amount = make_tree (type, op1);
1975 tree other_amount
1976 = fold (build (MINUS_EXPR, type,
1977 convert (type,
1978 build_int_2 (GET_MODE_BITSIZE (mode),
1979 0)),
1980 amount));
1981
1982 shifted = force_reg (mode, shifted);
1983
1984 temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
1985 mode, shifted, new_amount, subtarget, 1);
1986 temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
1987 mode, shifted, other_amount, 0, 1);
1988 return expand_binop (mode, ior_optab, temp, temp1, target,
1989 unsignedp, methods);
1990 }
1991
1992 temp = expand_binop (mode,
1993 left ? rotl_optab : rotr_optab,
1994 shifted, op1, target, unsignedp, methods);
1995
1996 /* If we don't have the rotate, but we are rotating by a constant
1997 that is in range, try a rotate in the opposite direction. */
1998
1999 if (temp == 0 && GET_CODE (op1) == CONST_INT
2000 && INTVAL (op1) > 0 && INTVAL (op1) < GET_MODE_BITSIZE (mode))
2001 temp = expand_binop (mode,
2002 left ? rotr_optab : rotl_optab,
2003 shifted,
2004 GEN_INT (GET_MODE_BITSIZE (mode)
2005 - INTVAL (op1)),
2006 target, unsignedp, methods);
2007 }
2008 else if (unsignedp)
2009 temp = expand_binop (mode,
2010 left ? ashl_optab : lshr_optab,
2011 shifted, op1, target, unsignedp, methods);
2012
2013 /* Do arithmetic shifts.
2014 Also, if we are going to widen the operand, we can just as well
2015 use an arithmetic right-shift instead of a logical one. */
2016 if (temp == 0 && ! rotate
2017 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2018 {
2019 enum optab_methods methods1 = methods;
2020
2021 /* If trying to widen a log shift to an arithmetic shift,
2022 don't accept an arithmetic shift of the same size. */
2023 if (unsignedp)
2024 methods1 = OPTAB_MUST_WIDEN;
2025
2026 /* Arithmetic shift */
2027
2028 temp = expand_binop (mode,
2029 left ? ashl_optab : ashr_optab,
2030 shifted, op1, target, unsignedp, methods1);
2031 }
2032
2033 /* We used to try extzv here for logical right shifts, but that was
2034 only useful for one machine, the VAX, and caused poor code
2035 generation there for lshrdi3, so the code was deleted and a
2036 define_expand for lshrsi3 was added to vax.md. */
2037 }
2038
2039 if (temp == 0)
2040 abort ();
2041 return temp;
2042 }
2043 \f
2044 enum alg_code { alg_zero, alg_m, alg_shift,
2045 alg_add_t_m2, alg_sub_t_m2,
2046 alg_add_factor, alg_sub_factor,
2047 alg_add_t2_m, alg_sub_t2_m,
2048 alg_add, alg_subtract, alg_factor, alg_shiftop };
2049
2050 /* This structure records a sequence of operations.
2051 `ops' is the number of operations recorded.
2052 `cost' is their total cost.
2053 The operations are stored in `op' and the corresponding
2054 logarithms of the integer coefficients in `log'.
2055
2056 These are the operations:
2057 alg_zero total := 0;
2058 alg_m total := multiplicand;
2059 alg_shift total := total * coeff
2060 alg_add_t_m2 total := total + multiplicand * coeff;
2061 alg_sub_t_m2 total := total - multiplicand * coeff;
2062 alg_add_factor total := total * coeff + total;
2063 alg_sub_factor total := total * coeff - total;
2064 alg_add_t2_m total := total * coeff + multiplicand;
2065 alg_sub_t2_m total := total * coeff - multiplicand;
2066
2067 The first operand must be either alg_zero or alg_m. */
2068
2069 struct algorithm
2070 {
2071 short cost;
2072 short ops;
2073 /* The size of the OP and LOG fields are not directly related to the
2074 word size, but the worst-case algorithms will be if we have few
2075 consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2076 In that case we will generate shift-by-2, add, shift-by-2, add,...,
2077 in total wordsize operations. */
2078 enum alg_code op[MAX_BITS_PER_WORD];
2079 char log[MAX_BITS_PER_WORD];
2080 };
2081
2082 static void synth_mult PARAMS ((struct algorithm *,
2083 unsigned HOST_WIDE_INT,
2084 int));
2085 static unsigned HOST_WIDE_INT choose_multiplier PARAMS ((unsigned HOST_WIDE_INT,
2086 int, int,
2087 unsigned HOST_WIDE_INT *,
2088 int *, int *));
2089 static unsigned HOST_WIDE_INT invert_mod2n PARAMS ((unsigned HOST_WIDE_INT,
2090 int));
2091 /* Compute and return the best algorithm for multiplying by T.
2092 The algorithm must cost less than cost_limit
2093 If retval.cost >= COST_LIMIT, no algorithm was found and all
2094 other field of the returned struct are undefined. */
2095
2096 static void
2097 synth_mult (alg_out, t, cost_limit)
2098 struct algorithm *alg_out;
2099 unsigned HOST_WIDE_INT t;
2100 int cost_limit;
2101 {
2102 int m;
2103 struct algorithm *alg_in, *best_alg;
2104 int cost;
2105 unsigned HOST_WIDE_INT q;
2106
2107 /* Indicate that no algorithm is yet found. If no algorithm
2108 is found, this value will be returned and indicate failure. */
2109 alg_out->cost = cost_limit;
2110
2111 if (cost_limit <= 0)
2112 return;
2113
2114 /* t == 1 can be done in zero cost. */
2115 if (t == 1)
2116 {
2117 alg_out->ops = 1;
2118 alg_out->cost = 0;
2119 alg_out->op[0] = alg_m;
2120 return;
2121 }
2122
2123 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2124 fail now. */
2125 if (t == 0)
2126 {
2127 if (zero_cost >= cost_limit)
2128 return;
2129 else
2130 {
2131 alg_out->ops = 1;
2132 alg_out->cost = zero_cost;
2133 alg_out->op[0] = alg_zero;
2134 return;
2135 }
2136 }
2137
2138 /* We'll be needing a couple extra algorithm structures now. */
2139
2140 alg_in = (struct algorithm *)alloca (sizeof (struct algorithm));
2141 best_alg = (struct algorithm *)alloca (sizeof (struct algorithm));
2142
2143 /* If we have a group of zero bits at the low-order part of T, try
2144 multiplying by the remaining bits and then doing a shift. */
2145
2146 if ((t & 1) == 0)
2147 {
2148 m = floor_log2 (t & -t); /* m = number of low zero bits */
2149 if (m < BITS_PER_WORD)
2150 {
2151 q = t >> m;
2152 cost = shift_cost[m];
2153 synth_mult (alg_in, q, cost_limit - cost);
2154
2155 cost += alg_in->cost;
2156 if (cost < cost_limit)
2157 {
2158 struct algorithm *x;
2159 x = alg_in, alg_in = best_alg, best_alg = x;
2160 best_alg->log[best_alg->ops] = m;
2161 best_alg->op[best_alg->ops] = alg_shift;
2162 cost_limit = cost;
2163 }
2164 }
2165 }
2166
2167 /* If we have an odd number, add or subtract one. */
2168 if ((t & 1) != 0)
2169 {
2170 unsigned HOST_WIDE_INT w;
2171
2172 for (w = 1; (w & t) != 0; w <<= 1)
2173 ;
2174 /* If T was -1, then W will be zero after the loop. This is another
2175 case where T ends with ...111. Handling this with (T + 1) and
2176 subtract 1 produces slightly better code and results in algorithm
2177 selection much faster than treating it like the ...0111 case
2178 below. */
2179 if (w == 0
2180 || (w > 2
2181 /* Reject the case where t is 3.
2182 Thus we prefer addition in that case. */
2183 && t != 3))
2184 {
2185 /* T ends with ...111. Multiply by (T + 1) and subtract 1. */
2186
2187 cost = add_cost;
2188 synth_mult (alg_in, t + 1, cost_limit - cost);
2189
2190 cost += alg_in->cost;
2191 if (cost < cost_limit)
2192 {
2193 struct algorithm *x;
2194 x = alg_in, alg_in = best_alg, best_alg = x;
2195 best_alg->log[best_alg->ops] = 0;
2196 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2197 cost_limit = cost;
2198 }
2199 }
2200 else
2201 {
2202 /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
2203
2204 cost = add_cost;
2205 synth_mult (alg_in, t - 1, cost_limit - cost);
2206
2207 cost += alg_in->cost;
2208 if (cost < cost_limit)
2209 {
2210 struct algorithm *x;
2211 x = alg_in, alg_in = best_alg, best_alg = x;
2212 best_alg->log[best_alg->ops] = 0;
2213 best_alg->op[best_alg->ops] = alg_add_t_m2;
2214 cost_limit = cost;
2215 }
2216 }
2217 }
2218
2219 /* Look for factors of t of the form
2220 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2221 If we find such a factor, we can multiply by t using an algorithm that
2222 multiplies by q, shift the result by m and add/subtract it to itself.
2223
2224 We search for large factors first and loop down, even if large factors
2225 are less probable than small; if we find a large factor we will find a
2226 good sequence quickly, and therefore be able to prune (by decreasing
2227 COST_LIMIT) the search. */
2228
2229 for (m = floor_log2 (t - 1); m >= 2; m--)
2230 {
2231 unsigned HOST_WIDE_INT d;
2232
2233 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2234 if (t % d == 0 && t > d && m < BITS_PER_WORD)
2235 {
2236 cost = MIN (shiftadd_cost[m], add_cost + shift_cost[m]);
2237 synth_mult (alg_in, t / d, cost_limit - cost);
2238
2239 cost += alg_in->cost;
2240 if (cost < cost_limit)
2241 {
2242 struct algorithm *x;
2243 x = alg_in, alg_in = best_alg, best_alg = x;
2244 best_alg->log[best_alg->ops] = m;
2245 best_alg->op[best_alg->ops] = alg_add_factor;
2246 cost_limit = cost;
2247 }
2248 /* Other factors will have been taken care of in the recursion. */
2249 break;
2250 }
2251
2252 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2253 if (t % d == 0 && t > d && m < BITS_PER_WORD)
2254 {
2255 cost = MIN (shiftsub_cost[m], add_cost + shift_cost[m]);
2256 synth_mult (alg_in, t / d, cost_limit - cost);
2257
2258 cost += alg_in->cost;
2259 if (cost < cost_limit)
2260 {
2261 struct algorithm *x;
2262 x = alg_in, alg_in = best_alg, best_alg = x;
2263 best_alg->log[best_alg->ops] = m;
2264 best_alg->op[best_alg->ops] = alg_sub_factor;
2265 cost_limit = cost;
2266 }
2267 break;
2268 }
2269 }
2270
2271 /* Try shift-and-add (load effective address) instructions,
2272 i.e. do a*3, a*5, a*9. */
2273 if ((t & 1) != 0)
2274 {
2275 q = t - 1;
2276 q = q & -q;
2277 m = exact_log2 (q);
2278 if (m >= 0 && m < BITS_PER_WORD)
2279 {
2280 cost = shiftadd_cost[m];
2281 synth_mult (alg_in, (t - 1) >> m, cost_limit - cost);
2282
2283 cost += alg_in->cost;
2284 if (cost < cost_limit)
2285 {
2286 struct algorithm *x;
2287 x = alg_in, alg_in = best_alg, best_alg = x;
2288 best_alg->log[best_alg->ops] = m;
2289 best_alg->op[best_alg->ops] = alg_add_t2_m;
2290 cost_limit = cost;
2291 }
2292 }
2293
2294 q = t + 1;
2295 q = q & -q;
2296 m = exact_log2 (q);
2297 if (m >= 0 && m < BITS_PER_WORD)
2298 {
2299 cost = shiftsub_cost[m];
2300 synth_mult (alg_in, (t + 1) >> m, cost_limit - cost);
2301
2302 cost += alg_in->cost;
2303 if (cost < cost_limit)
2304 {
2305 struct algorithm *x;
2306 x = alg_in, alg_in = best_alg, best_alg = x;
2307 best_alg->log[best_alg->ops] = m;
2308 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2309 cost_limit = cost;
2310 }
2311 }
2312 }
2313
2314 /* If cost_limit has not decreased since we stored it in alg_out->cost,
2315 we have not found any algorithm. */
2316 if (cost_limit == alg_out->cost)
2317 return;
2318
2319 /* If we are getting a too long sequence for `struct algorithm'
2320 to record, make this search fail. */
2321 if (best_alg->ops == MAX_BITS_PER_WORD)
2322 return;
2323
2324 /* Copy the algorithm from temporary space to the space at alg_out.
2325 We avoid using structure assignment because the majority of
2326 best_alg is normally undefined, and this is a critical function. */
2327 alg_out->ops = best_alg->ops + 1;
2328 alg_out->cost = cost_limit;
2329 memcpy (alg_out->op, best_alg->op,
2330 alg_out->ops * sizeof *alg_out->op);
2331 memcpy (alg_out->log, best_alg->log,
2332 alg_out->ops * sizeof *alg_out->log);
2333 }
2334 \f
2335 /* Perform a multiplication and return an rtx for the result.
2336 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
2337 TARGET is a suggestion for where to store the result (an rtx).
2338
2339 We check specially for a constant integer as OP1.
2340 If you want this check for OP0 as well, then before calling
2341 you should swap the two operands if OP0 would be constant. */
2342
2343 rtx
2344 expand_mult (mode, op0, op1, target, unsignedp)
2345 enum machine_mode mode;
2346 register rtx op0, op1, target;
2347 int unsignedp;
2348 {
2349 rtx const_op1 = op1;
2350
2351 /* synth_mult does an `unsigned int' multiply. As long as the mode is
2352 less than or equal in size to `unsigned int' this doesn't matter.
2353 If the mode is larger than `unsigned int', then synth_mult works only
2354 if the constant value exactly fits in an `unsigned int' without any
2355 truncation. This means that multiplying by negative values does
2356 not work; results are off by 2^32 on a 32 bit machine. */
2357
2358 /* If we are multiplying in DImode, it may still be a win
2359 to try to work with shifts and adds. */
2360 if (GET_CODE (op1) == CONST_DOUBLE
2361 && GET_MODE_CLASS (GET_MODE (op1)) == MODE_INT
2362 && HOST_BITS_PER_INT >= BITS_PER_WORD
2363 && CONST_DOUBLE_HIGH (op1) == 0)
2364 const_op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
2365 else if (HOST_BITS_PER_INT < GET_MODE_BITSIZE (mode)
2366 && GET_CODE (op1) == CONST_INT
2367 && INTVAL (op1) < 0)
2368 const_op1 = 0;
2369
2370 /* We used to test optimize here, on the grounds that it's better to
2371 produce a smaller program when -O is not used.
2372 But this causes such a terrible slowdown sometimes
2373 that it seems better to use synth_mult always. */
2374
2375 if (const_op1 && GET_CODE (const_op1) == CONST_INT
2376 && (unsignedp || ! flag_trapv))
2377 {
2378 struct algorithm alg;
2379 struct algorithm alg2;
2380 HOST_WIDE_INT val = INTVAL (op1);
2381 HOST_WIDE_INT val_so_far;
2382 rtx insn;
2383 int mult_cost;
2384 enum {basic_variant, negate_variant, add_variant} variant = basic_variant;
2385
2386 /* Try to do the computation three ways: multiply by the negative of OP1
2387 and then negate, do the multiplication directly, or do multiplication
2388 by OP1 - 1. */
2389
2390 mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
2391 mult_cost = MIN (12 * add_cost, mult_cost);
2392
2393 synth_mult (&alg, val, mult_cost);
2394
2395 /* This works only if the inverted value actually fits in an
2396 `unsigned int' */
2397 if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2398 {
2399 synth_mult (&alg2, - val,
2400 (alg.cost < mult_cost ? alg.cost : mult_cost) - negate_cost);
2401 if (alg2.cost + negate_cost < alg.cost)
2402 alg = alg2, variant = negate_variant;
2403 }
2404
2405 /* This proves very useful for division-by-constant. */
2406 synth_mult (&alg2, val - 1,
2407 (alg.cost < mult_cost ? alg.cost : mult_cost) - add_cost);
2408 if (alg2.cost + add_cost < alg.cost)
2409 alg = alg2, variant = add_variant;
2410
2411 if (alg.cost < mult_cost)
2412 {
2413 /* We found something cheaper than a multiply insn. */
2414 int opno;
2415 rtx accum, tem;
2416 enum machine_mode nmode;
2417
2418 op0 = protect_from_queue (op0, 0);
2419
2420 /* Avoid referencing memory over and over.
2421 For speed, but also for correctness when mem is volatile. */
2422 if (GET_CODE (op0) == MEM)
2423 op0 = force_reg (mode, op0);
2424
2425 /* ACCUM starts out either as OP0 or as a zero, depending on
2426 the first operation. */
2427
2428 if (alg.op[0] == alg_zero)
2429 {
2430 accum = copy_to_mode_reg (mode, const0_rtx);
2431 val_so_far = 0;
2432 }
2433 else if (alg.op[0] == alg_m)
2434 {
2435 accum = copy_to_mode_reg (mode, op0);
2436 val_so_far = 1;
2437 }
2438 else
2439 abort ();
2440
2441 for (opno = 1; opno < alg.ops; opno++)
2442 {
2443 int log = alg.log[opno];
2444 int preserve = preserve_subexpressions_p ();
2445 rtx shift_subtarget = preserve ? 0 : accum;
2446 rtx add_target
2447 = (opno == alg.ops - 1 && target != 0 && variant != add_variant
2448 && ! preserve)
2449 ? target : 0;
2450 rtx accum_target = preserve ? 0 : accum;
2451
2452 switch (alg.op[opno])
2453 {
2454 case alg_shift:
2455 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2456 build_int_2 (log, 0), NULL_RTX, 0);
2457 val_so_far <<= log;
2458 break;
2459
2460 case alg_add_t_m2:
2461 tem = expand_shift (LSHIFT_EXPR, mode, op0,
2462 build_int_2 (log, 0), NULL_RTX, 0);
2463 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2464 add_target
2465 ? add_target : accum_target);
2466 val_so_far += (HOST_WIDE_INT) 1 << log;
2467 break;
2468
2469 case alg_sub_t_m2:
2470 tem = expand_shift (LSHIFT_EXPR, mode, op0,
2471 build_int_2 (log, 0), NULL_RTX, 0);
2472 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2473 add_target
2474 ? add_target : accum_target);
2475 val_so_far -= (HOST_WIDE_INT) 1 << log;
2476 break;
2477
2478 case alg_add_t2_m:
2479 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2480 build_int_2 (log, 0), shift_subtarget,
2481 0);
2482 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2483 add_target
2484 ? add_target : accum_target);
2485 val_so_far = (val_so_far << log) + 1;
2486 break;
2487
2488 case alg_sub_t2_m:
2489 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2490 build_int_2 (log, 0), shift_subtarget,
2491 0);
2492 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2493 add_target
2494 ? add_target : accum_target);
2495 val_so_far = (val_so_far << log) - 1;
2496 break;
2497
2498 case alg_add_factor:
2499 tem = expand_shift (LSHIFT_EXPR, mode, accum,
2500 build_int_2 (log, 0), NULL_RTX, 0);
2501 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2502 add_target
2503 ? add_target : accum_target);
2504 val_so_far += val_so_far << log;
2505 break;
2506
2507 case alg_sub_factor:
2508 tem = expand_shift (LSHIFT_EXPR, mode, accum,
2509 build_int_2 (log, 0), NULL_RTX, 0);
2510 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2511 (add_target ? add_target
2512 : preserve ? 0 : tem));
2513 val_so_far = (val_so_far << log) - val_so_far;
2514 break;
2515
2516 default:
2517 abort ();
2518 }
2519
2520 /* Write a REG_EQUAL note on the last insn so that we can cse
2521 multiplication sequences. Note that if ACCUM is a SUBREG,
2522 we've set the inner register and must properly indicate
2523 that. */
2524
2525 tem = op0, nmode = mode;
2526 if (GET_CODE (accum) == SUBREG)
2527 {
2528 nmode = GET_MODE (SUBREG_REG (accum));
2529 tem = gen_lowpart (nmode, op0);
2530 }
2531
2532 insn = get_last_insn ();
2533 set_unique_reg_note (insn,
2534 REG_EQUAL,
2535 gen_rtx_MULT (nmode, tem,
2536 GEN_INT (val_so_far)));
2537 }
2538
2539 if (variant == negate_variant)
2540 {
2541 val_so_far = - val_so_far;
2542 accum = expand_unop (mode, neg_optab, accum, target, 0);
2543 }
2544 else if (variant == add_variant)
2545 {
2546 val_so_far = val_so_far + 1;
2547 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
2548 }
2549
2550 if (val != val_so_far)
2551 abort ();
2552
2553 return accum;
2554 }
2555 }
2556
2557 /* This used to use umul_optab if unsigned, but for non-widening multiply
2558 there is no difference between signed and unsigned. */
2559 op0 = expand_binop (mode,
2560 ! unsignedp
2561 && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
2562 ? smulv_optab : smul_optab,
2563 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
2564 if (op0 == 0)
2565 abort ();
2566 return op0;
2567 }
2568 \f
2569 /* Return the smallest n such that 2**n >= X. */
2570
2571 int
2572 ceil_log2 (x)
2573 unsigned HOST_WIDE_INT x;
2574 {
2575 return floor_log2 (x - 1) + 1;
2576 }
2577
2578 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
2579 replace division by D, and put the least significant N bits of the result
2580 in *MULTIPLIER_PTR and return the most significant bit.
2581
2582 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
2583 needed precision is in PRECISION (should be <= N).
2584
2585 PRECISION should be as small as possible so this function can choose
2586 multiplier more freely.
2587
2588 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
2589 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
2590
2591 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
2592 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
2593
2594 static
2595 unsigned HOST_WIDE_INT
2596 choose_multiplier (d, n, precision, multiplier_ptr, post_shift_ptr, lgup_ptr)
2597 unsigned HOST_WIDE_INT d;
2598 int n;
2599 int precision;
2600 unsigned HOST_WIDE_INT *multiplier_ptr;
2601 int *post_shift_ptr;
2602 int *lgup_ptr;
2603 {
2604 HOST_WIDE_INT mhigh_hi, mlow_hi;
2605 unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
2606 int lgup, post_shift;
2607 int pow, pow2;
2608 unsigned HOST_WIDE_INT nl, dummy1;
2609 HOST_WIDE_INT nh, dummy2;
2610
2611 /* lgup = ceil(log2(divisor)); */
2612 lgup = ceil_log2 (d);
2613
2614 if (lgup > n)
2615 abort ();
2616
2617 pow = n + lgup;
2618 pow2 = n + lgup - precision;
2619
2620 if (pow == 2 * HOST_BITS_PER_WIDE_INT)
2621 {
2622 /* We could handle this with some effort, but this case is much better
2623 handled directly with a scc insn, so rely on caller using that. */
2624 abort ();
2625 }
2626
2627 /* mlow = 2^(N + lgup)/d */
2628 if (pow >= HOST_BITS_PER_WIDE_INT)
2629 {
2630 nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
2631 nl = 0;
2632 }
2633 else
2634 {
2635 nh = 0;
2636 nl = (unsigned HOST_WIDE_INT) 1 << pow;
2637 }
2638 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2639 &mlow_lo, &mlow_hi, &dummy1, &dummy2);
2640
2641 /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
2642 if (pow2 >= HOST_BITS_PER_WIDE_INT)
2643 nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
2644 else
2645 nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
2646 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
2647 &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
2648
2649 if (mhigh_hi && nh - d >= d)
2650 abort ();
2651 if (mhigh_hi > 1 || mlow_hi > 1)
2652 abort ();
2653 /* assert that mlow < mhigh. */
2654 if (! (mlow_hi < mhigh_hi || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo)))
2655 abort();
2656
2657 /* If precision == N, then mlow, mhigh exceed 2^N
2658 (but they do not exceed 2^(N+1)). */
2659
2660 /* Reduce to lowest terms */
2661 for (post_shift = lgup; post_shift > 0; post_shift--)
2662 {
2663 unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
2664 unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
2665 if (ml_lo >= mh_lo)
2666 break;
2667
2668 mlow_hi = 0;
2669 mlow_lo = ml_lo;
2670 mhigh_hi = 0;
2671 mhigh_lo = mh_lo;
2672 }
2673
2674 *post_shift_ptr = post_shift;
2675 *lgup_ptr = lgup;
2676 if (n < HOST_BITS_PER_WIDE_INT)
2677 {
2678 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
2679 *multiplier_ptr = mhigh_lo & mask;
2680 return mhigh_lo >= mask;
2681 }
2682 else
2683 {
2684 *multiplier_ptr = mhigh_lo;
2685 return mhigh_hi;
2686 }
2687 }
2688
2689 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
2690 congruent to 1 (mod 2**N). */
2691
2692 static unsigned HOST_WIDE_INT
2693 invert_mod2n (x, n)
2694 unsigned HOST_WIDE_INT x;
2695 int n;
2696 {
2697 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
2698
2699 /* The algorithm notes that the choice y = x satisfies
2700 x*y == 1 mod 2^3, since x is assumed odd.
2701 Each iteration doubles the number of bits of significance in y. */
2702
2703 unsigned HOST_WIDE_INT mask;
2704 unsigned HOST_WIDE_INT y = x;
2705 int nbit = 3;
2706
2707 mask = (n == HOST_BITS_PER_WIDE_INT
2708 ? ~(unsigned HOST_WIDE_INT) 0
2709 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
2710
2711 while (nbit < n)
2712 {
2713 y = y * (2 - x*y) & mask; /* Modulo 2^N */
2714 nbit *= 2;
2715 }
2716 return y;
2717 }
2718
2719 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
2720 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
2721 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
2722 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
2723 become signed.
2724
2725 The result is put in TARGET if that is convenient.
2726
2727 MODE is the mode of operation. */
2728
2729 rtx
2730 expand_mult_highpart_adjust (mode, adj_operand, op0, op1, target, unsignedp)
2731 enum machine_mode mode;
2732 register rtx adj_operand, op0, op1, target;
2733 int unsignedp;
2734 {
2735 rtx tem;
2736 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
2737
2738 tem = expand_shift (RSHIFT_EXPR, mode, op0,
2739 build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2740 NULL_RTX, 0);
2741 tem = expand_and (tem, op1, NULL_RTX);
2742 adj_operand
2743 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2744 adj_operand);
2745
2746 tem = expand_shift (RSHIFT_EXPR, mode, op1,
2747 build_int_2 (GET_MODE_BITSIZE (mode) - 1, 0),
2748 NULL_RTX, 0);
2749 tem = expand_and (tem, op0, NULL_RTX);
2750 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
2751 target);
2752
2753 return target;
2754 }
2755
2756 /* Emit code to multiply OP0 and CNST1, putting the high half of the result
2757 in TARGET if that is convenient, and return where the result is. If the
2758 operation can not be performed, 0 is returned.
2759
2760 MODE is the mode of operation and result.
2761
2762 UNSIGNEDP nonzero means unsigned multiply.
2763
2764 MAX_COST is the total allowed cost for the expanded RTL. */
2765
2766 rtx
2767 expand_mult_highpart (mode, op0, cnst1, target, unsignedp, max_cost)
2768 enum machine_mode mode;
2769 register rtx op0, target;
2770 unsigned HOST_WIDE_INT cnst1;
2771 int unsignedp;
2772 int max_cost;
2773 {
2774 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
2775 optab mul_highpart_optab;
2776 optab moptab;
2777 rtx tem;
2778 int size = GET_MODE_BITSIZE (mode);
2779 rtx op1, wide_op1;
2780
2781 /* We can't support modes wider than HOST_BITS_PER_INT. */
2782 if (size > HOST_BITS_PER_WIDE_INT)
2783 abort ();
2784
2785 op1 = GEN_INT (cnst1);
2786
2787 if (GET_MODE_BITSIZE (wider_mode) <= HOST_BITS_PER_INT)
2788 wide_op1 = op1;
2789 else
2790 wide_op1
2791 = immed_double_const (cnst1,
2792 (unsignedp
2793 ? (HOST_WIDE_INT) 0
2794 : -(cnst1 >> (HOST_BITS_PER_WIDE_INT - 1))),
2795 wider_mode);
2796
2797 /* expand_mult handles constant multiplication of word_mode
2798 or narrower. It does a poor job for large modes. */
2799 if (size < BITS_PER_WORD
2800 && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
2801 {
2802 /* We have to do this, since expand_binop doesn't do conversion for
2803 multiply. Maybe change expand_binop to handle widening multiply? */
2804 op0 = convert_to_mode (wider_mode, op0, unsignedp);
2805
2806 /* We know that this can't have signed overflow, so pretend this is
2807 an unsigned multiply. */
2808 tem = expand_mult (wider_mode, op0, wide_op1, NULL_RTX, 0);
2809 tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2810 build_int_2 (size, 0), NULL_RTX, 1);
2811 return convert_modes (mode, wider_mode, tem, unsignedp);
2812 }
2813
2814 if (target == 0)
2815 target = gen_reg_rtx (mode);
2816
2817 /* Firstly, try using a multiplication insn that only generates the needed
2818 high part of the product, and in the sign flavor of unsignedp. */
2819 if (mul_highpart_cost[(int) mode] < max_cost)
2820 {
2821 mul_highpart_optab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
2822 target = expand_binop (mode, mul_highpart_optab,
2823 op0, op1, target, unsignedp, OPTAB_DIRECT);
2824 if (target)
2825 return target;
2826 }
2827
2828 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
2829 Need to adjust the result after the multiplication. */
2830 if (size - 1 < BITS_PER_WORD
2831 && (mul_highpart_cost[(int) mode] + 2 * shift_cost[size-1] + 4 * add_cost
2832 < max_cost))
2833 {
2834 mul_highpart_optab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
2835 target = expand_binop (mode, mul_highpart_optab,
2836 op0, op1, target, unsignedp, OPTAB_DIRECT);
2837 if (target)
2838 /* We used the wrong signedness. Adjust the result. */
2839 return expand_mult_highpart_adjust (mode, target, op0,
2840 op1, target, unsignedp);
2841 }
2842
2843 /* Try widening multiplication. */
2844 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
2845 if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2846 && mul_widen_cost[(int) wider_mode] < max_cost)
2847 {
2848 op1 = force_reg (mode, op1);
2849 goto try;
2850 }
2851
2852 /* Try widening the mode and perform a non-widening multiplication. */
2853 moptab = smul_optab;
2854 if (smul_optab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2855 && size - 1 < BITS_PER_WORD
2856 && mul_cost[(int) wider_mode] + shift_cost[size-1] < max_cost)
2857 {
2858 op1 = wide_op1;
2859 goto try;
2860 }
2861
2862 /* Try widening multiplication of opposite signedness, and adjust. */
2863 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
2864 if (moptab->handlers[(int) wider_mode].insn_code != CODE_FOR_nothing
2865 && size - 1 < BITS_PER_WORD
2866 && (mul_widen_cost[(int) wider_mode]
2867 + 2 * shift_cost[size-1] + 4 * add_cost < max_cost))
2868 {
2869 rtx regop1 = force_reg (mode, op1);
2870 tem = expand_binop (wider_mode, moptab, op0, regop1,
2871 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
2872 if (tem != 0)
2873 {
2874 /* Extract the high half of the just generated product. */
2875 tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2876 build_int_2 (size, 0), NULL_RTX, 1);
2877 tem = convert_modes (mode, wider_mode, tem, unsignedp);
2878 /* We used the wrong signedness. Adjust the result. */
2879 return expand_mult_highpart_adjust (mode, tem, op0, op1,
2880 target, unsignedp);
2881 }
2882 }
2883
2884 return 0;
2885
2886 try:
2887 /* Pass NULL_RTX as target since TARGET has wrong mode. */
2888 tem = expand_binop (wider_mode, moptab, op0, op1,
2889 NULL_RTX, unsignedp, OPTAB_WIDEN);
2890 if (tem == 0)
2891 return 0;
2892
2893 /* Extract the high half of the just generated product. */
2894 if (mode == word_mode)
2895 {
2896 return gen_highpart (mode, tem);
2897 }
2898 else
2899 {
2900 tem = expand_shift (RSHIFT_EXPR, wider_mode, tem,
2901 build_int_2 (size, 0), NULL_RTX, 1);
2902 return convert_modes (mode, wider_mode, tem, unsignedp);
2903 }
2904 }
2905 \f
2906 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
2907 if that is convenient, and returning where the result is.
2908 You may request either the quotient or the remainder as the result;
2909 specify REM_FLAG nonzero to get the remainder.
2910
2911 CODE is the expression code for which kind of division this is;
2912 it controls how rounding is done. MODE is the machine mode to use.
2913 UNSIGNEDP nonzero means do unsigned division. */
2914
2915 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
2916 and then correct it by or'ing in missing high bits
2917 if result of ANDI is nonzero.
2918 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
2919 This could optimize to a bfexts instruction.
2920 But C doesn't use these operations, so their optimizations are
2921 left for later. */
2922 /* ??? For modulo, we don't actually need the highpart of the first product,
2923 the low part will do nicely. And for small divisors, the second multiply
2924 can also be a low-part only multiply or even be completely left out.
2925 E.g. to calculate the remainder of a division by 3 with a 32 bit
2926 multiply, multiply with 0x55555556 and extract the upper two bits;
2927 the result is exact for inputs up to 0x1fffffff.
2928 The input range can be reduced by using cross-sum rules.
2929 For odd divisors >= 3, the following table gives right shift counts
2930 so that if an number is shifted by an integer multiple of the given
2931 amount, the remainder stays the same:
2932 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
2933 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
2934 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
2935 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
2936 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
2937
2938 Cross-sum rules for even numbers can be derived by leaving as many bits
2939 to the right alone as the divisor has zeros to the right.
2940 E.g. if x is an unsigned 32 bit number:
2941 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
2942 */
2943
2944 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
2945
2946 rtx
2947 expand_divmod (rem_flag, code, mode, op0, op1, target, unsignedp)
2948 int rem_flag;
2949 enum tree_code code;
2950 enum machine_mode mode;
2951 register rtx op0, op1, target;
2952 int unsignedp;
2953 {
2954 enum machine_mode compute_mode;
2955 register rtx tquotient;
2956 rtx quotient = 0, remainder = 0;
2957 rtx last;
2958 int size;
2959 rtx insn, set;
2960 optab optab1, optab2;
2961 int op1_is_constant, op1_is_pow2;
2962 int max_cost, extra_cost;
2963 static HOST_WIDE_INT last_div_const = 0;
2964
2965 op1_is_constant = GET_CODE (op1) == CONST_INT;
2966 op1_is_pow2 = (op1_is_constant
2967 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
2968 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1))))));
2969
2970 /*
2971 This is the structure of expand_divmod:
2972
2973 First comes code to fix up the operands so we can perform the operations
2974 correctly and efficiently.
2975
2976 Second comes a switch statement with code specific for each rounding mode.
2977 For some special operands this code emits all RTL for the desired
2978 operation, for other cases, it generates only a quotient and stores it in
2979 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
2980 to indicate that it has not done anything.
2981
2982 Last comes code that finishes the operation. If QUOTIENT is set and
2983 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
2984 QUOTIENT is not set, it is computed using trunc rounding.
2985
2986 We try to generate special code for division and remainder when OP1 is a
2987 constant. If |OP1| = 2**n we can use shifts and some other fast
2988 operations. For other values of OP1, we compute a carefully selected
2989 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
2990 by m.
2991
2992 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
2993 half of the product. Different strategies for generating the product are
2994 implemented in expand_mult_highpart.
2995
2996 If what we actually want is the remainder, we generate that by another
2997 by-constant multiplication and a subtraction. */
2998
2999 /* We shouldn't be called with OP1 == const1_rtx, but some of the
3000 code below will malfunction if we are, so check here and handle
3001 the special case if so. */
3002 if (op1 == const1_rtx)
3003 return rem_flag ? const0_rtx : op0;
3004
3005 /* When dividing by -1, we could get an overflow.
3006 negv_optab can handle overflows. */
3007 if (! unsignedp && op1 == constm1_rtx)
3008 {
3009 if (rem_flag)
3010 return const0_rtx;
3011 return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3012 ? negv_optab : neg_optab, op0, target, 0);
3013 }
3014
3015 if (target
3016 /* Don't use the function value register as a target
3017 since we have to read it as well as write it,
3018 and function-inlining gets confused by this. */
3019 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3020 /* Don't clobber an operand while doing a multi-step calculation. */
3021 || ((rem_flag || op1_is_constant)
3022 && (reg_mentioned_p (target, op0)
3023 || (GET_CODE (op0) == MEM && GET_CODE (target) == MEM)))
3024 || reg_mentioned_p (target, op1)
3025 || (GET_CODE (op1) == MEM && GET_CODE (target) == MEM)))
3026 target = 0;
3027
3028 /* Get the mode in which to perform this computation. Normally it will
3029 be MODE, but sometimes we can't do the desired operation in MODE.
3030 If so, pick a wider mode in which we can do the operation. Convert
3031 to that mode at the start to avoid repeated conversions.
3032
3033 First see what operations we need. These depend on the expression
3034 we are evaluating. (We assume that divxx3 insns exist under the
3035 same conditions that modxx3 insns and that these insns don't normally
3036 fail. If these assumptions are not correct, we may generate less
3037 efficient code in some cases.)
3038
3039 Then see if we find a mode in which we can open-code that operation
3040 (either a division, modulus, or shift). Finally, check for the smallest
3041 mode for which we can do the operation with a library call. */
3042
3043 /* We might want to refine this now that we have division-by-constant
3044 optimization. Since expand_mult_highpart tries so many variants, it is
3045 not straightforward to generalize this. Maybe we should make an array
3046 of possible modes in init_expmed? Save this for GCC 2.7. */
3047
3048 optab1 = (op1_is_pow2 ? (unsignedp ? lshr_optab : ashr_optab)
3049 : (unsignedp ? udiv_optab : sdiv_optab));
3050 optab2 = (op1_is_pow2 ? optab1 : (unsignedp ? udivmod_optab : sdivmod_optab));
3051
3052 for (compute_mode = mode; compute_mode != VOIDmode;
3053 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3054 if (optab1->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing
3055 || optab2->handlers[(int) compute_mode].insn_code != CODE_FOR_nothing)
3056 break;
3057
3058 if (compute_mode == VOIDmode)
3059 for (compute_mode = mode; compute_mode != VOIDmode;
3060 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3061 if (optab1->handlers[(int) compute_mode].libfunc
3062 || optab2->handlers[(int) compute_mode].libfunc)
3063 break;
3064
3065 /* If we still couldn't find a mode, use MODE, but we'll probably abort
3066 in expand_binop. */
3067 if (compute_mode == VOIDmode)
3068 compute_mode = mode;
3069
3070 if (target && GET_MODE (target) == compute_mode)
3071 tquotient = target;
3072 else
3073 tquotient = gen_reg_rtx (compute_mode);
3074
3075 size = GET_MODE_BITSIZE (compute_mode);
3076 #if 0
3077 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3078 (mode), and thereby get better code when OP1 is a constant. Do that
3079 later. It will require going over all usages of SIZE below. */
3080 size = GET_MODE_BITSIZE (mode);
3081 #endif
3082
3083 /* Only deduct something for a REM if the last divide done was
3084 for a different constant. Then set the constant of the last
3085 divide. */
3086 max_cost = div_cost[(int) compute_mode]
3087 - (rem_flag && ! (last_div_const != 0 && op1_is_constant
3088 && INTVAL (op1) == last_div_const)
3089 ? mul_cost[(int) compute_mode] + add_cost : 0);
3090
3091 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3092
3093 /* Now convert to the best mode to use. */
3094 if (compute_mode != mode)
3095 {
3096 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3097 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3098
3099 /* convert_modes may have placed op1 into a register, so we
3100 must recompute the following. */
3101 op1_is_constant = GET_CODE (op1) == CONST_INT;
3102 op1_is_pow2 = (op1_is_constant
3103 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3104 || (! unsignedp
3105 && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3106 }
3107
3108 /* If one of the operands is a volatile MEM, copy it into a register. */
3109
3110 if (GET_CODE (op0) == MEM && MEM_VOLATILE_P (op0))
3111 op0 = force_reg (compute_mode, op0);
3112 if (GET_CODE (op1) == MEM && MEM_VOLATILE_P (op1))
3113 op1 = force_reg (compute_mode, op1);
3114
3115 /* If we need the remainder or if OP1 is constant, we need to
3116 put OP0 in a register in case it has any queued subexpressions. */
3117 if (rem_flag || op1_is_constant)
3118 op0 = force_reg (compute_mode, op0);
3119
3120 last = get_last_insn ();
3121
3122 /* Promote floor rounding to trunc rounding for unsigned operations. */
3123 if (unsignedp)
3124 {
3125 if (code == FLOOR_DIV_EXPR)
3126 code = TRUNC_DIV_EXPR;
3127 if (code == FLOOR_MOD_EXPR)
3128 code = TRUNC_MOD_EXPR;
3129 if (code == EXACT_DIV_EXPR && op1_is_pow2)
3130 code = TRUNC_DIV_EXPR;
3131 }
3132
3133 if (op1 != const0_rtx)
3134 switch (code)
3135 {
3136 case TRUNC_MOD_EXPR:
3137 case TRUNC_DIV_EXPR:
3138 if (op1_is_constant)
3139 {
3140 if (unsignedp)
3141 {
3142 unsigned HOST_WIDE_INT mh, ml;
3143 int pre_shift, post_shift;
3144 int dummy;
3145 unsigned HOST_WIDE_INT d = INTVAL (op1);
3146
3147 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3148 {
3149 pre_shift = floor_log2 (d);
3150 if (rem_flag)
3151 {
3152 remainder
3153 = expand_binop (compute_mode, and_optab, op0,
3154 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3155 remainder, 1,
3156 OPTAB_LIB_WIDEN);
3157 if (remainder)
3158 return gen_lowpart (mode, remainder);
3159 }
3160 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3161 build_int_2 (pre_shift, 0),
3162 tquotient, 1);
3163 }
3164 else if (size <= HOST_BITS_PER_WIDE_INT)
3165 {
3166 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3167 {
3168 /* Most significant bit of divisor is set; emit an scc
3169 insn. */
3170 quotient = emit_store_flag (tquotient, GEU, op0, op1,
3171 compute_mode, 1, 1);
3172 if (quotient == 0)
3173 goto fail1;
3174 }
3175 else
3176 {
3177 /* Find a suitable multiplier and right shift count
3178 instead of multiplying with D. */
3179
3180 mh = choose_multiplier (d, size, size,
3181 &ml, &post_shift, &dummy);
3182
3183 /* If the suggested multiplier is more than SIZE bits,
3184 we can do better for even divisors, using an
3185 initial right shift. */
3186 if (mh != 0 && (d & 1) == 0)
3187 {
3188 pre_shift = floor_log2 (d & -d);
3189 mh = choose_multiplier (d >> pre_shift, size,
3190 size - pre_shift,
3191 &ml, &post_shift, &dummy);
3192 if (mh)
3193 abort ();
3194 }
3195 else
3196 pre_shift = 0;
3197
3198 if (mh != 0)
3199 {
3200 rtx t1, t2, t3, t4;
3201
3202 if (post_shift - 1 >= BITS_PER_WORD)
3203 goto fail1;
3204
3205 extra_cost = (shift_cost[post_shift - 1]
3206 + shift_cost[1] + 2 * add_cost);
3207 t1 = expand_mult_highpart (compute_mode, op0, ml,
3208 NULL_RTX, 1,
3209 max_cost - extra_cost);
3210 if (t1 == 0)
3211 goto fail1;
3212 t2 = force_operand (gen_rtx_MINUS (compute_mode,
3213 op0, t1),
3214 NULL_RTX);
3215 t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3216 build_int_2 (1, 0), NULL_RTX,1);
3217 t4 = force_operand (gen_rtx_PLUS (compute_mode,
3218 t1, t3),
3219 NULL_RTX);
3220 quotient
3221 = expand_shift (RSHIFT_EXPR, compute_mode, t4,
3222 build_int_2 (post_shift - 1, 0),
3223 tquotient, 1);
3224 }
3225 else
3226 {
3227 rtx t1, t2;
3228
3229 if (pre_shift >= BITS_PER_WORD
3230 || post_shift >= BITS_PER_WORD)
3231 goto fail1;
3232
3233 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3234 build_int_2 (pre_shift, 0),
3235 NULL_RTX, 1);
3236 extra_cost = (shift_cost[pre_shift]
3237 + shift_cost[post_shift]);
3238 t2 = expand_mult_highpart (compute_mode, t1, ml,
3239 NULL_RTX, 1,
3240 max_cost - extra_cost);
3241 if (t2 == 0)
3242 goto fail1;
3243 quotient
3244 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3245 build_int_2 (post_shift, 0),
3246 tquotient, 1);
3247 }
3248 }
3249 }
3250 else /* Too wide mode to use tricky code */
3251 break;
3252
3253 insn = get_last_insn ();
3254 if (insn != last
3255 && (set = single_set (insn)) != 0
3256 && SET_DEST (set) == quotient)
3257 set_unique_reg_note (insn,
3258 REG_EQUAL,
3259 gen_rtx_UDIV (compute_mode, op0, op1));
3260 }
3261 else /* TRUNC_DIV, signed */
3262 {
3263 unsigned HOST_WIDE_INT ml;
3264 int lgup, post_shift;
3265 HOST_WIDE_INT d = INTVAL (op1);
3266 unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
3267
3268 /* n rem d = n rem -d */
3269 if (rem_flag && d < 0)
3270 {
3271 d = abs_d;
3272 op1 = GEN_INT (abs_d);
3273 }
3274
3275 if (d == 1)
3276 quotient = op0;
3277 else if (d == -1)
3278 quotient = expand_unop (compute_mode, neg_optab, op0,
3279 tquotient, 0);
3280 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
3281 {
3282 /* This case is not handled correctly below. */
3283 quotient = emit_store_flag (tquotient, EQ, op0, op1,
3284 compute_mode, 1, 1);
3285 if (quotient == 0)
3286 goto fail1;
3287 }
3288 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
3289 && (rem_flag ? smod_pow2_cheap : sdiv_pow2_cheap))
3290 ;
3291 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
3292 {
3293 lgup = floor_log2 (abs_d);
3294 if (BRANCH_COST < 1 || (abs_d != 2 && BRANCH_COST < 3))
3295 {
3296 rtx label = gen_label_rtx ();
3297 rtx t1;
3298
3299 t1 = copy_to_mode_reg (compute_mode, op0);
3300 do_cmp_and_jump (t1, const0_rtx, GE,
3301 compute_mode, label);
3302 expand_inc (t1, GEN_INT (abs_d - 1));
3303 emit_label (label);
3304 quotient = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3305 build_int_2 (lgup, 0),
3306 tquotient, 0);
3307 }
3308 else
3309 {
3310 rtx t1, t2, t3;
3311 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3312 build_int_2 (size - 1, 0),
3313 NULL_RTX, 0);
3314 t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3315 build_int_2 (size - lgup, 0),
3316 NULL_RTX, 1);
3317 t3 = force_operand (gen_rtx_PLUS (compute_mode,
3318 op0, t2),
3319 NULL_RTX);
3320 quotient = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3321 build_int_2 (lgup, 0),
3322 tquotient, 0);
3323 }
3324
3325 /* We have computed OP0 / abs(OP1). If OP1 is negative, negate
3326 the quotient. */
3327 if (d < 0)
3328 {
3329 insn = get_last_insn ();
3330 if (insn != last
3331 && (set = single_set (insn)) != 0
3332 && SET_DEST (set) == quotient
3333 && abs_d < ((unsigned HOST_WIDE_INT) 1
3334 << (HOST_BITS_PER_WIDE_INT - 1)))
3335 set_unique_reg_note (insn,
3336 REG_EQUAL,
3337 gen_rtx_DIV (compute_mode,
3338 op0,
3339 GEN_INT (abs_d)));
3340
3341 quotient = expand_unop (compute_mode, neg_optab,
3342 quotient, quotient, 0);
3343 }
3344 }
3345 else if (size <= HOST_BITS_PER_WIDE_INT)
3346 {
3347 choose_multiplier (abs_d, size, size - 1,
3348 &ml, &post_shift, &lgup);
3349 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
3350 {
3351 rtx t1, t2, t3;
3352
3353 if (post_shift >= BITS_PER_WORD
3354 || size - 1 >= BITS_PER_WORD)
3355 goto fail1;
3356
3357 extra_cost = (shift_cost[post_shift]
3358 + shift_cost[size - 1] + add_cost);
3359 t1 = expand_mult_highpart (compute_mode, op0, ml,
3360 NULL_RTX, 0,
3361 max_cost - extra_cost);
3362 if (t1 == 0)
3363 goto fail1;
3364 t2 = expand_shift (RSHIFT_EXPR, compute_mode, t1,
3365 build_int_2 (post_shift, 0), NULL_RTX, 0);
3366 t3 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3367 build_int_2 (size - 1, 0), NULL_RTX, 0);
3368 if (d < 0)
3369 quotient
3370 = force_operand (gen_rtx_MINUS (compute_mode,
3371 t3, t2),
3372 tquotient);
3373 else
3374 quotient
3375 = force_operand (gen_rtx_MINUS (compute_mode,
3376 t2, t3),
3377 tquotient);
3378 }
3379 else
3380 {
3381 rtx t1, t2, t3, t4;
3382
3383 if (post_shift >= BITS_PER_WORD
3384 || size - 1 >= BITS_PER_WORD)
3385 goto fail1;
3386
3387 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
3388 extra_cost = (shift_cost[post_shift]
3389 + shift_cost[size - 1] + 2 * add_cost);
3390 t1 = expand_mult_highpart (compute_mode, op0, ml,
3391 NULL_RTX, 0,
3392 max_cost - extra_cost);
3393 if (t1 == 0)
3394 goto fail1;
3395 t2 = force_operand (gen_rtx_PLUS (compute_mode,
3396 t1, op0),
3397 NULL_RTX);
3398 t3 = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3399 build_int_2 (post_shift, 0),
3400 NULL_RTX, 0);
3401 t4 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3402 build_int_2 (size - 1, 0),
3403 NULL_RTX, 0);
3404 if (d < 0)
3405 quotient
3406 = force_operand (gen_rtx_MINUS (compute_mode,
3407 t4, t3),
3408 tquotient);
3409 else
3410 quotient
3411 = force_operand (gen_rtx_MINUS (compute_mode,
3412 t3, t4),
3413 tquotient);
3414 }
3415 }
3416 else /* Too wide mode to use tricky code */
3417 break;
3418
3419 insn = get_last_insn ();
3420 if (insn != last
3421 && (set = single_set (insn)) != 0
3422 && SET_DEST (set) == quotient)
3423 set_unique_reg_note (insn,
3424 REG_EQUAL,
3425 gen_rtx_DIV (compute_mode, op0, op1));
3426 }
3427 break;
3428 }
3429 fail1:
3430 delete_insns_since (last);
3431 break;
3432
3433 case FLOOR_DIV_EXPR:
3434 case FLOOR_MOD_EXPR:
3435 /* We will come here only for signed operations. */
3436 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3437 {
3438 unsigned HOST_WIDE_INT mh, ml;
3439 int pre_shift, lgup, post_shift;
3440 HOST_WIDE_INT d = INTVAL (op1);
3441
3442 if (d > 0)
3443 {
3444 /* We could just as easily deal with negative constants here,
3445 but it does not seem worth the trouble for GCC 2.6. */
3446 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3447 {
3448 pre_shift = floor_log2 (d);
3449 if (rem_flag)
3450 {
3451 remainder = expand_binop (compute_mode, and_optab, op0,
3452 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3453 remainder, 0, OPTAB_LIB_WIDEN);
3454 if (remainder)
3455 return gen_lowpart (mode, remainder);
3456 }
3457 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3458 build_int_2 (pre_shift, 0),
3459 tquotient, 0);
3460 }
3461 else
3462 {
3463 rtx t1, t2, t3, t4;
3464
3465 mh = choose_multiplier (d, size, size - 1,
3466 &ml, &post_shift, &lgup);
3467 if (mh)
3468 abort ();
3469
3470 if (post_shift < BITS_PER_WORD
3471 && size - 1 < BITS_PER_WORD)
3472 {
3473 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3474 build_int_2 (size - 1, 0),
3475 NULL_RTX, 0);
3476 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
3477 NULL_RTX, 0, OPTAB_WIDEN);
3478 extra_cost = (shift_cost[post_shift]
3479 + shift_cost[size - 1] + 2 * add_cost);
3480 t3 = expand_mult_highpart (compute_mode, t2, ml,
3481 NULL_RTX, 1,
3482 max_cost - extra_cost);
3483 if (t3 != 0)
3484 {
3485 t4 = expand_shift (RSHIFT_EXPR, compute_mode, t3,
3486 build_int_2 (post_shift, 0),
3487 NULL_RTX, 1);
3488 quotient = expand_binop (compute_mode, xor_optab,
3489 t4, t1, tquotient, 0,
3490 OPTAB_WIDEN);
3491 }
3492 }
3493 }
3494 }
3495 else
3496 {
3497 rtx nsign, t1, t2, t3, t4;
3498 t1 = force_operand (gen_rtx_PLUS (compute_mode,
3499 op0, constm1_rtx), NULL_RTX);
3500 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
3501 0, OPTAB_WIDEN);
3502 nsign = expand_shift (RSHIFT_EXPR, compute_mode, t2,
3503 build_int_2 (size - 1, 0), NULL_RTX, 0);
3504 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
3505 NULL_RTX);
3506 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
3507 NULL_RTX, 0);
3508 if (t4)
3509 {
3510 rtx t5;
3511 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
3512 NULL_RTX, 0);
3513 quotient = force_operand (gen_rtx_PLUS (compute_mode,
3514 t4, t5),
3515 tquotient);
3516 }
3517 }
3518 }
3519
3520 if (quotient != 0)
3521 break;
3522 delete_insns_since (last);
3523
3524 /* Try using an instruction that produces both the quotient and
3525 remainder, using truncation. We can easily compensate the quotient
3526 or remainder to get floor rounding, once we have the remainder.
3527 Notice that we compute also the final remainder value here,
3528 and return the result right away. */
3529 if (target == 0 || GET_MODE (target) != compute_mode)
3530 target = gen_reg_rtx (compute_mode);
3531
3532 if (rem_flag)
3533 {
3534 remainder
3535 = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
3536 quotient = gen_reg_rtx (compute_mode);
3537 }
3538 else
3539 {
3540 quotient
3541 = GET_CODE (target) == REG ? target : gen_reg_rtx (compute_mode);
3542 remainder = gen_reg_rtx (compute_mode);
3543 }
3544
3545 if (expand_twoval_binop (sdivmod_optab, op0, op1,
3546 quotient, remainder, 0))
3547 {
3548 /* This could be computed with a branch-less sequence.
3549 Save that for later. */
3550 rtx tem;
3551 rtx label = gen_label_rtx ();
3552 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
3553 tem = expand_binop (compute_mode, xor_optab, op0, op1,
3554 NULL_RTX, 0, OPTAB_WIDEN);
3555 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
3556 expand_dec (quotient, const1_rtx);
3557 expand_inc (remainder, op1);
3558 emit_label (label);
3559 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3560 }
3561
3562 /* No luck with division elimination or divmod. Have to do it
3563 by conditionally adjusting op0 *and* the result. */
3564 {
3565 rtx label1, label2, label3, label4, label5;
3566 rtx adjusted_op0;
3567 rtx tem;
3568
3569 quotient = gen_reg_rtx (compute_mode);
3570 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3571 label1 = gen_label_rtx ();
3572 label2 = gen_label_rtx ();
3573 label3 = gen_label_rtx ();
3574 label4 = gen_label_rtx ();
3575 label5 = gen_label_rtx ();
3576 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
3577 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
3578 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3579 quotient, 0, OPTAB_LIB_WIDEN);
3580 if (tem != quotient)
3581 emit_move_insn (quotient, tem);
3582 emit_jump_insn (gen_jump (label5));
3583 emit_barrier ();
3584 emit_label (label1);
3585 expand_inc (adjusted_op0, const1_rtx);
3586 emit_jump_insn (gen_jump (label4));
3587 emit_barrier ();
3588 emit_label (label2);
3589 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
3590 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3591 quotient, 0, OPTAB_LIB_WIDEN);
3592 if (tem != quotient)
3593 emit_move_insn (quotient, tem);
3594 emit_jump_insn (gen_jump (label5));
3595 emit_barrier ();
3596 emit_label (label3);
3597 expand_dec (adjusted_op0, const1_rtx);
3598 emit_label (label4);
3599 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3600 quotient, 0, OPTAB_LIB_WIDEN);
3601 if (tem != quotient)
3602 emit_move_insn (quotient, tem);
3603 expand_dec (quotient, const1_rtx);
3604 emit_label (label5);
3605 }
3606 break;
3607
3608 case CEIL_DIV_EXPR:
3609 case CEIL_MOD_EXPR:
3610 if (unsignedp)
3611 {
3612 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
3613 {
3614 rtx t1, t2, t3;
3615 unsigned HOST_WIDE_INT d = INTVAL (op1);
3616 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3617 build_int_2 (floor_log2 (d), 0),
3618 tquotient, 1);
3619 t2 = expand_binop (compute_mode, and_optab, op0,
3620 GEN_INT (d - 1),
3621 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3622 t3 = gen_reg_rtx (compute_mode);
3623 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3624 compute_mode, 1, 1);
3625 if (t3 == 0)
3626 {
3627 rtx lab;
3628 lab = gen_label_rtx ();
3629 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
3630 expand_inc (t1, const1_rtx);
3631 emit_label (lab);
3632 quotient = t1;
3633 }
3634 else
3635 quotient = force_operand (gen_rtx_PLUS (compute_mode,
3636 t1, t3),
3637 tquotient);
3638 break;
3639 }
3640
3641 /* Try using an instruction that produces both the quotient and
3642 remainder, using truncation. We can easily compensate the
3643 quotient or remainder to get ceiling rounding, once we have the
3644 remainder. Notice that we compute also the final remainder
3645 value here, and return the result right away. */
3646 if (target == 0 || GET_MODE (target) != compute_mode)
3647 target = gen_reg_rtx (compute_mode);
3648
3649 if (rem_flag)
3650 {
3651 remainder = (GET_CODE (target) == REG
3652 ? target : gen_reg_rtx (compute_mode));
3653 quotient = gen_reg_rtx (compute_mode);
3654 }
3655 else
3656 {
3657 quotient = (GET_CODE (target) == REG
3658 ? target : gen_reg_rtx (compute_mode));
3659 remainder = gen_reg_rtx (compute_mode);
3660 }
3661
3662 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
3663 remainder, 1))
3664 {
3665 /* This could be computed with a branch-less sequence.
3666 Save that for later. */
3667 rtx label = gen_label_rtx ();
3668 do_cmp_and_jump (remainder, const0_rtx, EQ,
3669 compute_mode, label);
3670 expand_inc (quotient, const1_rtx);
3671 expand_dec (remainder, op1);
3672 emit_label (label);
3673 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3674 }
3675
3676 /* No luck with division elimination or divmod. Have to do it
3677 by conditionally adjusting op0 *and* the result. */
3678 {
3679 rtx label1, label2;
3680 rtx adjusted_op0, tem;
3681
3682 quotient = gen_reg_rtx (compute_mode);
3683 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3684 label1 = gen_label_rtx ();
3685 label2 = gen_label_rtx ();
3686 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
3687 compute_mode, label1);
3688 emit_move_insn (quotient, const0_rtx);
3689 emit_jump_insn (gen_jump (label2));
3690 emit_barrier ();
3691 emit_label (label1);
3692 expand_dec (adjusted_op0, const1_rtx);
3693 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
3694 quotient, 1, OPTAB_LIB_WIDEN);
3695 if (tem != quotient)
3696 emit_move_insn (quotient, tem);
3697 expand_inc (quotient, const1_rtx);
3698 emit_label (label2);
3699 }
3700 }
3701 else /* signed */
3702 {
3703 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3704 && INTVAL (op1) >= 0)
3705 {
3706 /* This is extremely similar to the code for the unsigned case
3707 above. For 2.7 we should merge these variants, but for
3708 2.6.1 I don't want to touch the code for unsigned since that
3709 get used in C. The signed case will only be used by other
3710 languages (Ada). */
3711
3712 rtx t1, t2, t3;
3713 unsigned HOST_WIDE_INT d = INTVAL (op1);
3714 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3715 build_int_2 (floor_log2 (d), 0),
3716 tquotient, 0);
3717 t2 = expand_binop (compute_mode, and_optab, op0,
3718 GEN_INT (d - 1),
3719 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3720 t3 = gen_reg_rtx (compute_mode);
3721 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
3722 compute_mode, 1, 1);
3723 if (t3 == 0)
3724 {
3725 rtx lab;
3726 lab = gen_label_rtx ();
3727 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
3728 expand_inc (t1, const1_rtx);
3729 emit_label (lab);
3730 quotient = t1;
3731 }
3732 else
3733 quotient = force_operand (gen_rtx_PLUS (compute_mode,
3734 t1, t3),
3735 tquotient);
3736 break;
3737 }
3738
3739 /* Try using an instruction that produces both the quotient and
3740 remainder, using truncation. We can easily compensate the
3741 quotient or remainder to get ceiling rounding, once we have the
3742 remainder. Notice that we compute also the final remainder
3743 value here, and return the result right away. */
3744 if (target == 0 || GET_MODE (target) != compute_mode)
3745 target = gen_reg_rtx (compute_mode);
3746 if (rem_flag)
3747 {
3748 remainder= (GET_CODE (target) == REG
3749 ? target : gen_reg_rtx (compute_mode));
3750 quotient = gen_reg_rtx (compute_mode);
3751 }
3752 else
3753 {
3754 quotient = (GET_CODE (target) == REG
3755 ? target : gen_reg_rtx (compute_mode));
3756 remainder = gen_reg_rtx (compute_mode);
3757 }
3758
3759 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
3760 remainder, 0))
3761 {
3762 /* This could be computed with a branch-less sequence.
3763 Save that for later. */
3764 rtx tem;
3765 rtx label = gen_label_rtx ();
3766 do_cmp_and_jump (remainder, const0_rtx, EQ,
3767 compute_mode, label);
3768 tem = expand_binop (compute_mode, xor_optab, op0, op1,
3769 NULL_RTX, 0, OPTAB_WIDEN);
3770 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
3771 expand_inc (quotient, const1_rtx);
3772 expand_dec (remainder, op1);
3773 emit_label (label);
3774 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3775 }
3776
3777 /* No luck with division elimination or divmod. Have to do it
3778 by conditionally adjusting op0 *and* the result. */
3779 {
3780 rtx label1, label2, label3, label4, label5;
3781 rtx adjusted_op0;
3782 rtx tem;
3783
3784 quotient = gen_reg_rtx (compute_mode);
3785 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
3786 label1 = gen_label_rtx ();
3787 label2 = gen_label_rtx ();
3788 label3 = gen_label_rtx ();
3789 label4 = gen_label_rtx ();
3790 label5 = gen_label_rtx ();
3791 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
3792 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
3793 compute_mode, label1);
3794 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3795 quotient, 0, OPTAB_LIB_WIDEN);
3796 if (tem != quotient)
3797 emit_move_insn (quotient, tem);
3798 emit_jump_insn (gen_jump (label5));
3799 emit_barrier ();
3800 emit_label (label1);
3801 expand_dec (adjusted_op0, const1_rtx);
3802 emit_jump_insn (gen_jump (label4));
3803 emit_barrier ();
3804 emit_label (label2);
3805 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
3806 compute_mode, label3);
3807 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3808 quotient, 0, OPTAB_LIB_WIDEN);
3809 if (tem != quotient)
3810 emit_move_insn (quotient, tem);
3811 emit_jump_insn (gen_jump (label5));
3812 emit_barrier ();
3813 emit_label (label3);
3814 expand_inc (adjusted_op0, const1_rtx);
3815 emit_label (label4);
3816 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
3817 quotient, 0, OPTAB_LIB_WIDEN);
3818 if (tem != quotient)
3819 emit_move_insn (quotient, tem);
3820 expand_inc (quotient, const1_rtx);
3821 emit_label (label5);
3822 }
3823 }
3824 break;
3825
3826 case EXACT_DIV_EXPR:
3827 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
3828 {
3829 HOST_WIDE_INT d = INTVAL (op1);
3830 unsigned HOST_WIDE_INT ml;
3831 int pre_shift;
3832 rtx t1;
3833
3834 pre_shift = floor_log2 (d & -d);
3835 ml = invert_mod2n (d >> pre_shift, size);
3836 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3837 build_int_2 (pre_shift, 0), NULL_RTX, unsignedp);
3838 quotient = expand_mult (compute_mode, t1, GEN_INT (ml), NULL_RTX,
3839 0);
3840
3841 insn = get_last_insn ();
3842 set_unique_reg_note (insn,
3843 REG_EQUAL,
3844 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
3845 compute_mode,
3846 op0, op1));
3847 }
3848 break;
3849
3850 case ROUND_DIV_EXPR:
3851 case ROUND_MOD_EXPR:
3852 if (unsignedp)
3853 {
3854 rtx tem;
3855 rtx label;
3856 label = gen_label_rtx ();
3857 quotient = gen_reg_rtx (compute_mode);
3858 remainder = gen_reg_rtx (compute_mode);
3859 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
3860 {
3861 rtx tem;
3862 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
3863 quotient, 1, OPTAB_LIB_WIDEN);
3864 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
3865 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
3866 remainder, 1, OPTAB_LIB_WIDEN);
3867 }
3868 tem = plus_constant (op1, -1);
3869 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
3870 build_int_2 (1, 0), NULL_RTX, 1);
3871 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
3872 expand_inc (quotient, const1_rtx);
3873 expand_dec (remainder, op1);
3874 emit_label (label);
3875 }
3876 else
3877 {
3878 rtx abs_rem, abs_op1, tem, mask;
3879 rtx label;
3880 label = gen_label_rtx ();
3881 quotient = gen_reg_rtx (compute_mode);
3882 remainder = gen_reg_rtx (compute_mode);
3883 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
3884 {
3885 rtx tem;
3886 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
3887 quotient, 0, OPTAB_LIB_WIDEN);
3888 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
3889 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
3890 remainder, 0, OPTAB_LIB_WIDEN);
3891 }
3892 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
3893 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
3894 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
3895 build_int_2 (1, 0), NULL_RTX, 1);
3896 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
3897 tem = expand_binop (compute_mode, xor_optab, op0, op1,
3898 NULL_RTX, 0, OPTAB_WIDEN);
3899 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
3900 build_int_2 (size - 1, 0), NULL_RTX, 0);
3901 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
3902 NULL_RTX, 0, OPTAB_WIDEN);
3903 tem = expand_binop (compute_mode, sub_optab, tem, mask,
3904 NULL_RTX, 0, OPTAB_WIDEN);
3905 expand_inc (quotient, tem);
3906 tem = expand_binop (compute_mode, xor_optab, mask, op1,
3907 NULL_RTX, 0, OPTAB_WIDEN);
3908 tem = expand_binop (compute_mode, sub_optab, tem, mask,
3909 NULL_RTX, 0, OPTAB_WIDEN);
3910 expand_dec (remainder, tem);
3911 emit_label (label);
3912 }
3913 return gen_lowpart (mode, rem_flag ? remainder : quotient);
3914
3915 default:
3916 abort ();
3917 }
3918
3919 if (quotient == 0)
3920 {
3921 if (target && GET_MODE (target) != compute_mode)
3922 target = 0;
3923
3924 if (rem_flag)
3925 {
3926 /* Try to produce the remainder without producing the quotient.
3927 If we seem to have a divmod patten that does not require widening,
3928 don't try windening here. We should really have an WIDEN argument
3929 to expand_twoval_binop, since what we'd really like to do here is
3930 1) try a mod insn in compute_mode
3931 2) try a divmod insn in compute_mode
3932 3) try a div insn in compute_mode and multiply-subtract to get
3933 remainder
3934 4) try the same things with widening allowed. */
3935 remainder
3936 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
3937 op0, op1, target,
3938 unsignedp,
3939 ((optab2->handlers[(int) compute_mode].insn_code
3940 != CODE_FOR_nothing)
3941 ? OPTAB_DIRECT : OPTAB_WIDEN));
3942 if (remainder == 0)
3943 {
3944 /* No luck there. Can we do remainder and divide at once
3945 without a library call? */
3946 remainder = gen_reg_rtx (compute_mode);
3947 if (! expand_twoval_binop ((unsignedp
3948 ? udivmod_optab
3949 : sdivmod_optab),
3950 op0, op1,
3951 NULL_RTX, remainder, unsignedp))
3952 remainder = 0;
3953 }
3954
3955 if (remainder)
3956 return gen_lowpart (mode, remainder);
3957 }
3958
3959 /* Produce the quotient. Try a quotient insn, but not a library call.
3960 If we have a divmod in this mode, use it in preference to widening
3961 the div (for this test we assume it will not fail). Note that optab2
3962 is set to the one of the two optabs that the call below will use. */
3963 quotient
3964 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
3965 op0, op1, rem_flag ? NULL_RTX : target,
3966 unsignedp,
3967 ((optab2->handlers[(int) compute_mode].insn_code
3968 != CODE_FOR_nothing)
3969 ? OPTAB_DIRECT : OPTAB_WIDEN));
3970
3971 if (quotient == 0)
3972 {
3973 /* No luck there. Try a quotient-and-remainder insn,
3974 keeping the quotient alone. */
3975 quotient = gen_reg_rtx (compute_mode);
3976 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
3977 op0, op1,
3978 quotient, NULL_RTX, unsignedp))
3979 {
3980 quotient = 0;
3981 if (! rem_flag)
3982 /* Still no luck. If we are not computing the remainder,
3983 use a library call for the quotient. */
3984 quotient = sign_expand_binop (compute_mode,
3985 udiv_optab, sdiv_optab,
3986 op0, op1, target,
3987 unsignedp, OPTAB_LIB_WIDEN);
3988 }
3989 }
3990 }
3991
3992 if (rem_flag)
3993 {
3994 if (target && GET_MODE (target) != compute_mode)
3995 target = 0;
3996
3997 if (quotient == 0)
3998 /* No divide instruction either. Use library for remainder. */
3999 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4000 op0, op1, target,
4001 unsignedp, OPTAB_LIB_WIDEN);
4002 else
4003 {
4004 /* We divided. Now finish doing X - Y * (X / Y). */
4005 remainder = expand_mult (compute_mode, quotient, op1,
4006 NULL_RTX, unsignedp);
4007 remainder = expand_binop (compute_mode, sub_optab, op0,
4008 remainder, target, unsignedp,
4009 OPTAB_LIB_WIDEN);
4010 }
4011 }
4012
4013 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4014 }
4015 \f
4016 /* Return a tree node with data type TYPE, describing the value of X.
4017 Usually this is an RTL_EXPR, if there is no obvious better choice.
4018 X may be an expression, however we only support those expressions
4019 generated by loop.c. */
4020
4021 tree
4022 make_tree (type, x)
4023 tree type;
4024 rtx x;
4025 {
4026 tree t;
4027
4028 switch (GET_CODE (x))
4029 {
4030 case CONST_INT:
4031 t = build_int_2 (INTVAL (x),
4032 (TREE_UNSIGNED (type)
4033 && (GET_MODE_BITSIZE (TYPE_MODE (type)) < HOST_BITS_PER_WIDE_INT))
4034 || INTVAL (x) >= 0 ? 0 : -1);
4035 TREE_TYPE (t) = type;
4036 return t;
4037
4038 case CONST_DOUBLE:
4039 if (GET_MODE (x) == VOIDmode)
4040 {
4041 t = build_int_2 (CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4042 TREE_TYPE (t) = type;
4043 }
4044 else
4045 {
4046 REAL_VALUE_TYPE d;
4047
4048 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4049 t = build_real (type, d);
4050 }
4051
4052 return t;
4053
4054 case PLUS:
4055 return fold (build (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4056 make_tree (type, XEXP (x, 1))));
4057
4058 case MINUS:
4059 return fold (build (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4060 make_tree (type, XEXP (x, 1))));
4061
4062 case NEG:
4063 return fold (build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))));
4064
4065 case MULT:
4066 return fold (build (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4067 make_tree (type, XEXP (x, 1))));
4068
4069 case ASHIFT:
4070 return fold (build (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4071 make_tree (type, XEXP (x, 1))));
4072
4073 case LSHIFTRT:
4074 return fold (convert (type,
4075 build (RSHIFT_EXPR, unsigned_type (type),
4076 make_tree (unsigned_type (type),
4077 XEXP (x, 0)),
4078 make_tree (type, XEXP (x, 1)))));
4079
4080 case ASHIFTRT:
4081 return fold (convert (type,
4082 build (RSHIFT_EXPR, signed_type (type),
4083 make_tree (signed_type (type), XEXP (x, 0)),
4084 make_tree (type, XEXP (x, 1)))));
4085
4086 case DIV:
4087 if (TREE_CODE (type) != REAL_TYPE)
4088 t = signed_type (type);
4089 else
4090 t = type;
4091
4092 return fold (convert (type,
4093 build (TRUNC_DIV_EXPR, t,
4094 make_tree (t, XEXP (x, 0)),
4095 make_tree (t, XEXP (x, 1)))));
4096 case UDIV:
4097 t = unsigned_type (type);
4098 return fold (convert (type,
4099 build (TRUNC_DIV_EXPR, t,
4100 make_tree (t, XEXP (x, 0)),
4101 make_tree (t, XEXP (x, 1)))));
4102 default:
4103 t = make_node (RTL_EXPR);
4104 TREE_TYPE (t) = type;
4105
4106 #ifdef POINTERS_EXTEND_UNSIGNED
4107 /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
4108 ptr_mode. So convert. */
4109 if (POINTER_TYPE_P (type) && GET_MODE (x) != TYPE_MODE (type))
4110 x = convert_memory_address (TYPE_MODE (type), x);
4111 #endif
4112
4113 RTL_EXPR_RTL (t) = x;
4114 /* There are no insns to be output
4115 when this rtl_expr is used. */
4116 RTL_EXPR_SEQUENCE (t) = 0;
4117 return t;
4118 }
4119 }
4120
4121 /* Return an rtx representing the value of X * MULT + ADD.
4122 TARGET is a suggestion for where to store the result (an rtx).
4123 MODE is the machine mode for the computation.
4124 X and MULT must have mode MODE. ADD may have a different mode.
4125 So can X (defaults to same as MODE).
4126 UNSIGNEDP is non-zero to do unsigned multiplication.
4127 This may emit insns. */
4128
4129 rtx
4130 expand_mult_add (x, target, mult, add, mode, unsignedp)
4131 rtx x, target, mult, add;
4132 enum machine_mode mode;
4133 int unsignedp;
4134 {
4135 tree type = type_for_mode (mode, unsignedp);
4136 tree add_type = (GET_MODE (add) == VOIDmode
4137 ? type : type_for_mode (GET_MODE (add), unsignedp));
4138 tree result = fold (build (PLUS_EXPR, type,
4139 fold (build (MULT_EXPR, type,
4140 make_tree (type, x),
4141 make_tree (type, mult))),
4142 make_tree (add_type, add)));
4143
4144 return expand_expr (result, target, VOIDmode, 0);
4145 }
4146 \f
4147 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
4148 and returning TARGET.
4149
4150 If TARGET is 0, a pseudo-register or constant is returned. */
4151
4152 rtx
4153 expand_and (op0, op1, target)
4154 rtx op0, op1, target;
4155 {
4156 enum machine_mode mode = VOIDmode;
4157 rtx tem;
4158
4159 if (GET_MODE (op0) != VOIDmode)
4160 mode = GET_MODE (op0);
4161 else if (GET_MODE (op1) != VOIDmode)
4162 mode = GET_MODE (op1);
4163
4164 if (mode != VOIDmode)
4165 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
4166 else if (GET_CODE (op0) == CONST_INT && GET_CODE (op1) == CONST_INT)
4167 tem = GEN_INT (INTVAL (op0) & INTVAL (op1));
4168 else
4169 abort ();
4170
4171 if (target == 0)
4172 target = tem;
4173 else if (tem != target)
4174 emit_move_insn (target, tem);
4175 return target;
4176 }
4177 \f
4178 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
4179 and storing in TARGET. Normally return TARGET.
4180 Return 0 if that cannot be done.
4181
4182 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
4183 it is VOIDmode, they cannot both be CONST_INT.
4184
4185 UNSIGNEDP is for the case where we have to widen the operands
4186 to perform the operation. It says to use zero-extension.
4187
4188 NORMALIZEP is 1 if we should convert the result to be either zero
4189 or one. Normalize is -1 if we should convert the result to be
4190 either zero or -1. If NORMALIZEP is zero, the result will be left
4191 "raw" out of the scc insn. */
4192
4193 rtx
4194 emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep)
4195 rtx target;
4196 enum rtx_code code;
4197 rtx op0, op1;
4198 enum machine_mode mode;
4199 int unsignedp;
4200 int normalizep;
4201 {
4202 rtx subtarget;
4203 enum insn_code icode;
4204 enum machine_mode compare_mode;
4205 enum machine_mode target_mode = GET_MODE (target);
4206 rtx tem;
4207 rtx last = get_last_insn ();
4208 rtx pattern, comparison;
4209
4210 if (unsignedp)
4211 code = unsigned_condition (code);
4212
4213 /* If one operand is constant, make it the second one. Only do this
4214 if the other operand is not constant as well. */
4215
4216 if ((CONSTANT_P (op0) && ! CONSTANT_P (op1))
4217 || (GET_CODE (op0) == CONST_INT && GET_CODE (op1) != CONST_INT))
4218 {
4219 tem = op0;
4220 op0 = op1;
4221 op1 = tem;
4222 code = swap_condition (code);
4223 }
4224
4225 if (mode == VOIDmode)
4226 mode = GET_MODE (op0);
4227
4228 /* For some comparisons with 1 and -1, we can convert this to
4229 comparisons with zero. This will often produce more opportunities for
4230 store-flag insns. */
4231
4232 switch (code)
4233 {
4234 case LT:
4235 if (op1 == const1_rtx)
4236 op1 = const0_rtx, code = LE;
4237 break;
4238 case LE:
4239 if (op1 == constm1_rtx)
4240 op1 = const0_rtx, code = LT;
4241 break;
4242 case GE:
4243 if (op1 == const1_rtx)
4244 op1 = const0_rtx, code = GT;
4245 break;
4246 case GT:
4247 if (op1 == constm1_rtx)
4248 op1 = const0_rtx, code = GE;
4249 break;
4250 case GEU:
4251 if (op1 == const1_rtx)
4252 op1 = const0_rtx, code = NE;
4253 break;
4254 case LTU:
4255 if (op1 == const1_rtx)
4256 op1 = const0_rtx, code = EQ;
4257 break;
4258 default:
4259 break;
4260 }
4261
4262 /* If we are comparing a double-word integer with zero, we can convert
4263 the comparison into one involving a single word. */
4264 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
4265 && GET_MODE_CLASS (mode) == MODE_INT
4266 && op1 == const0_rtx)
4267 {
4268 if (code == EQ || code == NE)
4269 {
4270 /* Do a logical OR of the two words and compare the result. */
4271 rtx op0h = gen_highpart (word_mode, op0);
4272 rtx op0l = gen_lowpart (word_mode, op0);
4273 rtx op0both = expand_binop (word_mode, ior_optab, op0h, op0l,
4274 NULL_RTX, unsignedp, OPTAB_DIRECT);
4275 if (op0both != 0)
4276 return emit_store_flag (target, code, op0both, op1, word_mode,
4277 unsignedp, normalizep);
4278 }
4279 else if (code == LT || code == GE)
4280 /* If testing the sign bit, can just test on high word. */
4281 return emit_store_flag (target, code, gen_highpart (word_mode, op0),
4282 op1, word_mode, unsignedp, normalizep);
4283 }
4284
4285 /* From now on, we won't change CODE, so set ICODE now. */
4286 icode = setcc_gen_code[(int) code];
4287
4288 /* If this is A < 0 or A >= 0, we can do this by taking the ones
4289 complement of A (for GE) and shifting the sign bit to the low bit. */
4290 if (op1 == const0_rtx && (code == LT || code == GE)
4291 && GET_MODE_CLASS (mode) == MODE_INT
4292 && (normalizep || STORE_FLAG_VALUE == 1
4293 || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4294 && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4295 == (HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
4296 {
4297 subtarget = target;
4298
4299 /* If the result is to be wider than OP0, it is best to convert it
4300 first. If it is to be narrower, it is *incorrect* to convert it
4301 first. */
4302 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
4303 {
4304 op0 = protect_from_queue (op0, 0);
4305 op0 = convert_modes (target_mode, mode, op0, 0);
4306 mode = target_mode;
4307 }
4308
4309 if (target_mode != mode)
4310 subtarget = 0;
4311
4312 if (code == GE)
4313 op0 = expand_unop (mode, one_cmpl_optab, op0,
4314 ((STORE_FLAG_VALUE == 1 || normalizep)
4315 ? 0 : subtarget), 0);
4316
4317 if (STORE_FLAG_VALUE == 1 || normalizep)
4318 /* If we are supposed to produce a 0/1 value, we want to do
4319 a logical shift from the sign bit to the low-order bit; for
4320 a -1/0 value, we do an arithmetic shift. */
4321 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
4322 size_int (GET_MODE_BITSIZE (mode) - 1),
4323 subtarget, normalizep != -1);
4324
4325 if (mode != target_mode)
4326 op0 = convert_modes (target_mode, mode, op0, 0);
4327
4328 return op0;
4329 }
4330
4331 if (icode != CODE_FOR_nothing)
4332 {
4333 insn_operand_predicate_fn pred;
4334
4335 /* We think we may be able to do this with a scc insn. Emit the
4336 comparison and then the scc insn.
4337
4338 compare_from_rtx may call emit_queue, which would be deleted below
4339 if the scc insn fails. So call it ourselves before setting LAST.
4340 Likewise for do_pending_stack_adjust. */
4341
4342 emit_queue ();
4343 do_pending_stack_adjust ();
4344 last = get_last_insn ();
4345
4346 comparison
4347 = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX, 0);
4348 if (GET_CODE (comparison) == CONST_INT)
4349 return (comparison == const0_rtx ? const0_rtx
4350 : normalizep == 1 ? const1_rtx
4351 : normalizep == -1 ? constm1_rtx
4352 : const_true_rtx);
4353
4354 /* If the code of COMPARISON doesn't match CODE, something is
4355 wrong; we can no longer be sure that we have the operation.
4356 We could handle this case, but it should not happen. */
4357
4358 if (GET_CODE (comparison) != code)
4359 abort ();
4360
4361 /* Get a reference to the target in the proper mode for this insn. */
4362 compare_mode = insn_data[(int) icode].operand[0].mode;
4363 subtarget = target;
4364 pred = insn_data[(int) icode].operand[0].predicate;
4365 if (preserve_subexpressions_p ()
4366 || ! (*pred) (subtarget, compare_mode))
4367 subtarget = gen_reg_rtx (compare_mode);
4368
4369 pattern = GEN_FCN (icode) (subtarget);
4370 if (pattern)
4371 {
4372 emit_insn (pattern);
4373
4374 /* If we are converting to a wider mode, first convert to
4375 TARGET_MODE, then normalize. This produces better combining
4376 opportunities on machines that have a SIGN_EXTRACT when we are
4377 testing a single bit. This mostly benefits the 68k.
4378
4379 If STORE_FLAG_VALUE does not have the sign bit set when
4380 interpreted in COMPARE_MODE, we can do this conversion as
4381 unsigned, which is usually more efficient. */
4382 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
4383 {
4384 convert_move (target, subtarget,
4385 (GET_MODE_BITSIZE (compare_mode)
4386 <= HOST_BITS_PER_WIDE_INT)
4387 && 0 == (STORE_FLAG_VALUE
4388 & ((HOST_WIDE_INT) 1
4389 << (GET_MODE_BITSIZE (compare_mode) -1))));
4390 op0 = target;
4391 compare_mode = target_mode;
4392 }
4393 else
4394 op0 = subtarget;
4395
4396 /* If we want to keep subexpressions around, don't reuse our
4397 last target. */
4398
4399 if (preserve_subexpressions_p ())
4400 subtarget = 0;
4401
4402 /* Now normalize to the proper value in COMPARE_MODE. Sometimes
4403 we don't have to do anything. */
4404 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
4405 ;
4406 /* STORE_FLAG_VALUE might be the most negative number, so write
4407 the comparison this way to avoid a compiler-time warning. */
4408 else if (- normalizep == STORE_FLAG_VALUE)
4409 op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
4410
4411 /* We don't want to use STORE_FLAG_VALUE < 0 below since this
4412 makes it hard to use a value of just the sign bit due to
4413 ANSI integer constant typing rules. */
4414 else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
4415 && (STORE_FLAG_VALUE
4416 & ((HOST_WIDE_INT) 1
4417 << (GET_MODE_BITSIZE (compare_mode) - 1))))
4418 op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
4419 size_int (GET_MODE_BITSIZE (compare_mode) - 1),
4420 subtarget, normalizep == 1);
4421 else if (STORE_FLAG_VALUE & 1)
4422 {
4423 op0 = expand_and (op0, const1_rtx, subtarget);
4424 if (normalizep == -1)
4425 op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
4426 }
4427 else
4428 abort ();
4429
4430 /* If we were converting to a smaller mode, do the
4431 conversion now. */
4432 if (target_mode != compare_mode)
4433 {
4434 convert_move (target, op0, 0);
4435 return target;
4436 }
4437 else
4438 return op0;
4439 }
4440 }
4441
4442 delete_insns_since (last);
4443
4444 /* If expensive optimizations, use different pseudo registers for each
4445 insn, instead of reusing the same pseudo. This leads to better CSE,
4446 but slows down the compiler, since there are more pseudos */
4447 subtarget = (!flag_expensive_optimizations
4448 && (target_mode == mode)) ? target : NULL_RTX;
4449
4450 /* If we reached here, we can't do this with a scc insn. However, there
4451 are some comparisons that can be done directly. For example, if
4452 this is an equality comparison of integers, we can try to exclusive-or
4453 (or subtract) the two operands and use a recursive call to try the
4454 comparison with zero. Don't do any of these cases if branches are
4455 very cheap. */
4456
4457 if (BRANCH_COST > 0
4458 && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
4459 && op1 != const0_rtx)
4460 {
4461 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
4462 OPTAB_WIDEN);
4463
4464 if (tem == 0)
4465 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
4466 OPTAB_WIDEN);
4467 if (tem != 0)
4468 tem = emit_store_flag (target, code, tem, const0_rtx,
4469 mode, unsignedp, normalizep);
4470 if (tem == 0)
4471 delete_insns_since (last);
4472 return tem;
4473 }
4474
4475 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
4476 the constant zero. Reject all other comparisons at this point. Only
4477 do LE and GT if branches are expensive since they are expensive on
4478 2-operand machines. */
4479
4480 if (BRANCH_COST == 0
4481 || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
4482 || (code != EQ && code != NE
4483 && (BRANCH_COST <= 1 || (code != LE && code != GT))))
4484 return 0;
4485
4486 /* See what we need to return. We can only return a 1, -1, or the
4487 sign bit. */
4488
4489 if (normalizep == 0)
4490 {
4491 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
4492 normalizep = STORE_FLAG_VALUE;
4493
4494 else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
4495 && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
4496 == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
4497 ;
4498 else
4499 return 0;
4500 }
4501
4502 /* Try to put the result of the comparison in the sign bit. Assume we can't
4503 do the necessary operation below. */
4504
4505 tem = 0;
4506
4507 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
4508 the sign bit set. */
4509
4510 if (code == LE)
4511 {
4512 /* This is destructive, so SUBTARGET can't be OP0. */
4513 if (rtx_equal_p (subtarget, op0))
4514 subtarget = 0;
4515
4516 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
4517 OPTAB_WIDEN);
4518 if (tem)
4519 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
4520 OPTAB_WIDEN);
4521 }
4522
4523 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
4524 number of bits in the mode of OP0, minus one. */
4525
4526 if (code == GT)
4527 {
4528 if (rtx_equal_p (subtarget, op0))
4529 subtarget = 0;
4530
4531 tem = expand_shift (RSHIFT_EXPR, mode, op0,
4532 size_int (GET_MODE_BITSIZE (mode) - 1),
4533 subtarget, 0);
4534 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
4535 OPTAB_WIDEN);
4536 }
4537
4538 if (code == EQ || code == NE)
4539 {
4540 /* For EQ or NE, one way to do the comparison is to apply an operation
4541 that converts the operand into a positive number if it is non-zero
4542 or zero if it was originally zero. Then, for EQ, we subtract 1 and
4543 for NE we negate. This puts the result in the sign bit. Then we
4544 normalize with a shift, if needed.
4545
4546 Two operations that can do the above actions are ABS and FFS, so try
4547 them. If that doesn't work, and MODE is smaller than a full word,
4548 we can use zero-extension to the wider mode (an unsigned conversion)
4549 as the operation. */
4550
4551 /* Note that ABS doesn't yield a positive number for INT_MIN, but
4552 that is compensated by the subsequent overflow when subtracting
4553 one / negating. */
4554
4555 if (abs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
4556 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
4557 else if (ffs_optab->handlers[(int) mode].insn_code != CODE_FOR_nothing)
4558 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
4559 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
4560 {
4561 op0 = protect_from_queue (op0, 0);
4562 tem = convert_modes (word_mode, mode, op0, 1);
4563 mode = word_mode;
4564 }
4565
4566 if (tem != 0)
4567 {
4568 if (code == EQ)
4569 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
4570 0, OPTAB_WIDEN);
4571 else
4572 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
4573 }
4574
4575 /* If we couldn't do it that way, for NE we can "or" the two's complement
4576 of the value with itself. For EQ, we take the one's complement of
4577 that "or", which is an extra insn, so we only handle EQ if branches
4578 are expensive. */
4579
4580 if (tem == 0 && (code == NE || BRANCH_COST > 1))
4581 {
4582 if (rtx_equal_p (subtarget, op0))
4583 subtarget = 0;
4584
4585 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
4586 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
4587 OPTAB_WIDEN);
4588
4589 if (tem && code == EQ)
4590 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
4591 }
4592 }
4593
4594 if (tem && normalizep)
4595 tem = expand_shift (RSHIFT_EXPR, mode, tem,
4596 size_int (GET_MODE_BITSIZE (mode) - 1),
4597 subtarget, normalizep == 1);
4598
4599 if (tem)
4600 {
4601 if (GET_MODE (tem) != target_mode)
4602 {
4603 convert_move (target, tem, 0);
4604 tem = target;
4605 }
4606 else if (!subtarget)
4607 {
4608 emit_move_insn (target, tem);
4609 tem = target;
4610 }
4611 }
4612 else
4613 delete_insns_since (last);
4614
4615 return tem;
4616 }
4617
4618 /* Like emit_store_flag, but always succeeds. */
4619
4620 rtx
4621 emit_store_flag_force (target, code, op0, op1, mode, unsignedp, normalizep)
4622 rtx target;
4623 enum rtx_code code;
4624 rtx op0, op1;
4625 enum machine_mode mode;
4626 int unsignedp;
4627 int normalizep;
4628 {
4629 rtx tem, label;
4630
4631 /* First see if emit_store_flag can do the job. */
4632 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
4633 if (tem != 0)
4634 return tem;
4635
4636 if (normalizep == 0)
4637 normalizep = 1;
4638
4639 /* If this failed, we have to do this with set/compare/jump/set code. */
4640
4641 if (GET_CODE (target) != REG
4642 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
4643 target = gen_reg_rtx (GET_MODE (target));
4644
4645 emit_move_insn (target, const1_rtx);
4646 label = gen_label_rtx ();
4647 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX, 0,
4648 NULL_RTX, label);
4649
4650 emit_move_insn (target, const0_rtx);
4651 emit_label (label);
4652
4653 return target;
4654 }
4655 \f
4656 /* Perform possibly multi-word comparison and conditional jump to LABEL
4657 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE
4658
4659 The algorithm is based on the code in expr.c:do_jump.
4660
4661 Note that this does not perform a general comparison. Only variants
4662 generated within expmed.c are correctly handled, others abort (but could
4663 be handled if needed). */
4664
4665 static void
4666 do_cmp_and_jump (arg1, arg2, op, mode, label)
4667 rtx arg1, arg2, label;
4668 enum rtx_code op;
4669 enum machine_mode mode;
4670 {
4671 /* If this mode is an integer too wide to compare properly,
4672 compare word by word. Rely on cse to optimize constant cases. */
4673
4674 if (GET_MODE_CLASS (mode) == MODE_INT
4675 && ! can_compare_p (op, mode, ccp_jump))
4676 {
4677 rtx label2 = gen_label_rtx ();
4678
4679 switch (op)
4680 {
4681 case LTU:
4682 do_jump_by_parts_greater_rtx (mode, 1, arg2, arg1, label2, label);
4683 break;
4684
4685 case LEU:
4686 do_jump_by_parts_greater_rtx (mode, 1, arg1, arg2, label, label2);
4687 break;
4688
4689 case LT:
4690 do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label2, label);
4691 break;
4692
4693 case GT:
4694 do_jump_by_parts_greater_rtx (mode, 0, arg1, arg2, label2, label);
4695 break;
4696
4697 case GE:
4698 do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label, label2);
4699 break;
4700
4701 /* do_jump_by_parts_equality_rtx compares with zero. Luckily
4702 that's the only equality operations we do */
4703 case EQ:
4704 if (arg2 != const0_rtx || mode != GET_MODE(arg1))
4705 abort();
4706 do_jump_by_parts_equality_rtx (arg1, label2, label);
4707 break;
4708
4709 case NE:
4710 if (arg2 != const0_rtx || mode != GET_MODE(arg1))
4711 abort();
4712 do_jump_by_parts_equality_rtx (arg1, label, label2);
4713 break;
4714
4715 default:
4716 abort();
4717 }
4718
4719 emit_label (label2);
4720 }
4721 else
4722 {
4723 emit_cmp_and_jump_insns (arg1, arg2, op, NULL_RTX, mode, 0, 0, label);
4724 }
4725 }
This page took 0.241636 seconds and 5 git commands to generate.