]> gcc.gnu.org Git - gcc.git/blob - gcc/expmed.c
37d6768b266196bd9956a53c210ad31fca3d700d
[gcc.git] / gcc / expmed.c
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
4 1999, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 2, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING. If not, write to the Free
20 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
21 02111-1307, USA. */
22
23
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "toplev.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "tm_p.h"
32 #include "flags.h"
33 #include "insn-config.h"
34 #include "expr.h"
35 #include "optabs.h"
36 #include "real.h"
37 #include "recog.h"
38 #include "langhooks.h"
39
40 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
41 unsigned HOST_WIDE_INT,
42 unsigned HOST_WIDE_INT, rtx);
43 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
44 unsigned HOST_WIDE_INT, rtx);
45 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
46 unsigned HOST_WIDE_INT,
47 unsigned HOST_WIDE_INT,
48 unsigned HOST_WIDE_INT, rtx, int);
49 static rtx mask_rtx (enum machine_mode, int, int, int);
50 static rtx lshift_value (enum machine_mode, rtx, int, int);
51 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
52 unsigned HOST_WIDE_INT, int);
53 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
54 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
55 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
56
57 /* Nonzero means divides or modulus operations are relatively cheap for
58 powers of two, so don't use branches; emit the operation instead.
59 Usually, this will mean that the MD file will emit non-branch
60 sequences. */
61
62 static bool sdiv_pow2_cheap[NUM_MACHINE_MODES];
63 static bool smod_pow2_cheap[NUM_MACHINE_MODES];
64
65 #ifndef SLOW_UNALIGNED_ACCESS
66 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
67 #endif
68
69 /* For compilers that support multiple targets with different word sizes,
70 MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD. An example
71 is the H8/300(H) compiler. */
72
73 #ifndef MAX_BITS_PER_WORD
74 #define MAX_BITS_PER_WORD BITS_PER_WORD
75 #endif
76
77 /* Reduce conditional compilation elsewhere. */
78 #ifndef HAVE_insv
79 #define HAVE_insv 0
80 #define CODE_FOR_insv CODE_FOR_nothing
81 #define gen_insv(a,b,c,d) NULL_RTX
82 #endif
83 #ifndef HAVE_extv
84 #define HAVE_extv 0
85 #define CODE_FOR_extv CODE_FOR_nothing
86 #define gen_extv(a,b,c,d) NULL_RTX
87 #endif
88 #ifndef HAVE_extzv
89 #define HAVE_extzv 0
90 #define CODE_FOR_extzv CODE_FOR_nothing
91 #define gen_extzv(a,b,c,d) NULL_RTX
92 #endif
93
94 /* Cost of various pieces of RTL. Note that some of these are indexed by
95 shift count and some by mode. */
96 static int zero_cost;
97 static int add_cost[NUM_MACHINE_MODES];
98 static int neg_cost[NUM_MACHINE_MODES];
99 static int shift_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
100 static int shiftadd_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
101 static int shiftsub_cost[NUM_MACHINE_MODES][MAX_BITS_PER_WORD];
102 static int mul_cost[NUM_MACHINE_MODES];
103 static int div_cost[NUM_MACHINE_MODES];
104 static int mul_widen_cost[NUM_MACHINE_MODES];
105 static int mul_highpart_cost[NUM_MACHINE_MODES];
106
107 void
108 init_expmed (void)
109 {
110 struct
111 {
112 struct rtx_def reg; rtunion reg_fld[2];
113 struct rtx_def plus; rtunion plus_fld1;
114 struct rtx_def neg;
115 struct rtx_def udiv; rtunion udiv_fld1;
116 struct rtx_def mult; rtunion mult_fld1;
117 struct rtx_def div; rtunion div_fld1;
118 struct rtx_def mod; rtunion mod_fld1;
119 struct rtx_def zext;
120 struct rtx_def wide_mult; rtunion wide_mult_fld1;
121 struct rtx_def wide_lshr; rtunion wide_lshr_fld1;
122 struct rtx_def wide_trunc;
123 struct rtx_def shift; rtunion shift_fld1;
124 struct rtx_def shift_mult; rtunion shift_mult_fld1;
125 struct rtx_def shift_add; rtunion shift_add_fld1;
126 struct rtx_def shift_sub; rtunion shift_sub_fld1;
127 } all;
128
129 rtx pow2[MAX_BITS_PER_WORD];
130 rtx cint[MAX_BITS_PER_WORD];
131 int m, n;
132 enum machine_mode mode, wider_mode;
133
134 zero_cost = rtx_cost (const0_rtx, 0);
135
136 for (m = 1; m < MAX_BITS_PER_WORD; m++)
137 {
138 pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
139 cint[m] = GEN_INT (m);
140 }
141
142 memset (&all, 0, sizeof all);
143
144 PUT_CODE (&all.reg, REG);
145 REGNO (&all.reg) = 10000;
146
147 PUT_CODE (&all.plus, PLUS);
148 XEXP (&all.plus, 0) = &all.reg;
149 XEXP (&all.plus, 1) = &all.reg;
150
151 PUT_CODE (&all.neg, NEG);
152 XEXP (&all.neg, 0) = &all.reg;
153
154 PUT_CODE (&all.udiv, UDIV);
155 XEXP (&all.udiv, 0) = &all.reg;
156 XEXP (&all.udiv, 1) = &all.reg;
157
158 PUT_CODE (&all.mult, MULT);
159 XEXP (&all.mult, 0) = &all.reg;
160 XEXP (&all.mult, 1) = &all.reg;
161
162 PUT_CODE (&all.div, DIV);
163 XEXP (&all.div, 0) = &all.reg;
164 XEXP (&all.div, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
165
166 PUT_CODE (&all.mod, MOD);
167 XEXP (&all.mod, 0) = &all.reg;
168 XEXP (&all.mod, 1) = XEXP (&all.div, 1);
169
170 PUT_CODE (&all.zext, ZERO_EXTEND);
171 XEXP (&all.zext, 0) = &all.reg;
172
173 PUT_CODE (&all.wide_mult, MULT);
174 XEXP (&all.wide_mult, 0) = &all.zext;
175 XEXP (&all.wide_mult, 1) = &all.zext;
176
177 PUT_CODE (&all.wide_lshr, LSHIFTRT);
178 XEXP (&all.wide_lshr, 0) = &all.wide_mult;
179
180 PUT_CODE (&all.wide_trunc, TRUNCATE);
181 XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
182
183 PUT_CODE (&all.shift, ASHIFT);
184 XEXP (&all.shift, 0) = &all.reg;
185
186 PUT_CODE (&all.shift_mult, MULT);
187 XEXP (&all.shift_mult, 0) = &all.reg;
188
189 PUT_CODE (&all.shift_add, PLUS);
190 XEXP (&all.shift_add, 0) = &all.shift_mult;
191 XEXP (&all.shift_add, 1) = &all.reg;
192
193 PUT_CODE (&all.shift_sub, MINUS);
194 XEXP (&all.shift_sub, 0) = &all.shift_mult;
195 XEXP (&all.shift_sub, 1) = &all.reg;
196
197 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
198 mode != VOIDmode;
199 mode = GET_MODE_WIDER_MODE (mode))
200 {
201 PUT_MODE (&all.reg, mode);
202 PUT_MODE (&all.plus, mode);
203 PUT_MODE (&all.neg, mode);
204 PUT_MODE (&all.udiv, mode);
205 PUT_MODE (&all.mult, mode);
206 PUT_MODE (&all.div, mode);
207 PUT_MODE (&all.mod, mode);
208 PUT_MODE (&all.wide_trunc, mode);
209 PUT_MODE (&all.shift, mode);
210 PUT_MODE (&all.shift_mult, mode);
211 PUT_MODE (&all.shift_add, mode);
212 PUT_MODE (&all.shift_sub, mode);
213
214 add_cost[mode] = rtx_cost (&all.plus, SET);
215 neg_cost[mode] = rtx_cost (&all.neg, SET);
216 div_cost[mode] = rtx_cost (&all.udiv, SET);
217 mul_cost[mode] = rtx_cost (&all.mult, SET);
218
219 sdiv_pow2_cheap[mode] = (rtx_cost (&all.div, SET) <= 2 * add_cost[mode]);
220 smod_pow2_cheap[mode] = (rtx_cost (&all.mod, SET) <= 4 * add_cost[mode]);
221
222 wider_mode = GET_MODE_WIDER_MODE (mode);
223 if (wider_mode != VOIDmode)
224 {
225 PUT_MODE (&all.zext, wider_mode);
226 PUT_MODE (&all.wide_mult, wider_mode);
227 PUT_MODE (&all.wide_lshr, wider_mode);
228 XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
229
230 mul_widen_cost[wider_mode] = rtx_cost (&all.wide_mult, SET);
231 mul_highpart_cost[mode] = rtx_cost (&all.wide_trunc, SET);
232 }
233
234 shift_cost[mode][0] = 0;
235 shiftadd_cost[mode][0] = shiftsub_cost[mode][0] = add_cost[mode];
236
237 n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
238 for (m = 1; m < n; m++)
239 {
240 XEXP (&all.shift, 1) = cint[m];
241 XEXP (&all.shift_mult, 1) = pow2[m];
242
243 shift_cost[mode][m] = rtx_cost (&all.shift, SET);
244 shiftadd_cost[mode][m] = rtx_cost (&all.shift_add, SET);
245 shiftsub_cost[mode][m] = rtx_cost (&all.shift_sub, SET);
246 }
247 }
248 }
249
250 /* Return an rtx representing minus the value of X.
251 MODE is the intended mode of the result,
252 useful if X is a CONST_INT. */
253
254 rtx
255 negate_rtx (enum machine_mode mode, rtx x)
256 {
257 rtx result = simplify_unary_operation (NEG, mode, x, mode);
258
259 if (result == 0)
260 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
261
262 return result;
263 }
264
265 /* Report on the availability of insv/extv/extzv and the desired mode
266 of each of their operands. Returns MAX_MACHINE_MODE if HAVE_foo
267 is false; else the mode of the specified operand. If OPNO is -1,
268 all the caller cares about is whether the insn is available. */
269 enum machine_mode
270 mode_for_extraction (enum extraction_pattern pattern, int opno)
271 {
272 const struct insn_data *data;
273
274 switch (pattern)
275 {
276 case EP_insv:
277 if (HAVE_insv)
278 {
279 data = &insn_data[CODE_FOR_insv];
280 break;
281 }
282 return MAX_MACHINE_MODE;
283
284 case EP_extv:
285 if (HAVE_extv)
286 {
287 data = &insn_data[CODE_FOR_extv];
288 break;
289 }
290 return MAX_MACHINE_MODE;
291
292 case EP_extzv:
293 if (HAVE_extzv)
294 {
295 data = &insn_data[CODE_FOR_extzv];
296 break;
297 }
298 return MAX_MACHINE_MODE;
299
300 default:
301 gcc_unreachable ();
302 }
303
304 if (opno == -1)
305 return VOIDmode;
306
307 /* Everyone who uses this function used to follow it with
308 if (result == VOIDmode) result = word_mode; */
309 if (data->operand[opno].mode == VOIDmode)
310 return word_mode;
311 return data->operand[opno].mode;
312 }
313
314 \f
315 /* Generate code to store value from rtx VALUE
316 into a bit-field within structure STR_RTX
317 containing BITSIZE bits starting at bit BITNUM.
318 FIELDMODE is the machine-mode of the FIELD_DECL node for this field.
319 ALIGN is the alignment that STR_RTX is known to have.
320 TOTAL_SIZE is the size of the structure in bytes, or -1 if varying. */
321
322 /* ??? Note that there are two different ideas here for how
323 to determine the size to count bits within, for a register.
324 One is BITS_PER_WORD, and the other is the size of operand 3
325 of the insv pattern.
326
327 If operand 3 of the insv pattern is VOIDmode, then we will use BITS_PER_WORD
328 else, we use the mode of operand 3. */
329
330 rtx
331 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
332 unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
333 rtx value)
334 {
335 unsigned int unit
336 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
337 unsigned HOST_WIDE_INT offset = bitnum / unit;
338 unsigned HOST_WIDE_INT bitpos = bitnum % unit;
339 rtx op0 = str_rtx;
340 int byte_offset;
341 rtx orig_value;
342
343 enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
344
345 while (GET_CODE (op0) == SUBREG)
346 {
347 /* The following line once was done only if WORDS_BIG_ENDIAN,
348 but I think that is a mistake. WORDS_BIG_ENDIAN is
349 meaningful at a much higher level; when structures are copied
350 between memory and regs, the higher-numbered regs
351 always get higher addresses. */
352 offset += (SUBREG_BYTE (op0) / UNITS_PER_WORD);
353 /* We used to adjust BITPOS here, but now we do the whole adjustment
354 right after the loop. */
355 op0 = SUBREG_REG (op0);
356 }
357
358 /* Use vec_set patterns for inserting parts of vectors whenever
359 available. */
360 if (VECTOR_MODE_P (GET_MODE (op0))
361 && !MEM_P (op0)
362 && (vec_set_optab->handlers[GET_MODE (op0)].insn_code
363 != CODE_FOR_nothing)
364 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
365 && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
366 && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
367 {
368 enum machine_mode outermode = GET_MODE (op0);
369 enum machine_mode innermode = GET_MODE_INNER (outermode);
370 int icode = (int) vec_set_optab->handlers[outermode].insn_code;
371 int pos = bitnum / GET_MODE_BITSIZE (innermode);
372 rtx rtxpos = GEN_INT (pos);
373 rtx src = value;
374 rtx dest = op0;
375 rtx pat, seq;
376 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
377 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
378 enum machine_mode mode2 = insn_data[icode].operand[2].mode;
379
380 start_sequence ();
381
382 if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
383 src = copy_to_mode_reg (mode1, src);
384
385 if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
386 rtxpos = copy_to_mode_reg (mode1, rtxpos);
387
388 /* We could handle this, but we should always be called with a pseudo
389 for our targets and all insns should take them as outputs. */
390 gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
391 && (*insn_data[icode].operand[1].predicate) (src, mode1)
392 && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
393 pat = GEN_FCN (icode) (dest, src, rtxpos);
394 seq = get_insns ();
395 end_sequence ();
396 if (pat)
397 {
398 emit_insn (seq);
399 emit_insn (pat);
400 return dest;
401 }
402 }
403
404 if (flag_force_mem)
405 {
406 int old_generating_concat_p = generating_concat_p;
407 generating_concat_p = 0;
408 value = force_not_mem (value);
409 generating_concat_p = old_generating_concat_p;
410 }
411
412 /* If the target is a register, overwriting the entire object, or storing
413 a full-word or multi-word field can be done with just a SUBREG.
414
415 If the target is memory, storing any naturally aligned field can be
416 done with a simple store. For targets that support fast unaligned
417 memory, any naturally sized, unit aligned field can be done directly. */
418
419 byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
420 + (offset * UNITS_PER_WORD);
421
422 if (bitpos == 0
423 && bitsize == GET_MODE_BITSIZE (fieldmode)
424 && (!MEM_P (op0)
425 ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
426 || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
427 && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
428 : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
429 || (offset * BITS_PER_UNIT % bitsize == 0
430 && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
431 {
432 if (GET_MODE (op0) != fieldmode)
433 {
434 if (GET_CODE (op0) == SUBREG)
435 {
436 /* Else we've got some float mode source being extracted
437 into a different float mode destination -- this
438 combination of subregs results in Severe Tire
439 Damage. */
440 gcc_assert (GET_MODE (SUBREG_REG (op0)) == fieldmode
441 || GET_MODE_CLASS (fieldmode) == MODE_INT
442 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
443 op0 = SUBREG_REG (op0);
444 }
445 if (REG_P (op0))
446 op0 = gen_rtx_SUBREG (fieldmode, op0, byte_offset);
447 else
448 op0 = adjust_address (op0, fieldmode, offset);
449 }
450 emit_move_insn (op0, value);
451 return value;
452 }
453
454 /* Make sure we are playing with integral modes. Pun with subregs
455 if we aren't. This must come after the entire register case above,
456 since that case is valid for any mode. The following cases are only
457 valid for integral modes. */
458 {
459 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
460 if (imode != GET_MODE (op0))
461 {
462 if (MEM_P (op0))
463 op0 = adjust_address (op0, imode, 0);
464 else
465 {
466 gcc_assert (imode != BLKmode);
467 op0 = gen_lowpart (imode, op0);
468 }
469 }
470 }
471
472 /* We may be accessing data outside the field, which means
473 we can alias adjacent data. */
474 if (MEM_P (op0))
475 {
476 op0 = shallow_copy_rtx (op0);
477 set_mem_alias_set (op0, 0);
478 set_mem_expr (op0, 0);
479 }
480
481 /* If OP0 is a register, BITPOS must count within a word.
482 But as we have it, it counts within whatever size OP0 now has.
483 On a bigendian machine, these are not the same, so convert. */
484 if (BYTES_BIG_ENDIAN
485 && !MEM_P (op0)
486 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
487 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
488
489 /* Storing an lsb-aligned field in a register
490 can be done with a movestrict instruction. */
491
492 if (!MEM_P (op0)
493 && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
494 && bitsize == GET_MODE_BITSIZE (fieldmode)
495 && (movstrict_optab->handlers[fieldmode].insn_code
496 != CODE_FOR_nothing))
497 {
498 int icode = movstrict_optab->handlers[fieldmode].insn_code;
499
500 /* Get appropriate low part of the value being stored. */
501 if (GET_CODE (value) == CONST_INT || REG_P (value))
502 value = gen_lowpart (fieldmode, value);
503 else if (!(GET_CODE (value) == SYMBOL_REF
504 || GET_CODE (value) == LABEL_REF
505 || GET_CODE (value) == CONST))
506 value = convert_to_mode (fieldmode, value, 0);
507
508 if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
509 value = copy_to_mode_reg (fieldmode, value);
510
511 if (GET_CODE (op0) == SUBREG)
512 {
513 /* Else we've got some float mode source being extracted into
514 a different float mode destination -- this combination of
515 subregs results in Severe Tire Damage. */
516 gcc_assert (GET_MODE (SUBREG_REG (op0)) == fieldmode
517 || GET_MODE_CLASS (fieldmode) == MODE_INT
518 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
519 op0 = SUBREG_REG (op0);
520 }
521
522 emit_insn (GEN_FCN (icode)
523 (gen_rtx_SUBREG (fieldmode, op0,
524 (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
525 + (offset * UNITS_PER_WORD)),
526 value));
527
528 return value;
529 }
530
531 /* Handle fields bigger than a word. */
532
533 if (bitsize > BITS_PER_WORD)
534 {
535 /* Here we transfer the words of the field
536 in the order least significant first.
537 This is because the most significant word is the one which may
538 be less than full.
539 However, only do that if the value is not BLKmode. */
540
541 unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
542 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
543 unsigned int i;
544
545 /* This is the mode we must force value to, so that there will be enough
546 subwords to extract. Note that fieldmode will often (always?) be
547 VOIDmode, because that is what store_field uses to indicate that this
548 is a bit field, but passing VOIDmode to operand_subword_force will
549 result in an abort. */
550 fieldmode = GET_MODE (value);
551 if (fieldmode == VOIDmode)
552 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
553
554 for (i = 0; i < nwords; i++)
555 {
556 /* If I is 0, use the low-order word in both field and target;
557 if I is 1, use the next to lowest word; and so on. */
558 unsigned int wordnum = (backwards ? nwords - i - 1 : i);
559 unsigned int bit_offset = (backwards
560 ? MAX ((int) bitsize - ((int) i + 1)
561 * BITS_PER_WORD,
562 0)
563 : (int) i * BITS_PER_WORD);
564
565 store_bit_field (op0, MIN (BITS_PER_WORD,
566 bitsize - i * BITS_PER_WORD),
567 bitnum + bit_offset, word_mode,
568 operand_subword_force (value, wordnum, fieldmode));
569 }
570 return value;
571 }
572
573 /* From here on we can assume that the field to be stored in is
574 a full-word (whatever type that is), since it is shorter than a word. */
575
576 /* OFFSET is the number of words or bytes (UNIT says which)
577 from STR_RTX to the first word or byte containing part of the field. */
578
579 if (!MEM_P (op0))
580 {
581 if (offset != 0
582 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
583 {
584 if (!REG_P (op0))
585 {
586 /* Since this is a destination (lvalue), we can't copy it to a
587 pseudo. We can trivially remove a SUBREG that does not
588 change the size of the operand. Such a SUBREG may have been
589 added above. Otherwise, abort. */
590 gcc_assert (GET_CODE (op0) == SUBREG
591 && (GET_MODE_SIZE (GET_MODE (op0))
592 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
593 op0 = SUBREG_REG (op0);
594 }
595 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
596 op0, (offset * UNITS_PER_WORD));
597 }
598 offset = 0;
599 }
600
601 /* If VALUE is a floating-point mode, access it as an integer of the
602 corresponding size. This can occur on a machine with 64 bit registers
603 that uses SFmode for float. This can also occur for unaligned float
604 structure fields. */
605 orig_value = value;
606 if (GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
607 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
608 value = gen_lowpart ((GET_MODE (value) == VOIDmode
609 ? word_mode : int_mode_for_mode (GET_MODE (value))),
610 value);
611
612 /* Now OFFSET is nonzero only if OP0 is memory
613 and is therefore always measured in bytes. */
614
615 if (HAVE_insv
616 && GET_MODE (value) != BLKmode
617 && !(bitsize == 1 && GET_CODE (value) == CONST_INT)
618 /* Ensure insv's size is wide enough for this field. */
619 && (GET_MODE_BITSIZE (op_mode) >= bitsize)
620 && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
621 && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode))))
622 {
623 int xbitpos = bitpos;
624 rtx value1;
625 rtx xop0 = op0;
626 rtx last = get_last_insn ();
627 rtx pat;
628 enum machine_mode maxmode = mode_for_extraction (EP_insv, 3);
629 int save_volatile_ok = volatile_ok;
630
631 volatile_ok = 1;
632
633 /* If this machine's insv can only insert into a register, copy OP0
634 into a register and save it back later. */
635 /* This used to check flag_force_mem, but that was a serious
636 de-optimization now that flag_force_mem is enabled by -O2. */
637 if (MEM_P (op0)
638 && ! ((*insn_data[(int) CODE_FOR_insv].operand[0].predicate)
639 (op0, VOIDmode)))
640 {
641 rtx tempreg;
642 enum machine_mode bestmode;
643
644 /* Get the mode to use for inserting into this field. If OP0 is
645 BLKmode, get the smallest mode consistent with the alignment. If
646 OP0 is a non-BLKmode object that is no wider than MAXMODE, use its
647 mode. Otherwise, use the smallest mode containing the field. */
648
649 if (GET_MODE (op0) == BLKmode
650 || GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (maxmode))
651 bestmode
652 = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0), maxmode,
653 MEM_VOLATILE_P (op0));
654 else
655 bestmode = GET_MODE (op0);
656
657 if (bestmode == VOIDmode
658 || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
659 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
660 goto insv_loses;
661
662 /* Adjust address to point to the containing unit of that mode.
663 Compute offset as multiple of this unit, counting in bytes. */
664 unit = GET_MODE_BITSIZE (bestmode);
665 offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
666 bitpos = bitnum % unit;
667 op0 = adjust_address (op0, bestmode, offset);
668
669 /* Fetch that unit, store the bitfield in it, then store
670 the unit. */
671 tempreg = copy_to_reg (op0);
672 store_bit_field (tempreg, bitsize, bitpos, fieldmode, orig_value);
673 emit_move_insn (op0, tempreg);
674 return value;
675 }
676 volatile_ok = save_volatile_ok;
677
678 /* Add OFFSET into OP0's address. */
679 if (MEM_P (xop0))
680 xop0 = adjust_address (xop0, byte_mode, offset);
681
682 /* If xop0 is a register, we need it in MAXMODE
683 to make it acceptable to the format of insv. */
684 if (GET_CODE (xop0) == SUBREG)
685 /* We can't just change the mode, because this might clobber op0,
686 and we will need the original value of op0 if insv fails. */
687 xop0 = gen_rtx_SUBREG (maxmode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
688 if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
689 xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
690
691 /* On big-endian machines, we count bits from the most significant.
692 If the bit field insn does not, we must invert. */
693
694 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
695 xbitpos = unit - bitsize - xbitpos;
696
697 /* We have been counting XBITPOS within UNIT.
698 Count instead within the size of the register. */
699 if (BITS_BIG_ENDIAN && !MEM_P (xop0))
700 xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
701
702 unit = GET_MODE_BITSIZE (maxmode);
703
704 /* Convert VALUE to maxmode (which insv insn wants) in VALUE1. */
705 value1 = value;
706 if (GET_MODE (value) != maxmode)
707 {
708 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
709 {
710 /* Optimization: Don't bother really extending VALUE
711 if it has all the bits we will actually use. However,
712 if we must narrow it, be sure we do it correctly. */
713
714 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (maxmode))
715 {
716 rtx tmp;
717
718 tmp = simplify_subreg (maxmode, value1, GET_MODE (value), 0);
719 if (! tmp)
720 tmp = simplify_gen_subreg (maxmode,
721 force_reg (GET_MODE (value),
722 value1),
723 GET_MODE (value), 0);
724 value1 = tmp;
725 }
726 else
727 value1 = gen_lowpart (maxmode, value1);
728 }
729 else if (GET_CODE (value) == CONST_INT)
730 value1 = gen_int_mode (INTVAL (value), maxmode);
731 else
732 /* Parse phase is supposed to make VALUE's data type
733 match that of the component reference, which is a type
734 at least as wide as the field; so VALUE should have
735 a mode that corresponds to that type. */
736 gcc_assert (CONSTANT_P (value));
737 }
738
739 /* If this machine's insv insists on a register,
740 get VALUE1 into a register. */
741 if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
742 (value1, maxmode)))
743 value1 = force_reg (maxmode, value1);
744
745 pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
746 if (pat)
747 emit_insn (pat);
748 else
749 {
750 delete_insns_since (last);
751 store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
752 }
753 }
754 else
755 insv_loses:
756 /* Insv is not available; store using shifts and boolean ops. */
757 store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
758 return value;
759 }
760 \f
761 /* Use shifts and boolean operations to store VALUE
762 into a bit field of width BITSIZE
763 in a memory location specified by OP0 except offset by OFFSET bytes.
764 (OFFSET must be 0 if OP0 is a register.)
765 The field starts at position BITPOS within the byte.
766 (If OP0 is a register, it may be a full word or a narrower mode,
767 but BITPOS still counts within a full word,
768 which is significant on bigendian machines.) */
769
770 static void
771 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
772 unsigned HOST_WIDE_INT bitsize,
773 unsigned HOST_WIDE_INT bitpos, rtx value)
774 {
775 enum machine_mode mode;
776 unsigned int total_bits = BITS_PER_WORD;
777 rtx subtarget, temp;
778 int all_zero = 0;
779 int all_one = 0;
780
781 /* There is a case not handled here:
782 a structure with a known alignment of just a halfword
783 and a field split across two aligned halfwords within the structure.
784 Or likewise a structure with a known alignment of just a byte
785 and a field split across two bytes.
786 Such cases are not supposed to be able to occur. */
787
788 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
789 {
790 gcc_assert (!offset);
791 /* Special treatment for a bit field split across two registers. */
792 if (bitsize + bitpos > BITS_PER_WORD)
793 {
794 store_split_bit_field (op0, bitsize, bitpos, value);
795 return;
796 }
797 }
798 else
799 {
800 /* Get the proper mode to use for this field. We want a mode that
801 includes the entire field. If such a mode would be larger than
802 a word, we won't be doing the extraction the normal way.
803 We don't want a mode bigger than the destination. */
804
805 mode = GET_MODE (op0);
806 if (GET_MODE_BITSIZE (mode) == 0
807 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
808 mode = word_mode;
809 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
810 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
811
812 if (mode == VOIDmode)
813 {
814 /* The only way this should occur is if the field spans word
815 boundaries. */
816 store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
817 value);
818 return;
819 }
820
821 total_bits = GET_MODE_BITSIZE (mode);
822
823 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
824 be in the range 0 to total_bits-1, and put any excess bytes in
825 OFFSET. */
826 if (bitpos >= total_bits)
827 {
828 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
829 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
830 * BITS_PER_UNIT);
831 }
832
833 /* Get ref to an aligned byte, halfword, or word containing the field.
834 Adjust BITPOS to be position within a word,
835 and OFFSET to be the offset of that word.
836 Then alter OP0 to refer to that word. */
837 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
838 offset -= (offset % (total_bits / BITS_PER_UNIT));
839 op0 = adjust_address (op0, mode, offset);
840 }
841
842 mode = GET_MODE (op0);
843
844 /* Now MODE is either some integral mode for a MEM as OP0,
845 or is a full-word for a REG as OP0. TOTAL_BITS corresponds.
846 The bit field is contained entirely within OP0.
847 BITPOS is the starting bit number within OP0.
848 (OP0's mode may actually be narrower than MODE.) */
849
850 if (BYTES_BIG_ENDIAN)
851 /* BITPOS is the distance between our msb
852 and that of the containing datum.
853 Convert it to the distance from the lsb. */
854 bitpos = total_bits - bitsize - bitpos;
855
856 /* Now BITPOS is always the distance between our lsb
857 and that of OP0. */
858
859 /* Shift VALUE left by BITPOS bits. If VALUE is not constant,
860 we must first convert its mode to MODE. */
861
862 if (GET_CODE (value) == CONST_INT)
863 {
864 HOST_WIDE_INT v = INTVAL (value);
865
866 if (bitsize < HOST_BITS_PER_WIDE_INT)
867 v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
868
869 if (v == 0)
870 all_zero = 1;
871 else if ((bitsize < HOST_BITS_PER_WIDE_INT
872 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
873 || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
874 all_one = 1;
875
876 value = lshift_value (mode, value, bitpos, bitsize);
877 }
878 else
879 {
880 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
881 && bitpos + bitsize != GET_MODE_BITSIZE (mode));
882
883 if (GET_MODE (value) != mode)
884 {
885 if ((REG_P (value) || GET_CODE (value) == SUBREG)
886 && GET_MODE_SIZE (mode) < GET_MODE_SIZE (GET_MODE (value)))
887 value = gen_lowpart (mode, value);
888 else
889 value = convert_to_mode (mode, value, 1);
890 }
891
892 if (must_and)
893 value = expand_binop (mode, and_optab, value,
894 mask_rtx (mode, 0, bitsize, 0),
895 NULL_RTX, 1, OPTAB_LIB_WIDEN);
896 if (bitpos > 0)
897 value = expand_shift (LSHIFT_EXPR, mode, value,
898 build_int_cst (NULL_TREE, bitpos), NULL_RTX, 1);
899 }
900
901 /* Now clear the chosen bits in OP0,
902 except that if VALUE is -1 we need not bother. */
903
904 subtarget = (REG_P (op0) || ! flag_force_mem) ? op0 : 0;
905
906 if (! all_one)
907 {
908 temp = expand_binop (mode, and_optab, op0,
909 mask_rtx (mode, bitpos, bitsize, 1),
910 subtarget, 1, OPTAB_LIB_WIDEN);
911 subtarget = temp;
912 }
913 else
914 temp = op0;
915
916 /* Now logical-or VALUE into OP0, unless it is zero. */
917
918 if (! all_zero)
919 temp = expand_binop (mode, ior_optab, temp, value,
920 subtarget, 1, OPTAB_LIB_WIDEN);
921 if (op0 != temp)
922 emit_move_insn (op0, temp);
923 }
924 \f
925 /* Store a bit field that is split across multiple accessible memory objects.
926
927 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
928 BITSIZE is the field width; BITPOS the position of its first bit
929 (within the word).
930 VALUE is the value to store.
931
932 This does not yet handle fields wider than BITS_PER_WORD. */
933
934 static void
935 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
936 unsigned HOST_WIDE_INT bitpos, rtx value)
937 {
938 unsigned int unit;
939 unsigned int bitsdone = 0;
940
941 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
942 much at a time. */
943 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
944 unit = BITS_PER_WORD;
945 else
946 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
947
948 /* If VALUE is a constant other than a CONST_INT, get it into a register in
949 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
950 that VALUE might be a floating-point constant. */
951 if (CONSTANT_P (value) && GET_CODE (value) != CONST_INT)
952 {
953 rtx word = gen_lowpart_common (word_mode, value);
954
955 if (word && (value != word))
956 value = word;
957 else
958 value = gen_lowpart_common (word_mode,
959 force_reg (GET_MODE (value) != VOIDmode
960 ? GET_MODE (value)
961 : word_mode, value));
962 }
963
964 while (bitsdone < bitsize)
965 {
966 unsigned HOST_WIDE_INT thissize;
967 rtx part, word;
968 unsigned HOST_WIDE_INT thispos;
969 unsigned HOST_WIDE_INT offset;
970
971 offset = (bitpos + bitsdone) / unit;
972 thispos = (bitpos + bitsdone) % unit;
973
974 /* THISSIZE must not overrun a word boundary. Otherwise,
975 store_fixed_bit_field will call us again, and we will mutually
976 recurse forever. */
977 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
978 thissize = MIN (thissize, unit - thispos);
979
980 if (BYTES_BIG_ENDIAN)
981 {
982 int total_bits;
983
984 /* We must do an endian conversion exactly the same way as it is
985 done in extract_bit_field, so that the two calls to
986 extract_fixed_bit_field will have comparable arguments. */
987 if (!MEM_P (value) || GET_MODE (value) == BLKmode)
988 total_bits = BITS_PER_WORD;
989 else
990 total_bits = GET_MODE_BITSIZE (GET_MODE (value));
991
992 /* Fetch successively less significant portions. */
993 if (GET_CODE (value) == CONST_INT)
994 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
995 >> (bitsize - bitsdone - thissize))
996 & (((HOST_WIDE_INT) 1 << thissize) - 1));
997 else
998 /* The args are chosen so that the last part includes the
999 lsb. Give extract_bit_field the value it needs (with
1000 endianness compensation) to fetch the piece we want. */
1001 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1002 total_bits - bitsize + bitsdone,
1003 NULL_RTX, 1);
1004 }
1005 else
1006 {
1007 /* Fetch successively more significant portions. */
1008 if (GET_CODE (value) == CONST_INT)
1009 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1010 >> bitsdone)
1011 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1012 else
1013 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1014 bitsdone, NULL_RTX, 1);
1015 }
1016
1017 /* If OP0 is a register, then handle OFFSET here.
1018
1019 When handling multiword bitfields, extract_bit_field may pass
1020 down a word_mode SUBREG of a larger REG for a bitfield that actually
1021 crosses a word boundary. Thus, for a SUBREG, we must find
1022 the current word starting from the base register. */
1023 if (GET_CODE (op0) == SUBREG)
1024 {
1025 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1026 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1027 GET_MODE (SUBREG_REG (op0)));
1028 offset = 0;
1029 }
1030 else if (REG_P (op0))
1031 {
1032 word = operand_subword_force (op0, offset, GET_MODE (op0));
1033 offset = 0;
1034 }
1035 else
1036 word = op0;
1037
1038 /* OFFSET is in UNITs, and UNIT is in bits.
1039 store_fixed_bit_field wants offset in bytes. */
1040 store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1041 thispos, part);
1042 bitsdone += thissize;
1043 }
1044 }
1045 \f
1046 /* Generate code to extract a byte-field from STR_RTX
1047 containing BITSIZE bits, starting at BITNUM,
1048 and put it in TARGET if possible (if TARGET is nonzero).
1049 Regardless of TARGET, we return the rtx for where the value is placed.
1050
1051 STR_RTX is the structure containing the byte (a REG or MEM).
1052 UNSIGNEDP is nonzero if this is an unsigned bit field.
1053 MODE is the natural mode of the field value once extracted.
1054 TMODE is the mode the caller would like the value to have;
1055 but the value may be returned with type MODE instead.
1056
1057 TOTAL_SIZE is the size in bytes of the containing structure,
1058 or -1 if varying.
1059
1060 If a TARGET is specified and we can store in it at no extra cost,
1061 we do so, and return TARGET.
1062 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1063 if they are equally easy. */
1064
1065 rtx
1066 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1067 unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1068 enum machine_mode mode, enum machine_mode tmode)
1069 {
1070 unsigned int unit
1071 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1072 unsigned HOST_WIDE_INT offset = bitnum / unit;
1073 unsigned HOST_WIDE_INT bitpos = bitnum % unit;
1074 rtx op0 = str_rtx;
1075 rtx spec_target = target;
1076 rtx spec_target_subreg = 0;
1077 enum machine_mode int_mode;
1078 enum machine_mode extv_mode = mode_for_extraction (EP_extv, 0);
1079 enum machine_mode extzv_mode = mode_for_extraction (EP_extzv, 0);
1080 enum machine_mode mode1;
1081 int byte_offset;
1082
1083 if (tmode == VOIDmode)
1084 tmode = mode;
1085
1086 while (GET_CODE (op0) == SUBREG)
1087 {
1088 bitpos += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1089 if (bitpos > unit)
1090 {
1091 offset += (bitpos / unit);
1092 bitpos %= unit;
1093 }
1094 op0 = SUBREG_REG (op0);
1095 }
1096
1097 if (REG_P (op0)
1098 && mode == GET_MODE (op0)
1099 && bitnum == 0
1100 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1101 {
1102 /* We're trying to extract a full register from itself. */
1103 return op0;
1104 }
1105
1106 /* Use vec_extract patterns for extracting parts of vectors whenever
1107 available. */
1108 if (VECTOR_MODE_P (GET_MODE (op0))
1109 && !MEM_P (op0)
1110 && (vec_extract_optab->handlers[GET_MODE (op0)].insn_code
1111 != CODE_FOR_nothing)
1112 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1113 == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1114 {
1115 enum machine_mode outermode = GET_MODE (op0);
1116 enum machine_mode innermode = GET_MODE_INNER (outermode);
1117 int icode = (int) vec_extract_optab->handlers[outermode].insn_code;
1118 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1119 rtx rtxpos = GEN_INT (pos);
1120 rtx src = op0;
1121 rtx dest = NULL, pat, seq;
1122 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1123 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1124 enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1125
1126 if (innermode == tmode || innermode == mode)
1127 dest = target;
1128
1129 if (!dest)
1130 dest = gen_reg_rtx (innermode);
1131
1132 start_sequence ();
1133
1134 if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1135 dest = copy_to_mode_reg (mode0, dest);
1136
1137 if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1138 src = copy_to_mode_reg (mode1, src);
1139
1140 if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1141 rtxpos = copy_to_mode_reg (mode1, rtxpos);
1142
1143 /* We could handle this, but we should always be called with a pseudo
1144 for our targets and all insns should take them as outputs. */
1145 gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
1146 && (*insn_data[icode].operand[1].predicate) (src, mode1)
1147 && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
1148
1149 pat = GEN_FCN (icode) (dest, src, rtxpos);
1150 seq = get_insns ();
1151 end_sequence ();
1152 if (pat)
1153 {
1154 emit_insn (seq);
1155 emit_insn (pat);
1156 return dest;
1157 }
1158 }
1159
1160 /* Make sure we are playing with integral modes. Pun with subregs
1161 if we aren't. */
1162 {
1163 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1164 if (imode != GET_MODE (op0))
1165 {
1166 op0 = gen_lowpart (imode, op0);
1167
1168 /* If we got a SUBREG, force it into a register since we aren't going
1169 to be able to do another SUBREG on it. */
1170 if (GET_CODE (op0) == SUBREG)
1171 op0 = force_reg (imode, op0);
1172 }
1173 }
1174
1175 /* We may be accessing data outside the field, which means
1176 we can alias adjacent data. */
1177 if (MEM_P (op0))
1178 {
1179 op0 = shallow_copy_rtx (op0);
1180 set_mem_alias_set (op0, 0);
1181 set_mem_expr (op0, 0);
1182 }
1183
1184 /* Extraction of a full-word or multi-word value from a structure
1185 in a register or aligned memory can be done with just a SUBREG.
1186 A subword value in the least significant part of a register
1187 can also be extracted with a SUBREG. For this, we need the
1188 byte offset of the value in op0. */
1189
1190 byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1191
1192 /* If OP0 is a register, BITPOS must count within a word.
1193 But as we have it, it counts within whatever size OP0 now has.
1194 On a bigendian machine, these are not the same, so convert. */
1195 if (BYTES_BIG_ENDIAN
1196 && !MEM_P (op0)
1197 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1198 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1199
1200 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1201 If that's wrong, the solution is to test for it and set TARGET to 0
1202 if needed. */
1203
1204 /* Only scalar integer modes can be converted via subregs. There is an
1205 additional problem for FP modes here in that they can have a precision
1206 which is different from the size. mode_for_size uses precision, but
1207 we want a mode based on the size, so we must avoid calling it for FP
1208 modes. */
1209 mode1 = (SCALAR_INT_MODE_P (tmode)
1210 ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1211 : mode);
1212
1213 if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1214 && bitpos % BITS_PER_WORD == 0)
1215 || (mode1 != BLKmode
1216 /* ??? The big endian test here is wrong. This is correct
1217 if the value is in a register, and if mode_for_size is not
1218 the same mode as op0. This causes us to get unnecessarily
1219 inefficient code from the Thumb port when -mbig-endian. */
1220 && (BYTES_BIG_ENDIAN
1221 ? bitpos + bitsize == BITS_PER_WORD
1222 : bitpos == 0)))
1223 && ((!MEM_P (op0)
1224 && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode),
1225 GET_MODE_BITSIZE (GET_MODE (op0)))
1226 && GET_MODE_SIZE (mode1) != 0
1227 && byte_offset % GET_MODE_SIZE (mode1) == 0)
1228 || (MEM_P (op0)
1229 && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1230 || (offset * BITS_PER_UNIT % bitsize == 0
1231 && MEM_ALIGN (op0) % bitsize == 0)))))
1232 {
1233 if (mode1 != GET_MODE (op0))
1234 {
1235 if (MEM_P (op0))
1236 op0 = adjust_address (op0, mode1, offset);
1237 else
1238 {
1239 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1240 byte_offset);
1241 if (sub == NULL)
1242 goto no_subreg_mode_swap;
1243 op0 = sub;
1244 }
1245 }
1246 if (mode1 != mode)
1247 return convert_to_mode (tmode, op0, unsignedp);
1248 return op0;
1249 }
1250 no_subreg_mode_swap:
1251
1252 /* Handle fields bigger than a word. */
1253
1254 if (bitsize > BITS_PER_WORD)
1255 {
1256 /* Here we transfer the words of the field
1257 in the order least significant first.
1258 This is because the most significant word is the one which may
1259 be less than full. */
1260
1261 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1262 unsigned int i;
1263
1264 if (target == 0 || !REG_P (target))
1265 target = gen_reg_rtx (mode);
1266
1267 /* Indicate for flow that the entire target reg is being set. */
1268 emit_insn (gen_rtx_CLOBBER (VOIDmode, target));
1269
1270 for (i = 0; i < nwords; i++)
1271 {
1272 /* If I is 0, use the low-order word in both field and target;
1273 if I is 1, use the next to lowest word; and so on. */
1274 /* Word number in TARGET to use. */
1275 unsigned int wordnum
1276 = (WORDS_BIG_ENDIAN
1277 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1278 : i);
1279 /* Offset from start of field in OP0. */
1280 unsigned int bit_offset = (WORDS_BIG_ENDIAN
1281 ? MAX (0, ((int) bitsize - ((int) i + 1)
1282 * (int) BITS_PER_WORD))
1283 : (int) i * BITS_PER_WORD);
1284 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1285 rtx result_part
1286 = extract_bit_field (op0, MIN (BITS_PER_WORD,
1287 bitsize - i * BITS_PER_WORD),
1288 bitnum + bit_offset, 1, target_part, mode,
1289 word_mode);
1290
1291 gcc_assert (target_part);
1292
1293 if (result_part != target_part)
1294 emit_move_insn (target_part, result_part);
1295 }
1296
1297 if (unsignedp)
1298 {
1299 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1300 need to be zero'd out. */
1301 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1302 {
1303 unsigned int i, total_words;
1304
1305 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1306 for (i = nwords; i < total_words; i++)
1307 emit_move_insn
1308 (operand_subword (target,
1309 WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1310 1, VOIDmode),
1311 const0_rtx);
1312 }
1313 return target;
1314 }
1315
1316 /* Signed bit field: sign-extend with two arithmetic shifts. */
1317 target = expand_shift (LSHIFT_EXPR, mode, target,
1318 build_int_cst (NULL_TREE,
1319 GET_MODE_BITSIZE (mode) - bitsize),
1320 NULL_RTX, 0);
1321 return expand_shift (RSHIFT_EXPR, mode, target,
1322 build_int_cst (NULL_TREE,
1323 GET_MODE_BITSIZE (mode) - bitsize),
1324 NULL_RTX, 0);
1325 }
1326
1327 /* From here on we know the desired field is smaller than a word. */
1328
1329 /* Check if there is a correspondingly-sized integer field, so we can
1330 safely extract it as one size of integer, if necessary; then
1331 truncate or extend to the size that is wanted; then use SUBREGs or
1332 convert_to_mode to get one of the modes we really wanted. */
1333
1334 int_mode = int_mode_for_mode (tmode);
1335 if (int_mode == BLKmode)
1336 int_mode = int_mode_for_mode (mode);
1337 /* Should probably push op0 out to memory and then do a load. */
1338 gcc_assert (int_mode != BLKmode);
1339
1340 /* OFFSET is the number of words or bytes (UNIT says which)
1341 from STR_RTX to the first word or byte containing part of the field. */
1342 if (!MEM_P (op0))
1343 {
1344 if (offset != 0
1345 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1346 {
1347 if (!REG_P (op0))
1348 op0 = copy_to_reg (op0);
1349 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1350 op0, (offset * UNITS_PER_WORD));
1351 }
1352 offset = 0;
1353 }
1354
1355 /* Now OFFSET is nonzero only for memory operands. */
1356
1357 if (unsignedp)
1358 {
1359 if (HAVE_extzv
1360 && (GET_MODE_BITSIZE (extzv_mode) >= bitsize)
1361 && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1362 && (bitsize + bitpos > GET_MODE_BITSIZE (extzv_mode))))
1363 {
1364 unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1365 rtx bitsize_rtx, bitpos_rtx;
1366 rtx last = get_last_insn ();
1367 rtx xop0 = op0;
1368 rtx xtarget = target;
1369 rtx xspec_target = spec_target;
1370 rtx xspec_target_subreg = spec_target_subreg;
1371 rtx pat;
1372 enum machine_mode maxmode = mode_for_extraction (EP_extzv, 0);
1373
1374 if (MEM_P (xop0))
1375 {
1376 int save_volatile_ok = volatile_ok;
1377 volatile_ok = 1;
1378
1379 /* Is the memory operand acceptable? */
1380 if (! ((*insn_data[(int) CODE_FOR_extzv].operand[1].predicate)
1381 (xop0, GET_MODE (xop0))))
1382 {
1383 /* No, load into a reg and extract from there. */
1384 enum machine_mode bestmode;
1385
1386 /* Get the mode to use for inserting into this field. If
1387 OP0 is BLKmode, get the smallest mode consistent with the
1388 alignment. If OP0 is a non-BLKmode object that is no
1389 wider than MAXMODE, use its mode. Otherwise, use the
1390 smallest mode containing the field. */
1391
1392 if (GET_MODE (xop0) == BLKmode
1393 || (GET_MODE_SIZE (GET_MODE (op0))
1394 > GET_MODE_SIZE (maxmode)))
1395 bestmode = get_best_mode (bitsize, bitnum,
1396 MEM_ALIGN (xop0), maxmode,
1397 MEM_VOLATILE_P (xop0));
1398 else
1399 bestmode = GET_MODE (xop0);
1400
1401 if (bestmode == VOIDmode
1402 || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1403 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1404 goto extzv_loses;
1405
1406 /* Compute offset as multiple of this unit,
1407 counting in bytes. */
1408 unit = GET_MODE_BITSIZE (bestmode);
1409 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1410 xbitpos = bitnum % unit;
1411 xop0 = adjust_address (xop0, bestmode, xoffset);
1412
1413 /* Fetch it to a register in that size. */
1414 xop0 = force_reg (bestmode, xop0);
1415
1416 /* XBITPOS counts within UNIT, which is what is expected. */
1417 }
1418 else
1419 /* Get ref to first byte containing part of the field. */
1420 xop0 = adjust_address (xop0, byte_mode, xoffset);
1421
1422 volatile_ok = save_volatile_ok;
1423 }
1424
1425 /* If op0 is a register, we need it in MAXMODE (which is usually
1426 SImode). to make it acceptable to the format of extzv. */
1427 if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1428 goto extzv_loses;
1429 if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1430 xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1431
1432 /* On big-endian machines, we count bits from the most significant.
1433 If the bit field insn does not, we must invert. */
1434 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1435 xbitpos = unit - bitsize - xbitpos;
1436
1437 /* Now convert from counting within UNIT to counting in MAXMODE. */
1438 if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1439 xbitpos += GET_MODE_BITSIZE (maxmode) - unit;
1440
1441 unit = GET_MODE_BITSIZE (maxmode);
1442
1443 if (xtarget == 0
1444 || (flag_force_mem && MEM_P (xtarget)))
1445 xtarget = xspec_target = gen_reg_rtx (tmode);
1446
1447 if (GET_MODE (xtarget) != maxmode)
1448 {
1449 if (REG_P (xtarget))
1450 {
1451 int wider = (GET_MODE_SIZE (maxmode)
1452 > GET_MODE_SIZE (GET_MODE (xtarget)));
1453 xtarget = gen_lowpart (maxmode, xtarget);
1454 if (wider)
1455 xspec_target_subreg = xtarget;
1456 }
1457 else
1458 xtarget = gen_reg_rtx (maxmode);
1459 }
1460
1461 /* If this machine's extzv insists on a register target,
1462 make sure we have one. */
1463 if (! ((*insn_data[(int) CODE_FOR_extzv].operand[0].predicate)
1464 (xtarget, maxmode)))
1465 xtarget = gen_reg_rtx (maxmode);
1466
1467 bitsize_rtx = GEN_INT (bitsize);
1468 bitpos_rtx = GEN_INT (xbitpos);
1469
1470 pat = gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1471 if (pat)
1472 {
1473 emit_insn (pat);
1474 target = xtarget;
1475 spec_target = xspec_target;
1476 spec_target_subreg = xspec_target_subreg;
1477 }
1478 else
1479 {
1480 delete_insns_since (last);
1481 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1482 bitpos, target, 1);
1483 }
1484 }
1485 else
1486 extzv_loses:
1487 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1488 bitpos, target, 1);
1489 }
1490 else
1491 {
1492 if (HAVE_extv
1493 && (GET_MODE_BITSIZE (extv_mode) >= bitsize)
1494 && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
1495 && (bitsize + bitpos > GET_MODE_BITSIZE (extv_mode))))
1496 {
1497 int xbitpos = bitpos, xoffset = offset;
1498 rtx bitsize_rtx, bitpos_rtx;
1499 rtx last = get_last_insn ();
1500 rtx xop0 = op0, xtarget = target;
1501 rtx xspec_target = spec_target;
1502 rtx xspec_target_subreg = spec_target_subreg;
1503 rtx pat;
1504 enum machine_mode maxmode = mode_for_extraction (EP_extv, 0);
1505
1506 if (MEM_P (xop0))
1507 {
1508 /* Is the memory operand acceptable? */
1509 if (! ((*insn_data[(int) CODE_FOR_extv].operand[1].predicate)
1510 (xop0, GET_MODE (xop0))))
1511 {
1512 /* No, load into a reg and extract from there. */
1513 enum machine_mode bestmode;
1514
1515 /* Get the mode to use for inserting into this field. If
1516 OP0 is BLKmode, get the smallest mode consistent with the
1517 alignment. If OP0 is a non-BLKmode object that is no
1518 wider than MAXMODE, use its mode. Otherwise, use the
1519 smallest mode containing the field. */
1520
1521 if (GET_MODE (xop0) == BLKmode
1522 || (GET_MODE_SIZE (GET_MODE (op0))
1523 > GET_MODE_SIZE (maxmode)))
1524 bestmode = get_best_mode (bitsize, bitnum,
1525 MEM_ALIGN (xop0), maxmode,
1526 MEM_VOLATILE_P (xop0));
1527 else
1528 bestmode = GET_MODE (xop0);
1529
1530 if (bestmode == VOIDmode
1531 || (SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (xop0))
1532 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (xop0)))
1533 goto extv_loses;
1534
1535 /* Compute offset as multiple of this unit,
1536 counting in bytes. */
1537 unit = GET_MODE_BITSIZE (bestmode);
1538 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1539 xbitpos = bitnum % unit;
1540 xop0 = adjust_address (xop0, bestmode, xoffset);
1541
1542 /* Fetch it to a register in that size. */
1543 xop0 = force_reg (bestmode, xop0);
1544
1545 /* XBITPOS counts within UNIT, which is what is expected. */
1546 }
1547 else
1548 /* Get ref to first byte containing part of the field. */
1549 xop0 = adjust_address (xop0, byte_mode, xoffset);
1550 }
1551
1552 /* If op0 is a register, we need it in MAXMODE (which is usually
1553 SImode) to make it acceptable to the format of extv. */
1554 if (GET_CODE (xop0) == SUBREG && GET_MODE (xop0) != maxmode)
1555 goto extv_loses;
1556 if (REG_P (xop0) && GET_MODE (xop0) != maxmode)
1557 xop0 = gen_rtx_SUBREG (maxmode, xop0, 0);
1558
1559 /* On big-endian machines, we count bits from the most significant.
1560 If the bit field insn does not, we must invert. */
1561 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1562 xbitpos = unit - bitsize - xbitpos;
1563
1564 /* XBITPOS counts within a size of UNIT.
1565 Adjust to count within a size of MAXMODE. */
1566 if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1567 xbitpos += (GET_MODE_BITSIZE (maxmode) - unit);
1568
1569 unit = GET_MODE_BITSIZE (maxmode);
1570
1571 if (xtarget == 0
1572 || (flag_force_mem && MEM_P (xtarget)))
1573 xtarget = xspec_target = gen_reg_rtx (tmode);
1574
1575 if (GET_MODE (xtarget) != maxmode)
1576 {
1577 if (REG_P (xtarget))
1578 {
1579 int wider = (GET_MODE_SIZE (maxmode)
1580 > GET_MODE_SIZE (GET_MODE (xtarget)));
1581 xtarget = gen_lowpart (maxmode, xtarget);
1582 if (wider)
1583 xspec_target_subreg = xtarget;
1584 }
1585 else
1586 xtarget = gen_reg_rtx (maxmode);
1587 }
1588
1589 /* If this machine's extv insists on a register target,
1590 make sure we have one. */
1591 if (! ((*insn_data[(int) CODE_FOR_extv].operand[0].predicate)
1592 (xtarget, maxmode)))
1593 xtarget = gen_reg_rtx (maxmode);
1594
1595 bitsize_rtx = GEN_INT (bitsize);
1596 bitpos_rtx = GEN_INT (xbitpos);
1597
1598 pat = gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx);
1599 if (pat)
1600 {
1601 emit_insn (pat);
1602 target = xtarget;
1603 spec_target = xspec_target;
1604 spec_target_subreg = xspec_target_subreg;
1605 }
1606 else
1607 {
1608 delete_insns_since (last);
1609 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1610 bitpos, target, 0);
1611 }
1612 }
1613 else
1614 extv_loses:
1615 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1616 bitpos, target, 0);
1617 }
1618 if (target == spec_target)
1619 return target;
1620 if (target == spec_target_subreg)
1621 return spec_target;
1622 if (GET_MODE (target) != tmode && GET_MODE (target) != mode)
1623 {
1624 /* If the target mode is complex, then extract the two scalar elements
1625 from the value now. Creating (subreg:SC (reg:DI) 0), as we would do
1626 with the clause below, will cause gen_realpart or gen_imagpart to
1627 fail, since those functions must return lvalues. */
1628 if (COMPLEX_MODE_P (tmode))
1629 {
1630 rtx realpart, imagpart;
1631 enum machine_mode itmode = GET_MODE_INNER (tmode);
1632
1633 target = convert_to_mode (mode_for_size (GET_MODE_BITSIZE (tmode),
1634 MODE_INT, 0),
1635 target, unsignedp);
1636
1637 realpart = extract_bit_field (target, GET_MODE_BITSIZE (itmode), 0,
1638 unsignedp, NULL, itmode, itmode);
1639 imagpart = extract_bit_field (target, GET_MODE_BITSIZE (itmode),
1640 GET_MODE_BITSIZE (itmode), unsignedp,
1641 NULL, itmode, itmode);
1642
1643 return gen_rtx_CONCAT (tmode, realpart, imagpart);
1644 }
1645
1646 /* If the target mode is not a scalar integral, first convert to the
1647 integer mode of that size and then access it as a floating-point
1648 value via a SUBREG. */
1649 if (!SCALAR_INT_MODE_P (tmode))
1650 {
1651 enum machine_mode smode
1652 = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1653 target = convert_to_mode (smode, target, unsignedp);
1654 target = force_reg (smode, target);
1655 return gen_lowpart (tmode, target);
1656 }
1657
1658 return convert_to_mode (tmode, target, unsignedp);
1659 }
1660 return target;
1661 }
1662 \f
1663 /* Extract a bit field using shifts and boolean operations
1664 Returns an rtx to represent the value.
1665 OP0 addresses a register (word) or memory (byte).
1666 BITPOS says which bit within the word or byte the bit field starts in.
1667 OFFSET says how many bytes farther the bit field starts;
1668 it is 0 if OP0 is a register.
1669 BITSIZE says how many bits long the bit field is.
1670 (If OP0 is a register, it may be narrower than a full word,
1671 but BITPOS still counts within a full word,
1672 which is significant on bigendian machines.)
1673
1674 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1675 If TARGET is nonzero, attempts to store the value there
1676 and return TARGET, but this is not guaranteed.
1677 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1678
1679 static rtx
1680 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1681 unsigned HOST_WIDE_INT offset,
1682 unsigned HOST_WIDE_INT bitsize,
1683 unsigned HOST_WIDE_INT bitpos, rtx target,
1684 int unsignedp)
1685 {
1686 unsigned int total_bits = BITS_PER_WORD;
1687 enum machine_mode mode;
1688
1689 if (GET_CODE (op0) == SUBREG || REG_P (op0))
1690 {
1691 /* Special treatment for a bit field split across two registers. */
1692 if (bitsize + bitpos > BITS_PER_WORD)
1693 return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1694 }
1695 else
1696 {
1697 /* Get the proper mode to use for this field. We want a mode that
1698 includes the entire field. If such a mode would be larger than
1699 a word, we won't be doing the extraction the normal way. */
1700
1701 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1702 MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1703
1704 if (mode == VOIDmode)
1705 /* The only way this should occur is if the field spans word
1706 boundaries. */
1707 return extract_split_bit_field (op0, bitsize,
1708 bitpos + offset * BITS_PER_UNIT,
1709 unsignedp);
1710
1711 total_bits = GET_MODE_BITSIZE (mode);
1712
1713 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
1714 be in the range 0 to total_bits-1, and put any excess bytes in
1715 OFFSET. */
1716 if (bitpos >= total_bits)
1717 {
1718 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1719 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1720 * BITS_PER_UNIT);
1721 }
1722
1723 /* Get ref to an aligned byte, halfword, or word containing the field.
1724 Adjust BITPOS to be position within a word,
1725 and OFFSET to be the offset of that word.
1726 Then alter OP0 to refer to that word. */
1727 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1728 offset -= (offset % (total_bits / BITS_PER_UNIT));
1729 op0 = adjust_address (op0, mode, offset);
1730 }
1731
1732 mode = GET_MODE (op0);
1733
1734 if (BYTES_BIG_ENDIAN)
1735 /* BITPOS is the distance between our msb and that of OP0.
1736 Convert it to the distance from the lsb. */
1737 bitpos = total_bits - bitsize - bitpos;
1738
1739 /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1740 We have reduced the big-endian case to the little-endian case. */
1741
1742 if (unsignedp)
1743 {
1744 if (bitpos)
1745 {
1746 /* If the field does not already start at the lsb,
1747 shift it so it does. */
1748 tree amount = build_int_cst (NULL_TREE, bitpos);
1749 /* Maybe propagate the target for the shift. */
1750 /* But not if we will return it--could confuse integrate.c. */
1751 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1752 if (tmode != mode) subtarget = 0;
1753 op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1754 }
1755 /* Convert the value to the desired mode. */
1756 if (mode != tmode)
1757 op0 = convert_to_mode (tmode, op0, 1);
1758
1759 /* Unless the msb of the field used to be the msb when we shifted,
1760 mask out the upper bits. */
1761
1762 if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1763 return expand_binop (GET_MODE (op0), and_optab, op0,
1764 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1765 target, 1, OPTAB_LIB_WIDEN);
1766 return op0;
1767 }
1768
1769 /* To extract a signed bit-field, first shift its msb to the msb of the word,
1770 then arithmetic-shift its lsb to the lsb of the word. */
1771 op0 = force_reg (mode, op0);
1772 if (mode != tmode)
1773 target = 0;
1774
1775 /* Find the narrowest integer mode that contains the field. */
1776
1777 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1778 mode = GET_MODE_WIDER_MODE (mode))
1779 if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1780 {
1781 op0 = convert_to_mode (mode, op0, 0);
1782 break;
1783 }
1784
1785 if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1786 {
1787 tree amount
1788 = build_int_cst (NULL_TREE,
1789 GET_MODE_BITSIZE (mode) - (bitsize + bitpos));
1790 /* Maybe propagate the target for the shift. */
1791 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1792 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1793 }
1794
1795 return expand_shift (RSHIFT_EXPR, mode, op0,
1796 build_int_cst (NULL_TREE,
1797 GET_MODE_BITSIZE (mode) - bitsize),
1798 target, 0);
1799 }
1800 \f
1801 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1802 of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1803 complement of that if COMPLEMENT. The mask is truncated if
1804 necessary to the width of mode MODE. The mask is zero-extended if
1805 BITSIZE+BITPOS is too small for MODE. */
1806
1807 static rtx
1808 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1809 {
1810 HOST_WIDE_INT masklow, maskhigh;
1811
1812 if (bitsize == 0)
1813 masklow = 0;
1814 else if (bitpos < HOST_BITS_PER_WIDE_INT)
1815 masklow = (HOST_WIDE_INT) -1 << bitpos;
1816 else
1817 masklow = 0;
1818
1819 if (bitpos + bitsize < HOST_BITS_PER_WIDE_INT)
1820 masklow &= ((unsigned HOST_WIDE_INT) -1
1821 >> (HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1822
1823 if (bitpos <= HOST_BITS_PER_WIDE_INT)
1824 maskhigh = -1;
1825 else
1826 maskhigh = (HOST_WIDE_INT) -1 << (bitpos - HOST_BITS_PER_WIDE_INT);
1827
1828 if (bitsize == 0)
1829 maskhigh = 0;
1830 else if (bitpos + bitsize > HOST_BITS_PER_WIDE_INT)
1831 maskhigh &= ((unsigned HOST_WIDE_INT) -1
1832 >> (2 * HOST_BITS_PER_WIDE_INT - bitpos - bitsize));
1833 else
1834 maskhigh = 0;
1835
1836 if (complement)
1837 {
1838 maskhigh = ~maskhigh;
1839 masklow = ~masklow;
1840 }
1841
1842 return immed_double_const (masklow, maskhigh, mode);
1843 }
1844
1845 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1846 VALUE truncated to BITSIZE bits and then shifted left BITPOS bits. */
1847
1848 static rtx
1849 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1850 {
1851 unsigned HOST_WIDE_INT v = INTVAL (value);
1852 HOST_WIDE_INT low, high;
1853
1854 if (bitsize < HOST_BITS_PER_WIDE_INT)
1855 v &= ~((HOST_WIDE_INT) -1 << bitsize);
1856
1857 if (bitpos < HOST_BITS_PER_WIDE_INT)
1858 {
1859 low = v << bitpos;
1860 high = (bitpos > 0 ? (v >> (HOST_BITS_PER_WIDE_INT - bitpos)) : 0);
1861 }
1862 else
1863 {
1864 low = 0;
1865 high = v << (bitpos - HOST_BITS_PER_WIDE_INT);
1866 }
1867
1868 return immed_double_const (low, high, mode);
1869 }
1870 \f
1871 /* Extract a bit field from a memory by forcing the alignment of the
1872 memory. This efficient only if the field spans at least 4 boundaries.
1873
1874 OP0 is the MEM.
1875 BITSIZE is the field width; BITPOS is the position of the first bit.
1876 UNSIGNEDP is true if the result should be zero-extended. */
1877
1878 static rtx
1879 extract_force_align_mem_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1880 unsigned HOST_WIDE_INT bitpos,
1881 int unsignedp)
1882 {
1883 enum machine_mode mode, dmode;
1884 unsigned int m_bitsize, m_size;
1885 unsigned int sign_shift_up, sign_shift_dn;
1886 rtx base, a1, a2, v1, v2, comb, shift, result, start;
1887
1888 /* Choose a mode that will fit BITSIZE. */
1889 mode = smallest_mode_for_size (bitsize, MODE_INT);
1890 m_size = GET_MODE_SIZE (mode);
1891 m_bitsize = GET_MODE_BITSIZE (mode);
1892
1893 /* Choose a mode twice as wide. Fail if no such mode exists. */
1894 dmode = mode_for_size (m_bitsize * 2, MODE_INT, false);
1895 if (dmode == BLKmode)
1896 return NULL;
1897
1898 do_pending_stack_adjust ();
1899 start = get_last_insn ();
1900
1901 /* At the end, we'll need an additional shift to deal with sign/zero
1902 extension. By default this will be a left+right shift of the
1903 appropriate size. But we may be able to eliminate one of them. */
1904 sign_shift_up = sign_shift_dn = m_bitsize - bitsize;
1905
1906 if (STRICT_ALIGNMENT)
1907 {
1908 base = plus_constant (XEXP (op0, 0), bitpos / BITS_PER_UNIT);
1909 bitpos %= BITS_PER_UNIT;
1910
1911 /* We load two values to be concatenate. There's an edge condition
1912 that bears notice -- an aligned value at the end of a page can
1913 only load one value lest we segfault. So the two values we load
1914 are at "base & -size" and "(base + size - 1) & -size". If base
1915 is unaligned, the addresses will be aligned and sequential; if
1916 base is aligned, the addresses will both be equal to base. */
1917
1918 a1 = expand_simple_binop (Pmode, AND, force_operand (base, NULL),
1919 GEN_INT (-(HOST_WIDE_INT)m_size),
1920 NULL, true, OPTAB_LIB_WIDEN);
1921 mark_reg_pointer (a1, m_bitsize);
1922 v1 = gen_rtx_MEM (mode, a1);
1923 set_mem_align (v1, m_bitsize);
1924 v1 = force_reg (mode, validize_mem (v1));
1925
1926 a2 = plus_constant (base, GET_MODE_SIZE (mode) - 1);
1927 a2 = expand_simple_binop (Pmode, AND, force_operand (a2, NULL),
1928 GEN_INT (-(HOST_WIDE_INT)m_size),
1929 NULL, true, OPTAB_LIB_WIDEN);
1930 v2 = gen_rtx_MEM (mode, a2);
1931 set_mem_align (v2, m_bitsize);
1932 v2 = force_reg (mode, validize_mem (v2));
1933
1934 /* Combine these two values into a double-word value. */
1935 if (m_bitsize == BITS_PER_WORD)
1936 {
1937 comb = gen_reg_rtx (dmode);
1938 emit_insn (gen_rtx_CLOBBER (VOIDmode, comb));
1939 emit_move_insn (gen_rtx_SUBREG (mode, comb, 0), v1);
1940 emit_move_insn (gen_rtx_SUBREG (mode, comb, m_size), v2);
1941 }
1942 else
1943 {
1944 if (BYTES_BIG_ENDIAN)
1945 comb = v1, v1 = v2, v2 = comb;
1946 v1 = convert_modes (dmode, mode, v1, true);
1947 if (v1 == NULL)
1948 goto fail;
1949 v2 = convert_modes (dmode, mode, v2, true);
1950 v2 = expand_simple_binop (dmode, ASHIFT, v2, GEN_INT (m_bitsize),
1951 NULL, true, OPTAB_LIB_WIDEN);
1952 if (v2 == NULL)
1953 goto fail;
1954 comb = expand_simple_binop (dmode, IOR, v1, v2, NULL,
1955 true, OPTAB_LIB_WIDEN);
1956 if (comb == NULL)
1957 goto fail;
1958 }
1959
1960 shift = expand_simple_binop (Pmode, AND, base, GEN_INT (m_size - 1),
1961 NULL, true, OPTAB_LIB_WIDEN);
1962 shift = expand_mult (Pmode, shift, GEN_INT (BITS_PER_UNIT), NULL, 1);
1963
1964 if (bitpos != 0)
1965 {
1966 if (sign_shift_up <= bitpos)
1967 bitpos -= sign_shift_up, sign_shift_up = 0;
1968 shift = expand_simple_binop (Pmode, PLUS, shift, GEN_INT (bitpos),
1969 NULL, true, OPTAB_LIB_WIDEN);
1970 }
1971 }
1972 else
1973 {
1974 unsigned HOST_WIDE_INT offset = bitpos / BITS_PER_UNIT;
1975 bitpos %= BITS_PER_UNIT;
1976
1977 /* When strict alignment is not required, we can just load directly
1978 from memory without masking. If the remaining BITPOS offset is
1979 small enough, we may be able to do all operations in MODE as
1980 opposed to DMODE. */
1981 if (bitpos + bitsize <= m_bitsize)
1982 dmode = mode;
1983 comb = adjust_address (op0, dmode, offset);
1984
1985 if (sign_shift_up <= bitpos)
1986 bitpos -= sign_shift_up, sign_shift_up = 0;
1987 shift = GEN_INT (bitpos);
1988 }
1989
1990 /* Shift down the double-word such that the requested value is at bit 0. */
1991 if (shift != const0_rtx)
1992 comb = expand_simple_binop (dmode, unsignedp ? LSHIFTRT : ASHIFTRT,
1993 comb, shift, NULL, unsignedp, OPTAB_LIB_WIDEN);
1994 if (comb == NULL)
1995 goto fail;
1996
1997 /* If the field exactly matches MODE, then all we need to do is return the
1998 lowpart. Otherwise, shift to get the sign bits set properly. */
1999 result = force_reg (mode, gen_lowpart (mode, comb));
2000
2001 if (sign_shift_up)
2002 result = expand_simple_binop (mode, ASHIFT, result,
2003 GEN_INT (sign_shift_up),
2004 NULL_RTX, 0, OPTAB_LIB_WIDEN);
2005 if (sign_shift_dn)
2006 result = expand_simple_binop (mode, unsignedp ? LSHIFTRT : ASHIFTRT,
2007 result, GEN_INT (sign_shift_dn),
2008 NULL_RTX, 0, OPTAB_LIB_WIDEN);
2009
2010 return result;
2011
2012 fail:
2013 delete_insns_since (start);
2014 return NULL;
2015 }
2016
2017 /* Extract a bit field that is split across two words
2018 and return an RTX for the result.
2019
2020 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
2021 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
2022 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */
2023
2024 static rtx
2025 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
2026 unsigned HOST_WIDE_INT bitpos, int unsignedp)
2027 {
2028 unsigned int unit;
2029 unsigned int bitsdone = 0;
2030 rtx result = NULL_RTX;
2031 int first = 1;
2032
2033 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
2034 much at a time. */
2035 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
2036 unit = BITS_PER_WORD;
2037 else
2038 {
2039 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
2040 if (0 && bitsize / unit > 2)
2041 {
2042 rtx tmp = extract_force_align_mem_bit_field (op0, bitsize, bitpos,
2043 unsignedp);
2044 if (tmp)
2045 return tmp;
2046 }
2047 }
2048
2049 while (bitsdone < bitsize)
2050 {
2051 unsigned HOST_WIDE_INT thissize;
2052 rtx part, word;
2053 unsigned HOST_WIDE_INT thispos;
2054 unsigned HOST_WIDE_INT offset;
2055
2056 offset = (bitpos + bitsdone) / unit;
2057 thispos = (bitpos + bitsdone) % unit;
2058
2059 /* THISSIZE must not overrun a word boundary. Otherwise,
2060 extract_fixed_bit_field will call us again, and we will mutually
2061 recurse forever. */
2062 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2063 thissize = MIN (thissize, unit - thispos);
2064
2065 /* If OP0 is a register, then handle OFFSET here.
2066
2067 When handling multiword bitfields, extract_bit_field may pass
2068 down a word_mode SUBREG of a larger REG for a bitfield that actually
2069 crosses a word boundary. Thus, for a SUBREG, we must find
2070 the current word starting from the base register. */
2071 if (GET_CODE (op0) == SUBREG)
2072 {
2073 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
2074 word = operand_subword_force (SUBREG_REG (op0), word_offset,
2075 GET_MODE (SUBREG_REG (op0)));
2076 offset = 0;
2077 }
2078 else if (REG_P (op0))
2079 {
2080 word = operand_subword_force (op0, offset, GET_MODE (op0));
2081 offset = 0;
2082 }
2083 else
2084 word = op0;
2085
2086 /* Extract the parts in bit-counting order,
2087 whose meaning is determined by BYTES_PER_UNIT.
2088 OFFSET is in UNITs, and UNIT is in bits.
2089 extract_fixed_bit_field wants offset in bytes. */
2090 part = extract_fixed_bit_field (word_mode, word,
2091 offset * unit / BITS_PER_UNIT,
2092 thissize, thispos, 0, 1);
2093 bitsdone += thissize;
2094
2095 /* Shift this part into place for the result. */
2096 if (BYTES_BIG_ENDIAN)
2097 {
2098 if (bitsize != bitsdone)
2099 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2100 build_int_cst (NULL_TREE, bitsize - bitsdone),
2101 0, 1);
2102 }
2103 else
2104 {
2105 if (bitsdone != thissize)
2106 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2107 build_int_cst (NULL_TREE,
2108 bitsdone - thissize), 0, 1);
2109 }
2110
2111 if (first)
2112 result = part;
2113 else
2114 /* Combine the parts with bitwise or. This works
2115 because we extracted each part as an unsigned bit field. */
2116 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2117 OPTAB_LIB_WIDEN);
2118
2119 first = 0;
2120 }
2121
2122 /* Unsigned bit field: we are done. */
2123 if (unsignedp)
2124 return result;
2125 /* Signed bit field: sign-extend with two arithmetic shifts. */
2126 result = expand_shift (LSHIFT_EXPR, word_mode, result,
2127 build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2128 NULL_RTX, 0);
2129 return expand_shift (RSHIFT_EXPR, word_mode, result,
2130 build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2131 NULL_RTX, 0);
2132 }
2133 \f
2134 /* Add INC into TARGET. */
2135
2136 void
2137 expand_inc (rtx target, rtx inc)
2138 {
2139 rtx value = expand_binop (GET_MODE (target), add_optab,
2140 target, inc,
2141 target, 0, OPTAB_LIB_WIDEN);
2142 if (value != target)
2143 emit_move_insn (target, value);
2144 }
2145
2146 /* Subtract DEC from TARGET. */
2147
2148 void
2149 expand_dec (rtx target, rtx dec)
2150 {
2151 rtx value = expand_binop (GET_MODE (target), sub_optab,
2152 target, dec,
2153 target, 0, OPTAB_LIB_WIDEN);
2154 if (value != target)
2155 emit_move_insn (target, value);
2156 }
2157 \f
2158 /* Output a shift instruction for expression code CODE,
2159 with SHIFTED being the rtx for the value to shift,
2160 and AMOUNT the tree for the amount to shift by.
2161 Store the result in the rtx TARGET, if that is convenient.
2162 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2163 Return the rtx for where the value is. */
2164
2165 rtx
2166 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2167 tree amount, rtx target, int unsignedp)
2168 {
2169 rtx op1, temp = 0;
2170 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2171 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2172 int try;
2173
2174 /* Previously detected shift-counts computed by NEGATE_EXPR
2175 and shifted in the other direction; but that does not work
2176 on all machines. */
2177
2178 op1 = expand_expr (amount, NULL_RTX, VOIDmode, 0);
2179
2180 if (SHIFT_COUNT_TRUNCATED)
2181 {
2182 if (GET_CODE (op1) == CONST_INT
2183 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2184 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2185 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2186 % GET_MODE_BITSIZE (mode));
2187 else if (GET_CODE (op1) == SUBREG
2188 && subreg_lowpart_p (op1))
2189 op1 = SUBREG_REG (op1);
2190 }
2191
2192 if (op1 == const0_rtx)
2193 return shifted;
2194
2195 /* Check whether its cheaper to implement a left shift by a constant
2196 bit count by a sequence of additions. */
2197 if (code == LSHIFT_EXPR
2198 && GET_CODE (op1) == CONST_INT
2199 && INTVAL (op1) > 0
2200 && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2201 && shift_cost[mode][INTVAL (op1)] > INTVAL (op1) * add_cost[mode])
2202 {
2203 int i;
2204 for (i = 0; i < INTVAL (op1); i++)
2205 {
2206 temp = force_reg (mode, shifted);
2207 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2208 unsignedp, OPTAB_LIB_WIDEN);
2209 }
2210 return shifted;
2211 }
2212
2213 for (try = 0; temp == 0 && try < 3; try++)
2214 {
2215 enum optab_methods methods;
2216
2217 if (try == 0)
2218 methods = OPTAB_DIRECT;
2219 else if (try == 1)
2220 methods = OPTAB_WIDEN;
2221 else
2222 methods = OPTAB_LIB_WIDEN;
2223
2224 if (rotate)
2225 {
2226 /* Widening does not work for rotation. */
2227 if (methods == OPTAB_WIDEN)
2228 continue;
2229 else if (methods == OPTAB_LIB_WIDEN)
2230 {
2231 /* If we have been unable to open-code this by a rotation,
2232 do it as the IOR of two shifts. I.e., to rotate A
2233 by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2234 where C is the bitsize of A.
2235
2236 It is theoretically possible that the target machine might
2237 not be able to perform either shift and hence we would
2238 be making two libcalls rather than just the one for the
2239 shift (similarly if IOR could not be done). We will allow
2240 this extremely unlikely lossage to avoid complicating the
2241 code below. */
2242
2243 rtx subtarget = target == shifted ? 0 : target;
2244 rtx temp1;
2245 tree type = TREE_TYPE (amount);
2246 tree new_amount = make_tree (type, op1);
2247 tree other_amount
2248 = fold (build2 (MINUS_EXPR, type, convert
2249 (type, build_int_cst
2250 (NULL_TREE, GET_MODE_BITSIZE (mode))),
2251 amount));
2252
2253 shifted = force_reg (mode, shifted);
2254
2255 temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2256 mode, shifted, new_amount, subtarget, 1);
2257 temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2258 mode, shifted, other_amount, 0, 1);
2259 return expand_binop (mode, ior_optab, temp, temp1, target,
2260 unsignedp, methods);
2261 }
2262
2263 temp = expand_binop (mode,
2264 left ? rotl_optab : rotr_optab,
2265 shifted, op1, target, unsignedp, methods);
2266
2267 /* If we don't have the rotate, but we are rotating by a constant
2268 that is in range, try a rotate in the opposite direction. */
2269
2270 if (temp == 0 && GET_CODE (op1) == CONST_INT
2271 && INTVAL (op1) > 0
2272 && (unsigned int) INTVAL (op1) < GET_MODE_BITSIZE (mode))
2273 temp = expand_binop (mode,
2274 left ? rotr_optab : rotl_optab,
2275 shifted,
2276 GEN_INT (GET_MODE_BITSIZE (mode)
2277 - INTVAL (op1)),
2278 target, unsignedp, methods);
2279 }
2280 else if (unsignedp)
2281 temp = expand_binop (mode,
2282 left ? ashl_optab : lshr_optab,
2283 shifted, op1, target, unsignedp, methods);
2284
2285 /* Do arithmetic shifts.
2286 Also, if we are going to widen the operand, we can just as well
2287 use an arithmetic right-shift instead of a logical one. */
2288 if (temp == 0 && ! rotate
2289 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2290 {
2291 enum optab_methods methods1 = methods;
2292
2293 /* If trying to widen a log shift to an arithmetic shift,
2294 don't accept an arithmetic shift of the same size. */
2295 if (unsignedp)
2296 methods1 = OPTAB_MUST_WIDEN;
2297
2298 /* Arithmetic shift */
2299
2300 temp = expand_binop (mode,
2301 left ? ashl_optab : ashr_optab,
2302 shifted, op1, target, unsignedp, methods1);
2303 }
2304
2305 /* We used to try extzv here for logical right shifts, but that was
2306 only useful for one machine, the VAX, and caused poor code
2307 generation there for lshrdi3, so the code was deleted and a
2308 define_expand for lshrsi3 was added to vax.md. */
2309 }
2310
2311 gcc_assert (temp);
2312 return temp;
2313 }
2314 \f
2315 enum alg_code { alg_unknown, alg_zero, alg_m, alg_shift,
2316 alg_add_t_m2, alg_sub_t_m2,
2317 alg_add_factor, alg_sub_factor,
2318 alg_add_t2_m, alg_sub_t2_m };
2319
2320 /* This structure holds the "cost" of a multiply sequence. The
2321 "cost" field holds the total rtx_cost of every operator in the
2322 synthetic multiplication sequence, hence cost(a op b) is defined
2323 as rtx_cost(op) + cost(a) + cost(b), where cost(leaf) is zero.
2324 The "latency" field holds the minimum possible latency of the
2325 synthetic multiply, on a hypothetical infinitely parallel CPU.
2326 This is the critical path, or the maximum height, of the expression
2327 tree which is the sum of rtx_costs on the most expensive path from
2328 any leaf to the root. Hence latency(a op b) is defined as zero for
2329 leaves and rtx_cost(op) + max(latency(a), latency(b)) otherwise. */
2330
2331 struct mult_cost {
2332 short cost; /* Total rtx_cost of the multiplication sequence. */
2333 short latency; /* The latency of the multiplication sequence. */
2334 };
2335
2336 /* This macro is used to compare a pointer to a mult_cost against an
2337 single integer "rtx_cost" value. This is equivalent to the macro
2338 CHEAPER_MULT_COST(X,Z) where Z = {Y,Y}. */
2339 #define MULT_COST_LESS(X,Y) ((X)->cost < (Y) \
2340 || ((X)->cost == (Y) && (X)->latency < (Y)))
2341
2342 /* This macro is used to compare two pointers to mult_costs against
2343 each other. The macro returns true if X is cheaper than Y.
2344 Currently, the cheaper of two mult_costs is the one with the
2345 lower "cost". If "cost"s are tied, the lower latency is cheaper. */
2346 #define CHEAPER_MULT_COST(X,Y) ((X)->cost < (Y)->cost \
2347 || ((X)->cost == (Y)->cost \
2348 && (X)->latency < (Y)->latency))
2349
2350 /* This structure records a sequence of operations.
2351 `ops' is the number of operations recorded.
2352 `cost' is their total cost.
2353 The operations are stored in `op' and the corresponding
2354 logarithms of the integer coefficients in `log'.
2355
2356 These are the operations:
2357 alg_zero total := 0;
2358 alg_m total := multiplicand;
2359 alg_shift total := total * coeff
2360 alg_add_t_m2 total := total + multiplicand * coeff;
2361 alg_sub_t_m2 total := total - multiplicand * coeff;
2362 alg_add_factor total := total * coeff + total;
2363 alg_sub_factor total := total * coeff - total;
2364 alg_add_t2_m total := total * coeff + multiplicand;
2365 alg_sub_t2_m total := total * coeff - multiplicand;
2366
2367 The first operand must be either alg_zero or alg_m. */
2368
2369 struct algorithm
2370 {
2371 struct mult_cost cost;
2372 short ops;
2373 /* The size of the OP and LOG fields are not directly related to the
2374 word size, but the worst-case algorithms will be if we have few
2375 consecutive ones or zeros, i.e., a multiplicand like 10101010101...
2376 In that case we will generate shift-by-2, add, shift-by-2, add,...,
2377 in total wordsize operations. */
2378 enum alg_code op[MAX_BITS_PER_WORD];
2379 char log[MAX_BITS_PER_WORD];
2380 };
2381
2382 /* The entry for our multiplication cache/hash table. */
2383 struct alg_hash_entry {
2384 /* The number we are multiplying by. */
2385 unsigned int t;
2386
2387 /* The mode in which we are multiplying something by T. */
2388 enum machine_mode mode;
2389
2390 /* The best multiplication algorithm for t. */
2391 enum alg_code alg;
2392 };
2393
2394 /* The number of cache/hash entries. */
2395 #define NUM_ALG_HASH_ENTRIES 307
2396
2397 /* Each entry of ALG_HASH caches alg_code for some integer. This is
2398 actually a hash table. If we have a collision, that the older
2399 entry is kicked out. */
2400 static struct alg_hash_entry alg_hash[NUM_ALG_HASH_ENTRIES];
2401
2402 /* Indicates the type of fixup needed after a constant multiplication.
2403 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2404 the result should be negated, and ADD_VARIANT means that the
2405 multiplicand should be added to the result. */
2406 enum mult_variant {basic_variant, negate_variant, add_variant};
2407
2408 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2409 const struct mult_cost *, enum machine_mode mode);
2410 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2411 struct algorithm *, enum mult_variant *, int);
2412 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2413 const struct algorithm *, enum mult_variant);
2414 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2415 int, unsigned HOST_WIDE_INT *,
2416 int *, int *);
2417 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2418 static rtx extract_high_half (enum machine_mode, rtx);
2419 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2420 int, int);
2421 /* Compute and return the best algorithm for multiplying by T.
2422 The algorithm must cost less than cost_limit
2423 If retval.cost >= COST_LIMIT, no algorithm was found and all
2424 other field of the returned struct are undefined.
2425 MODE is the machine mode of the multiplication. */
2426
2427 static void
2428 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2429 const struct mult_cost *cost_limit, enum machine_mode mode)
2430 {
2431 int m;
2432 struct algorithm *alg_in, *best_alg;
2433 struct mult_cost best_cost;
2434 struct mult_cost new_limit;
2435 int op_cost, op_latency;
2436 unsigned HOST_WIDE_INT q;
2437 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2438 int hash_index;
2439 bool cache_hit = false;
2440 enum alg_code cache_alg = alg_zero;
2441
2442 /* Indicate that no algorithm is yet found. If no algorithm
2443 is found, this value will be returned and indicate failure. */
2444 alg_out->cost.cost = cost_limit->cost + 1;
2445 alg_out->cost.latency = cost_limit->latency + 1;
2446
2447 if (cost_limit->cost < 0
2448 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2449 return;
2450
2451 /* Restrict the bits of "t" to the multiplication's mode. */
2452 t &= GET_MODE_MASK (mode);
2453
2454 /* t == 1 can be done in zero cost. */
2455 if (t == 1)
2456 {
2457 alg_out->ops = 1;
2458 alg_out->cost.cost = 0;
2459 alg_out->cost.latency = 0;
2460 alg_out->op[0] = alg_m;
2461 return;
2462 }
2463
2464 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2465 fail now. */
2466 if (t == 0)
2467 {
2468 if (MULT_COST_LESS (cost_limit, zero_cost))
2469 return;
2470 else
2471 {
2472 alg_out->ops = 1;
2473 alg_out->cost.cost = zero_cost;
2474 alg_out->cost.latency = zero_cost;
2475 alg_out->op[0] = alg_zero;
2476 return;
2477 }
2478 }
2479
2480 /* We'll be needing a couple extra algorithm structures now. */
2481
2482 alg_in = alloca (sizeof (struct algorithm));
2483 best_alg = alloca (sizeof (struct algorithm));
2484 best_cost = *cost_limit;
2485
2486 /* Compute the hash index. */
2487 hash_index = (t ^ (unsigned int) mode) % NUM_ALG_HASH_ENTRIES;
2488
2489 /* See if we already know what to do for T. */
2490 if (alg_hash[hash_index].t == t
2491 && alg_hash[hash_index].mode == mode
2492 && alg_hash[hash_index].alg != alg_unknown)
2493 {
2494 cache_hit = true;
2495 cache_alg = alg_hash[hash_index].alg;
2496 switch (cache_alg)
2497 {
2498 case alg_shift:
2499 goto do_alg_shift;
2500
2501 case alg_add_t_m2:
2502 case alg_sub_t_m2:
2503 goto do_alg_addsub_t_m2;
2504
2505 case alg_add_factor:
2506 case alg_sub_factor:
2507 goto do_alg_addsub_factor;
2508
2509 case alg_add_t2_m:
2510 goto do_alg_add_t2_m;
2511
2512 case alg_sub_t2_m:
2513 goto do_alg_sub_t2_m;
2514
2515 default:
2516 gcc_unreachable ();
2517 }
2518 }
2519
2520 /* If we have a group of zero bits at the low-order part of T, try
2521 multiplying by the remaining bits and then doing a shift. */
2522
2523 if ((t & 1) == 0)
2524 {
2525 do_alg_shift:
2526 m = floor_log2 (t & -t); /* m = number of low zero bits */
2527 if (m < maxm)
2528 {
2529 q = t >> m;
2530 /* The function expand_shift will choose between a shift and
2531 a sequence of additions, so the observed cost is given as
2532 MIN (m * add_cost[mode], shift_cost[mode][m]). */
2533 op_cost = m * add_cost[mode];
2534 if (shift_cost[mode][m] < op_cost)
2535 op_cost = shift_cost[mode][m];
2536 new_limit.cost = best_cost.cost - op_cost;
2537 new_limit.latency = best_cost.latency - op_cost;
2538 synth_mult (alg_in, q, &new_limit, mode);
2539
2540 alg_in->cost.cost += op_cost;
2541 alg_in->cost.latency += op_cost;
2542 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2543 {
2544 struct algorithm *x;
2545 best_cost = alg_in->cost;
2546 x = alg_in, alg_in = best_alg, best_alg = x;
2547 best_alg->log[best_alg->ops] = m;
2548 best_alg->op[best_alg->ops] = alg_shift;
2549 }
2550 }
2551 if (cache_hit)
2552 goto done;
2553 }
2554
2555 /* If we have an odd number, add or subtract one. */
2556 if ((t & 1) != 0)
2557 {
2558 unsigned HOST_WIDE_INT w;
2559
2560 do_alg_addsub_t_m2:
2561 for (w = 1; (w & t) != 0; w <<= 1)
2562 ;
2563 /* If T was -1, then W will be zero after the loop. This is another
2564 case where T ends with ...111. Handling this with (T + 1) and
2565 subtract 1 produces slightly better code and results in algorithm
2566 selection much faster than treating it like the ...0111 case
2567 below. */
2568 if (w == 0
2569 || (w > 2
2570 /* Reject the case where t is 3.
2571 Thus we prefer addition in that case. */
2572 && t != 3))
2573 {
2574 /* T ends with ...111. Multiply by (T + 1) and subtract 1. */
2575
2576 op_cost = add_cost[mode];
2577 new_limit.cost = best_cost.cost - op_cost;
2578 new_limit.latency = best_cost.latency - op_cost;
2579 synth_mult (alg_in, t + 1, &new_limit, mode);
2580
2581 alg_in->cost.cost += op_cost;
2582 alg_in->cost.latency += op_cost;
2583 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2584 {
2585 struct algorithm *x;
2586 best_cost = alg_in->cost;
2587 x = alg_in, alg_in = best_alg, best_alg = x;
2588 best_alg->log[best_alg->ops] = 0;
2589 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2590 }
2591 }
2592 else
2593 {
2594 /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
2595
2596 op_cost = add_cost[mode];
2597 new_limit.cost = best_cost.cost - op_cost;
2598 new_limit.latency = best_cost.latency - op_cost;
2599 synth_mult (alg_in, t - 1, &new_limit, mode);
2600
2601 alg_in->cost.cost += op_cost;
2602 alg_in->cost.latency += op_cost;
2603 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2604 {
2605 struct algorithm *x;
2606 best_cost = alg_in->cost;
2607 x = alg_in, alg_in = best_alg, best_alg = x;
2608 best_alg->log[best_alg->ops] = 0;
2609 best_alg->op[best_alg->ops] = alg_add_t_m2;
2610 }
2611 }
2612 if (cache_hit)
2613 goto done;
2614 }
2615
2616 /* Look for factors of t of the form
2617 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2618 If we find such a factor, we can multiply by t using an algorithm that
2619 multiplies by q, shift the result by m and add/subtract it to itself.
2620
2621 We search for large factors first and loop down, even if large factors
2622 are less probable than small; if we find a large factor we will find a
2623 good sequence quickly, and therefore be able to prune (by decreasing
2624 COST_LIMIT) the search. */
2625
2626 do_alg_addsub_factor:
2627 for (m = floor_log2 (t - 1); m >= 2; m--)
2628 {
2629 unsigned HOST_WIDE_INT d;
2630
2631 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2632 if (t % d == 0 && t > d && m < maxm
2633 && (!cache_hit || cache_alg == alg_add_factor))
2634 {
2635 /* If the target has a cheap shift-and-add instruction use
2636 that in preference to a shift insn followed by an add insn.
2637 Assume that the shift-and-add is "atomic" with a latency
2638 equal to its cost, otherwise assume that on superscalar
2639 hardware the shift may be executed concurrently with the
2640 earlier steps in the algorithm. */
2641 op_cost = add_cost[mode] + shift_cost[mode][m];
2642 if (shiftadd_cost[mode][m] < op_cost)
2643 {
2644 op_cost = shiftadd_cost[mode][m];
2645 op_latency = op_cost;
2646 }
2647 else
2648 op_latency = add_cost[mode];
2649
2650 new_limit.cost = best_cost.cost - op_cost;
2651 new_limit.latency = best_cost.latency - op_latency;
2652 synth_mult (alg_in, t / d, &new_limit, mode);
2653
2654 alg_in->cost.cost += op_cost;
2655 alg_in->cost.latency += op_latency;
2656 if (alg_in->cost.latency < op_cost)
2657 alg_in->cost.latency = op_cost;
2658 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2659 {
2660 struct algorithm *x;
2661 best_cost = alg_in->cost;
2662 x = alg_in, alg_in = best_alg, best_alg = x;
2663 best_alg->log[best_alg->ops] = m;
2664 best_alg->op[best_alg->ops] = alg_add_factor;
2665 }
2666 /* Other factors will have been taken care of in the recursion. */
2667 break;
2668 }
2669
2670 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2671 if (t % d == 0 && t > d && m < maxm
2672 && (!cache_hit || cache_alg == alg_sub_factor))
2673 {
2674 /* If the target has a cheap shift-and-subtract insn use
2675 that in preference to a shift insn followed by a sub insn.
2676 Assume that the shift-and-sub is "atomic" with a latency
2677 equal to it's cost, otherwise assume that on superscalar
2678 hardware the shift may be executed concurrently with the
2679 earlier steps in the algorithm. */
2680 op_cost = add_cost[mode] + shift_cost[mode][m];
2681 if (shiftsub_cost[mode][m] < op_cost)
2682 {
2683 op_cost = shiftsub_cost[mode][m];
2684 op_latency = op_cost;
2685 }
2686 else
2687 op_latency = add_cost[mode];
2688
2689 new_limit.cost = best_cost.cost - op_cost;
2690 new_limit.cost = best_cost.cost - op_latency;
2691 synth_mult (alg_in, t / d, &new_limit, mode);
2692
2693 alg_in->cost.cost += op_cost;
2694 alg_in->cost.latency += op_latency;
2695 if (alg_in->cost.latency < op_cost)
2696 alg_in->cost.latency = op_cost;
2697 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2698 {
2699 struct algorithm *x;
2700 best_cost = alg_in->cost;
2701 x = alg_in, alg_in = best_alg, best_alg = x;
2702 best_alg->log[best_alg->ops] = m;
2703 best_alg->op[best_alg->ops] = alg_sub_factor;
2704 }
2705 break;
2706 }
2707 }
2708 if (cache_hit)
2709 goto done;
2710
2711 /* Try shift-and-add (load effective address) instructions,
2712 i.e. do a*3, a*5, a*9. */
2713 if ((t & 1) != 0)
2714 {
2715 do_alg_add_t2_m:
2716 q = t - 1;
2717 q = q & -q;
2718 m = exact_log2 (q);
2719 if (m >= 0 && m < maxm)
2720 {
2721 op_cost = shiftadd_cost[mode][m];
2722 new_limit.cost = best_cost.cost - op_cost;
2723 new_limit.latency = best_cost.latency - op_cost;
2724 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2725
2726 alg_in->cost.cost += op_cost;
2727 alg_in->cost.latency += op_cost;
2728 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2729 {
2730 struct algorithm *x;
2731 best_cost = alg_in->cost;
2732 x = alg_in, alg_in = best_alg, best_alg = x;
2733 best_alg->log[best_alg->ops] = m;
2734 best_alg->op[best_alg->ops] = alg_add_t2_m;
2735 }
2736 }
2737 if (cache_hit)
2738 goto done;
2739
2740 do_alg_sub_t2_m:
2741 q = t + 1;
2742 q = q & -q;
2743 m = exact_log2 (q);
2744 if (m >= 0 && m < maxm)
2745 {
2746 op_cost = shiftsub_cost[mode][m];
2747 new_limit.cost = best_cost.cost - op_cost;
2748 new_limit.latency = best_cost.latency - op_cost;
2749 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2750
2751 alg_in->cost.cost += op_cost;
2752 alg_in->cost.latency += op_cost;
2753 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2754 {
2755 struct algorithm *x;
2756 best_cost = alg_in->cost;
2757 x = alg_in, alg_in = best_alg, best_alg = x;
2758 best_alg->log[best_alg->ops] = m;
2759 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2760 }
2761 }
2762 if (cache_hit)
2763 goto done;
2764 }
2765
2766 done:
2767 /* If best_cost has not decreased, we have not found any algorithm. */
2768 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2769 return;
2770
2771 /* Cache the result. */
2772 if (!cache_hit)
2773 {
2774 alg_hash[hash_index].t = t;
2775 alg_hash[hash_index].mode = mode;
2776 alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2777 }
2778
2779 /* If we are getting a too long sequence for `struct algorithm'
2780 to record, make this search fail. */
2781 if (best_alg->ops == MAX_BITS_PER_WORD)
2782 return;
2783
2784 /* Copy the algorithm from temporary space to the space at alg_out.
2785 We avoid using structure assignment because the majority of
2786 best_alg is normally undefined, and this is a critical function. */
2787 alg_out->ops = best_alg->ops + 1;
2788 alg_out->cost = best_cost;
2789 memcpy (alg_out->op, best_alg->op,
2790 alg_out->ops * sizeof *alg_out->op);
2791 memcpy (alg_out->log, best_alg->log,
2792 alg_out->ops * sizeof *alg_out->log);
2793 }
2794 \f
2795 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2796 Try three variations:
2797
2798 - a shift/add sequence based on VAL itself
2799 - a shift/add sequence based on -VAL, followed by a negation
2800 - a shift/add sequence based on VAL - 1, followed by an addition.
2801
2802 Return true if the cheapest of these cost less than MULT_COST,
2803 describing the algorithm in *ALG and final fixup in *VARIANT. */
2804
2805 static bool
2806 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2807 struct algorithm *alg, enum mult_variant *variant,
2808 int mult_cost)
2809 {
2810 struct algorithm alg2;
2811 struct mult_cost limit;
2812 int op_cost;
2813
2814 *variant = basic_variant;
2815 limit.cost = mult_cost;
2816 limit.latency = mult_cost;
2817 synth_mult (alg, val, &limit, mode);
2818
2819 /* This works only if the inverted value actually fits in an
2820 `unsigned int' */
2821 if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2822 {
2823 op_cost = neg_cost[mode];
2824 if (MULT_COST_LESS (&alg->cost, mult_cost))
2825 {
2826 limit.cost = alg->cost.cost - op_cost;
2827 limit.latency = alg->cost.latency - op_cost;
2828 }
2829 else
2830 {
2831 limit.cost = mult_cost - op_cost;
2832 limit.latency = mult_cost - op_cost;
2833 }
2834
2835 synth_mult (&alg2, -val, &limit, mode);
2836 alg2.cost.cost += op_cost;
2837 alg2.cost.latency += op_cost;
2838 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2839 *alg = alg2, *variant = negate_variant;
2840 }
2841
2842 /* This proves very useful for division-by-constant. */
2843 op_cost = add_cost[mode];
2844 if (MULT_COST_LESS (&alg->cost, mult_cost))
2845 {
2846 limit.cost = alg->cost.cost - op_cost;
2847 limit.latency = alg->cost.latency - op_cost;
2848 }
2849 else
2850 {
2851 limit.cost = mult_cost - op_cost;
2852 limit.latency = mult_cost - op_cost;
2853 }
2854
2855 synth_mult (&alg2, val - 1, &limit, mode);
2856 alg2.cost.cost += op_cost;
2857 alg2.cost.latency += op_cost;
2858 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2859 *alg = alg2, *variant = add_variant;
2860
2861 return MULT_COST_LESS (&alg->cost, mult_cost);
2862 }
2863
2864 /* A subroutine of expand_mult, used for constant multiplications.
2865 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2866 convenient. Use the shift/add sequence described by ALG and apply
2867 the final fixup specified by VARIANT. */
2868
2869 static rtx
2870 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2871 rtx target, const struct algorithm *alg,
2872 enum mult_variant variant)
2873 {
2874 HOST_WIDE_INT val_so_far;
2875 rtx insn, accum, tem;
2876 int opno;
2877 enum machine_mode nmode;
2878
2879 /* Avoid referencing memory over and over.
2880 For speed, but also for correctness when mem is volatile. */
2881 if (MEM_P (op0))
2882 op0 = force_reg (mode, op0);
2883
2884 /* ACCUM starts out either as OP0 or as a zero, depending on
2885 the first operation. */
2886
2887 if (alg->op[0] == alg_zero)
2888 {
2889 accum = copy_to_mode_reg (mode, const0_rtx);
2890 val_so_far = 0;
2891 }
2892 else if (alg->op[0] == alg_m)
2893 {
2894 accum = copy_to_mode_reg (mode, op0);
2895 val_so_far = 1;
2896 }
2897 else
2898 gcc_unreachable ();
2899
2900 for (opno = 1; opno < alg->ops; opno++)
2901 {
2902 int log = alg->log[opno];
2903 rtx shift_subtarget = optimize ? 0 : accum;
2904 rtx add_target
2905 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2906 && !optimize)
2907 ? target : 0;
2908 rtx accum_target = optimize ? 0 : accum;
2909
2910 switch (alg->op[opno])
2911 {
2912 case alg_shift:
2913 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2914 build_int_cst (NULL_TREE, log),
2915 NULL_RTX, 0);
2916 val_so_far <<= log;
2917 break;
2918
2919 case alg_add_t_m2:
2920 tem = expand_shift (LSHIFT_EXPR, mode, op0,
2921 build_int_cst (NULL_TREE, log),
2922 NULL_RTX, 0);
2923 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2924 add_target ? add_target : accum_target);
2925 val_so_far += (HOST_WIDE_INT) 1 << log;
2926 break;
2927
2928 case alg_sub_t_m2:
2929 tem = expand_shift (LSHIFT_EXPR, mode, op0,
2930 build_int_cst (NULL_TREE, log),
2931 NULL_RTX, 0);
2932 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2933 add_target ? add_target : accum_target);
2934 val_so_far -= (HOST_WIDE_INT) 1 << log;
2935 break;
2936
2937 case alg_add_t2_m:
2938 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2939 build_int_cst (NULL_TREE, log),
2940 shift_subtarget,
2941 0);
2942 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2943 add_target ? add_target : accum_target);
2944 val_so_far = (val_so_far << log) + 1;
2945 break;
2946
2947 case alg_sub_t2_m:
2948 accum = expand_shift (LSHIFT_EXPR, mode, accum,
2949 build_int_cst (NULL_TREE, log),
2950 shift_subtarget, 0);
2951 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2952 add_target ? add_target : accum_target);
2953 val_so_far = (val_so_far << log) - 1;
2954 break;
2955
2956 case alg_add_factor:
2957 tem = expand_shift (LSHIFT_EXPR, mode, accum,
2958 build_int_cst (NULL_TREE, log),
2959 NULL_RTX, 0);
2960 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2961 add_target ? add_target : accum_target);
2962 val_so_far += val_so_far << log;
2963 break;
2964
2965 case alg_sub_factor:
2966 tem = expand_shift (LSHIFT_EXPR, mode, accum,
2967 build_int_cst (NULL_TREE, log),
2968 NULL_RTX, 0);
2969 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2970 (add_target
2971 ? add_target : (optimize ? 0 : tem)));
2972 val_so_far = (val_so_far << log) - val_so_far;
2973 break;
2974
2975 default:
2976 gcc_unreachable ();
2977 }
2978
2979 /* Write a REG_EQUAL note on the last insn so that we can cse
2980 multiplication sequences. Note that if ACCUM is a SUBREG,
2981 we've set the inner register and must properly indicate
2982 that. */
2983
2984 tem = op0, nmode = mode;
2985 if (GET_CODE (accum) == SUBREG)
2986 {
2987 nmode = GET_MODE (SUBREG_REG (accum));
2988 tem = gen_lowpart (nmode, op0);
2989 }
2990
2991 insn = get_last_insn ();
2992 set_unique_reg_note (insn, REG_EQUAL,
2993 gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)));
2994 }
2995
2996 if (variant == negate_variant)
2997 {
2998 val_so_far = -val_so_far;
2999 accum = expand_unop (mode, neg_optab, accum, target, 0);
3000 }
3001 else if (variant == add_variant)
3002 {
3003 val_so_far = val_so_far + 1;
3004 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3005 }
3006
3007 /* Compare only the bits of val and val_so_far that are significant
3008 in the result mode, to avoid sign-/zero-extension confusion. */
3009 val &= GET_MODE_MASK (mode);
3010 val_so_far &= GET_MODE_MASK (mode);
3011 gcc_assert (val == val_so_far);
3012
3013 return accum;
3014 }
3015
3016 /* Perform a multiplication and return an rtx for the result.
3017 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3018 TARGET is a suggestion for where to store the result (an rtx).
3019
3020 We check specially for a constant integer as OP1.
3021 If you want this check for OP0 as well, then before calling
3022 you should swap the two operands if OP0 would be constant. */
3023
3024 rtx
3025 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3026 int unsignedp)
3027 {
3028 rtx const_op1 = op1;
3029 enum mult_variant variant;
3030 struct algorithm algorithm;
3031
3032 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3033 less than or equal in size to `unsigned int' this doesn't matter.
3034 If the mode is larger than `unsigned int', then synth_mult works only
3035 if the constant value exactly fits in an `unsigned int' without any
3036 truncation. This means that multiplying by negative values does
3037 not work; results are off by 2^32 on a 32 bit machine. */
3038
3039 /* If we are multiplying in DImode, it may still be a win
3040 to try to work with shifts and adds. */
3041 if (GET_CODE (op1) == CONST_DOUBLE
3042 && GET_MODE_CLASS (GET_MODE (op1)) == MODE_INT
3043 && HOST_BITS_PER_INT >= BITS_PER_WORD
3044 && CONST_DOUBLE_HIGH (op1) == 0)
3045 const_op1 = GEN_INT (CONST_DOUBLE_LOW (op1));
3046 else if (HOST_BITS_PER_INT < GET_MODE_BITSIZE (mode)
3047 && GET_CODE (op1) == CONST_INT
3048 && INTVAL (op1) < 0)
3049 const_op1 = 0;
3050
3051 /* We used to test optimize here, on the grounds that it's better to
3052 produce a smaller program when -O is not used.
3053 But this causes such a terrible slowdown sometimes
3054 that it seems better to use synth_mult always. */
3055
3056 if (const_op1 && GET_CODE (const_op1) == CONST_INT
3057 && (unsignedp || !flag_trapv))
3058 {
3059 int mult_cost = rtx_cost (gen_rtx_MULT (mode, op0, op1), SET);
3060
3061 if (choose_mult_variant (mode, INTVAL (const_op1), &algorithm, &variant,
3062 mult_cost))
3063 return expand_mult_const (mode, op0, INTVAL (const_op1), target,
3064 &algorithm, variant);
3065 }
3066
3067 if (GET_CODE (op0) == CONST_DOUBLE)
3068 {
3069 rtx temp = op0;
3070 op0 = op1;
3071 op1 = temp;
3072 }
3073
3074 /* Expand x*2.0 as x+x. */
3075 if (GET_CODE (op1) == CONST_DOUBLE
3076 && GET_MODE_CLASS (mode) == MODE_FLOAT)
3077 {
3078 REAL_VALUE_TYPE d;
3079 REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3080
3081 if (REAL_VALUES_EQUAL (d, dconst2))
3082 {
3083 op0 = force_reg (GET_MODE (op0), op0);
3084 return expand_binop (mode, add_optab, op0, op0,
3085 target, unsignedp, OPTAB_LIB_WIDEN);
3086 }
3087 }
3088
3089 /* This used to use umul_optab if unsigned, but for non-widening multiply
3090 there is no difference between signed and unsigned. */
3091 op0 = expand_binop (mode,
3092 ! unsignedp
3093 && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3094 ? smulv_optab : smul_optab,
3095 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3096 gcc_assert (op0);
3097 return op0;
3098 }
3099 \f
3100 /* Return the smallest n such that 2**n >= X. */
3101
3102 int
3103 ceil_log2 (unsigned HOST_WIDE_INT x)
3104 {
3105 return floor_log2 (x - 1) + 1;
3106 }
3107
3108 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3109 replace division by D, and put the least significant N bits of the result
3110 in *MULTIPLIER_PTR and return the most significant bit.
3111
3112 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3113 needed precision is in PRECISION (should be <= N).
3114
3115 PRECISION should be as small as possible so this function can choose
3116 multiplier more freely.
3117
3118 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3119 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3120
3121 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3122 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3123
3124 static
3125 unsigned HOST_WIDE_INT
3126 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3127 unsigned HOST_WIDE_INT *multiplier_ptr,
3128 int *post_shift_ptr, int *lgup_ptr)
3129 {
3130 HOST_WIDE_INT mhigh_hi, mlow_hi;
3131 unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3132 int lgup, post_shift;
3133 int pow, pow2;
3134 unsigned HOST_WIDE_INT nl, dummy1;
3135 HOST_WIDE_INT nh, dummy2;
3136
3137 /* lgup = ceil(log2(divisor)); */
3138 lgup = ceil_log2 (d);
3139
3140 gcc_assert (lgup <= n);
3141
3142 pow = n + lgup;
3143 pow2 = n + lgup - precision;
3144
3145 /* We could handle this with some effort, but this case is much
3146 better handled directly with a scc insn, so rely on caller using
3147 that. */
3148 gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3149
3150 /* mlow = 2^(N + lgup)/d */
3151 if (pow >= HOST_BITS_PER_WIDE_INT)
3152 {
3153 nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3154 nl = 0;
3155 }
3156 else
3157 {
3158 nh = 0;
3159 nl = (unsigned HOST_WIDE_INT) 1 << pow;
3160 }
3161 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3162 &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3163
3164 /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3165 if (pow2 >= HOST_BITS_PER_WIDE_INT)
3166 nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3167 else
3168 nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3169 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3170 &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3171
3172 gcc_assert (!mhigh_hi || nh - d < d);
3173 gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3174 /* Assert that mlow < mhigh. */
3175 gcc_assert (mlow_hi < mhigh_hi
3176 || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3177
3178 /* If precision == N, then mlow, mhigh exceed 2^N
3179 (but they do not exceed 2^(N+1)). */
3180
3181 /* Reduce to lowest terms. */
3182 for (post_shift = lgup; post_shift > 0; post_shift--)
3183 {
3184 unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3185 unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3186 if (ml_lo >= mh_lo)
3187 break;
3188
3189 mlow_hi = 0;
3190 mlow_lo = ml_lo;
3191 mhigh_hi = 0;
3192 mhigh_lo = mh_lo;
3193 }
3194
3195 *post_shift_ptr = post_shift;
3196 *lgup_ptr = lgup;
3197 if (n < HOST_BITS_PER_WIDE_INT)
3198 {
3199 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3200 *multiplier_ptr = mhigh_lo & mask;
3201 return mhigh_lo >= mask;
3202 }
3203 else
3204 {
3205 *multiplier_ptr = mhigh_lo;
3206 return mhigh_hi;
3207 }
3208 }
3209
3210 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3211 congruent to 1 (mod 2**N). */
3212
3213 static unsigned HOST_WIDE_INT
3214 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3215 {
3216 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3217
3218 /* The algorithm notes that the choice y = x satisfies
3219 x*y == 1 mod 2^3, since x is assumed odd.
3220 Each iteration doubles the number of bits of significance in y. */
3221
3222 unsigned HOST_WIDE_INT mask;
3223 unsigned HOST_WIDE_INT y = x;
3224 int nbit = 3;
3225
3226 mask = (n == HOST_BITS_PER_WIDE_INT
3227 ? ~(unsigned HOST_WIDE_INT) 0
3228 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3229
3230 while (nbit < n)
3231 {
3232 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3233 nbit *= 2;
3234 }
3235 return y;
3236 }
3237
3238 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3239 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3240 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3241 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3242 become signed.
3243
3244 The result is put in TARGET if that is convenient.
3245
3246 MODE is the mode of operation. */
3247
3248 rtx
3249 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3250 rtx op1, rtx target, int unsignedp)
3251 {
3252 rtx tem;
3253 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3254
3255 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3256 build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3257 NULL_RTX, 0);
3258 tem = expand_and (mode, tem, op1, NULL_RTX);
3259 adj_operand
3260 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3261 adj_operand);
3262
3263 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3264 build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3265 NULL_RTX, 0);
3266 tem = expand_and (mode, tem, op0, NULL_RTX);
3267 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3268 target);
3269
3270 return target;
3271 }
3272
3273 /* Subroutine of expand_mult_highpart. Return the MODE high part of OP. */
3274
3275 static rtx
3276 extract_high_half (enum machine_mode mode, rtx op)
3277 {
3278 enum machine_mode wider_mode;
3279
3280 if (mode == word_mode)
3281 return gen_highpart (mode, op);
3282
3283 wider_mode = GET_MODE_WIDER_MODE (mode);
3284 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3285 build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode)), 0, 1);
3286 return convert_modes (mode, wider_mode, op, 0);
3287 }
3288
3289 /* Like expand_mult_highpart, but only consider using a multiplication
3290 optab. OP1 is an rtx for the constant operand. */
3291
3292 static rtx
3293 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3294 rtx target, int unsignedp, int max_cost)
3295 {
3296 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3297 enum machine_mode wider_mode;
3298 optab moptab;
3299 rtx tem;
3300 int size;
3301
3302 wider_mode = GET_MODE_WIDER_MODE (mode);
3303 size = GET_MODE_BITSIZE (mode);
3304
3305 /* Firstly, try using a multiplication insn that only generates the needed
3306 high part of the product, and in the sign flavor of unsignedp. */
3307 if (mul_highpart_cost[mode] < max_cost)
3308 {
3309 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3310 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3311 unsignedp, OPTAB_DIRECT);
3312 if (tem)
3313 return tem;
3314 }
3315
3316 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3317 Need to adjust the result after the multiplication. */
3318 if (size - 1 < BITS_PER_WORD
3319 && (mul_highpart_cost[mode] + 2 * shift_cost[mode][size-1]
3320 + 4 * add_cost[mode] < max_cost))
3321 {
3322 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3323 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3324 unsignedp, OPTAB_DIRECT);
3325 if (tem)
3326 /* We used the wrong signedness. Adjust the result. */
3327 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3328 tem, unsignedp);
3329 }
3330
3331 /* Try widening multiplication. */
3332 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3333 if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3334 && mul_widen_cost[wider_mode] < max_cost)
3335 {
3336 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3337 unsignedp, OPTAB_WIDEN);
3338 if (tem)
3339 return extract_high_half (mode, tem);
3340 }
3341
3342 /* Try widening the mode and perform a non-widening multiplication. */
3343 moptab = smul_optab;
3344 if (smul_optab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3345 && size - 1 < BITS_PER_WORD
3346 && mul_cost[wider_mode] + shift_cost[mode][size-1] < max_cost)
3347 {
3348 tem = expand_binop (wider_mode, moptab, op0, op1, 0,
3349 unsignedp, OPTAB_WIDEN);
3350 if (tem)
3351 return extract_high_half (mode, tem);
3352 }
3353
3354 /* Try widening multiplication of opposite signedness, and adjust. */
3355 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3356 if (moptab->handlers[wider_mode].insn_code != CODE_FOR_nothing
3357 && size - 1 < BITS_PER_WORD
3358 && (mul_widen_cost[wider_mode] + 2 * shift_cost[mode][size-1]
3359 + 4 * add_cost[mode] < max_cost))
3360 {
3361 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3362 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3363 if (tem != 0)
3364 {
3365 tem = extract_high_half (mode, tem);
3366 /* We used the wrong signedness. Adjust the result. */
3367 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3368 target, unsignedp);
3369 }
3370 }
3371
3372 return 0;
3373 }
3374
3375 /* Emit code to multiply OP0 and CNST1, putting the high half of the result
3376 in TARGET if that is convenient, and return where the result is. If the
3377 operation can not be performed, 0 is returned.
3378
3379 MODE is the mode of operation and result.
3380
3381 UNSIGNEDP nonzero means unsigned multiply.
3382
3383 MAX_COST is the total allowed cost for the expanded RTL. */
3384
3385 rtx
3386 expand_mult_highpart (enum machine_mode mode, rtx op0,
3387 unsigned HOST_WIDE_INT cnst1, rtx target,
3388 int unsignedp, int max_cost)
3389 {
3390 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3391 int extra_cost;
3392 bool sign_adjust = false;
3393 enum mult_variant variant;
3394 struct algorithm alg;
3395 rtx op1, tem;
3396
3397 /* We can't support modes wider than HOST_BITS_PER_INT. */
3398 gcc_assert (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT);
3399
3400 op1 = gen_int_mode (cnst1, wider_mode);
3401 cnst1 &= GET_MODE_MASK (mode);
3402
3403 /* We can't optimize modes wider than BITS_PER_WORD.
3404 ??? We might be able to perform double-word arithmetic if
3405 mode == word_mode, however all the cost calculations in
3406 synth_mult etc. assume single-word operations. */
3407 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3408 return expand_mult_highpart_optab (mode, op0, op1, target,
3409 unsignedp, max_cost);
3410
3411 extra_cost = shift_cost[mode][GET_MODE_BITSIZE (mode) - 1];
3412
3413 /* Check whether we try to multiply by a negative constant. */
3414 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3415 {
3416 sign_adjust = true;
3417 extra_cost += add_cost[mode];
3418 }
3419
3420 /* See whether shift/add multiplication is cheap enough. */
3421 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3422 max_cost - extra_cost))
3423 {
3424 /* See whether the specialized multiplication optabs are
3425 cheaper than the shift/add version. */
3426 tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3427 alg.cost.cost + extra_cost);
3428 if (tem)
3429 return tem;
3430
3431 tem = convert_to_mode (wider_mode, op0, unsignedp);
3432 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3433 tem = extract_high_half (mode, tem);
3434
3435 /* Adjust result for signedness. */
3436 if (sign_adjust)
3437 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3438
3439 return tem;
3440 }
3441 return expand_mult_highpart_optab (mode, op0, op1, target,
3442 unsignedp, max_cost);
3443 }
3444
3445
3446 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3447
3448 static rtx
3449 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3450 {
3451 unsigned HOST_WIDE_INT masklow, maskhigh;
3452 rtx result, temp, shift, label;
3453 int logd;
3454
3455 logd = floor_log2 (d);
3456 result = gen_reg_rtx (mode);
3457
3458 /* Avoid conditional branches when they're expensive. */
3459 if (BRANCH_COST >= 2
3460 && !optimize_size)
3461 {
3462 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3463 mode, 0, -1);
3464 if (signmask)
3465 {
3466 signmask = force_reg (mode, signmask);
3467 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3468 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3469
3470 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3471 which instruction sequence to use. If logical right shifts
3472 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3473 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3474
3475 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3476 if (lshr_optab->handlers[mode].insn_code == CODE_FOR_nothing
3477 || rtx_cost (temp, SET) > COSTS_N_INSNS (2))
3478 {
3479 temp = expand_binop (mode, xor_optab, op0, signmask,
3480 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3481 temp = expand_binop (mode, sub_optab, temp, signmask,
3482 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3483 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3484 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3485 temp = expand_binop (mode, xor_optab, temp, signmask,
3486 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3487 temp = expand_binop (mode, sub_optab, temp, signmask,
3488 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3489 }
3490 else
3491 {
3492 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3493 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3494 signmask = force_reg (mode, signmask);
3495
3496 temp = expand_binop (mode, add_optab, op0, signmask,
3497 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3498 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3499 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3500 temp = expand_binop (mode, sub_optab, temp, signmask,
3501 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3502 }
3503 return temp;
3504 }
3505 }
3506
3507 /* Mask contains the mode's signbit and the significant bits of the
3508 modulus. By including the signbit in the operation, many targets
3509 can avoid an explicit compare operation in the following comparison
3510 against zero. */
3511
3512 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3513 if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3514 {
3515 masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3516 maskhigh = -1;
3517 }
3518 else
3519 maskhigh = (HOST_WIDE_INT) -1
3520 << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3521
3522 temp = expand_binop (mode, and_optab, op0,
3523 immed_double_const (masklow, maskhigh, mode),
3524 result, 1, OPTAB_LIB_WIDEN);
3525 if (temp != result)
3526 emit_move_insn (result, temp);
3527
3528 label = gen_label_rtx ();
3529 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3530
3531 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3532 0, OPTAB_LIB_WIDEN);
3533 masklow = (HOST_WIDE_INT) -1 << logd;
3534 maskhigh = -1;
3535 temp = expand_binop (mode, ior_optab, temp,
3536 immed_double_const (masklow, maskhigh, mode),
3537 result, 1, OPTAB_LIB_WIDEN);
3538 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3539 0, OPTAB_LIB_WIDEN);
3540 if (temp != result)
3541 emit_move_insn (result, temp);
3542 emit_label (label);
3543 return result;
3544 }
3545
3546 /* Expand signed division of OP0 by a power of two D in mode MODE.
3547 This routine is only called for positive values of D. */
3548
3549 static rtx
3550 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3551 {
3552 rtx temp, label;
3553 tree shift;
3554 int logd;
3555
3556 logd = floor_log2 (d);
3557 shift = build_int_cst (NULL_TREE, logd);
3558
3559 if (d == 2 && BRANCH_COST >= 1)
3560 {
3561 temp = gen_reg_rtx (mode);
3562 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3563 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3564 0, OPTAB_LIB_WIDEN);
3565 return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3566 }
3567
3568 #ifdef HAVE_conditional_move
3569 if (BRANCH_COST >= 2)
3570 {
3571 rtx temp2;
3572
3573 /* ??? emit_conditional_move forces a stack adjustment via
3574 compare_from_rtx so, if the sequence is discarded, it will
3575 be lost. Do it now instead. */
3576 do_pending_stack_adjust ();
3577
3578 start_sequence ();
3579 temp2 = copy_to_mode_reg (mode, op0);
3580 temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3581 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3582 temp = force_reg (mode, temp);
3583
3584 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3585 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3586 mode, temp, temp2, mode, 0);
3587 if (temp2)
3588 {
3589 rtx seq = get_insns ();
3590 end_sequence ();
3591 emit_insn (seq);
3592 return expand_shift (RSHIFT_EXPR, mode, temp2, shift, NULL_RTX, 0);
3593 }
3594 end_sequence ();
3595 }
3596 #endif
3597
3598 if (BRANCH_COST >= 2)
3599 {
3600 int ushift = GET_MODE_BITSIZE (mode) - logd;
3601
3602 temp = gen_reg_rtx (mode);
3603 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3604 if (shift_cost[mode][ushift] > COSTS_N_INSNS (1))
3605 temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3606 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3607 else
3608 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3609 build_int_cst (NULL_TREE, ushift),
3610 NULL_RTX, 1);
3611 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3612 0, OPTAB_LIB_WIDEN);
3613 return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3614 }
3615
3616 label = gen_label_rtx ();
3617 temp = copy_to_mode_reg (mode, op0);
3618 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3619 expand_inc (temp, GEN_INT (d - 1));
3620 emit_label (label);
3621 return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3622 }
3623 \f
3624 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3625 if that is convenient, and returning where the result is.
3626 You may request either the quotient or the remainder as the result;
3627 specify REM_FLAG nonzero to get the remainder.
3628
3629 CODE is the expression code for which kind of division this is;
3630 it controls how rounding is done. MODE is the machine mode to use.
3631 UNSIGNEDP nonzero means do unsigned division. */
3632
3633 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3634 and then correct it by or'ing in missing high bits
3635 if result of ANDI is nonzero.
3636 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3637 This could optimize to a bfexts instruction.
3638 But C doesn't use these operations, so their optimizations are
3639 left for later. */
3640 /* ??? For modulo, we don't actually need the highpart of the first product,
3641 the low part will do nicely. And for small divisors, the second multiply
3642 can also be a low-part only multiply or even be completely left out.
3643 E.g. to calculate the remainder of a division by 3 with a 32 bit
3644 multiply, multiply with 0x55555556 and extract the upper two bits;
3645 the result is exact for inputs up to 0x1fffffff.
3646 The input range can be reduced by using cross-sum rules.
3647 For odd divisors >= 3, the following table gives right shift counts
3648 so that if a number is shifted by an integer multiple of the given
3649 amount, the remainder stays the same:
3650 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3651 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3652 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3653 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3654 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3655
3656 Cross-sum rules for even numbers can be derived by leaving as many bits
3657 to the right alone as the divisor has zeros to the right.
3658 E.g. if x is an unsigned 32 bit number:
3659 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3660 */
3661
3662 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
3663
3664 rtx
3665 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3666 rtx op0, rtx op1, rtx target, int unsignedp)
3667 {
3668 enum machine_mode compute_mode;
3669 rtx tquotient;
3670 rtx quotient = 0, remainder = 0;
3671 rtx last;
3672 int size;
3673 rtx insn, set;
3674 optab optab1, optab2;
3675 int op1_is_constant, op1_is_pow2 = 0;
3676 int max_cost, extra_cost;
3677 static HOST_WIDE_INT last_div_const = 0;
3678 static HOST_WIDE_INT ext_op1;
3679
3680 op1_is_constant = GET_CODE (op1) == CONST_INT;
3681 if (op1_is_constant)
3682 {
3683 ext_op1 = INTVAL (op1);
3684 if (unsignedp)
3685 ext_op1 &= GET_MODE_MASK (mode);
3686 op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3687 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3688 }
3689
3690 /*
3691 This is the structure of expand_divmod:
3692
3693 First comes code to fix up the operands so we can perform the operations
3694 correctly and efficiently.
3695
3696 Second comes a switch statement with code specific for each rounding mode.
3697 For some special operands this code emits all RTL for the desired
3698 operation, for other cases, it generates only a quotient and stores it in
3699 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
3700 to indicate that it has not done anything.
3701
3702 Last comes code that finishes the operation. If QUOTIENT is set and
3703 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
3704 QUOTIENT is not set, it is computed using trunc rounding.
3705
3706 We try to generate special code for division and remainder when OP1 is a
3707 constant. If |OP1| = 2**n we can use shifts and some other fast
3708 operations. For other values of OP1, we compute a carefully selected
3709 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3710 by m.
3711
3712 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3713 half of the product. Different strategies for generating the product are
3714 implemented in expand_mult_highpart.
3715
3716 If what we actually want is the remainder, we generate that by another
3717 by-constant multiplication and a subtraction. */
3718
3719 /* We shouldn't be called with OP1 == const1_rtx, but some of the
3720 code below will malfunction if we are, so check here and handle
3721 the special case if so. */
3722 if (op1 == const1_rtx)
3723 return rem_flag ? const0_rtx : op0;
3724
3725 /* When dividing by -1, we could get an overflow.
3726 negv_optab can handle overflows. */
3727 if (! unsignedp && op1 == constm1_rtx)
3728 {
3729 if (rem_flag)
3730 return const0_rtx;
3731 return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3732 ? negv_optab : neg_optab, op0, target, 0);
3733 }
3734
3735 if (target
3736 /* Don't use the function value register as a target
3737 since we have to read it as well as write it,
3738 and function-inlining gets confused by this. */
3739 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3740 /* Don't clobber an operand while doing a multi-step calculation. */
3741 || ((rem_flag || op1_is_constant)
3742 && (reg_mentioned_p (target, op0)
3743 || (MEM_P (op0) && MEM_P (target))))
3744 || reg_mentioned_p (target, op1)
3745 || (MEM_P (op1) && MEM_P (target))))
3746 target = 0;
3747
3748 /* Get the mode in which to perform this computation. Normally it will
3749 be MODE, but sometimes we can't do the desired operation in MODE.
3750 If so, pick a wider mode in which we can do the operation. Convert
3751 to that mode at the start to avoid repeated conversions.
3752
3753 First see what operations we need. These depend on the expression
3754 we are evaluating. (We assume that divxx3 insns exist under the
3755 same conditions that modxx3 insns and that these insns don't normally
3756 fail. If these assumptions are not correct, we may generate less
3757 efficient code in some cases.)
3758
3759 Then see if we find a mode in which we can open-code that operation
3760 (either a division, modulus, or shift). Finally, check for the smallest
3761 mode for which we can do the operation with a library call. */
3762
3763 /* We might want to refine this now that we have division-by-constant
3764 optimization. Since expand_mult_highpart tries so many variants, it is
3765 not straightforward to generalize this. Maybe we should make an array
3766 of possible modes in init_expmed? Save this for GCC 2.7. */
3767
3768 optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3769 ? (unsignedp ? lshr_optab : ashr_optab)
3770 : (unsignedp ? udiv_optab : sdiv_optab));
3771 optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3772 ? optab1
3773 : (unsignedp ? udivmod_optab : sdivmod_optab));
3774
3775 for (compute_mode = mode; compute_mode != VOIDmode;
3776 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3777 if (optab1->handlers[compute_mode].insn_code != CODE_FOR_nothing
3778 || optab2->handlers[compute_mode].insn_code != CODE_FOR_nothing)
3779 break;
3780
3781 if (compute_mode == VOIDmode)
3782 for (compute_mode = mode; compute_mode != VOIDmode;
3783 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3784 if (optab1->handlers[compute_mode].libfunc
3785 || optab2->handlers[compute_mode].libfunc)
3786 break;
3787
3788 /* If we still couldn't find a mode, use MODE, but we'll probably abort
3789 in expand_binop. */
3790 if (compute_mode == VOIDmode)
3791 compute_mode = mode;
3792
3793 if (target && GET_MODE (target) == compute_mode)
3794 tquotient = target;
3795 else
3796 tquotient = gen_reg_rtx (compute_mode);
3797
3798 size = GET_MODE_BITSIZE (compute_mode);
3799 #if 0
3800 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3801 (mode), and thereby get better code when OP1 is a constant. Do that
3802 later. It will require going over all usages of SIZE below. */
3803 size = GET_MODE_BITSIZE (mode);
3804 #endif
3805
3806 /* Only deduct something for a REM if the last divide done was
3807 for a different constant. Then set the constant of the last
3808 divide. */
3809 max_cost = div_cost[compute_mode]
3810 - (rem_flag && ! (last_div_const != 0 && op1_is_constant
3811 && INTVAL (op1) == last_div_const)
3812 ? mul_cost[compute_mode] + add_cost[compute_mode]
3813 : 0);
3814
3815 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3816
3817 /* Now convert to the best mode to use. */
3818 if (compute_mode != mode)
3819 {
3820 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3821 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3822
3823 /* convert_modes may have placed op1 into a register, so we
3824 must recompute the following. */
3825 op1_is_constant = GET_CODE (op1) == CONST_INT;
3826 op1_is_pow2 = (op1_is_constant
3827 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3828 || (! unsignedp
3829 && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3830 }
3831
3832 /* If one of the operands is a volatile MEM, copy it into a register. */
3833
3834 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3835 op0 = force_reg (compute_mode, op0);
3836 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3837 op1 = force_reg (compute_mode, op1);
3838
3839 /* If we need the remainder or if OP1 is constant, we need to
3840 put OP0 in a register in case it has any queued subexpressions. */
3841 if (rem_flag || op1_is_constant)
3842 op0 = force_reg (compute_mode, op0);
3843
3844 last = get_last_insn ();
3845
3846 /* Promote floor rounding to trunc rounding for unsigned operations. */
3847 if (unsignedp)
3848 {
3849 if (code == FLOOR_DIV_EXPR)
3850 code = TRUNC_DIV_EXPR;
3851 if (code == FLOOR_MOD_EXPR)
3852 code = TRUNC_MOD_EXPR;
3853 if (code == EXACT_DIV_EXPR && op1_is_pow2)
3854 code = TRUNC_DIV_EXPR;
3855 }
3856
3857 if (op1 != const0_rtx)
3858 switch (code)
3859 {
3860 case TRUNC_MOD_EXPR:
3861 case TRUNC_DIV_EXPR:
3862 if (op1_is_constant)
3863 {
3864 if (unsignedp)
3865 {
3866 unsigned HOST_WIDE_INT mh, ml;
3867 int pre_shift, post_shift;
3868 int dummy;
3869 unsigned HOST_WIDE_INT d = (INTVAL (op1)
3870 & GET_MODE_MASK (compute_mode));
3871
3872 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
3873 {
3874 pre_shift = floor_log2 (d);
3875 if (rem_flag)
3876 {
3877 remainder
3878 = expand_binop (compute_mode, and_optab, op0,
3879 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
3880 remainder, 1,
3881 OPTAB_LIB_WIDEN);
3882 if (remainder)
3883 return gen_lowpart (mode, remainder);
3884 }
3885 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
3886 build_int_cst (NULL_TREE,
3887 pre_shift),
3888 tquotient, 1);
3889 }
3890 else if (size <= HOST_BITS_PER_WIDE_INT)
3891 {
3892 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
3893 {
3894 /* Most significant bit of divisor is set; emit an scc
3895 insn. */
3896 quotient = emit_store_flag (tquotient, GEU, op0, op1,
3897 compute_mode, 1, 1);
3898 if (quotient == 0)
3899 goto fail1;
3900 }
3901 else
3902 {
3903 /* Find a suitable multiplier and right shift count
3904 instead of multiplying with D. */
3905
3906 mh = choose_multiplier (d, size, size,
3907 &ml, &post_shift, &dummy);
3908
3909 /* If the suggested multiplier is more than SIZE bits,
3910 we can do better for even divisors, using an
3911 initial right shift. */
3912 if (mh != 0 && (d & 1) == 0)
3913 {
3914 pre_shift = floor_log2 (d & -d);
3915 mh = choose_multiplier (d >> pre_shift, size,
3916 size - pre_shift,
3917 &ml, &post_shift, &dummy);
3918 gcc_assert (!mh);
3919 }
3920 else
3921 pre_shift = 0;
3922
3923 if (mh != 0)
3924 {
3925 rtx t1, t2, t3, t4;
3926
3927 if (post_shift - 1 >= BITS_PER_WORD)
3928 goto fail1;
3929
3930 extra_cost
3931 = (shift_cost[compute_mode][post_shift - 1]
3932 + shift_cost[compute_mode][1]
3933 + 2 * add_cost[compute_mode]);
3934 t1 = expand_mult_highpart (compute_mode, op0, ml,
3935 NULL_RTX, 1,
3936 max_cost - extra_cost);
3937 if (t1 == 0)
3938 goto fail1;
3939 t2 = force_operand (gen_rtx_MINUS (compute_mode,
3940 op0, t1),
3941 NULL_RTX);
3942 t3 = expand_shift
3943 (RSHIFT_EXPR, compute_mode, t2,
3944 build_int_cst (NULL_TREE, 1),
3945 NULL_RTX,1);
3946 t4 = force_operand (gen_rtx_PLUS (compute_mode,
3947 t1, t3),
3948 NULL_RTX);
3949 quotient = expand_shift
3950 (RSHIFT_EXPR, compute_mode, t4,
3951 build_int_cst (NULL_TREE, post_shift - 1),
3952 tquotient, 1);
3953 }
3954 else
3955 {
3956 rtx t1, t2;
3957
3958 if (pre_shift >= BITS_PER_WORD
3959 || post_shift >= BITS_PER_WORD)
3960 goto fail1;
3961
3962 t1 = expand_shift
3963 (RSHIFT_EXPR, compute_mode, op0,
3964 build_int_cst (NULL_TREE, pre_shift),
3965 NULL_RTX, 1);
3966 extra_cost
3967 = (shift_cost[compute_mode][pre_shift]
3968 + shift_cost[compute_mode][post_shift]);
3969 t2 = expand_mult_highpart (compute_mode, t1, ml,
3970 NULL_RTX, 1,
3971 max_cost - extra_cost);
3972 if (t2 == 0)
3973 goto fail1;
3974 quotient = expand_shift
3975 (RSHIFT_EXPR, compute_mode, t2,
3976 build_int_cst (NULL_TREE, post_shift),
3977 tquotient, 1);
3978 }
3979 }
3980 }
3981 else /* Too wide mode to use tricky code */
3982 break;
3983
3984 insn = get_last_insn ();
3985 if (insn != last
3986 && (set = single_set (insn)) != 0
3987 && SET_DEST (set) == quotient)
3988 set_unique_reg_note (insn,
3989 REG_EQUAL,
3990 gen_rtx_UDIV (compute_mode, op0, op1));
3991 }
3992 else /* TRUNC_DIV, signed */
3993 {
3994 unsigned HOST_WIDE_INT ml;
3995 int lgup, post_shift;
3996 HOST_WIDE_INT d = INTVAL (op1);
3997 unsigned HOST_WIDE_INT abs_d = d >= 0 ? d : -d;
3998
3999 /* n rem d = n rem -d */
4000 if (rem_flag && d < 0)
4001 {
4002 d = abs_d;
4003 op1 = gen_int_mode (abs_d, compute_mode);
4004 }
4005
4006 if (d == 1)
4007 quotient = op0;
4008 else if (d == -1)
4009 quotient = expand_unop (compute_mode, neg_optab, op0,
4010 tquotient, 0);
4011 else if (abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4012 {
4013 /* This case is not handled correctly below. */
4014 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4015 compute_mode, 1, 1);
4016 if (quotient == 0)
4017 goto fail1;
4018 }
4019 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4020 && (rem_flag ? smod_pow2_cheap[compute_mode]
4021 : sdiv_pow2_cheap[compute_mode])
4022 /* We assume that cheap metric is true if the
4023 optab has an expander for this mode. */
4024 && (((rem_flag ? smod_optab : sdiv_optab)
4025 ->handlers[compute_mode].insn_code
4026 != CODE_FOR_nothing)
4027 || (sdivmod_optab->handlers[compute_mode]
4028 .insn_code != CODE_FOR_nothing)))
4029 ;
4030 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4031 {
4032 if (rem_flag)
4033 {
4034 remainder = expand_smod_pow2 (compute_mode, op0, d);
4035 if (remainder)
4036 return gen_lowpart (mode, remainder);
4037 }
4038
4039 if (sdiv_pow2_cheap[compute_mode]
4040 && ((sdiv_optab->handlers[compute_mode].insn_code
4041 != CODE_FOR_nothing)
4042 || (sdivmod_optab->handlers[compute_mode].insn_code
4043 != CODE_FOR_nothing)))
4044 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4045 compute_mode, op0,
4046 gen_int_mode (abs_d,
4047 compute_mode),
4048 NULL_RTX, 0);
4049 else
4050 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4051
4052 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4053 negate the quotient. */
4054 if (d < 0)
4055 {
4056 insn = get_last_insn ();
4057 if (insn != last
4058 && (set = single_set (insn)) != 0
4059 && SET_DEST (set) == quotient
4060 && abs_d < ((unsigned HOST_WIDE_INT) 1
4061 << (HOST_BITS_PER_WIDE_INT - 1)))
4062 set_unique_reg_note (insn,
4063 REG_EQUAL,
4064 gen_rtx_DIV (compute_mode,
4065 op0,
4066 GEN_INT
4067 (trunc_int_for_mode
4068 (abs_d,
4069 compute_mode))));
4070
4071 quotient = expand_unop (compute_mode, neg_optab,
4072 quotient, quotient, 0);
4073 }
4074 }
4075 else if (size <= HOST_BITS_PER_WIDE_INT)
4076 {
4077 choose_multiplier (abs_d, size, size - 1,
4078 &ml, &post_shift, &lgup);
4079 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4080 {
4081 rtx t1, t2, t3;
4082
4083 if (post_shift >= BITS_PER_WORD
4084 || size - 1 >= BITS_PER_WORD)
4085 goto fail1;
4086
4087 extra_cost = (shift_cost[compute_mode][post_shift]
4088 + shift_cost[compute_mode][size - 1]
4089 + add_cost[compute_mode]);
4090 t1 = expand_mult_highpart (compute_mode, op0, ml,
4091 NULL_RTX, 0,
4092 max_cost - extra_cost);
4093 if (t1 == 0)
4094 goto fail1;
4095 t2 = expand_shift
4096 (RSHIFT_EXPR, compute_mode, t1,
4097 build_int_cst (NULL_TREE, post_shift),
4098 NULL_RTX, 0);
4099 t3 = expand_shift
4100 (RSHIFT_EXPR, compute_mode, op0,
4101 build_int_cst (NULL_TREE, size - 1),
4102 NULL_RTX, 0);
4103 if (d < 0)
4104 quotient
4105 = force_operand (gen_rtx_MINUS (compute_mode,
4106 t3, t2),
4107 tquotient);
4108 else
4109 quotient
4110 = force_operand (gen_rtx_MINUS (compute_mode,
4111 t2, t3),
4112 tquotient);
4113 }
4114 else
4115 {
4116 rtx t1, t2, t3, t4;
4117
4118 if (post_shift >= BITS_PER_WORD
4119 || size - 1 >= BITS_PER_WORD)
4120 goto fail1;
4121
4122 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4123 extra_cost = (shift_cost[compute_mode][post_shift]
4124 + shift_cost[compute_mode][size - 1]
4125 + 2 * add_cost[compute_mode]);
4126 t1 = expand_mult_highpart (compute_mode, op0, ml,
4127 NULL_RTX, 0,
4128 max_cost - extra_cost);
4129 if (t1 == 0)
4130 goto fail1;
4131 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4132 t1, op0),
4133 NULL_RTX);
4134 t3 = expand_shift
4135 (RSHIFT_EXPR, compute_mode, t2,
4136 build_int_cst (NULL_TREE, post_shift),
4137 NULL_RTX, 0);
4138 t4 = expand_shift
4139 (RSHIFT_EXPR, compute_mode, op0,
4140 build_int_cst (NULL_TREE, size - 1),
4141 NULL_RTX, 0);
4142 if (d < 0)
4143 quotient
4144 = force_operand (gen_rtx_MINUS (compute_mode,
4145 t4, t3),
4146 tquotient);
4147 else
4148 quotient
4149 = force_operand (gen_rtx_MINUS (compute_mode,
4150 t3, t4),
4151 tquotient);
4152 }
4153 }
4154 else /* Too wide mode to use tricky code */
4155 break;
4156
4157 insn = get_last_insn ();
4158 if (insn != last
4159 && (set = single_set (insn)) != 0
4160 && SET_DEST (set) == quotient)
4161 set_unique_reg_note (insn,
4162 REG_EQUAL,
4163 gen_rtx_DIV (compute_mode, op0, op1));
4164 }
4165 break;
4166 }
4167 fail1:
4168 delete_insns_since (last);
4169 break;
4170
4171 case FLOOR_DIV_EXPR:
4172 case FLOOR_MOD_EXPR:
4173 /* We will come here only for signed operations. */
4174 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4175 {
4176 unsigned HOST_WIDE_INT mh, ml;
4177 int pre_shift, lgup, post_shift;
4178 HOST_WIDE_INT d = INTVAL (op1);
4179
4180 if (d > 0)
4181 {
4182 /* We could just as easily deal with negative constants here,
4183 but it does not seem worth the trouble for GCC 2.6. */
4184 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4185 {
4186 pre_shift = floor_log2 (d);
4187 if (rem_flag)
4188 {
4189 remainder = expand_binop (compute_mode, and_optab, op0,
4190 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4191 remainder, 0, OPTAB_LIB_WIDEN);
4192 if (remainder)
4193 return gen_lowpart (mode, remainder);
4194 }
4195 quotient = expand_shift
4196 (RSHIFT_EXPR, compute_mode, op0,
4197 build_int_cst (NULL_TREE, pre_shift),
4198 tquotient, 0);
4199 }
4200 else
4201 {
4202 rtx t1, t2, t3, t4;
4203
4204 mh = choose_multiplier (d, size, size - 1,
4205 &ml, &post_shift, &lgup);
4206 gcc_assert (!mh);
4207
4208 if (post_shift < BITS_PER_WORD
4209 && size - 1 < BITS_PER_WORD)
4210 {
4211 t1 = expand_shift
4212 (RSHIFT_EXPR, compute_mode, op0,
4213 build_int_cst (NULL_TREE, size - 1),
4214 NULL_RTX, 0);
4215 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4216 NULL_RTX, 0, OPTAB_WIDEN);
4217 extra_cost = (shift_cost[compute_mode][post_shift]
4218 + shift_cost[compute_mode][size - 1]
4219 + 2 * add_cost[compute_mode]);
4220 t3 = expand_mult_highpart (compute_mode, t2, ml,
4221 NULL_RTX, 1,
4222 max_cost - extra_cost);
4223 if (t3 != 0)
4224 {
4225 t4 = expand_shift
4226 (RSHIFT_EXPR, compute_mode, t3,
4227 build_int_cst (NULL_TREE, post_shift),
4228 NULL_RTX, 1);
4229 quotient = expand_binop (compute_mode, xor_optab,
4230 t4, t1, tquotient, 0,
4231 OPTAB_WIDEN);
4232 }
4233 }
4234 }
4235 }
4236 else
4237 {
4238 rtx nsign, t1, t2, t3, t4;
4239 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4240 op0, constm1_rtx), NULL_RTX);
4241 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4242 0, OPTAB_WIDEN);
4243 nsign = expand_shift
4244 (RSHIFT_EXPR, compute_mode, t2,
4245 build_int_cst (NULL_TREE, size - 1),
4246 NULL_RTX, 0);
4247 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4248 NULL_RTX);
4249 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4250 NULL_RTX, 0);
4251 if (t4)
4252 {
4253 rtx t5;
4254 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4255 NULL_RTX, 0);
4256 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4257 t4, t5),
4258 tquotient);
4259 }
4260 }
4261 }
4262
4263 if (quotient != 0)
4264 break;
4265 delete_insns_since (last);
4266
4267 /* Try using an instruction that produces both the quotient and
4268 remainder, using truncation. We can easily compensate the quotient
4269 or remainder to get floor rounding, once we have the remainder.
4270 Notice that we compute also the final remainder value here,
4271 and return the result right away. */
4272 if (target == 0 || GET_MODE (target) != compute_mode)
4273 target = gen_reg_rtx (compute_mode);
4274
4275 if (rem_flag)
4276 {
4277 remainder
4278 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4279 quotient = gen_reg_rtx (compute_mode);
4280 }
4281 else
4282 {
4283 quotient
4284 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4285 remainder = gen_reg_rtx (compute_mode);
4286 }
4287
4288 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4289 quotient, remainder, 0))
4290 {
4291 /* This could be computed with a branch-less sequence.
4292 Save that for later. */
4293 rtx tem;
4294 rtx label = gen_label_rtx ();
4295 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4296 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4297 NULL_RTX, 0, OPTAB_WIDEN);
4298 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4299 expand_dec (quotient, const1_rtx);
4300 expand_inc (remainder, op1);
4301 emit_label (label);
4302 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4303 }
4304
4305 /* No luck with division elimination or divmod. Have to do it
4306 by conditionally adjusting op0 *and* the result. */
4307 {
4308 rtx label1, label2, label3, label4, label5;
4309 rtx adjusted_op0;
4310 rtx tem;
4311
4312 quotient = gen_reg_rtx (compute_mode);
4313 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4314 label1 = gen_label_rtx ();
4315 label2 = gen_label_rtx ();
4316 label3 = gen_label_rtx ();
4317 label4 = gen_label_rtx ();
4318 label5 = gen_label_rtx ();
4319 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4320 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4321 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4322 quotient, 0, OPTAB_LIB_WIDEN);
4323 if (tem != quotient)
4324 emit_move_insn (quotient, tem);
4325 emit_jump_insn (gen_jump (label5));
4326 emit_barrier ();
4327 emit_label (label1);
4328 expand_inc (adjusted_op0, const1_rtx);
4329 emit_jump_insn (gen_jump (label4));
4330 emit_barrier ();
4331 emit_label (label2);
4332 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4333 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4334 quotient, 0, OPTAB_LIB_WIDEN);
4335 if (tem != quotient)
4336 emit_move_insn (quotient, tem);
4337 emit_jump_insn (gen_jump (label5));
4338 emit_barrier ();
4339 emit_label (label3);
4340 expand_dec (adjusted_op0, const1_rtx);
4341 emit_label (label4);
4342 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4343 quotient, 0, OPTAB_LIB_WIDEN);
4344 if (tem != quotient)
4345 emit_move_insn (quotient, tem);
4346 expand_dec (quotient, const1_rtx);
4347 emit_label (label5);
4348 }
4349 break;
4350
4351 case CEIL_DIV_EXPR:
4352 case CEIL_MOD_EXPR:
4353 if (unsignedp)
4354 {
4355 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4356 {
4357 rtx t1, t2, t3;
4358 unsigned HOST_WIDE_INT d = INTVAL (op1);
4359 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4360 build_int_cst (NULL_TREE, floor_log2 (d)),
4361 tquotient, 1);
4362 t2 = expand_binop (compute_mode, and_optab, op0,
4363 GEN_INT (d - 1),
4364 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4365 t3 = gen_reg_rtx (compute_mode);
4366 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4367 compute_mode, 1, 1);
4368 if (t3 == 0)
4369 {
4370 rtx lab;
4371 lab = gen_label_rtx ();
4372 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4373 expand_inc (t1, const1_rtx);
4374 emit_label (lab);
4375 quotient = t1;
4376 }
4377 else
4378 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4379 t1, t3),
4380 tquotient);
4381 break;
4382 }
4383
4384 /* Try using an instruction that produces both the quotient and
4385 remainder, using truncation. We can easily compensate the
4386 quotient or remainder to get ceiling rounding, once we have the
4387 remainder. Notice that we compute also the final remainder
4388 value here, and return the result right away. */
4389 if (target == 0 || GET_MODE (target) != compute_mode)
4390 target = gen_reg_rtx (compute_mode);
4391
4392 if (rem_flag)
4393 {
4394 remainder = (REG_P (target)
4395 ? target : gen_reg_rtx (compute_mode));
4396 quotient = gen_reg_rtx (compute_mode);
4397 }
4398 else
4399 {
4400 quotient = (REG_P (target)
4401 ? target : gen_reg_rtx (compute_mode));
4402 remainder = gen_reg_rtx (compute_mode);
4403 }
4404
4405 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4406 remainder, 1))
4407 {
4408 /* This could be computed with a branch-less sequence.
4409 Save that for later. */
4410 rtx label = gen_label_rtx ();
4411 do_cmp_and_jump (remainder, const0_rtx, EQ,
4412 compute_mode, label);
4413 expand_inc (quotient, const1_rtx);
4414 expand_dec (remainder, op1);
4415 emit_label (label);
4416 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4417 }
4418
4419 /* No luck with division elimination or divmod. Have to do it
4420 by conditionally adjusting op0 *and* the result. */
4421 {
4422 rtx label1, label2;
4423 rtx adjusted_op0, tem;
4424
4425 quotient = gen_reg_rtx (compute_mode);
4426 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4427 label1 = gen_label_rtx ();
4428 label2 = gen_label_rtx ();
4429 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4430 compute_mode, label1);
4431 emit_move_insn (quotient, const0_rtx);
4432 emit_jump_insn (gen_jump (label2));
4433 emit_barrier ();
4434 emit_label (label1);
4435 expand_dec (adjusted_op0, const1_rtx);
4436 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4437 quotient, 1, OPTAB_LIB_WIDEN);
4438 if (tem != quotient)
4439 emit_move_insn (quotient, tem);
4440 expand_inc (quotient, const1_rtx);
4441 emit_label (label2);
4442 }
4443 }
4444 else /* signed */
4445 {
4446 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4447 && INTVAL (op1) >= 0)
4448 {
4449 /* This is extremely similar to the code for the unsigned case
4450 above. For 2.7 we should merge these variants, but for
4451 2.6.1 I don't want to touch the code for unsigned since that
4452 get used in C. The signed case will only be used by other
4453 languages (Ada). */
4454
4455 rtx t1, t2, t3;
4456 unsigned HOST_WIDE_INT d = INTVAL (op1);
4457 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4458 build_int_cst (NULL_TREE, floor_log2 (d)),
4459 tquotient, 0);
4460 t2 = expand_binop (compute_mode, and_optab, op0,
4461 GEN_INT (d - 1),
4462 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4463 t3 = gen_reg_rtx (compute_mode);
4464 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4465 compute_mode, 1, 1);
4466 if (t3 == 0)
4467 {
4468 rtx lab;
4469 lab = gen_label_rtx ();
4470 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4471 expand_inc (t1, const1_rtx);
4472 emit_label (lab);
4473 quotient = t1;
4474 }
4475 else
4476 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4477 t1, t3),
4478 tquotient);
4479 break;
4480 }
4481
4482 /* Try using an instruction that produces both the quotient and
4483 remainder, using truncation. We can easily compensate the
4484 quotient or remainder to get ceiling rounding, once we have the
4485 remainder. Notice that we compute also the final remainder
4486 value here, and return the result right away. */
4487 if (target == 0 || GET_MODE (target) != compute_mode)
4488 target = gen_reg_rtx (compute_mode);
4489 if (rem_flag)
4490 {
4491 remainder= (REG_P (target)
4492 ? target : gen_reg_rtx (compute_mode));
4493 quotient = gen_reg_rtx (compute_mode);
4494 }
4495 else
4496 {
4497 quotient = (REG_P (target)
4498 ? target : gen_reg_rtx (compute_mode));
4499 remainder = gen_reg_rtx (compute_mode);
4500 }
4501
4502 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4503 remainder, 0))
4504 {
4505 /* This could be computed with a branch-less sequence.
4506 Save that for later. */
4507 rtx tem;
4508 rtx label = gen_label_rtx ();
4509 do_cmp_and_jump (remainder, const0_rtx, EQ,
4510 compute_mode, label);
4511 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4512 NULL_RTX, 0, OPTAB_WIDEN);
4513 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4514 expand_inc (quotient, const1_rtx);
4515 expand_dec (remainder, op1);
4516 emit_label (label);
4517 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4518 }
4519
4520 /* No luck with division elimination or divmod. Have to do it
4521 by conditionally adjusting op0 *and* the result. */
4522 {
4523 rtx label1, label2, label3, label4, label5;
4524 rtx adjusted_op0;
4525 rtx tem;
4526
4527 quotient = gen_reg_rtx (compute_mode);
4528 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4529 label1 = gen_label_rtx ();
4530 label2 = gen_label_rtx ();
4531 label3 = gen_label_rtx ();
4532 label4 = gen_label_rtx ();
4533 label5 = gen_label_rtx ();
4534 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4535 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4536 compute_mode, label1);
4537 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4538 quotient, 0, OPTAB_LIB_WIDEN);
4539 if (tem != quotient)
4540 emit_move_insn (quotient, tem);
4541 emit_jump_insn (gen_jump (label5));
4542 emit_barrier ();
4543 emit_label (label1);
4544 expand_dec (adjusted_op0, const1_rtx);
4545 emit_jump_insn (gen_jump (label4));
4546 emit_barrier ();
4547 emit_label (label2);
4548 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4549 compute_mode, label3);
4550 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4551 quotient, 0, OPTAB_LIB_WIDEN);
4552 if (tem != quotient)
4553 emit_move_insn (quotient, tem);
4554 emit_jump_insn (gen_jump (label5));
4555 emit_barrier ();
4556 emit_label (label3);
4557 expand_inc (adjusted_op0, const1_rtx);
4558 emit_label (label4);
4559 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4560 quotient, 0, OPTAB_LIB_WIDEN);
4561 if (tem != quotient)
4562 emit_move_insn (quotient, tem);
4563 expand_inc (quotient, const1_rtx);
4564 emit_label (label5);
4565 }
4566 }
4567 break;
4568
4569 case EXACT_DIV_EXPR:
4570 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4571 {
4572 HOST_WIDE_INT d = INTVAL (op1);
4573 unsigned HOST_WIDE_INT ml;
4574 int pre_shift;
4575 rtx t1;
4576
4577 pre_shift = floor_log2 (d & -d);
4578 ml = invert_mod2n (d >> pre_shift, size);
4579 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4580 build_int_cst (NULL_TREE, pre_shift),
4581 NULL_RTX, unsignedp);
4582 quotient = expand_mult (compute_mode, t1,
4583 gen_int_mode (ml, compute_mode),
4584 NULL_RTX, 1);
4585
4586 insn = get_last_insn ();
4587 set_unique_reg_note (insn,
4588 REG_EQUAL,
4589 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4590 compute_mode,
4591 op0, op1));
4592 }
4593 break;
4594
4595 case ROUND_DIV_EXPR:
4596 case ROUND_MOD_EXPR:
4597 if (unsignedp)
4598 {
4599 rtx tem;
4600 rtx label;
4601 label = gen_label_rtx ();
4602 quotient = gen_reg_rtx (compute_mode);
4603 remainder = gen_reg_rtx (compute_mode);
4604 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4605 {
4606 rtx tem;
4607 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4608 quotient, 1, OPTAB_LIB_WIDEN);
4609 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4610 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4611 remainder, 1, OPTAB_LIB_WIDEN);
4612 }
4613 tem = plus_constant (op1, -1);
4614 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4615 build_int_cst (NULL_TREE, 1),
4616 NULL_RTX, 1);
4617 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4618 expand_inc (quotient, const1_rtx);
4619 expand_dec (remainder, op1);
4620 emit_label (label);
4621 }
4622 else
4623 {
4624 rtx abs_rem, abs_op1, tem, mask;
4625 rtx label;
4626 label = gen_label_rtx ();
4627 quotient = gen_reg_rtx (compute_mode);
4628 remainder = gen_reg_rtx (compute_mode);
4629 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4630 {
4631 rtx tem;
4632 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4633 quotient, 0, OPTAB_LIB_WIDEN);
4634 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4635 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4636 remainder, 0, OPTAB_LIB_WIDEN);
4637 }
4638 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4639 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4640 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4641 build_int_cst (NULL_TREE, 1),
4642 NULL_RTX, 1);
4643 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4644 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4645 NULL_RTX, 0, OPTAB_WIDEN);
4646 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4647 build_int_cst (NULL_TREE, size - 1),
4648 NULL_RTX, 0);
4649 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4650 NULL_RTX, 0, OPTAB_WIDEN);
4651 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4652 NULL_RTX, 0, OPTAB_WIDEN);
4653 expand_inc (quotient, tem);
4654 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4655 NULL_RTX, 0, OPTAB_WIDEN);
4656 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4657 NULL_RTX, 0, OPTAB_WIDEN);
4658 expand_dec (remainder, tem);
4659 emit_label (label);
4660 }
4661 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4662
4663 default:
4664 gcc_unreachable ();
4665 }
4666
4667 if (quotient == 0)
4668 {
4669 if (target && GET_MODE (target) != compute_mode)
4670 target = 0;
4671
4672 if (rem_flag)
4673 {
4674 /* Try to produce the remainder without producing the quotient.
4675 If we seem to have a divmod pattern that does not require widening,
4676 don't try widening here. We should really have a WIDEN argument
4677 to expand_twoval_binop, since what we'd really like to do here is
4678 1) try a mod insn in compute_mode
4679 2) try a divmod insn in compute_mode
4680 3) try a div insn in compute_mode and multiply-subtract to get
4681 remainder
4682 4) try the same things with widening allowed. */
4683 remainder
4684 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4685 op0, op1, target,
4686 unsignedp,
4687 ((optab2->handlers[compute_mode].insn_code
4688 != CODE_FOR_nothing)
4689 ? OPTAB_DIRECT : OPTAB_WIDEN));
4690 if (remainder == 0)
4691 {
4692 /* No luck there. Can we do remainder and divide at once
4693 without a library call? */
4694 remainder = gen_reg_rtx (compute_mode);
4695 if (! expand_twoval_binop ((unsignedp
4696 ? udivmod_optab
4697 : sdivmod_optab),
4698 op0, op1,
4699 NULL_RTX, remainder, unsignedp))
4700 remainder = 0;
4701 }
4702
4703 if (remainder)
4704 return gen_lowpart (mode, remainder);
4705 }
4706
4707 /* Produce the quotient. Try a quotient insn, but not a library call.
4708 If we have a divmod in this mode, use it in preference to widening
4709 the div (for this test we assume it will not fail). Note that optab2
4710 is set to the one of the two optabs that the call below will use. */
4711 quotient
4712 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4713 op0, op1, rem_flag ? NULL_RTX : target,
4714 unsignedp,
4715 ((optab2->handlers[compute_mode].insn_code
4716 != CODE_FOR_nothing)
4717 ? OPTAB_DIRECT : OPTAB_WIDEN));
4718
4719 if (quotient == 0)
4720 {
4721 /* No luck there. Try a quotient-and-remainder insn,
4722 keeping the quotient alone. */
4723 quotient = gen_reg_rtx (compute_mode);
4724 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4725 op0, op1,
4726 quotient, NULL_RTX, unsignedp))
4727 {
4728 quotient = 0;
4729 if (! rem_flag)
4730 /* Still no luck. If we are not computing the remainder,
4731 use a library call for the quotient. */
4732 quotient = sign_expand_binop (compute_mode,
4733 udiv_optab, sdiv_optab,
4734 op0, op1, target,
4735 unsignedp, OPTAB_LIB_WIDEN);
4736 }
4737 }
4738 }
4739
4740 if (rem_flag)
4741 {
4742 if (target && GET_MODE (target) != compute_mode)
4743 target = 0;
4744
4745 if (quotient == 0)
4746 {
4747 /* No divide instruction either. Use library for remainder. */
4748 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4749 op0, op1, target,
4750 unsignedp, OPTAB_LIB_WIDEN);
4751 /* No remainder function. Try a quotient-and-remainder
4752 function, keeping the remainder. */
4753 if (!remainder)
4754 {
4755 remainder = gen_reg_rtx (compute_mode);
4756 if (!expand_twoval_binop_libfunc
4757 (unsignedp ? udivmod_optab : sdivmod_optab,
4758 op0, op1,
4759 NULL_RTX, remainder,
4760 unsignedp ? UMOD : MOD))
4761 remainder = NULL_RTX;
4762 }
4763 }
4764 else
4765 {
4766 /* We divided. Now finish doing X - Y * (X / Y). */
4767 remainder = expand_mult (compute_mode, quotient, op1,
4768 NULL_RTX, unsignedp);
4769 remainder = expand_binop (compute_mode, sub_optab, op0,
4770 remainder, target, unsignedp,
4771 OPTAB_LIB_WIDEN);
4772 }
4773 }
4774
4775 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4776 }
4777 \f
4778 /* Return a tree node with data type TYPE, describing the value of X.
4779 Usually this is an VAR_DECL, if there is no obvious better choice.
4780 X may be an expression, however we only support those expressions
4781 generated by loop.c. */
4782
4783 tree
4784 make_tree (tree type, rtx x)
4785 {
4786 tree t;
4787
4788 switch (GET_CODE (x))
4789 {
4790 case CONST_INT:
4791 {
4792 HOST_WIDE_INT hi = 0;
4793
4794 if (INTVAL (x) < 0
4795 && !(TYPE_UNSIGNED (type)
4796 && (GET_MODE_BITSIZE (TYPE_MODE (type))
4797 < HOST_BITS_PER_WIDE_INT)))
4798 hi = -1;
4799
4800 t = build_int_cst_wide (type, INTVAL (x), hi);
4801
4802 return t;
4803 }
4804
4805 case CONST_DOUBLE:
4806 if (GET_MODE (x) == VOIDmode)
4807 t = build_int_cst_wide (type,
4808 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4809 else
4810 {
4811 REAL_VALUE_TYPE d;
4812
4813 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4814 t = build_real (type, d);
4815 }
4816
4817 return t;
4818
4819 case CONST_VECTOR:
4820 {
4821 int i, units;
4822 rtx elt;
4823 tree t = NULL_TREE;
4824
4825 units = CONST_VECTOR_NUNITS (x);
4826
4827 /* Build a tree with vector elements. */
4828 for (i = units - 1; i >= 0; --i)
4829 {
4830 elt = CONST_VECTOR_ELT (x, i);
4831 t = tree_cons (NULL_TREE, make_tree (type, elt), t);
4832 }
4833
4834 return build_vector (type, t);
4835 }
4836
4837 case PLUS:
4838 return fold (build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4839 make_tree (type, XEXP (x, 1))));
4840
4841 case MINUS:
4842 return fold (build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4843 make_tree (type, XEXP (x, 1))));
4844
4845 case NEG:
4846 return fold (build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0))));
4847
4848 case MULT:
4849 return fold (build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4850 make_tree (type, XEXP (x, 1))));
4851
4852 case ASHIFT:
4853 return fold (build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4854 make_tree (type, XEXP (x, 1))));
4855
4856 case LSHIFTRT:
4857 t = lang_hooks.types.unsigned_type (type);
4858 return fold (convert (type,
4859 build2 (RSHIFT_EXPR, t,
4860 make_tree (t, XEXP (x, 0)),
4861 make_tree (type, XEXP (x, 1)))));
4862
4863 case ASHIFTRT:
4864 t = lang_hooks.types.signed_type (type);
4865 return fold (convert (type,
4866 build2 (RSHIFT_EXPR, t,
4867 make_tree (t, XEXP (x, 0)),
4868 make_tree (type, XEXP (x, 1)))));
4869
4870 case DIV:
4871 if (TREE_CODE (type) != REAL_TYPE)
4872 t = lang_hooks.types.signed_type (type);
4873 else
4874 t = type;
4875
4876 return fold (convert (type,
4877 build2 (TRUNC_DIV_EXPR, t,
4878 make_tree (t, XEXP (x, 0)),
4879 make_tree (t, XEXP (x, 1)))));
4880 case UDIV:
4881 t = lang_hooks.types.unsigned_type (type);
4882 return fold (convert (type,
4883 build2 (TRUNC_DIV_EXPR, t,
4884 make_tree (t, XEXP (x, 0)),
4885 make_tree (t, XEXP (x, 1)))));
4886
4887 case SIGN_EXTEND:
4888 case ZERO_EXTEND:
4889 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
4890 GET_CODE (x) == ZERO_EXTEND);
4891 return fold (convert (type, make_tree (t, XEXP (x, 0))));
4892
4893 default:
4894 t = build_decl (VAR_DECL, NULL_TREE, type);
4895
4896 /* If TYPE is a POINTER_TYPE, X might be Pmode with TYPE_MODE being
4897 ptr_mode. So convert. */
4898 if (POINTER_TYPE_P (type))
4899 x = convert_memory_address (TYPE_MODE (type), x);
4900
4901 /* Note that we do *not* use SET_DECL_RTL here, because we do not
4902 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
4903 t->decl.rtl = x;
4904
4905 return t;
4906 }
4907 }
4908
4909 /* Check whether the multiplication X * MULT + ADD overflows.
4910 X, MULT and ADD must be CONST_*.
4911 MODE is the machine mode for the computation.
4912 X and MULT must have mode MODE. ADD may have a different mode.
4913 So can X (defaults to same as MODE).
4914 UNSIGNEDP is nonzero to do unsigned multiplication. */
4915
4916 bool
4917 const_mult_add_overflow_p (rtx x, rtx mult, rtx add,
4918 enum machine_mode mode, int unsignedp)
4919 {
4920 tree type, mult_type, add_type, result;
4921
4922 type = lang_hooks.types.type_for_mode (mode, unsignedp);
4923
4924 /* In order to get a proper overflow indication from an unsigned
4925 type, we have to pretend that it's a sizetype. */
4926 mult_type = type;
4927 if (unsignedp)
4928 {
4929 /* FIXME:It would be nice if we could step directly from this
4930 type to its sizetype equivalent. */
4931 mult_type = build_distinct_type_copy (type);
4932 TYPE_IS_SIZETYPE (mult_type) = 1;
4933 }
4934
4935 add_type = (GET_MODE (add) == VOIDmode ? mult_type
4936 : lang_hooks.types.type_for_mode (GET_MODE (add), unsignedp));
4937
4938 result = fold (build2 (PLUS_EXPR, mult_type,
4939 fold (build2 (MULT_EXPR, mult_type,
4940 make_tree (mult_type, x),
4941 make_tree (mult_type, mult))),
4942 make_tree (add_type, add)));
4943
4944 return TREE_CONSTANT_OVERFLOW (result);
4945 }
4946
4947 /* Return an rtx representing the value of X * MULT + ADD.
4948 TARGET is a suggestion for where to store the result (an rtx).
4949 MODE is the machine mode for the computation.
4950 X and MULT must have mode MODE. ADD may have a different mode.
4951 So can X (defaults to same as MODE).
4952 UNSIGNEDP is nonzero to do unsigned multiplication.
4953 This may emit insns. */
4954
4955 rtx
4956 expand_mult_add (rtx x, rtx target, rtx mult, rtx add, enum machine_mode mode,
4957 int unsignedp)
4958 {
4959 tree type = lang_hooks.types.type_for_mode (mode, unsignedp);
4960 tree add_type = (GET_MODE (add) == VOIDmode
4961 ? type: lang_hooks.types.type_for_mode (GET_MODE (add),
4962 unsignedp));
4963 tree result = fold (build2 (PLUS_EXPR, type,
4964 fold (build2 (MULT_EXPR, type,
4965 make_tree (type, x),
4966 make_tree (type, mult))),
4967 make_tree (add_type, add)));
4968
4969 return expand_expr (result, target, VOIDmode, 0);
4970 }
4971 \f
4972 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
4973 and returning TARGET.
4974
4975 If TARGET is 0, a pseudo-register or constant is returned. */
4976
4977 rtx
4978 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
4979 {
4980 rtx tem = 0;
4981
4982 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
4983 tem = simplify_binary_operation (AND, mode, op0, op1);
4984 if (tem == 0)
4985 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
4986
4987 if (target == 0)
4988 target = tem;
4989 else if (tem != target)
4990 emit_move_insn (target, tem);
4991 return target;
4992 }
4993 \f
4994 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
4995 and storing in TARGET. Normally return TARGET.
4996 Return 0 if that cannot be done.
4997
4998 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
4999 it is VOIDmode, they cannot both be CONST_INT.
5000
5001 UNSIGNEDP is for the case where we have to widen the operands
5002 to perform the operation. It says to use zero-extension.
5003
5004 NORMALIZEP is 1 if we should convert the result to be either zero
5005 or one. Normalize is -1 if we should convert the result to be
5006 either zero or -1. If NORMALIZEP is zero, the result will be left
5007 "raw" out of the scc insn. */
5008
5009 rtx
5010 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5011 enum machine_mode mode, int unsignedp, int normalizep)
5012 {
5013 rtx subtarget;
5014 enum insn_code icode;
5015 enum machine_mode compare_mode;
5016 enum machine_mode target_mode = GET_MODE (target);
5017 rtx tem;
5018 rtx last = get_last_insn ();
5019 rtx pattern, comparison;
5020
5021 if (unsignedp)
5022 code = unsigned_condition (code);
5023
5024 /* If one operand is constant, make it the second one. Only do this
5025 if the other operand is not constant as well. */
5026
5027 if (swap_commutative_operands_p (op0, op1))
5028 {
5029 tem = op0;
5030 op0 = op1;
5031 op1 = tem;
5032 code = swap_condition (code);
5033 }
5034
5035 if (mode == VOIDmode)
5036 mode = GET_MODE (op0);
5037
5038 /* For some comparisons with 1 and -1, we can convert this to
5039 comparisons with zero. This will often produce more opportunities for
5040 store-flag insns. */
5041
5042 switch (code)
5043 {
5044 case LT:
5045 if (op1 == const1_rtx)
5046 op1 = const0_rtx, code = LE;
5047 break;
5048 case LE:
5049 if (op1 == constm1_rtx)
5050 op1 = const0_rtx, code = LT;
5051 break;
5052 case GE:
5053 if (op1 == const1_rtx)
5054 op1 = const0_rtx, code = GT;
5055 break;
5056 case GT:
5057 if (op1 == constm1_rtx)
5058 op1 = const0_rtx, code = GE;
5059 break;
5060 case GEU:
5061 if (op1 == const1_rtx)
5062 op1 = const0_rtx, code = NE;
5063 break;
5064 case LTU:
5065 if (op1 == const1_rtx)
5066 op1 = const0_rtx, code = EQ;
5067 break;
5068 default:
5069 break;
5070 }
5071
5072 /* If we are comparing a double-word integer with zero or -1, we can
5073 convert the comparison into one involving a single word. */
5074 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5075 && GET_MODE_CLASS (mode) == MODE_INT
5076 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5077 {
5078 if ((code == EQ || code == NE)
5079 && (op1 == const0_rtx || op1 == constm1_rtx))
5080 {
5081 rtx op00, op01, op0both;
5082
5083 /* Do a logical OR or AND of the two words and compare the result. */
5084 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5085 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5086 op0both = expand_binop (word_mode,
5087 op1 == const0_rtx ? ior_optab : and_optab,
5088 op00, op01, NULL_RTX, unsignedp, OPTAB_DIRECT);
5089
5090 if (op0both != 0)
5091 return emit_store_flag (target, code, op0both, op1, word_mode,
5092 unsignedp, normalizep);
5093 }
5094 else if ((code == LT || code == GE) && op1 == const0_rtx)
5095 {
5096 rtx op0h;
5097
5098 /* If testing the sign bit, can just test on high word. */
5099 op0h = simplify_gen_subreg (word_mode, op0, mode,
5100 subreg_highpart_offset (word_mode, mode));
5101 return emit_store_flag (target, code, op0h, op1, word_mode,
5102 unsignedp, normalizep);
5103 }
5104 }
5105
5106 /* From now on, we won't change CODE, so set ICODE now. */
5107 icode = setcc_gen_code[(int) code];
5108
5109 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5110 complement of A (for GE) and shifting the sign bit to the low bit. */
5111 if (op1 == const0_rtx && (code == LT || code == GE)
5112 && GET_MODE_CLASS (mode) == MODE_INT
5113 && (normalizep || STORE_FLAG_VALUE == 1
5114 || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5115 && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5116 == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))))
5117 {
5118 subtarget = target;
5119
5120 /* If the result is to be wider than OP0, it is best to convert it
5121 first. If it is to be narrower, it is *incorrect* to convert it
5122 first. */
5123 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5124 {
5125 op0 = convert_modes (target_mode, mode, op0, 0);
5126 mode = target_mode;
5127 }
5128
5129 if (target_mode != mode)
5130 subtarget = 0;
5131
5132 if (code == GE)
5133 op0 = expand_unop (mode, one_cmpl_optab, op0,
5134 ((STORE_FLAG_VALUE == 1 || normalizep)
5135 ? 0 : subtarget), 0);
5136
5137 if (STORE_FLAG_VALUE == 1 || normalizep)
5138 /* If we are supposed to produce a 0/1 value, we want to do
5139 a logical shift from the sign bit to the low-order bit; for
5140 a -1/0 value, we do an arithmetic shift. */
5141 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5142 size_int (GET_MODE_BITSIZE (mode) - 1),
5143 subtarget, normalizep != -1);
5144
5145 if (mode != target_mode)
5146 op0 = convert_modes (target_mode, mode, op0, 0);
5147
5148 return op0;
5149 }
5150
5151 if (icode != CODE_FOR_nothing)
5152 {
5153 insn_operand_predicate_fn pred;
5154
5155 /* We think we may be able to do this with a scc insn. Emit the
5156 comparison and then the scc insn. */
5157
5158 do_pending_stack_adjust ();
5159 last = get_last_insn ();
5160
5161 comparison
5162 = compare_from_rtx (op0, op1, code, unsignedp, mode, NULL_RTX);
5163 if (CONSTANT_P (comparison))
5164 {
5165 switch (GET_CODE (comparison))
5166 {
5167 case CONST_INT:
5168 if (comparison == const0_rtx)
5169 return const0_rtx;
5170 break;
5171
5172 #ifdef FLOAT_STORE_FLAG_VALUE
5173 case CONST_DOUBLE:
5174 if (comparison == CONST0_RTX (GET_MODE (comparison)))
5175 return const0_rtx;
5176 break;
5177 #endif
5178 default:
5179 gcc_unreachable ();
5180 }
5181
5182 if (normalizep == 1)
5183 return const1_rtx;
5184 if (normalizep == -1)
5185 return constm1_rtx;
5186 return const_true_rtx;
5187 }
5188
5189 /* The code of COMPARISON may not match CODE if compare_from_rtx
5190 decided to swap its operands and reverse the original code.
5191
5192 We know that compare_from_rtx returns either a CONST_INT or
5193 a new comparison code, so it is safe to just extract the
5194 code from COMPARISON. */
5195 code = GET_CODE (comparison);
5196
5197 /* Get a reference to the target in the proper mode for this insn. */
5198 compare_mode = insn_data[(int) icode].operand[0].mode;
5199 subtarget = target;
5200 pred = insn_data[(int) icode].operand[0].predicate;
5201 if (optimize || ! (*pred) (subtarget, compare_mode))
5202 subtarget = gen_reg_rtx (compare_mode);
5203
5204 pattern = GEN_FCN (icode) (subtarget);
5205 if (pattern)
5206 {
5207 emit_insn (pattern);
5208
5209 /* If we are converting to a wider mode, first convert to
5210 TARGET_MODE, then normalize. This produces better combining
5211 opportunities on machines that have a SIGN_EXTRACT when we are
5212 testing a single bit. This mostly benefits the 68k.
5213
5214 If STORE_FLAG_VALUE does not have the sign bit set when
5215 interpreted in COMPARE_MODE, we can do this conversion as
5216 unsigned, which is usually more efficient. */
5217 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (compare_mode))
5218 {
5219 convert_move (target, subtarget,
5220 (GET_MODE_BITSIZE (compare_mode)
5221 <= HOST_BITS_PER_WIDE_INT)
5222 && 0 == (STORE_FLAG_VALUE
5223 & ((HOST_WIDE_INT) 1
5224 << (GET_MODE_BITSIZE (compare_mode) -1))));
5225 op0 = target;
5226 compare_mode = target_mode;
5227 }
5228 else
5229 op0 = subtarget;
5230
5231 /* If we want to keep subexpressions around, don't reuse our
5232 last target. */
5233
5234 if (optimize)
5235 subtarget = 0;
5236
5237 /* Now normalize to the proper value in COMPARE_MODE. Sometimes
5238 we don't have to do anything. */
5239 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5240 ;
5241 /* STORE_FLAG_VALUE might be the most negative number, so write
5242 the comparison this way to avoid a compiler-time warning. */
5243 else if (- normalizep == STORE_FLAG_VALUE)
5244 op0 = expand_unop (compare_mode, neg_optab, op0, subtarget, 0);
5245
5246 /* We don't want to use STORE_FLAG_VALUE < 0 below since this
5247 makes it hard to use a value of just the sign bit due to
5248 ANSI integer constant typing rules. */
5249 else if (GET_MODE_BITSIZE (compare_mode) <= HOST_BITS_PER_WIDE_INT
5250 && (STORE_FLAG_VALUE
5251 & ((HOST_WIDE_INT) 1
5252 << (GET_MODE_BITSIZE (compare_mode) - 1))))
5253 op0 = expand_shift (RSHIFT_EXPR, compare_mode, op0,
5254 size_int (GET_MODE_BITSIZE (compare_mode) - 1),
5255 subtarget, normalizep == 1);
5256 else
5257 {
5258 gcc_assert (STORE_FLAG_VALUE & 1);
5259
5260 op0 = expand_and (compare_mode, op0, const1_rtx, subtarget);
5261 if (normalizep == -1)
5262 op0 = expand_unop (compare_mode, neg_optab, op0, op0, 0);
5263 }
5264
5265 /* If we were converting to a smaller mode, do the
5266 conversion now. */
5267 if (target_mode != compare_mode)
5268 {
5269 convert_move (target, op0, 0);
5270 return target;
5271 }
5272 else
5273 return op0;
5274 }
5275 }
5276
5277 delete_insns_since (last);
5278
5279 /* If optimizing, use different pseudo registers for each insn, instead
5280 of reusing the same pseudo. This leads to better CSE, but slows
5281 down the compiler, since there are more pseudos */
5282 subtarget = (!optimize
5283 && (target_mode == mode)) ? target : NULL_RTX;
5284
5285 /* If we reached here, we can't do this with a scc insn. However, there
5286 are some comparisons that can be done directly. For example, if
5287 this is an equality comparison of integers, we can try to exclusive-or
5288 (or subtract) the two operands and use a recursive call to try the
5289 comparison with zero. Don't do any of these cases if branches are
5290 very cheap. */
5291
5292 if (BRANCH_COST > 0
5293 && GET_MODE_CLASS (mode) == MODE_INT && (code == EQ || code == NE)
5294 && op1 != const0_rtx)
5295 {
5296 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5297 OPTAB_WIDEN);
5298
5299 if (tem == 0)
5300 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5301 OPTAB_WIDEN);
5302 if (tem != 0)
5303 tem = emit_store_flag (target, code, tem, const0_rtx,
5304 mode, unsignedp, normalizep);
5305 if (tem == 0)
5306 delete_insns_since (last);
5307 return tem;
5308 }
5309
5310 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5311 the constant zero. Reject all other comparisons at this point. Only
5312 do LE and GT if branches are expensive since they are expensive on
5313 2-operand machines. */
5314
5315 if (BRANCH_COST == 0
5316 || GET_MODE_CLASS (mode) != MODE_INT || op1 != const0_rtx
5317 || (code != EQ && code != NE
5318 && (BRANCH_COST <= 1 || (code != LE && code != GT))))
5319 return 0;
5320
5321 /* See what we need to return. We can only return a 1, -1, or the
5322 sign bit. */
5323
5324 if (normalizep == 0)
5325 {
5326 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5327 normalizep = STORE_FLAG_VALUE;
5328
5329 else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5330 && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5331 == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
5332 ;
5333 else
5334 return 0;
5335 }
5336
5337 /* Try to put the result of the comparison in the sign bit. Assume we can't
5338 do the necessary operation below. */
5339
5340 tem = 0;
5341
5342 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5343 the sign bit set. */
5344
5345 if (code == LE)
5346 {
5347 /* This is destructive, so SUBTARGET can't be OP0. */
5348 if (rtx_equal_p (subtarget, op0))
5349 subtarget = 0;
5350
5351 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5352 OPTAB_WIDEN);
5353 if (tem)
5354 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5355 OPTAB_WIDEN);
5356 }
5357
5358 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5359 number of bits in the mode of OP0, minus one. */
5360
5361 if (code == GT)
5362 {
5363 if (rtx_equal_p (subtarget, op0))
5364 subtarget = 0;
5365
5366 tem = expand_shift (RSHIFT_EXPR, mode, op0,
5367 size_int (GET_MODE_BITSIZE (mode) - 1),
5368 subtarget, 0);
5369 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5370 OPTAB_WIDEN);
5371 }
5372
5373 if (code == EQ || code == NE)
5374 {
5375 /* For EQ or NE, one way to do the comparison is to apply an operation
5376 that converts the operand into a positive number if it is nonzero
5377 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5378 for NE we negate. This puts the result in the sign bit. Then we
5379 normalize with a shift, if needed.
5380
5381 Two operations that can do the above actions are ABS and FFS, so try
5382 them. If that doesn't work, and MODE is smaller than a full word,
5383 we can use zero-extension to the wider mode (an unsigned conversion)
5384 as the operation. */
5385
5386 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5387 that is compensated by the subsequent overflow when subtracting
5388 one / negating. */
5389
5390 if (abs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
5391 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5392 else if (ffs_optab->handlers[mode].insn_code != CODE_FOR_nothing)
5393 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5394 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5395 {
5396 tem = convert_modes (word_mode, mode, op0, 1);
5397 mode = word_mode;
5398 }
5399
5400 if (tem != 0)
5401 {
5402 if (code == EQ)
5403 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5404 0, OPTAB_WIDEN);
5405 else
5406 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5407 }
5408
5409 /* If we couldn't do it that way, for NE we can "or" the two's complement
5410 of the value with itself. For EQ, we take the one's complement of
5411 that "or", which is an extra insn, so we only handle EQ if branches
5412 are expensive. */
5413
5414 if (tem == 0 && (code == NE || BRANCH_COST > 1))
5415 {
5416 if (rtx_equal_p (subtarget, op0))
5417 subtarget = 0;
5418
5419 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5420 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5421 OPTAB_WIDEN);
5422
5423 if (tem && code == EQ)
5424 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5425 }
5426 }
5427
5428 if (tem && normalizep)
5429 tem = expand_shift (RSHIFT_EXPR, mode, tem,
5430 size_int (GET_MODE_BITSIZE (mode) - 1),
5431 subtarget, normalizep == 1);
5432
5433 if (tem)
5434 {
5435 if (GET_MODE (tem) != target_mode)
5436 {
5437 convert_move (target, tem, 0);
5438 tem = target;
5439 }
5440 else if (!subtarget)
5441 {
5442 emit_move_insn (target, tem);
5443 tem = target;
5444 }
5445 }
5446 else
5447 delete_insns_since (last);
5448
5449 return tem;
5450 }
5451
5452 /* Like emit_store_flag, but always succeeds. */
5453
5454 rtx
5455 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5456 enum machine_mode mode, int unsignedp, int normalizep)
5457 {
5458 rtx tem, label;
5459
5460 /* First see if emit_store_flag can do the job. */
5461 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5462 if (tem != 0)
5463 return tem;
5464
5465 if (normalizep == 0)
5466 normalizep = 1;
5467
5468 /* If this failed, we have to do this with set/compare/jump/set code. */
5469
5470 if (!REG_P (target)
5471 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5472 target = gen_reg_rtx (GET_MODE (target));
5473
5474 emit_move_insn (target, const1_rtx);
5475 label = gen_label_rtx ();
5476 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5477 NULL_RTX, label);
5478
5479 emit_move_insn (target, const0_rtx);
5480 emit_label (label);
5481
5482 return target;
5483 }
5484 \f
5485 /* Perform possibly multi-word comparison and conditional jump to LABEL
5486 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE
5487
5488 The algorithm is based on the code in expr.c:do_jump.
5489
5490 Note that this does not perform a general comparison. Only variants
5491 generated within expmed.c are correctly handled, others abort (but could
5492 be handled if needed). */
5493
5494 static void
5495 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5496 rtx label)
5497 {
5498 /* If this mode is an integer too wide to compare properly,
5499 compare word by word. Rely on cse to optimize constant cases. */
5500
5501 if (GET_MODE_CLASS (mode) == MODE_INT
5502 && ! can_compare_p (op, mode, ccp_jump))
5503 {
5504 rtx label2 = gen_label_rtx ();
5505
5506 switch (op)
5507 {
5508 case LTU:
5509 do_jump_by_parts_greater_rtx (mode, 1, arg2, arg1, label2, label);
5510 break;
5511
5512 case LEU:
5513 do_jump_by_parts_greater_rtx (mode, 1, arg1, arg2, label, label2);
5514 break;
5515
5516 case LT:
5517 do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label2, label);
5518 break;
5519
5520 case GT:
5521 do_jump_by_parts_greater_rtx (mode, 0, arg1, arg2, label2, label);
5522 break;
5523
5524 case GE:
5525 do_jump_by_parts_greater_rtx (mode, 0, arg2, arg1, label, label2);
5526 break;
5527
5528 /* do_jump_by_parts_equality_rtx compares with zero. Luckily
5529 that's the only equality operations we do */
5530 case EQ:
5531 gcc_assert (arg2 == const0_rtx && mode == GET_MODE(arg1));
5532 do_jump_by_parts_equality_rtx (arg1, label2, label);
5533 break;
5534
5535 case NE:
5536 gcc_assert (arg2 == const0_rtx && mode == GET_MODE(arg1));
5537 do_jump_by_parts_equality_rtx (arg1, label, label2);
5538 break;
5539
5540 default:
5541 gcc_unreachable ();
5542 }
5543
5544 emit_label (label2);
5545 }
5546 else
5547 emit_cmp_and_jump_insns (arg1, arg2, op, NULL_RTX, mode, 0, label);
5548 }
This page took 0.296652 seconds and 5 git commands to generate.