]> gcc.gnu.org Git - gcc.git/blob - gcc/expmed.c
4101f613f82ea29bfc919f09d2831974da1fbb91
[gcc.git] / gcc / expmed.c
1 /* Medium-level subroutines: convert bit-field store and extract
2 and shifts, multiplies and divides to rtl instructions.
3 Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
4 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
5 2011, 2012
6 Free Software Foundation, Inc.
7
8 This file is part of GCC.
9
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 3, or (at your option) any later
13 version.
14
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
23
24
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "diagnostic-core.h"
30 #include "rtl.h"
31 #include "tree.h"
32 #include "tm_p.h"
33 #include "flags.h"
34 #include "insn-config.h"
35 #include "expr.h"
36 #include "optabs.h"
37 #include "recog.h"
38 #include "langhooks.h"
39 #include "df.h"
40 #include "target.h"
41 #include "expmed.h"
42
43 struct target_expmed default_target_expmed;
44 #if SWITCHABLE_TARGET
45 struct target_expmed *this_target_expmed = &default_target_expmed;
46 #endif
47
48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
49 unsigned HOST_WIDE_INT,
50 unsigned HOST_WIDE_INT,
51 unsigned HOST_WIDE_INT,
52 unsigned HOST_WIDE_INT,
53 rtx);
54 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
55 unsigned HOST_WIDE_INT,
56 unsigned HOST_WIDE_INT,
57 unsigned HOST_WIDE_INT,
58 rtx);
59 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
60 unsigned HOST_WIDE_INT,
61 unsigned HOST_WIDE_INT,
62 unsigned HOST_WIDE_INT, rtx, int, bool);
63 static rtx mask_rtx (enum machine_mode, int, int, int);
64 static rtx lshift_value (enum machine_mode, rtx, int, int);
65 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
66 unsigned HOST_WIDE_INT, int);
67 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
68 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
69 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
70
71 /* Test whether a value is zero of a power of two. */
72 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
73
74 #ifndef SLOW_UNALIGNED_ACCESS
75 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
76 #endif
77
78
79 /* Reduce conditional compilation elsewhere. */
80 #ifndef HAVE_insv
81 #define HAVE_insv 0
82 #define CODE_FOR_insv CODE_FOR_nothing
83 #define gen_insv(a,b,c,d) NULL_RTX
84 #endif
85 #ifndef HAVE_extv
86 #define HAVE_extv 0
87 #define CODE_FOR_extv CODE_FOR_nothing
88 #define gen_extv(a,b,c,d) NULL_RTX
89 #endif
90 #ifndef HAVE_extzv
91 #define HAVE_extzv 0
92 #define CODE_FOR_extzv CODE_FOR_nothing
93 #define gen_extzv(a,b,c,d) NULL_RTX
94 #endif
95
96 struct init_expmed_rtl
97 {
98 struct rtx_def reg; rtunion reg_fld[2];
99 struct rtx_def plus; rtunion plus_fld1;
100 struct rtx_def neg;
101 struct rtx_def mult; rtunion mult_fld1;
102 struct rtx_def sdiv; rtunion sdiv_fld1;
103 struct rtx_def udiv; rtunion udiv_fld1;
104 struct rtx_def zext;
105 struct rtx_def sdiv_32; rtunion sdiv_32_fld1;
106 struct rtx_def smod_32; rtunion smod_32_fld1;
107 struct rtx_def wide_mult; rtunion wide_mult_fld1;
108 struct rtx_def wide_lshr; rtunion wide_lshr_fld1;
109 struct rtx_def wide_trunc;
110 struct rtx_def shift; rtunion shift_fld1;
111 struct rtx_def shift_mult; rtunion shift_mult_fld1;
112 struct rtx_def shift_add; rtunion shift_add_fld1;
113 struct rtx_def shift_sub0; rtunion shift_sub0_fld1;
114 struct rtx_def shift_sub1; rtunion shift_sub1_fld1;
115
116 rtx pow2[MAX_BITS_PER_WORD];
117 rtx cint[MAX_BITS_PER_WORD];
118 };
119
120 static void
121 init_expmed_one_mode (struct init_expmed_rtl *all,
122 enum machine_mode mode, int speed)
123 {
124 int m, n, mode_bitsize;
125
126 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
127
128 PUT_MODE (&all->reg, mode);
129 PUT_MODE (&all->plus, mode);
130 PUT_MODE (&all->neg, mode);
131 PUT_MODE (&all->mult, mode);
132 PUT_MODE (&all->sdiv, mode);
133 PUT_MODE (&all->udiv, mode);
134 PUT_MODE (&all->sdiv_32, mode);
135 PUT_MODE (&all->smod_32, mode);
136 PUT_MODE (&all->wide_trunc, mode);
137 PUT_MODE (&all->shift, mode);
138 PUT_MODE (&all->shift_mult, mode);
139 PUT_MODE (&all->shift_add, mode);
140 PUT_MODE (&all->shift_sub0, mode);
141 PUT_MODE (&all->shift_sub1, mode);
142
143 add_cost[speed][mode] = set_src_cost (&all->plus, speed);
144 neg_cost[speed][mode] = set_src_cost (&all->neg, speed);
145 mul_cost[speed][mode] = set_src_cost (&all->mult, speed);
146 sdiv_cost[speed][mode] = set_src_cost (&all->sdiv, speed);
147 udiv_cost[speed][mode] = set_src_cost (&all->udiv, speed);
148
149 sdiv_pow2_cheap[speed][mode] = (set_src_cost (&all->sdiv_32, speed)
150 <= 2 * add_cost[speed][mode]);
151 smod_pow2_cheap[speed][mode] = (set_src_cost (&all->smod_32, speed)
152 <= 4 * add_cost[speed][mode]);
153
154 shift_cost[speed][mode][0] = 0;
155 shiftadd_cost[speed][mode][0] = shiftsub0_cost[speed][mode][0]
156 = shiftsub1_cost[speed][mode][0] = add_cost[speed][mode];
157
158 n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
159 for (m = 1; m < n; m++)
160 {
161 XEXP (&all->shift, 1) = all->cint[m];
162 XEXP (&all->shift_mult, 1) = all->pow2[m];
163
164 shift_cost[speed][mode][m] = set_src_cost (&all->shift, speed);
165 shiftadd_cost[speed][mode][m] = set_src_cost (&all->shift_add, speed);
166 shiftsub0_cost[speed][mode][m] = set_src_cost (&all->shift_sub0, speed);
167 shiftsub1_cost[speed][mode][m] = set_src_cost (&all->shift_sub1, speed);
168 }
169
170 if (SCALAR_INT_MODE_P (mode))
171 {
172 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
173
174 if (wider_mode != VOIDmode)
175 {
176 PUT_MODE (&all->zext, wider_mode);
177 PUT_MODE (&all->wide_mult, wider_mode);
178 PUT_MODE (&all->wide_lshr, wider_mode);
179 XEXP (&all->wide_lshr, 1) = GEN_INT (mode_bitsize);
180
181 mul_widen_cost[speed][wider_mode]
182 = set_src_cost (&all->wide_mult, speed);
183 mul_highpart_cost[speed][mode]
184 = set_src_cost (&all->wide_trunc, speed);
185 }
186 }
187 }
188
189 void
190 init_expmed (void)
191 {
192 struct init_expmed_rtl all;
193 enum machine_mode mode;
194 int m, speed;
195
196 memset (&all, 0, sizeof all);
197 for (m = 1; m < MAX_BITS_PER_WORD; m++)
198 {
199 all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
200 all.cint[m] = GEN_INT (m);
201 }
202
203 PUT_CODE (&all.reg, REG);
204 /* Avoid using hard regs in ways which may be unsupported. */
205 SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
206
207 PUT_CODE (&all.plus, PLUS);
208 XEXP (&all.plus, 0) = &all.reg;
209 XEXP (&all.plus, 1) = &all.reg;
210
211 PUT_CODE (&all.neg, NEG);
212 XEXP (&all.neg, 0) = &all.reg;
213
214 PUT_CODE (&all.mult, MULT);
215 XEXP (&all.mult, 0) = &all.reg;
216 XEXP (&all.mult, 1) = &all.reg;
217
218 PUT_CODE (&all.sdiv, DIV);
219 XEXP (&all.sdiv, 0) = &all.reg;
220 XEXP (&all.sdiv, 1) = &all.reg;
221
222 PUT_CODE (&all.udiv, UDIV);
223 XEXP (&all.udiv, 0) = &all.reg;
224 XEXP (&all.udiv, 1) = &all.reg;
225
226 PUT_CODE (&all.sdiv_32, DIV);
227 XEXP (&all.sdiv_32, 0) = &all.reg;
228 XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? all.cint[32] : GEN_INT (32);
229
230 PUT_CODE (&all.smod_32, MOD);
231 XEXP (&all.smod_32, 0) = &all.reg;
232 XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
233
234 PUT_CODE (&all.zext, ZERO_EXTEND);
235 XEXP (&all.zext, 0) = &all.reg;
236
237 PUT_CODE (&all.wide_mult, MULT);
238 XEXP (&all.wide_mult, 0) = &all.zext;
239 XEXP (&all.wide_mult, 1) = &all.zext;
240
241 PUT_CODE (&all.wide_lshr, LSHIFTRT);
242 XEXP (&all.wide_lshr, 0) = &all.wide_mult;
243
244 PUT_CODE (&all.wide_trunc, TRUNCATE);
245 XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
246
247 PUT_CODE (&all.shift, ASHIFT);
248 XEXP (&all.shift, 0) = &all.reg;
249
250 PUT_CODE (&all.shift_mult, MULT);
251 XEXP (&all.shift_mult, 0) = &all.reg;
252
253 PUT_CODE (&all.shift_add, PLUS);
254 XEXP (&all.shift_add, 0) = &all.shift_mult;
255 XEXP (&all.shift_add, 1) = &all.reg;
256
257 PUT_CODE (&all.shift_sub0, MINUS);
258 XEXP (&all.shift_sub0, 0) = &all.shift_mult;
259 XEXP (&all.shift_sub0, 1) = &all.reg;
260
261 PUT_CODE (&all.shift_sub1, MINUS);
262 XEXP (&all.shift_sub1, 0) = &all.reg;
263 XEXP (&all.shift_sub1, 1) = &all.shift_mult;
264
265 for (speed = 0; speed < 2; speed++)
266 {
267 crtl->maybe_hot_insn_p = speed;
268 zero_cost[speed] = set_src_cost (const0_rtx, speed);
269
270 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
271 mode != VOIDmode;
272 mode = GET_MODE_WIDER_MODE (mode))
273 init_expmed_one_mode (&all, mode, speed);
274
275 for (mode = GET_CLASS_NARROWEST_MODE (MODE_VECTOR_INT);
276 mode != VOIDmode;
277 mode = GET_MODE_WIDER_MODE (mode))
278 init_expmed_one_mode (&all, mode, speed);
279 }
280
281 if (alg_hash_used_p)
282 memset (alg_hash, 0, sizeof (alg_hash));
283 else
284 alg_hash_used_p = true;
285 default_rtl_profile ();
286 }
287
288 /* Return an rtx representing minus the value of X.
289 MODE is the intended mode of the result,
290 useful if X is a CONST_INT. */
291
292 rtx
293 negate_rtx (enum machine_mode mode, rtx x)
294 {
295 rtx result = simplify_unary_operation (NEG, mode, x, mode);
296
297 if (result == 0)
298 result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
299
300 return result;
301 }
302
303 /* Report on the availability of insv/extv/extzv and the desired mode
304 of each of their operands. Returns MAX_MACHINE_MODE if HAVE_foo
305 is false; else the mode of the specified operand. If OPNO is -1,
306 all the caller cares about is whether the insn is available. */
307 enum machine_mode
308 mode_for_extraction (enum extraction_pattern pattern, int opno)
309 {
310 const struct insn_data_d *data;
311
312 switch (pattern)
313 {
314 case EP_insv:
315 if (HAVE_insv)
316 {
317 data = &insn_data[CODE_FOR_insv];
318 break;
319 }
320 return MAX_MACHINE_MODE;
321
322 case EP_extv:
323 if (HAVE_extv)
324 {
325 data = &insn_data[CODE_FOR_extv];
326 break;
327 }
328 return MAX_MACHINE_MODE;
329
330 case EP_extzv:
331 if (HAVE_extzv)
332 {
333 data = &insn_data[CODE_FOR_extzv];
334 break;
335 }
336 return MAX_MACHINE_MODE;
337
338 default:
339 gcc_unreachable ();
340 }
341
342 if (opno == -1)
343 return VOIDmode;
344
345 /* Everyone who uses this function used to follow it with
346 if (result == VOIDmode) result = word_mode; */
347 if (data->operand[opno].mode == VOIDmode)
348 return word_mode;
349 return data->operand[opno].mode;
350 }
351 \f
352 /* A subroutine of store_bit_field, with the same arguments. Return true
353 if the operation could be implemented.
354
355 If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
356 no other way of implementing the operation. If FALLBACK_P is false,
357 return false instead. */
358
359 static bool
360 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
361 unsigned HOST_WIDE_INT bitnum,
362 unsigned HOST_WIDE_INT bitregion_start,
363 unsigned HOST_WIDE_INT bitregion_end,
364 enum machine_mode fieldmode,
365 rtx value, bool fallback_p)
366 {
367 unsigned int unit
368 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
369 unsigned HOST_WIDE_INT offset, bitpos;
370 rtx op0 = str_rtx;
371 int byte_offset;
372 rtx orig_value;
373
374 enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
375
376 while (GET_CODE (op0) == SUBREG)
377 {
378 /* The following line once was done only if WORDS_BIG_ENDIAN,
379 but I think that is a mistake. WORDS_BIG_ENDIAN is
380 meaningful at a much higher level; when structures are copied
381 between memory and regs, the higher-numbered regs
382 always get higher addresses. */
383 int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
384 int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
385
386 byte_offset = 0;
387
388 /* Paradoxical subregs need special handling on big endian machines. */
389 if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
390 {
391 int difference = inner_mode_size - outer_mode_size;
392
393 if (WORDS_BIG_ENDIAN)
394 byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
395 if (BYTES_BIG_ENDIAN)
396 byte_offset += difference % UNITS_PER_WORD;
397 }
398 else
399 byte_offset = SUBREG_BYTE (op0);
400
401 bitnum += byte_offset * BITS_PER_UNIT;
402 op0 = SUBREG_REG (op0);
403 }
404
405 /* No action is needed if the target is a register and if the field
406 lies completely outside that register. This can occur if the source
407 code contains an out-of-bounds access to a small array. */
408 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
409 return true;
410
411 /* Use vec_set patterns for inserting parts of vectors whenever
412 available. */
413 if (VECTOR_MODE_P (GET_MODE (op0))
414 && !MEM_P (op0)
415 && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
416 && fieldmode == GET_MODE_INNER (GET_MODE (op0))
417 && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
418 && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
419 {
420 struct expand_operand ops[3];
421 enum machine_mode outermode = GET_MODE (op0);
422 enum machine_mode innermode = GET_MODE_INNER (outermode);
423 enum insn_code icode = optab_handler (vec_set_optab, outermode);
424 int pos = bitnum / GET_MODE_BITSIZE (innermode);
425
426 create_fixed_operand (&ops[0], op0);
427 create_input_operand (&ops[1], value, innermode);
428 create_integer_operand (&ops[2], pos);
429 if (maybe_expand_insn (icode, 3, ops))
430 return true;
431 }
432
433 /* If the target is a register, overwriting the entire object, or storing
434 a full-word or multi-word field can be done with just a SUBREG.
435
436 If the target is memory, storing any naturally aligned field can be
437 done with a simple store. For targets that support fast unaligned
438 memory, any naturally sized, unit aligned field can be done directly. */
439
440 offset = bitnum / unit;
441 bitpos = bitnum % unit;
442 byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
443 + (offset * UNITS_PER_WORD);
444
445 if (bitpos == 0
446 && bitsize == GET_MODE_BITSIZE (fieldmode)
447 && (!MEM_P (op0)
448 ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
449 || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
450 && ((GET_MODE (op0) == fieldmode && byte_offset == 0)
451 || validate_subreg (fieldmode, GET_MODE (op0), op0,
452 byte_offset)))
453 : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
454 || (offset * BITS_PER_UNIT % bitsize == 0
455 && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
456 {
457 if (MEM_P (op0))
458 op0 = adjust_address (op0, fieldmode, offset);
459 else if (GET_MODE (op0) != fieldmode)
460 op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
461 byte_offset);
462 emit_move_insn (op0, value);
463 return true;
464 }
465
466 /* Make sure we are playing with integral modes. Pun with subregs
467 if we aren't. This must come after the entire register case above,
468 since that case is valid for any mode. The following cases are only
469 valid for integral modes. */
470 {
471 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
472 if (imode != GET_MODE (op0))
473 {
474 if (MEM_P (op0))
475 op0 = adjust_address (op0, imode, 0);
476 else
477 {
478 gcc_assert (imode != BLKmode);
479 op0 = gen_lowpart (imode, op0);
480 }
481 }
482 }
483
484 /* We may be accessing data outside the field, which means
485 we can alias adjacent data. */
486 /* ?? not always for C++0x memory model ?? */
487 if (MEM_P (op0))
488 {
489 op0 = shallow_copy_rtx (op0);
490 set_mem_alias_set (op0, 0);
491 set_mem_expr (op0, 0);
492 }
493
494 /* If OP0 is a register, BITPOS must count within a word.
495 But as we have it, it counts within whatever size OP0 now has.
496 On a bigendian machine, these are not the same, so convert. */
497 if (BYTES_BIG_ENDIAN
498 && !MEM_P (op0)
499 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
500 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
501
502 /* Storing an lsb-aligned field in a register
503 can be done with a movestrict instruction. */
504
505 if (!MEM_P (op0)
506 && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
507 && bitsize == GET_MODE_BITSIZE (fieldmode)
508 && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
509 {
510 struct expand_operand ops[2];
511 enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
512 rtx arg0 = op0;
513 unsigned HOST_WIDE_INT subreg_off;
514
515 if (GET_CODE (arg0) == SUBREG)
516 {
517 /* Else we've got some float mode source being extracted into
518 a different float mode destination -- this combination of
519 subregs results in Severe Tire Damage. */
520 gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
521 || GET_MODE_CLASS (fieldmode) == MODE_INT
522 || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
523 arg0 = SUBREG_REG (arg0);
524 }
525
526 subreg_off = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
527 + (offset * UNITS_PER_WORD);
528 if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
529 {
530 arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
531
532 create_fixed_operand (&ops[0], arg0);
533 /* Shrink the source operand to FIELDMODE. */
534 create_convert_operand_to (&ops[1], value, fieldmode, false);
535 if (maybe_expand_insn (icode, 2, ops))
536 return true;
537 }
538 }
539
540 /* Handle fields bigger than a word. */
541
542 if (bitsize > BITS_PER_WORD)
543 {
544 /* Here we transfer the words of the field
545 in the order least significant first.
546 This is because the most significant word is the one which may
547 be less than full.
548 However, only do that if the value is not BLKmode. */
549
550 unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
551 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
552 unsigned int i;
553 rtx last;
554
555 /* This is the mode we must force value to, so that there will be enough
556 subwords to extract. Note that fieldmode will often (always?) be
557 VOIDmode, because that is what store_field uses to indicate that this
558 is a bit field, but passing VOIDmode to operand_subword_force
559 is not allowed. */
560 fieldmode = GET_MODE (value);
561 if (fieldmode == VOIDmode)
562 fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
563
564 last = get_last_insn ();
565 for (i = 0; i < nwords; i++)
566 {
567 /* If I is 0, use the low-order word in both field and target;
568 if I is 1, use the next to lowest word; and so on. */
569 unsigned int wordnum = (backwards
570 ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
571 - i - 1
572 : i);
573 unsigned int bit_offset = (backwards
574 ? MAX ((int) bitsize - ((int) i + 1)
575 * BITS_PER_WORD,
576 0)
577 : (int) i * BITS_PER_WORD);
578 rtx value_word = operand_subword_force (value, wordnum, fieldmode);
579 unsigned HOST_WIDE_INT new_bitsize =
580 MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
581
582 /* If the remaining chunk doesn't have full wordsize we have
583 to make sure that for big endian machines the higher order
584 bits are used. */
585 if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
586 value_word = simplify_expand_binop (word_mode, lshr_optab,
587 value_word,
588 GEN_INT (BITS_PER_WORD
589 - new_bitsize),
590 NULL_RTX, true,
591 OPTAB_LIB_WIDEN);
592
593 if (!store_bit_field_1 (op0, new_bitsize,
594 bitnum + bit_offset,
595 bitregion_start, bitregion_end,
596 word_mode,
597 value_word, fallback_p))
598 {
599 delete_insns_since (last);
600 return false;
601 }
602 }
603 return true;
604 }
605
606 /* From here on we can assume that the field to be stored in is
607 a full-word (whatever type that is), since it is shorter than a word. */
608
609 /* OFFSET is the number of words or bytes (UNIT says which)
610 from STR_RTX to the first word or byte containing part of the field. */
611
612 if (!MEM_P (op0))
613 {
614 if (offset != 0
615 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
616 {
617 if (!REG_P (op0))
618 {
619 /* Since this is a destination (lvalue), we can't copy
620 it to a pseudo. We can remove a SUBREG that does not
621 change the size of the operand. Such a SUBREG may
622 have been added above. */
623 gcc_assert (GET_CODE (op0) == SUBREG
624 && (GET_MODE_SIZE (GET_MODE (op0))
625 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
626 op0 = SUBREG_REG (op0);
627 }
628 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
629 op0, (offset * UNITS_PER_WORD));
630 }
631 offset = 0;
632 }
633
634 /* If VALUE has a floating-point or complex mode, access it as an
635 integer of the corresponding size. This can occur on a machine
636 with 64 bit registers that uses SFmode for float. It can also
637 occur for unaligned float or complex fields. */
638 orig_value = value;
639 if (GET_MODE (value) != VOIDmode
640 && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
641 && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
642 {
643 value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
644 emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
645 }
646
647 /* Now OFFSET is nonzero only if OP0 is memory
648 and is therefore always measured in bytes. */
649
650 if (HAVE_insv
651 && GET_MODE (value) != BLKmode
652 && bitsize > 0
653 && GET_MODE_BITSIZE (op_mode) >= bitsize
654 /* Do not use insv for volatile bitfields when
655 -fstrict-volatile-bitfields is in effect. */
656 && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
657 && flag_strict_volatile_bitfields > 0)
658 && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
659 && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode)))
660 /* Do not use insv if the bit region is restricted and
661 op_mode integer at offset doesn't fit into the
662 restricted region. */
663 && !(MEM_P (op0) && bitregion_end
664 && bitnum - bitpos + GET_MODE_BITSIZE (op_mode)
665 > bitregion_end + 1))
666 {
667 struct expand_operand ops[4];
668 int xbitpos = bitpos;
669 rtx value1;
670 rtx xop0 = op0;
671 rtx last = get_last_insn ();
672 bool copy_back = false;
673
674 /* Add OFFSET into OP0's address. */
675 if (MEM_P (xop0))
676 xop0 = adjust_address (xop0, byte_mode, offset);
677
678 /* If xop0 is a register, we need it in OP_MODE
679 to make it acceptable to the format of insv. */
680 if (GET_CODE (xop0) == SUBREG)
681 /* We can't just change the mode, because this might clobber op0,
682 and we will need the original value of op0 if insv fails. */
683 xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
684 if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
685 xop0 = gen_lowpart_SUBREG (op_mode, xop0);
686
687 /* If the destination is a paradoxical subreg such that we need a
688 truncate to the inner mode, perform the insertion on a temporary and
689 truncate the result to the original destination. Note that we can't
690 just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
691 X) 0)) is (reg:N X). */
692 if (GET_CODE (xop0) == SUBREG
693 && REG_P (SUBREG_REG (xop0))
694 && (!TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
695 op_mode)))
696 {
697 rtx tem = gen_reg_rtx (op_mode);
698 emit_move_insn (tem, xop0);
699 xop0 = tem;
700 copy_back = true;
701 }
702
703 /* We have been counting XBITPOS within UNIT.
704 Count instead within the size of the register. */
705 if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
706 xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
707
708 unit = GET_MODE_BITSIZE (op_mode);
709
710 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
711 "backwards" from the size of the unit we are inserting into.
712 Otherwise, we count bits from the most significant on a
713 BYTES/BITS_BIG_ENDIAN machine. */
714
715 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
716 xbitpos = unit - bitsize - xbitpos;
717
718 /* Convert VALUE to op_mode (which insv insn wants) in VALUE1. */
719 value1 = value;
720 if (GET_MODE (value) != op_mode)
721 {
722 if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
723 {
724 /* Optimization: Don't bother really extending VALUE
725 if it has all the bits we will actually use. However,
726 if we must narrow it, be sure we do it correctly. */
727
728 if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
729 {
730 rtx tmp;
731
732 tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
733 if (! tmp)
734 tmp = simplify_gen_subreg (op_mode,
735 force_reg (GET_MODE (value),
736 value1),
737 GET_MODE (value), 0);
738 value1 = tmp;
739 }
740 else
741 value1 = gen_lowpart (op_mode, value1);
742 }
743 else if (CONST_INT_P (value))
744 value1 = gen_int_mode (INTVAL (value), op_mode);
745 else
746 /* Parse phase is supposed to make VALUE's data type
747 match that of the component reference, which is a type
748 at least as wide as the field; so VALUE should have
749 a mode that corresponds to that type. */
750 gcc_assert (CONSTANT_P (value));
751 }
752
753 create_fixed_operand (&ops[0], xop0);
754 create_integer_operand (&ops[1], bitsize);
755 create_integer_operand (&ops[2], xbitpos);
756 create_input_operand (&ops[3], value1, op_mode);
757 if (maybe_expand_insn (CODE_FOR_insv, 4, ops))
758 {
759 if (copy_back)
760 convert_move (op0, xop0, true);
761 return true;
762 }
763 delete_insns_since (last);
764 }
765
766 /* If OP0 is a memory, try copying it to a register and seeing if a
767 cheap register alternative is available. */
768 if (HAVE_insv && MEM_P (op0))
769 {
770 enum machine_mode bestmode;
771 unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
772
773 if (bitregion_end)
774 maxbits = bitregion_end - bitregion_start + 1;
775
776 /* Get the mode to use for inserting into this field. If OP0 is
777 BLKmode, get the smallest mode consistent with the alignment. If
778 OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
779 mode. Otherwise, use the smallest mode containing the field. */
780
781 if (GET_MODE (op0) == BLKmode
782 || GET_MODE_BITSIZE (GET_MODE (op0)) > maxbits
783 || (op_mode != MAX_MACHINE_MODE
784 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
785 bestmode = get_best_mode (bitsize, bitnum,
786 bitregion_start, bitregion_end,
787 MEM_ALIGN (op0),
788 (op_mode == MAX_MACHINE_MODE
789 ? VOIDmode : op_mode),
790 MEM_VOLATILE_P (op0));
791 else
792 bestmode = GET_MODE (op0);
793
794 if (bestmode != VOIDmode
795 && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
796 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
797 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
798 {
799 rtx last, tempreg, xop0;
800 unsigned HOST_WIDE_INT xoffset, xbitpos;
801
802 last = get_last_insn ();
803
804 /* Adjust address to point to the containing unit of
805 that mode. Compute the offset as a multiple of this unit,
806 counting in bytes. */
807 unit = GET_MODE_BITSIZE (bestmode);
808 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
809 xbitpos = bitnum % unit;
810 xop0 = adjust_address (op0, bestmode, xoffset);
811
812 /* Fetch that unit, store the bitfield in it, then store
813 the unit. */
814 tempreg = copy_to_reg (xop0);
815 if (store_bit_field_1 (tempreg, bitsize, xbitpos,
816 bitregion_start, bitregion_end,
817 fieldmode, orig_value, false))
818 {
819 emit_move_insn (xop0, tempreg);
820 return true;
821 }
822 delete_insns_since (last);
823 }
824 }
825
826 if (!fallback_p)
827 return false;
828
829 store_fixed_bit_field (op0, offset, bitsize, bitpos,
830 bitregion_start, bitregion_end, value);
831 return true;
832 }
833
834 /* Generate code to store value from rtx VALUE
835 into a bit-field within structure STR_RTX
836 containing BITSIZE bits starting at bit BITNUM.
837
838 BITREGION_START is bitpos of the first bitfield in this region.
839 BITREGION_END is the bitpos of the ending bitfield in this region.
840 These two fields are 0, if the C++ memory model does not apply,
841 or we are not interested in keeping track of bitfield regions.
842
843 FIELDMODE is the machine-mode of the FIELD_DECL node for this field. */
844
845 void
846 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
847 unsigned HOST_WIDE_INT bitnum,
848 unsigned HOST_WIDE_INT bitregion_start,
849 unsigned HOST_WIDE_INT bitregion_end,
850 enum machine_mode fieldmode,
851 rtx value)
852 {
853 /* Under the C++0x memory model, we must not touch bits outside the
854 bit region. Adjust the address to start at the beginning of the
855 bit region. */
856 if (MEM_P (str_rtx) && bitregion_start > 0)
857 {
858 enum machine_mode bestmode;
859 enum machine_mode op_mode;
860 unsigned HOST_WIDE_INT offset;
861
862 op_mode = mode_for_extraction (EP_insv, 3);
863 if (op_mode == MAX_MACHINE_MODE)
864 op_mode = VOIDmode;
865
866 gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
867
868 offset = bitregion_start / BITS_PER_UNIT;
869 bitnum -= bitregion_start;
870 bitregion_end -= bitregion_start;
871 bitregion_start = 0;
872 bestmode = get_best_mode (bitsize, bitnum,
873 bitregion_start, bitregion_end,
874 MEM_ALIGN (str_rtx),
875 op_mode,
876 MEM_VOLATILE_P (str_rtx));
877 str_rtx = adjust_address (str_rtx, bestmode, offset);
878 }
879
880 if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
881 bitregion_start, bitregion_end,
882 fieldmode, value, true))
883 gcc_unreachable ();
884 }
885 \f
886 /* Use shifts and boolean operations to store VALUE
887 into a bit field of width BITSIZE
888 in a memory location specified by OP0 except offset by OFFSET bytes.
889 (OFFSET must be 0 if OP0 is a register.)
890 The field starts at position BITPOS within the byte.
891 (If OP0 is a register, it may be a full word or a narrower mode,
892 but BITPOS still counts within a full word,
893 which is significant on bigendian machines.) */
894
895 static void
896 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
897 unsigned HOST_WIDE_INT bitsize,
898 unsigned HOST_WIDE_INT bitpos,
899 unsigned HOST_WIDE_INT bitregion_start,
900 unsigned HOST_WIDE_INT bitregion_end,
901 rtx value)
902 {
903 enum machine_mode mode;
904 unsigned int total_bits = BITS_PER_WORD;
905 rtx temp;
906 int all_zero = 0;
907 int all_one = 0;
908
909 /* There is a case not handled here:
910 a structure with a known alignment of just a halfword
911 and a field split across two aligned halfwords within the structure.
912 Or likewise a structure with a known alignment of just a byte
913 and a field split across two bytes.
914 Such cases are not supposed to be able to occur. */
915
916 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
917 {
918 gcc_assert (!offset);
919 /* Special treatment for a bit field split across two registers. */
920 if (bitsize + bitpos > BITS_PER_WORD)
921 {
922 store_split_bit_field (op0, bitsize, bitpos,
923 bitregion_start, bitregion_end,
924 value);
925 return;
926 }
927 }
928 else
929 {
930 unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
931
932 if (bitregion_end)
933 maxbits = bitregion_end - bitregion_start + 1;
934
935 /* Get the proper mode to use for this field. We want a mode that
936 includes the entire field. If such a mode would be larger than
937 a word, we won't be doing the extraction the normal way.
938 We don't want a mode bigger than the destination. */
939
940 mode = GET_MODE (op0);
941 if (GET_MODE_BITSIZE (mode) == 0
942 || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
943 mode = word_mode;
944
945 if (MEM_VOLATILE_P (op0)
946 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
947 && GET_MODE_BITSIZE (GET_MODE (op0)) <= maxbits
948 && flag_strict_volatile_bitfields > 0)
949 mode = GET_MODE (op0);
950 else
951 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
952 bitregion_start, bitregion_end,
953 MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
954
955 if (mode == VOIDmode)
956 {
957 /* The only way this should occur is if the field spans word
958 boundaries. */
959 store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
960 bitregion_start, bitregion_end, value);
961 return;
962 }
963
964 total_bits = GET_MODE_BITSIZE (mode);
965
966 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
967 be in the range 0 to total_bits-1, and put any excess bytes in
968 OFFSET. */
969 if (bitpos >= total_bits)
970 {
971 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
972 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
973 * BITS_PER_UNIT);
974 }
975
976 /* Get ref to an aligned byte, halfword, or word containing the field.
977 Adjust BITPOS to be position within a word,
978 and OFFSET to be the offset of that word.
979 Then alter OP0 to refer to that word. */
980 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
981 offset -= (offset % (total_bits / BITS_PER_UNIT));
982 op0 = adjust_address (op0, mode, offset);
983 }
984
985 mode = GET_MODE (op0);
986
987 /* Now MODE is either some integral mode for a MEM as OP0,
988 or is a full-word for a REG as OP0. TOTAL_BITS corresponds.
989 The bit field is contained entirely within OP0.
990 BITPOS is the starting bit number within OP0.
991 (OP0's mode may actually be narrower than MODE.) */
992
993 if (BYTES_BIG_ENDIAN)
994 /* BITPOS is the distance between our msb
995 and that of the containing datum.
996 Convert it to the distance from the lsb. */
997 bitpos = total_bits - bitsize - bitpos;
998
999 /* Now BITPOS is always the distance between our lsb
1000 and that of OP0. */
1001
1002 /* Shift VALUE left by BITPOS bits. If VALUE is not constant,
1003 we must first convert its mode to MODE. */
1004
1005 if (CONST_INT_P (value))
1006 {
1007 HOST_WIDE_INT v = INTVAL (value);
1008
1009 if (bitsize < HOST_BITS_PER_WIDE_INT)
1010 v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
1011
1012 if (v == 0)
1013 all_zero = 1;
1014 else if ((bitsize < HOST_BITS_PER_WIDE_INT
1015 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
1016 || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
1017 all_one = 1;
1018
1019 value = lshift_value (mode, value, bitpos, bitsize);
1020 }
1021 else
1022 {
1023 int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1024 && bitpos + bitsize != GET_MODE_BITSIZE (mode));
1025
1026 if (GET_MODE (value) != mode)
1027 value = convert_to_mode (mode, value, 1);
1028
1029 if (must_and)
1030 value = expand_binop (mode, and_optab, value,
1031 mask_rtx (mode, 0, bitsize, 0),
1032 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1033 if (bitpos > 0)
1034 value = expand_shift (LSHIFT_EXPR, mode, value,
1035 bitpos, NULL_RTX, 1);
1036 }
1037
1038 /* Now clear the chosen bits in OP0,
1039 except that if VALUE is -1 we need not bother. */
1040 /* We keep the intermediates in registers to allow CSE to combine
1041 consecutive bitfield assignments. */
1042
1043 temp = force_reg (mode, op0);
1044
1045 if (! all_one)
1046 {
1047 temp = expand_binop (mode, and_optab, temp,
1048 mask_rtx (mode, bitpos, bitsize, 1),
1049 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1050 temp = force_reg (mode, temp);
1051 }
1052
1053 /* Now logical-or VALUE into OP0, unless it is zero. */
1054
1055 if (! all_zero)
1056 {
1057 temp = expand_binop (mode, ior_optab, temp, value,
1058 NULL_RTX, 1, OPTAB_LIB_WIDEN);
1059 temp = force_reg (mode, temp);
1060 }
1061
1062 if (op0 != temp)
1063 {
1064 op0 = copy_rtx (op0);
1065 emit_move_insn (op0, temp);
1066 }
1067 }
1068 \f
1069 /* Store a bit field that is split across multiple accessible memory objects.
1070
1071 OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1072 BITSIZE is the field width; BITPOS the position of its first bit
1073 (within the word).
1074 VALUE is the value to store.
1075
1076 This does not yet handle fields wider than BITS_PER_WORD. */
1077
1078 static void
1079 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1080 unsigned HOST_WIDE_INT bitpos,
1081 unsigned HOST_WIDE_INT bitregion_start,
1082 unsigned HOST_WIDE_INT bitregion_end,
1083 rtx value)
1084 {
1085 unsigned int unit;
1086 unsigned int bitsdone = 0;
1087
1088 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1089 much at a time. */
1090 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1091 unit = BITS_PER_WORD;
1092 else
1093 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1094
1095 /* If VALUE is a constant other than a CONST_INT, get it into a register in
1096 WORD_MODE. If we can do this using gen_lowpart_common, do so. Note
1097 that VALUE might be a floating-point constant. */
1098 if (CONSTANT_P (value) && !CONST_INT_P (value))
1099 {
1100 rtx word = gen_lowpart_common (word_mode, value);
1101
1102 if (word && (value != word))
1103 value = word;
1104 else
1105 value = gen_lowpart_common (word_mode,
1106 force_reg (GET_MODE (value) != VOIDmode
1107 ? GET_MODE (value)
1108 : word_mode, value));
1109 }
1110
1111 while (bitsdone < bitsize)
1112 {
1113 unsigned HOST_WIDE_INT thissize;
1114 rtx part, word;
1115 unsigned HOST_WIDE_INT thispos;
1116 unsigned HOST_WIDE_INT offset;
1117
1118 offset = (bitpos + bitsdone) / unit;
1119 thispos = (bitpos + bitsdone) % unit;
1120
1121 /* When region of bytes we can touch is restricted, decrease
1122 UNIT close to the end of the region as needed. */
1123 if (bitregion_end
1124 && unit > BITS_PER_UNIT
1125 && bitpos + bitsdone - thispos + unit > bitregion_end + 1)
1126 {
1127 unit = unit / 2;
1128 continue;
1129 }
1130
1131 /* THISSIZE must not overrun a word boundary. Otherwise,
1132 store_fixed_bit_field will call us again, and we will mutually
1133 recurse forever. */
1134 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1135 thissize = MIN (thissize, unit - thispos);
1136
1137 if (BYTES_BIG_ENDIAN)
1138 {
1139 int total_bits;
1140
1141 /* We must do an endian conversion exactly the same way as it is
1142 done in extract_bit_field, so that the two calls to
1143 extract_fixed_bit_field will have comparable arguments. */
1144 if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1145 total_bits = BITS_PER_WORD;
1146 else
1147 total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1148
1149 /* Fetch successively less significant portions. */
1150 if (CONST_INT_P (value))
1151 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1152 >> (bitsize - bitsdone - thissize))
1153 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1154 else
1155 /* The args are chosen so that the last part includes the
1156 lsb. Give extract_bit_field the value it needs (with
1157 endianness compensation) to fetch the piece we want. */
1158 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1159 total_bits - bitsize + bitsdone,
1160 NULL_RTX, 1, false);
1161 }
1162 else
1163 {
1164 /* Fetch successively more significant portions. */
1165 if (CONST_INT_P (value))
1166 part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1167 >> bitsdone)
1168 & (((HOST_WIDE_INT) 1 << thissize) - 1));
1169 else
1170 part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1171 bitsdone, NULL_RTX, 1, false);
1172 }
1173
1174 /* If OP0 is a register, then handle OFFSET here.
1175
1176 When handling multiword bitfields, extract_bit_field may pass
1177 down a word_mode SUBREG of a larger REG for a bitfield that actually
1178 crosses a word boundary. Thus, for a SUBREG, we must find
1179 the current word starting from the base register. */
1180 if (GET_CODE (op0) == SUBREG)
1181 {
1182 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1183 enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1184 if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1185 word = word_offset ? const0_rtx : op0;
1186 else
1187 word = operand_subword_force (SUBREG_REG (op0), word_offset,
1188 GET_MODE (SUBREG_REG (op0)));
1189 offset = 0;
1190 }
1191 else if (REG_P (op0))
1192 {
1193 enum machine_mode op0_mode = GET_MODE (op0);
1194 if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1195 word = offset ? const0_rtx : op0;
1196 else
1197 word = operand_subword_force (op0, offset, GET_MODE (op0));
1198 offset = 0;
1199 }
1200 else
1201 word = op0;
1202
1203 /* OFFSET is in UNITs, and UNIT is in bits.
1204 store_fixed_bit_field wants offset in bytes. If WORD is const0_rtx,
1205 it is just an out-of-bounds access. Ignore it. */
1206 if (word != const0_rtx)
1207 store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1208 thispos, bitregion_start, bitregion_end, part);
1209 bitsdone += thissize;
1210 }
1211 }
1212 \f
1213 /* A subroutine of extract_bit_field_1 that converts return value X
1214 to either MODE or TMODE. MODE, TMODE and UNSIGNEDP are arguments
1215 to extract_bit_field. */
1216
1217 static rtx
1218 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1219 enum machine_mode tmode, bool unsignedp)
1220 {
1221 if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1222 return x;
1223
1224 /* If the x mode is not a scalar integral, first convert to the
1225 integer mode of that size and then access it as a floating-point
1226 value via a SUBREG. */
1227 if (!SCALAR_INT_MODE_P (tmode))
1228 {
1229 enum machine_mode smode;
1230
1231 smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1232 x = convert_to_mode (smode, x, unsignedp);
1233 x = force_reg (smode, x);
1234 return gen_lowpart (tmode, x);
1235 }
1236
1237 return convert_to_mode (tmode, x, unsignedp);
1238 }
1239
1240 /* A subroutine of extract_bit_field, with the same arguments.
1241 If FALLBACK_P is true, fall back to extract_fixed_bit_field
1242 if we can find no other means of implementing the operation.
1243 if FALLBACK_P is false, return NULL instead. */
1244
1245 static rtx
1246 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1247 unsigned HOST_WIDE_INT bitnum,
1248 int unsignedp, bool packedp, rtx target,
1249 enum machine_mode mode, enum machine_mode tmode,
1250 bool fallback_p)
1251 {
1252 unsigned int unit
1253 = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1254 unsigned HOST_WIDE_INT offset, bitpos;
1255 rtx op0 = str_rtx;
1256 enum machine_mode int_mode;
1257 enum machine_mode ext_mode;
1258 enum machine_mode mode1;
1259 int byte_offset;
1260
1261 if (tmode == VOIDmode)
1262 tmode = mode;
1263
1264 while (GET_CODE (op0) == SUBREG)
1265 {
1266 bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1267 op0 = SUBREG_REG (op0);
1268 }
1269
1270 /* If we have an out-of-bounds access to a register, just return an
1271 uninitialized register of the required mode. This can occur if the
1272 source code contains an out-of-bounds access to a small array. */
1273 if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1274 return gen_reg_rtx (tmode);
1275
1276 if (REG_P (op0)
1277 && mode == GET_MODE (op0)
1278 && bitnum == 0
1279 && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1280 {
1281 /* We're trying to extract a full register from itself. */
1282 return op0;
1283 }
1284
1285 /* See if we can get a better vector mode before extracting. */
1286 if (VECTOR_MODE_P (GET_MODE (op0))
1287 && !MEM_P (op0)
1288 && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1289 {
1290 enum machine_mode new_mode;
1291
1292 if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1293 new_mode = MIN_MODE_VECTOR_FLOAT;
1294 else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1295 new_mode = MIN_MODE_VECTOR_FRACT;
1296 else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1297 new_mode = MIN_MODE_VECTOR_UFRACT;
1298 else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1299 new_mode = MIN_MODE_VECTOR_ACCUM;
1300 else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1301 new_mode = MIN_MODE_VECTOR_UACCUM;
1302 else
1303 new_mode = MIN_MODE_VECTOR_INT;
1304
1305 for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1306 if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1307 && targetm.vector_mode_supported_p (new_mode))
1308 break;
1309 if (new_mode != VOIDmode)
1310 op0 = gen_lowpart (new_mode, op0);
1311 }
1312
1313 /* Use vec_extract patterns for extracting parts of vectors whenever
1314 available. */
1315 if (VECTOR_MODE_P (GET_MODE (op0))
1316 && !MEM_P (op0)
1317 && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1318 && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1319 == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1320 {
1321 struct expand_operand ops[3];
1322 enum machine_mode outermode = GET_MODE (op0);
1323 enum machine_mode innermode = GET_MODE_INNER (outermode);
1324 enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1325 unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1326
1327 create_output_operand (&ops[0], target, innermode);
1328 create_input_operand (&ops[1], op0, outermode);
1329 create_integer_operand (&ops[2], pos);
1330 if (maybe_expand_insn (icode, 3, ops))
1331 {
1332 target = ops[0].value;
1333 if (GET_MODE (target) != mode)
1334 return gen_lowpart (tmode, target);
1335 return target;
1336 }
1337 }
1338
1339 /* Make sure we are playing with integral modes. Pun with subregs
1340 if we aren't. */
1341 {
1342 enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1343 if (imode != GET_MODE (op0))
1344 {
1345 if (MEM_P (op0))
1346 op0 = adjust_address (op0, imode, 0);
1347 else if (imode != BLKmode)
1348 {
1349 op0 = gen_lowpart (imode, op0);
1350
1351 /* If we got a SUBREG, force it into a register since we
1352 aren't going to be able to do another SUBREG on it. */
1353 if (GET_CODE (op0) == SUBREG)
1354 op0 = force_reg (imode, op0);
1355 }
1356 else if (REG_P (op0))
1357 {
1358 rtx reg, subreg;
1359 imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1360 MODE_INT);
1361 reg = gen_reg_rtx (imode);
1362 subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1363 emit_move_insn (subreg, op0);
1364 op0 = reg;
1365 bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1366 }
1367 else
1368 {
1369 rtx mem = assign_stack_temp (GET_MODE (op0),
1370 GET_MODE_SIZE (GET_MODE (op0)));
1371 emit_move_insn (mem, op0);
1372 op0 = adjust_address (mem, BLKmode, 0);
1373 }
1374 }
1375 }
1376
1377 /* We may be accessing data outside the field, which means
1378 we can alias adjacent data. */
1379 if (MEM_P (op0))
1380 {
1381 op0 = shallow_copy_rtx (op0);
1382 set_mem_alias_set (op0, 0);
1383 set_mem_expr (op0, 0);
1384 }
1385
1386 /* Extraction of a full-word or multi-word value from a structure
1387 in a register or aligned memory can be done with just a SUBREG.
1388 A subword value in the least significant part of a register
1389 can also be extracted with a SUBREG. For this, we need the
1390 byte offset of the value in op0. */
1391
1392 bitpos = bitnum % unit;
1393 offset = bitnum / unit;
1394 byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1395
1396 /* If OP0 is a register, BITPOS must count within a word.
1397 But as we have it, it counts within whatever size OP0 now has.
1398 On a bigendian machine, these are not the same, so convert. */
1399 if (BYTES_BIG_ENDIAN
1400 && !MEM_P (op0)
1401 && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1402 bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1403
1404 /* ??? We currently assume TARGET is at least as big as BITSIZE.
1405 If that's wrong, the solution is to test for it and set TARGET to 0
1406 if needed. */
1407
1408 /* Only scalar integer modes can be converted via subregs. There is an
1409 additional problem for FP modes here in that they can have a precision
1410 which is different from the size. mode_for_size uses precision, but
1411 we want a mode based on the size, so we must avoid calling it for FP
1412 modes. */
1413 mode1 = (SCALAR_INT_MODE_P (tmode)
1414 ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1415 : mode);
1416
1417 /* If the bitfield is volatile, we need to make sure the access
1418 remains on a type-aligned boundary. */
1419 if (GET_CODE (op0) == MEM
1420 && MEM_VOLATILE_P (op0)
1421 && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1422 && flag_strict_volatile_bitfields > 0)
1423 goto no_subreg_mode_swap;
1424
1425 if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1426 && bitpos % BITS_PER_WORD == 0)
1427 || (mode1 != BLKmode
1428 /* ??? The big endian test here is wrong. This is correct
1429 if the value is in a register, and if mode_for_size is not
1430 the same mode as op0. This causes us to get unnecessarily
1431 inefficient code from the Thumb port when -mbig-endian. */
1432 && (BYTES_BIG_ENDIAN
1433 ? bitpos + bitsize == BITS_PER_WORD
1434 : bitpos == 0)))
1435 && ((!MEM_P (op0)
1436 && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0))
1437 && GET_MODE_SIZE (mode1) != 0
1438 && byte_offset % GET_MODE_SIZE (mode1) == 0)
1439 || (MEM_P (op0)
1440 && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1441 || (offset * BITS_PER_UNIT % bitsize == 0
1442 && MEM_ALIGN (op0) % bitsize == 0)))))
1443 {
1444 if (MEM_P (op0))
1445 op0 = adjust_address (op0, mode1, offset);
1446 else if (mode1 != GET_MODE (op0))
1447 {
1448 rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1449 byte_offset);
1450 if (sub == NULL)
1451 goto no_subreg_mode_swap;
1452 op0 = sub;
1453 }
1454 if (mode1 != mode)
1455 return convert_to_mode (tmode, op0, unsignedp);
1456 return op0;
1457 }
1458 no_subreg_mode_swap:
1459
1460 /* Handle fields bigger than a word. */
1461
1462 if (bitsize > BITS_PER_WORD)
1463 {
1464 /* Here we transfer the words of the field
1465 in the order least significant first.
1466 This is because the most significant word is the one which may
1467 be less than full. */
1468
1469 unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1470 unsigned int i;
1471
1472 if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1473 target = gen_reg_rtx (mode);
1474
1475 /* Indicate for flow that the entire target reg is being set. */
1476 emit_clobber (target);
1477
1478 for (i = 0; i < nwords; i++)
1479 {
1480 /* If I is 0, use the low-order word in both field and target;
1481 if I is 1, use the next to lowest word; and so on. */
1482 /* Word number in TARGET to use. */
1483 unsigned int wordnum
1484 = (WORDS_BIG_ENDIAN
1485 ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1486 : i);
1487 /* Offset from start of field in OP0. */
1488 unsigned int bit_offset = (WORDS_BIG_ENDIAN
1489 ? MAX (0, ((int) bitsize - ((int) i + 1)
1490 * (int) BITS_PER_WORD))
1491 : (int) i * BITS_PER_WORD);
1492 rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1493 rtx result_part
1494 = extract_bit_field (op0, MIN (BITS_PER_WORD,
1495 bitsize - i * BITS_PER_WORD),
1496 bitnum + bit_offset, 1, false, target_part, mode,
1497 word_mode);
1498
1499 gcc_assert (target_part);
1500
1501 if (result_part != target_part)
1502 emit_move_insn (target_part, result_part);
1503 }
1504
1505 if (unsignedp)
1506 {
1507 /* Unless we've filled TARGET, the upper regs in a multi-reg value
1508 need to be zero'd out. */
1509 if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1510 {
1511 unsigned int i, total_words;
1512
1513 total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1514 for (i = nwords; i < total_words; i++)
1515 emit_move_insn
1516 (operand_subword (target,
1517 WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1518 1, VOIDmode),
1519 const0_rtx);
1520 }
1521 return target;
1522 }
1523
1524 /* Signed bit field: sign-extend with two arithmetic shifts. */
1525 target = expand_shift (LSHIFT_EXPR, mode, target,
1526 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1527 return expand_shift (RSHIFT_EXPR, mode, target,
1528 GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1529 }
1530
1531 /* From here on we know the desired field is smaller than a word. */
1532
1533 /* Check if there is a correspondingly-sized integer field, so we can
1534 safely extract it as one size of integer, if necessary; then
1535 truncate or extend to the size that is wanted; then use SUBREGs or
1536 convert_to_mode to get one of the modes we really wanted. */
1537
1538 int_mode = int_mode_for_mode (tmode);
1539 if (int_mode == BLKmode)
1540 int_mode = int_mode_for_mode (mode);
1541 /* Should probably push op0 out to memory and then do a load. */
1542 gcc_assert (int_mode != BLKmode);
1543
1544 /* OFFSET is the number of words or bytes (UNIT says which)
1545 from STR_RTX to the first word or byte containing part of the field. */
1546 if (!MEM_P (op0))
1547 {
1548 if (offset != 0
1549 || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1550 {
1551 if (!REG_P (op0))
1552 op0 = copy_to_reg (op0);
1553 op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1554 op0, (offset * UNITS_PER_WORD));
1555 }
1556 offset = 0;
1557 }
1558
1559 /* Now OFFSET is nonzero only for memory operands. */
1560 ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1561 if (ext_mode != MAX_MACHINE_MODE
1562 && bitsize > 0
1563 && GET_MODE_BITSIZE (ext_mode) >= bitsize
1564 /* Do not use extv/extzv for volatile bitfields when
1565 -fstrict-volatile-bitfields is in effect. */
1566 && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
1567 && flag_strict_volatile_bitfields > 0)
1568 /* If op0 is a register, we need it in EXT_MODE to make it
1569 acceptable to the format of ext(z)v. */
1570 && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1571 && !((REG_P (op0) || GET_CODE (op0) == SUBREG)
1572 && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode))))
1573 {
1574 struct expand_operand ops[4];
1575 unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1576 rtx xop0 = op0;
1577 rtx xtarget = target;
1578 rtx xspec_target = target;
1579 rtx xspec_target_subreg = 0;
1580
1581 /* If op0 is a register, we need it in EXT_MODE to make it
1582 acceptable to the format of ext(z)v. */
1583 if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
1584 xop0 = gen_lowpart_SUBREG (ext_mode, xop0);
1585 if (MEM_P (xop0))
1586 /* Get ref to first byte containing part of the field. */
1587 xop0 = adjust_address (xop0, byte_mode, xoffset);
1588
1589 /* Now convert from counting within UNIT to counting in EXT_MODE. */
1590 if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
1591 xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
1592
1593 unit = GET_MODE_BITSIZE (ext_mode);
1594
1595 /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1596 "backwards" from the size of the unit we are extracting from.
1597 Otherwise, we count bits from the most significant on a
1598 BYTES/BITS_BIG_ENDIAN machine. */
1599
1600 if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1601 xbitpos = unit - bitsize - xbitpos;
1602
1603 if (xtarget == 0)
1604 xtarget = xspec_target = gen_reg_rtx (tmode);
1605
1606 if (GET_MODE (xtarget) != ext_mode)
1607 {
1608 /* Don't use LHS paradoxical subreg if explicit truncation is needed
1609 between the mode of the extraction (word_mode) and the target
1610 mode. Instead, create a temporary and use convert_move to set
1611 the target. */
1612 if (REG_P (xtarget)
1613 && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (xtarget), ext_mode))
1614 {
1615 xtarget = gen_lowpart (ext_mode, xtarget);
1616 if (GET_MODE_PRECISION (ext_mode)
1617 > GET_MODE_PRECISION (GET_MODE (xspec_target)))
1618 xspec_target_subreg = xtarget;
1619 }
1620 else
1621 xtarget = gen_reg_rtx (ext_mode);
1622 }
1623
1624 create_output_operand (&ops[0], xtarget, ext_mode);
1625 create_fixed_operand (&ops[1], xop0);
1626 create_integer_operand (&ops[2], bitsize);
1627 create_integer_operand (&ops[3], xbitpos);
1628 if (maybe_expand_insn (unsignedp ? CODE_FOR_extzv : CODE_FOR_extv,
1629 4, ops))
1630 {
1631 xtarget = ops[0].value;
1632 if (xtarget == xspec_target)
1633 return xtarget;
1634 if (xtarget == xspec_target_subreg)
1635 return xspec_target;
1636 return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
1637 }
1638 }
1639
1640 /* If OP0 is a memory, try copying it to a register and seeing if a
1641 cheap register alternative is available. */
1642 if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1643 {
1644 enum machine_mode bestmode;
1645
1646 /* Get the mode to use for inserting into this field. If
1647 OP0 is BLKmode, get the smallest mode consistent with the
1648 alignment. If OP0 is a non-BLKmode object that is no
1649 wider than EXT_MODE, use its mode. Otherwise, use the
1650 smallest mode containing the field. */
1651
1652 if (GET_MODE (op0) == BLKmode
1653 || (ext_mode != MAX_MACHINE_MODE
1654 && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
1655 bestmode = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
1656 (ext_mode == MAX_MACHINE_MODE
1657 ? VOIDmode : ext_mode),
1658 MEM_VOLATILE_P (op0));
1659 else
1660 bestmode = GET_MODE (op0);
1661
1662 if (bestmode != VOIDmode
1663 && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1664 && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1665 {
1666 unsigned HOST_WIDE_INT xoffset, xbitpos;
1667
1668 /* Compute the offset as a multiple of this unit,
1669 counting in bytes. */
1670 unit = GET_MODE_BITSIZE (bestmode);
1671 xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1672 xbitpos = bitnum % unit;
1673
1674 /* Make sure the register is big enough for the whole field. */
1675 if (xoffset * BITS_PER_UNIT + unit
1676 >= offset * BITS_PER_UNIT + bitsize)
1677 {
1678 rtx last, result, xop0;
1679
1680 last = get_last_insn ();
1681
1682 /* Fetch it to a register in that size. */
1683 xop0 = adjust_address (op0, bestmode, xoffset);
1684 xop0 = force_reg (bestmode, xop0);
1685 result = extract_bit_field_1 (xop0, bitsize, xbitpos,
1686 unsignedp, packedp, target,
1687 mode, tmode, false);
1688 if (result)
1689 return result;
1690
1691 delete_insns_since (last);
1692 }
1693 }
1694 }
1695
1696 if (!fallback_p)
1697 return NULL;
1698
1699 target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1700 bitpos, target, unsignedp, packedp);
1701 return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1702 }
1703
1704 /* Generate code to extract a byte-field from STR_RTX
1705 containing BITSIZE bits, starting at BITNUM,
1706 and put it in TARGET if possible (if TARGET is nonzero).
1707 Regardless of TARGET, we return the rtx for where the value is placed.
1708
1709 STR_RTX is the structure containing the byte (a REG or MEM).
1710 UNSIGNEDP is nonzero if this is an unsigned bit field.
1711 PACKEDP is nonzero if the field has the packed attribute.
1712 MODE is the natural mode of the field value once extracted.
1713 TMODE is the mode the caller would like the value to have;
1714 but the value may be returned with type MODE instead.
1715
1716 If a TARGET is specified and we can store in it at no extra cost,
1717 we do so, and return TARGET.
1718 Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1719 if they are equally easy. */
1720
1721 rtx
1722 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1723 unsigned HOST_WIDE_INT bitnum, int unsignedp, bool packedp,
1724 rtx target, enum machine_mode mode, enum machine_mode tmode)
1725 {
1726 return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, packedp,
1727 target, mode, tmode, true);
1728 }
1729 \f
1730 /* Extract a bit field using shifts and boolean operations
1731 Returns an rtx to represent the value.
1732 OP0 addresses a register (word) or memory (byte).
1733 BITPOS says which bit within the word or byte the bit field starts in.
1734 OFFSET says how many bytes farther the bit field starts;
1735 it is 0 if OP0 is a register.
1736 BITSIZE says how many bits long the bit field is.
1737 (If OP0 is a register, it may be narrower than a full word,
1738 but BITPOS still counts within a full word,
1739 which is significant on bigendian machines.)
1740
1741 UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1742 PACKEDP is true if the field has the packed attribute.
1743
1744 If TARGET is nonzero, attempts to store the value there
1745 and return TARGET, but this is not guaranteed.
1746 If TARGET is not used, create a pseudo-reg of mode TMODE for the value. */
1747
1748 static rtx
1749 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1750 unsigned HOST_WIDE_INT offset,
1751 unsigned HOST_WIDE_INT bitsize,
1752 unsigned HOST_WIDE_INT bitpos, rtx target,
1753 int unsignedp, bool packedp)
1754 {
1755 unsigned int total_bits = BITS_PER_WORD;
1756 enum machine_mode mode;
1757
1758 if (GET_CODE (op0) == SUBREG || REG_P (op0))
1759 {
1760 /* Special treatment for a bit field split across two registers. */
1761 if (bitsize + bitpos > BITS_PER_WORD)
1762 return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1763 }
1764 else
1765 {
1766 /* Get the proper mode to use for this field. We want a mode that
1767 includes the entire field. If such a mode would be larger than
1768 a word, we won't be doing the extraction the normal way. */
1769
1770 if (MEM_VOLATILE_P (op0)
1771 && flag_strict_volatile_bitfields > 0)
1772 {
1773 if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1774 mode = GET_MODE (op0);
1775 else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1776 mode = GET_MODE (target);
1777 else
1778 mode = tmode;
1779 }
1780 else
1781 mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT, 0, 0,
1782 MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1783
1784 if (mode == VOIDmode)
1785 /* The only way this should occur is if the field spans word
1786 boundaries. */
1787 return extract_split_bit_field (op0, bitsize,
1788 bitpos + offset * BITS_PER_UNIT,
1789 unsignedp);
1790
1791 total_bits = GET_MODE_BITSIZE (mode);
1792
1793 /* Make sure bitpos is valid for the chosen mode. Adjust BITPOS to
1794 be in the range 0 to total_bits-1, and put any excess bytes in
1795 OFFSET. */
1796 if (bitpos >= total_bits)
1797 {
1798 offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1799 bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1800 * BITS_PER_UNIT);
1801 }
1802
1803 /* If we're accessing a volatile MEM, we can't do the next
1804 alignment step if it results in a multi-word access where we
1805 otherwise wouldn't have one. So, check for that case
1806 here. */
1807 if (MEM_P (op0)
1808 && MEM_VOLATILE_P (op0)
1809 && flag_strict_volatile_bitfields > 0
1810 && bitpos + bitsize <= total_bits
1811 && bitpos + bitsize + (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT > total_bits)
1812 {
1813 if (STRICT_ALIGNMENT)
1814 {
1815 static bool informed_about_misalignment = false;
1816 bool warned;
1817
1818 if (packedp)
1819 {
1820 if (bitsize == total_bits)
1821 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1822 "multiple accesses to volatile structure member"
1823 " because of packed attribute");
1824 else
1825 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1826 "multiple accesses to volatile structure bitfield"
1827 " because of packed attribute");
1828
1829 return extract_split_bit_field (op0, bitsize,
1830 bitpos + offset * BITS_PER_UNIT,
1831 unsignedp);
1832 }
1833
1834 if (bitsize == total_bits)
1835 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1836 "mis-aligned access used for structure member");
1837 else
1838 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1839 "mis-aligned access used for structure bitfield");
1840
1841 if (! informed_about_misalignment && warned)
1842 {
1843 informed_about_misalignment = true;
1844 inform (input_location,
1845 "when a volatile object spans multiple type-sized locations,"
1846 " the compiler must choose between using a single mis-aligned access to"
1847 " preserve the volatility, or using multiple aligned accesses to avoid"
1848 " runtime faults; this code may fail at runtime if the hardware does"
1849 " not allow this access");
1850 }
1851 }
1852 }
1853 else
1854 {
1855
1856 /* Get ref to an aligned byte, halfword, or word containing the field.
1857 Adjust BITPOS to be position within a word,
1858 and OFFSET to be the offset of that word.
1859 Then alter OP0 to refer to that word. */
1860 bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1861 offset -= (offset % (total_bits / BITS_PER_UNIT));
1862 }
1863
1864 op0 = adjust_address (op0, mode, offset);
1865 }
1866
1867 mode = GET_MODE (op0);
1868
1869 if (BYTES_BIG_ENDIAN)
1870 /* BITPOS is the distance between our msb and that of OP0.
1871 Convert it to the distance from the lsb. */
1872 bitpos = total_bits - bitsize - bitpos;
1873
1874 /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1875 We have reduced the big-endian case to the little-endian case. */
1876
1877 if (unsignedp)
1878 {
1879 if (bitpos)
1880 {
1881 /* If the field does not already start at the lsb,
1882 shift it so it does. */
1883 /* Maybe propagate the target for the shift. */
1884 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1885 if (tmode != mode)
1886 subtarget = 0;
1887 op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitpos, subtarget, 1);
1888 }
1889 /* Convert the value to the desired mode. */
1890 if (mode != tmode)
1891 op0 = convert_to_mode (tmode, op0, 1);
1892
1893 /* Unless the msb of the field used to be the msb when we shifted,
1894 mask out the upper bits. */
1895
1896 if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1897 return expand_binop (GET_MODE (op0), and_optab, op0,
1898 mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1899 target, 1, OPTAB_LIB_WIDEN);
1900 return op0;
1901 }
1902
1903 /* To extract a signed bit-field, first shift its msb to the msb of the word,
1904 then arithmetic-shift its lsb to the lsb of the word. */
1905 op0 = force_reg (mode, op0);
1906
1907 /* Find the narrowest integer mode that contains the field. */
1908
1909 for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1910 mode = GET_MODE_WIDER_MODE (mode))
1911 if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1912 {
1913 op0 = convert_to_mode (mode, op0, 0);
1914 break;
1915 }
1916
1917 if (mode != tmode)
1918 target = 0;
1919
1920 if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1921 {
1922 int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitpos);
1923 /* Maybe propagate the target for the shift. */
1924 rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1925 op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1926 }
1927
1928 return expand_shift (RSHIFT_EXPR, mode, op0,
1929 GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1930 }
1931 \f
1932 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1933 of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1934 complement of that if COMPLEMENT. The mask is truncated if
1935 necessary to the width of mode MODE. The mask is zero-extended if
1936 BITSIZE+BITPOS is too small for MODE. */
1937
1938 static rtx
1939 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1940 {
1941 double_int mask;
1942
1943 mask = double_int_mask (bitsize);
1944 mask = double_int_lshift (mask, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1945
1946 if (complement)
1947 mask = double_int_not (mask);
1948
1949 return immed_double_int_const (mask, mode);
1950 }
1951
1952 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1953 VALUE truncated to BITSIZE bits and then shifted left BITPOS bits. */
1954
1955 static rtx
1956 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1957 {
1958 double_int val;
1959
1960 val = double_int_zext (uhwi_to_double_int (INTVAL (value)), bitsize);
1961 val = double_int_lshift (val, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1962
1963 return immed_double_int_const (val, mode);
1964 }
1965 \f
1966 /* Extract a bit field that is split across two words
1967 and return an RTX for the result.
1968
1969 OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1970 BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1971 UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend. */
1972
1973 static rtx
1974 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1975 unsigned HOST_WIDE_INT bitpos, int unsignedp)
1976 {
1977 unsigned int unit;
1978 unsigned int bitsdone = 0;
1979 rtx result = NULL_RTX;
1980 int first = 1;
1981
1982 /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1983 much at a time. */
1984 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1985 unit = BITS_PER_WORD;
1986 else
1987 unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1988
1989 while (bitsdone < bitsize)
1990 {
1991 unsigned HOST_WIDE_INT thissize;
1992 rtx part, word;
1993 unsigned HOST_WIDE_INT thispos;
1994 unsigned HOST_WIDE_INT offset;
1995
1996 offset = (bitpos + bitsdone) / unit;
1997 thispos = (bitpos + bitsdone) % unit;
1998
1999 /* THISSIZE must not overrun a word boundary. Otherwise,
2000 extract_fixed_bit_field will call us again, and we will mutually
2001 recurse forever. */
2002 thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
2003 thissize = MIN (thissize, unit - thispos);
2004
2005 /* If OP0 is a register, then handle OFFSET here.
2006
2007 When handling multiword bitfields, extract_bit_field may pass
2008 down a word_mode SUBREG of a larger REG for a bitfield that actually
2009 crosses a word boundary. Thus, for a SUBREG, we must find
2010 the current word starting from the base register. */
2011 if (GET_CODE (op0) == SUBREG)
2012 {
2013 int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
2014 word = operand_subword_force (SUBREG_REG (op0), word_offset,
2015 GET_MODE (SUBREG_REG (op0)));
2016 offset = 0;
2017 }
2018 else if (REG_P (op0))
2019 {
2020 word = operand_subword_force (op0, offset, GET_MODE (op0));
2021 offset = 0;
2022 }
2023 else
2024 word = op0;
2025
2026 /* Extract the parts in bit-counting order,
2027 whose meaning is determined by BYTES_PER_UNIT.
2028 OFFSET is in UNITs, and UNIT is in bits.
2029 extract_fixed_bit_field wants offset in bytes. */
2030 part = extract_fixed_bit_field (word_mode, word,
2031 offset * unit / BITS_PER_UNIT,
2032 thissize, thispos, 0, 1, false);
2033 bitsdone += thissize;
2034
2035 /* Shift this part into place for the result. */
2036 if (BYTES_BIG_ENDIAN)
2037 {
2038 if (bitsize != bitsdone)
2039 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2040 bitsize - bitsdone, 0, 1);
2041 }
2042 else
2043 {
2044 if (bitsdone != thissize)
2045 part = expand_shift (LSHIFT_EXPR, word_mode, part,
2046 bitsdone - thissize, 0, 1);
2047 }
2048
2049 if (first)
2050 result = part;
2051 else
2052 /* Combine the parts with bitwise or. This works
2053 because we extracted each part as an unsigned bit field. */
2054 result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2055 OPTAB_LIB_WIDEN);
2056
2057 first = 0;
2058 }
2059
2060 /* Unsigned bit field: we are done. */
2061 if (unsignedp)
2062 return result;
2063 /* Signed bit field: sign-extend with two arithmetic shifts. */
2064 result = expand_shift (LSHIFT_EXPR, word_mode, result,
2065 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2066 return expand_shift (RSHIFT_EXPR, word_mode, result,
2067 BITS_PER_WORD - bitsize, NULL_RTX, 0);
2068 }
2069 \f
2070 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2071 the bit pattern. SRC_MODE is the mode of SRC; if this is smaller than
2072 MODE, fill the upper bits with zeros. Fail if the layout of either
2073 mode is unknown (as for CC modes) or if the extraction would involve
2074 unprofitable mode punning. Return the value on success, otherwise
2075 return null.
2076
2077 This is different from gen_lowpart* in these respects:
2078
2079 - the returned value must always be considered an rvalue
2080
2081 - when MODE is wider than SRC_MODE, the extraction involves
2082 a zero extension
2083
2084 - when MODE is smaller than SRC_MODE, the extraction involves
2085 a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2086
2087 In other words, this routine performs a computation, whereas the
2088 gen_lowpart* routines are conceptually lvalue or rvalue subreg
2089 operations. */
2090
2091 rtx
2092 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2093 {
2094 enum machine_mode int_mode, src_int_mode;
2095
2096 if (mode == src_mode)
2097 return src;
2098
2099 if (CONSTANT_P (src))
2100 {
2101 /* simplify_gen_subreg can't be used here, as if simplify_subreg
2102 fails, it will happily create (subreg (symbol_ref)) or similar
2103 invalid SUBREGs. */
2104 unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2105 rtx ret = simplify_subreg (mode, src, src_mode, byte);
2106 if (ret)
2107 return ret;
2108
2109 if (GET_MODE (src) == VOIDmode
2110 || !validate_subreg (mode, src_mode, src, byte))
2111 return NULL_RTX;
2112
2113 src = force_reg (GET_MODE (src), src);
2114 return gen_rtx_SUBREG (mode, src, byte);
2115 }
2116
2117 if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2118 return NULL_RTX;
2119
2120 if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2121 && MODES_TIEABLE_P (mode, src_mode))
2122 {
2123 rtx x = gen_lowpart_common (mode, src);
2124 if (x)
2125 return x;
2126 }
2127
2128 src_int_mode = int_mode_for_mode (src_mode);
2129 int_mode = int_mode_for_mode (mode);
2130 if (src_int_mode == BLKmode || int_mode == BLKmode)
2131 return NULL_RTX;
2132
2133 if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2134 return NULL_RTX;
2135 if (!MODES_TIEABLE_P (int_mode, mode))
2136 return NULL_RTX;
2137
2138 src = gen_lowpart (src_int_mode, src);
2139 src = convert_modes (int_mode, src_int_mode, src, true);
2140 src = gen_lowpart (mode, src);
2141 return src;
2142 }
2143 \f
2144 /* Add INC into TARGET. */
2145
2146 void
2147 expand_inc (rtx target, rtx inc)
2148 {
2149 rtx value = expand_binop (GET_MODE (target), add_optab,
2150 target, inc,
2151 target, 0, OPTAB_LIB_WIDEN);
2152 if (value != target)
2153 emit_move_insn (target, value);
2154 }
2155
2156 /* Subtract DEC from TARGET. */
2157
2158 void
2159 expand_dec (rtx target, rtx dec)
2160 {
2161 rtx value = expand_binop (GET_MODE (target), sub_optab,
2162 target, dec,
2163 target, 0, OPTAB_LIB_WIDEN);
2164 if (value != target)
2165 emit_move_insn (target, value);
2166 }
2167 \f
2168 /* Output a shift instruction for expression code CODE,
2169 with SHIFTED being the rtx for the value to shift,
2170 and AMOUNT the rtx for the amount to shift by.
2171 Store the result in the rtx TARGET, if that is convenient.
2172 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2173 Return the rtx for where the value is. */
2174
2175 static rtx
2176 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2177 rtx amount, rtx target, int unsignedp)
2178 {
2179 rtx op1, temp = 0;
2180 int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2181 int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2182 optab lshift_optab = ashl_optab;
2183 optab rshift_arith_optab = ashr_optab;
2184 optab rshift_uns_optab = lshr_optab;
2185 optab lrotate_optab = rotl_optab;
2186 optab rrotate_optab = rotr_optab;
2187 enum machine_mode op1_mode;
2188 int attempt;
2189 bool speed = optimize_insn_for_speed_p ();
2190
2191 op1 = amount;
2192 op1_mode = GET_MODE (op1);
2193
2194 /* Determine whether the shift/rotate amount is a vector, or scalar. If the
2195 shift amount is a vector, use the vector/vector shift patterns. */
2196 if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2197 {
2198 lshift_optab = vashl_optab;
2199 rshift_arith_optab = vashr_optab;
2200 rshift_uns_optab = vlshr_optab;
2201 lrotate_optab = vrotl_optab;
2202 rrotate_optab = vrotr_optab;
2203 }
2204
2205 /* Previously detected shift-counts computed by NEGATE_EXPR
2206 and shifted in the other direction; but that does not work
2207 on all machines. */
2208
2209 if (SHIFT_COUNT_TRUNCATED)
2210 {
2211 if (CONST_INT_P (op1)
2212 && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2213 (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2214 op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2215 % GET_MODE_BITSIZE (mode));
2216 else if (GET_CODE (op1) == SUBREG
2217 && subreg_lowpart_p (op1)
2218 && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1))))
2219 op1 = SUBREG_REG (op1);
2220 }
2221
2222 if (op1 == const0_rtx)
2223 return shifted;
2224
2225 /* Check whether its cheaper to implement a left shift by a constant
2226 bit count by a sequence of additions. */
2227 if (code == LSHIFT_EXPR
2228 && CONST_INT_P (op1)
2229 && INTVAL (op1) > 0
2230 && INTVAL (op1) < GET_MODE_PRECISION (mode)
2231 && INTVAL (op1) < MAX_BITS_PER_WORD
2232 && shift_cost[speed][mode][INTVAL (op1)] > INTVAL (op1) * add_cost[speed][mode]
2233 && shift_cost[speed][mode][INTVAL (op1)] != MAX_COST)
2234 {
2235 int i;
2236 for (i = 0; i < INTVAL (op1); i++)
2237 {
2238 temp = force_reg (mode, shifted);
2239 shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2240 unsignedp, OPTAB_LIB_WIDEN);
2241 }
2242 return shifted;
2243 }
2244
2245 for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2246 {
2247 enum optab_methods methods;
2248
2249 if (attempt == 0)
2250 methods = OPTAB_DIRECT;
2251 else if (attempt == 1)
2252 methods = OPTAB_WIDEN;
2253 else
2254 methods = OPTAB_LIB_WIDEN;
2255
2256 if (rotate)
2257 {
2258 /* Widening does not work for rotation. */
2259 if (methods == OPTAB_WIDEN)
2260 continue;
2261 else if (methods == OPTAB_LIB_WIDEN)
2262 {
2263 /* If we have been unable to open-code this by a rotation,
2264 do it as the IOR of two shifts. I.e., to rotate A
2265 by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2266 where C is the bitsize of A.
2267
2268 It is theoretically possible that the target machine might
2269 not be able to perform either shift and hence we would
2270 be making two libcalls rather than just the one for the
2271 shift (similarly if IOR could not be done). We will allow
2272 this extremely unlikely lossage to avoid complicating the
2273 code below. */
2274
2275 rtx subtarget = target == shifted ? 0 : target;
2276 rtx new_amount, other_amount;
2277 rtx temp1;
2278
2279 new_amount = op1;
2280 if (CONST_INT_P (op1))
2281 other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
2282 - INTVAL (op1));
2283 else
2284 other_amount
2285 = simplify_gen_binary (MINUS, GET_MODE (op1),
2286 GEN_INT (GET_MODE_PRECISION (mode)),
2287 op1);
2288
2289 shifted = force_reg (mode, shifted);
2290
2291 temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2292 mode, shifted, new_amount, 0, 1);
2293 temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2294 mode, shifted, other_amount,
2295 subtarget, 1);
2296 return expand_binop (mode, ior_optab, temp, temp1, target,
2297 unsignedp, methods);
2298 }
2299
2300 temp = expand_binop (mode,
2301 left ? lrotate_optab : rrotate_optab,
2302 shifted, op1, target, unsignedp, methods);
2303 }
2304 else if (unsignedp)
2305 temp = expand_binop (mode,
2306 left ? lshift_optab : rshift_uns_optab,
2307 shifted, op1, target, unsignedp, methods);
2308
2309 /* Do arithmetic shifts.
2310 Also, if we are going to widen the operand, we can just as well
2311 use an arithmetic right-shift instead of a logical one. */
2312 if (temp == 0 && ! rotate
2313 && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2314 {
2315 enum optab_methods methods1 = methods;
2316
2317 /* If trying to widen a log shift to an arithmetic shift,
2318 don't accept an arithmetic shift of the same size. */
2319 if (unsignedp)
2320 methods1 = OPTAB_MUST_WIDEN;
2321
2322 /* Arithmetic shift */
2323
2324 temp = expand_binop (mode,
2325 left ? lshift_optab : rshift_arith_optab,
2326 shifted, op1, target, unsignedp, methods1);
2327 }
2328
2329 /* We used to try extzv here for logical right shifts, but that was
2330 only useful for one machine, the VAX, and caused poor code
2331 generation there for lshrdi3, so the code was deleted and a
2332 define_expand for lshrsi3 was added to vax.md. */
2333 }
2334
2335 gcc_assert (temp);
2336 return temp;
2337 }
2338
2339 /* Output a shift instruction for expression code CODE,
2340 with SHIFTED being the rtx for the value to shift,
2341 and AMOUNT the amount to shift by.
2342 Store the result in the rtx TARGET, if that is convenient.
2343 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2344 Return the rtx for where the value is. */
2345
2346 rtx
2347 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2348 int amount, rtx target, int unsignedp)
2349 {
2350 return expand_shift_1 (code, mode,
2351 shifted, GEN_INT (amount), target, unsignedp);
2352 }
2353
2354 /* Output a shift instruction for expression code CODE,
2355 with SHIFTED being the rtx for the value to shift,
2356 and AMOUNT the tree for the amount to shift by.
2357 Store the result in the rtx TARGET, if that is convenient.
2358 If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2359 Return the rtx for where the value is. */
2360
2361 rtx
2362 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2363 tree amount, rtx target, int unsignedp)
2364 {
2365 return expand_shift_1 (code, mode,
2366 shifted, expand_normal (amount), target, unsignedp);
2367 }
2368
2369 \f
2370 /* Indicates the type of fixup needed after a constant multiplication.
2371 BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2372 the result should be negated, and ADD_VARIANT means that the
2373 multiplicand should be added to the result. */
2374 enum mult_variant {basic_variant, negate_variant, add_variant};
2375
2376 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2377 const struct mult_cost *, enum machine_mode mode);
2378 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2379 struct algorithm *, enum mult_variant *, int);
2380 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2381 const struct algorithm *, enum mult_variant);
2382 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2383 static rtx extract_high_half (enum machine_mode, rtx);
2384 static rtx expmed_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2385 static rtx expmed_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2386 int, int);
2387 /* Compute and return the best algorithm for multiplying by T.
2388 The algorithm must cost less than cost_limit
2389 If retval.cost >= COST_LIMIT, no algorithm was found and all
2390 other field of the returned struct are undefined.
2391 MODE is the machine mode of the multiplication. */
2392
2393 static void
2394 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2395 const struct mult_cost *cost_limit, enum machine_mode mode)
2396 {
2397 int m;
2398 struct algorithm *alg_in, *best_alg;
2399 struct mult_cost best_cost;
2400 struct mult_cost new_limit;
2401 int op_cost, op_latency;
2402 unsigned HOST_WIDE_INT orig_t = t;
2403 unsigned HOST_WIDE_INT q;
2404 int maxm, hash_index;
2405 bool cache_hit = false;
2406 enum alg_code cache_alg = alg_zero;
2407 bool speed = optimize_insn_for_speed_p ();
2408 enum machine_mode imode;
2409
2410 /* Indicate that no algorithm is yet found. If no algorithm
2411 is found, this value will be returned and indicate failure. */
2412 alg_out->cost.cost = cost_limit->cost + 1;
2413 alg_out->cost.latency = cost_limit->latency + 1;
2414
2415 if (cost_limit->cost < 0
2416 || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2417 return;
2418
2419 /* Be prepared for vector modes. */
2420 imode = GET_MODE_INNER (mode);
2421 if (imode == VOIDmode)
2422 imode = mode;
2423
2424 maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2425
2426 /* Restrict the bits of "t" to the multiplication's mode. */
2427 t &= GET_MODE_MASK (imode);
2428
2429 /* t == 1 can be done in zero cost. */
2430 if (t == 1)
2431 {
2432 alg_out->ops = 1;
2433 alg_out->cost.cost = 0;
2434 alg_out->cost.latency = 0;
2435 alg_out->op[0] = alg_m;
2436 return;
2437 }
2438
2439 /* t == 0 sometimes has a cost. If it does and it exceeds our limit,
2440 fail now. */
2441 if (t == 0)
2442 {
2443 if (MULT_COST_LESS (cost_limit, zero_cost[speed]))
2444 return;
2445 else
2446 {
2447 alg_out->ops = 1;
2448 alg_out->cost.cost = zero_cost[speed];
2449 alg_out->cost.latency = zero_cost[speed];
2450 alg_out->op[0] = alg_zero;
2451 return;
2452 }
2453 }
2454
2455 /* We'll be needing a couple extra algorithm structures now. */
2456
2457 alg_in = XALLOCA (struct algorithm);
2458 best_alg = XALLOCA (struct algorithm);
2459 best_cost = *cost_limit;
2460
2461 /* Compute the hash index. */
2462 hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2463
2464 /* See if we already know what to do for T. */
2465 if (alg_hash[hash_index].t == t
2466 && alg_hash[hash_index].mode == mode
2467 && alg_hash[hash_index].mode == mode
2468 && alg_hash[hash_index].speed == speed
2469 && alg_hash[hash_index].alg != alg_unknown)
2470 {
2471 cache_alg = alg_hash[hash_index].alg;
2472
2473 if (cache_alg == alg_impossible)
2474 {
2475 /* The cache tells us that it's impossible to synthesize
2476 multiplication by T within alg_hash[hash_index].cost. */
2477 if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2478 /* COST_LIMIT is at least as restrictive as the one
2479 recorded in the hash table, in which case we have no
2480 hope of synthesizing a multiplication. Just
2481 return. */
2482 return;
2483
2484 /* If we get here, COST_LIMIT is less restrictive than the
2485 one recorded in the hash table, so we may be able to
2486 synthesize a multiplication. Proceed as if we didn't
2487 have the cache entry. */
2488 }
2489 else
2490 {
2491 if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2492 /* The cached algorithm shows that this multiplication
2493 requires more cost than COST_LIMIT. Just return. This
2494 way, we don't clobber this cache entry with
2495 alg_impossible but retain useful information. */
2496 return;
2497
2498 cache_hit = true;
2499
2500 switch (cache_alg)
2501 {
2502 case alg_shift:
2503 goto do_alg_shift;
2504
2505 case alg_add_t_m2:
2506 case alg_sub_t_m2:
2507 goto do_alg_addsub_t_m2;
2508
2509 case alg_add_factor:
2510 case alg_sub_factor:
2511 goto do_alg_addsub_factor;
2512
2513 case alg_add_t2_m:
2514 goto do_alg_add_t2_m;
2515
2516 case alg_sub_t2_m:
2517 goto do_alg_sub_t2_m;
2518
2519 default:
2520 gcc_unreachable ();
2521 }
2522 }
2523 }
2524
2525 /* If we have a group of zero bits at the low-order part of T, try
2526 multiplying by the remaining bits and then doing a shift. */
2527
2528 if ((t & 1) == 0)
2529 {
2530 do_alg_shift:
2531 m = floor_log2 (t & -t); /* m = number of low zero bits */
2532 if (m < maxm)
2533 {
2534 q = t >> m;
2535 /* The function expand_shift will choose between a shift and
2536 a sequence of additions, so the observed cost is given as
2537 MIN (m * add_cost[speed][mode], shift_cost[speed][mode][m]). */
2538 op_cost = m * add_cost[speed][mode];
2539 if (shift_cost[speed][mode][m] < op_cost)
2540 op_cost = shift_cost[speed][mode][m];
2541 new_limit.cost = best_cost.cost - op_cost;
2542 new_limit.latency = best_cost.latency - op_cost;
2543 synth_mult (alg_in, q, &new_limit, mode);
2544
2545 alg_in->cost.cost += op_cost;
2546 alg_in->cost.latency += op_cost;
2547 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2548 {
2549 struct algorithm *x;
2550 best_cost = alg_in->cost;
2551 x = alg_in, alg_in = best_alg, best_alg = x;
2552 best_alg->log[best_alg->ops] = m;
2553 best_alg->op[best_alg->ops] = alg_shift;
2554 }
2555
2556 /* See if treating ORIG_T as a signed number yields a better
2557 sequence. Try this sequence only for a negative ORIG_T
2558 as it would be useless for a non-negative ORIG_T. */
2559 if ((HOST_WIDE_INT) orig_t < 0)
2560 {
2561 /* Shift ORIG_T as follows because a right shift of a
2562 negative-valued signed type is implementation
2563 defined. */
2564 q = ~(~orig_t >> m);
2565 /* The function expand_shift will choose between a shift
2566 and a sequence of additions, so the observed cost is
2567 given as MIN (m * add_cost[speed][mode],
2568 shift_cost[speed][mode][m]). */
2569 op_cost = m * add_cost[speed][mode];
2570 if (shift_cost[speed][mode][m] < op_cost)
2571 op_cost = shift_cost[speed][mode][m];
2572 new_limit.cost = best_cost.cost - op_cost;
2573 new_limit.latency = best_cost.latency - op_cost;
2574 synth_mult (alg_in, q, &new_limit, mode);
2575
2576 alg_in->cost.cost += op_cost;
2577 alg_in->cost.latency += op_cost;
2578 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2579 {
2580 struct algorithm *x;
2581 best_cost = alg_in->cost;
2582 x = alg_in, alg_in = best_alg, best_alg = x;
2583 best_alg->log[best_alg->ops] = m;
2584 best_alg->op[best_alg->ops] = alg_shift;
2585 }
2586 }
2587 }
2588 if (cache_hit)
2589 goto done;
2590 }
2591
2592 /* If we have an odd number, add or subtract one. */
2593 if ((t & 1) != 0)
2594 {
2595 unsigned HOST_WIDE_INT w;
2596
2597 do_alg_addsub_t_m2:
2598 for (w = 1; (w & t) != 0; w <<= 1)
2599 ;
2600 /* If T was -1, then W will be zero after the loop. This is another
2601 case where T ends with ...111. Handling this with (T + 1) and
2602 subtract 1 produces slightly better code and results in algorithm
2603 selection much faster than treating it like the ...0111 case
2604 below. */
2605 if (w == 0
2606 || (w > 2
2607 /* Reject the case where t is 3.
2608 Thus we prefer addition in that case. */
2609 && t != 3))
2610 {
2611 /* T ends with ...111. Multiply by (T + 1) and subtract 1. */
2612
2613 op_cost = add_cost[speed][mode];
2614 new_limit.cost = best_cost.cost - op_cost;
2615 new_limit.latency = best_cost.latency - op_cost;
2616 synth_mult (alg_in, t + 1, &new_limit, mode);
2617
2618 alg_in->cost.cost += op_cost;
2619 alg_in->cost.latency += op_cost;
2620 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2621 {
2622 struct algorithm *x;
2623 best_cost = alg_in->cost;
2624 x = alg_in, alg_in = best_alg, best_alg = x;
2625 best_alg->log[best_alg->ops] = 0;
2626 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2627 }
2628 }
2629 else
2630 {
2631 /* T ends with ...01 or ...011. Multiply by (T - 1) and add 1. */
2632
2633 op_cost = add_cost[speed][mode];
2634 new_limit.cost = best_cost.cost - op_cost;
2635 new_limit.latency = best_cost.latency - op_cost;
2636 synth_mult (alg_in, t - 1, &new_limit, mode);
2637
2638 alg_in->cost.cost += op_cost;
2639 alg_in->cost.latency += op_cost;
2640 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2641 {
2642 struct algorithm *x;
2643 best_cost = alg_in->cost;
2644 x = alg_in, alg_in = best_alg, best_alg = x;
2645 best_alg->log[best_alg->ops] = 0;
2646 best_alg->op[best_alg->ops] = alg_add_t_m2;
2647 }
2648 }
2649
2650 /* We may be able to calculate a * -7, a * -15, a * -31, etc
2651 quickly with a - a * n for some appropriate constant n. */
2652 m = exact_log2 (-orig_t + 1);
2653 if (m >= 0 && m < maxm)
2654 {
2655 op_cost = shiftsub1_cost[speed][mode][m];
2656 new_limit.cost = best_cost.cost - op_cost;
2657 new_limit.latency = best_cost.latency - op_cost;
2658 synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2659 &new_limit, mode);
2660
2661 alg_in->cost.cost += op_cost;
2662 alg_in->cost.latency += op_cost;
2663 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2664 {
2665 struct algorithm *x;
2666 best_cost = alg_in->cost;
2667 x = alg_in, alg_in = best_alg, best_alg = x;
2668 best_alg->log[best_alg->ops] = m;
2669 best_alg->op[best_alg->ops] = alg_sub_t_m2;
2670 }
2671 }
2672
2673 if (cache_hit)
2674 goto done;
2675 }
2676
2677 /* Look for factors of t of the form
2678 t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2679 If we find such a factor, we can multiply by t using an algorithm that
2680 multiplies by q, shift the result by m and add/subtract it to itself.
2681
2682 We search for large factors first and loop down, even if large factors
2683 are less probable than small; if we find a large factor we will find a
2684 good sequence quickly, and therefore be able to prune (by decreasing
2685 COST_LIMIT) the search. */
2686
2687 do_alg_addsub_factor:
2688 for (m = floor_log2 (t - 1); m >= 2; m--)
2689 {
2690 unsigned HOST_WIDE_INT d;
2691
2692 d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2693 if (t % d == 0 && t > d && m < maxm
2694 && (!cache_hit || cache_alg == alg_add_factor))
2695 {
2696 /* If the target has a cheap shift-and-add instruction use
2697 that in preference to a shift insn followed by an add insn.
2698 Assume that the shift-and-add is "atomic" with a latency
2699 equal to its cost, otherwise assume that on superscalar
2700 hardware the shift may be executed concurrently with the
2701 earlier steps in the algorithm. */
2702 op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2703 if (shiftadd_cost[speed][mode][m] < op_cost)
2704 {
2705 op_cost = shiftadd_cost[speed][mode][m];
2706 op_latency = op_cost;
2707 }
2708 else
2709 op_latency = add_cost[speed][mode];
2710
2711 new_limit.cost = best_cost.cost - op_cost;
2712 new_limit.latency = best_cost.latency - op_latency;
2713 synth_mult (alg_in, t / d, &new_limit, mode);
2714
2715 alg_in->cost.cost += op_cost;
2716 alg_in->cost.latency += op_latency;
2717 if (alg_in->cost.latency < op_cost)
2718 alg_in->cost.latency = op_cost;
2719 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2720 {
2721 struct algorithm *x;
2722 best_cost = alg_in->cost;
2723 x = alg_in, alg_in = best_alg, best_alg = x;
2724 best_alg->log[best_alg->ops] = m;
2725 best_alg->op[best_alg->ops] = alg_add_factor;
2726 }
2727 /* Other factors will have been taken care of in the recursion. */
2728 break;
2729 }
2730
2731 d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2732 if (t % d == 0 && t > d && m < maxm
2733 && (!cache_hit || cache_alg == alg_sub_factor))
2734 {
2735 /* If the target has a cheap shift-and-subtract insn use
2736 that in preference to a shift insn followed by a sub insn.
2737 Assume that the shift-and-sub is "atomic" with a latency
2738 equal to it's cost, otherwise assume that on superscalar
2739 hardware the shift may be executed concurrently with the
2740 earlier steps in the algorithm. */
2741 op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2742 if (shiftsub0_cost[speed][mode][m] < op_cost)
2743 {
2744 op_cost = shiftsub0_cost[speed][mode][m];
2745 op_latency = op_cost;
2746 }
2747 else
2748 op_latency = add_cost[speed][mode];
2749
2750 new_limit.cost = best_cost.cost - op_cost;
2751 new_limit.latency = best_cost.latency - op_latency;
2752 synth_mult (alg_in, t / d, &new_limit, mode);
2753
2754 alg_in->cost.cost += op_cost;
2755 alg_in->cost.latency += op_latency;
2756 if (alg_in->cost.latency < op_cost)
2757 alg_in->cost.latency = op_cost;
2758 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2759 {
2760 struct algorithm *x;
2761 best_cost = alg_in->cost;
2762 x = alg_in, alg_in = best_alg, best_alg = x;
2763 best_alg->log[best_alg->ops] = m;
2764 best_alg->op[best_alg->ops] = alg_sub_factor;
2765 }
2766 break;
2767 }
2768 }
2769 if (cache_hit)
2770 goto done;
2771
2772 /* Try shift-and-add (load effective address) instructions,
2773 i.e. do a*3, a*5, a*9. */
2774 if ((t & 1) != 0)
2775 {
2776 do_alg_add_t2_m:
2777 q = t - 1;
2778 q = q & -q;
2779 m = exact_log2 (q);
2780 if (m >= 0 && m < maxm)
2781 {
2782 op_cost = shiftadd_cost[speed][mode][m];
2783 new_limit.cost = best_cost.cost - op_cost;
2784 new_limit.latency = best_cost.latency - op_cost;
2785 synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2786
2787 alg_in->cost.cost += op_cost;
2788 alg_in->cost.latency += op_cost;
2789 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2790 {
2791 struct algorithm *x;
2792 best_cost = alg_in->cost;
2793 x = alg_in, alg_in = best_alg, best_alg = x;
2794 best_alg->log[best_alg->ops] = m;
2795 best_alg->op[best_alg->ops] = alg_add_t2_m;
2796 }
2797 }
2798 if (cache_hit)
2799 goto done;
2800
2801 do_alg_sub_t2_m:
2802 q = t + 1;
2803 q = q & -q;
2804 m = exact_log2 (q);
2805 if (m >= 0 && m < maxm)
2806 {
2807 op_cost = shiftsub0_cost[speed][mode][m];
2808 new_limit.cost = best_cost.cost - op_cost;
2809 new_limit.latency = best_cost.latency - op_cost;
2810 synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2811
2812 alg_in->cost.cost += op_cost;
2813 alg_in->cost.latency += op_cost;
2814 if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2815 {
2816 struct algorithm *x;
2817 best_cost = alg_in->cost;
2818 x = alg_in, alg_in = best_alg, best_alg = x;
2819 best_alg->log[best_alg->ops] = m;
2820 best_alg->op[best_alg->ops] = alg_sub_t2_m;
2821 }
2822 }
2823 if (cache_hit)
2824 goto done;
2825 }
2826
2827 done:
2828 /* If best_cost has not decreased, we have not found any algorithm. */
2829 if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2830 {
2831 /* We failed to find an algorithm. Record alg_impossible for
2832 this case (that is, <T, MODE, COST_LIMIT>) so that next time
2833 we are asked to find an algorithm for T within the same or
2834 lower COST_LIMIT, we can immediately return to the
2835 caller. */
2836 alg_hash[hash_index].t = t;
2837 alg_hash[hash_index].mode = mode;
2838 alg_hash[hash_index].speed = speed;
2839 alg_hash[hash_index].alg = alg_impossible;
2840 alg_hash[hash_index].cost = *cost_limit;
2841 return;
2842 }
2843
2844 /* Cache the result. */
2845 if (!cache_hit)
2846 {
2847 alg_hash[hash_index].t = t;
2848 alg_hash[hash_index].mode = mode;
2849 alg_hash[hash_index].speed = speed;
2850 alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2851 alg_hash[hash_index].cost.cost = best_cost.cost;
2852 alg_hash[hash_index].cost.latency = best_cost.latency;
2853 }
2854
2855 /* If we are getting a too long sequence for `struct algorithm'
2856 to record, make this search fail. */
2857 if (best_alg->ops == MAX_BITS_PER_WORD)
2858 return;
2859
2860 /* Copy the algorithm from temporary space to the space at alg_out.
2861 We avoid using structure assignment because the majority of
2862 best_alg is normally undefined, and this is a critical function. */
2863 alg_out->ops = best_alg->ops + 1;
2864 alg_out->cost = best_cost;
2865 memcpy (alg_out->op, best_alg->op,
2866 alg_out->ops * sizeof *alg_out->op);
2867 memcpy (alg_out->log, best_alg->log,
2868 alg_out->ops * sizeof *alg_out->log);
2869 }
2870 \f
2871 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2872 Try three variations:
2873
2874 - a shift/add sequence based on VAL itself
2875 - a shift/add sequence based on -VAL, followed by a negation
2876 - a shift/add sequence based on VAL - 1, followed by an addition.
2877
2878 Return true if the cheapest of these cost less than MULT_COST,
2879 describing the algorithm in *ALG and final fixup in *VARIANT. */
2880
2881 static bool
2882 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2883 struct algorithm *alg, enum mult_variant *variant,
2884 int mult_cost)
2885 {
2886 struct algorithm alg2;
2887 struct mult_cost limit;
2888 int op_cost;
2889 bool speed = optimize_insn_for_speed_p ();
2890
2891 /* Fail quickly for impossible bounds. */
2892 if (mult_cost < 0)
2893 return false;
2894
2895 /* Ensure that mult_cost provides a reasonable upper bound.
2896 Any constant multiplication can be performed with less
2897 than 2 * bits additions. */
2898 op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost[speed][mode];
2899 if (mult_cost > op_cost)
2900 mult_cost = op_cost;
2901
2902 *variant = basic_variant;
2903 limit.cost = mult_cost;
2904 limit.latency = mult_cost;
2905 synth_mult (alg, val, &limit, mode);
2906
2907 /* This works only if the inverted value actually fits in an
2908 `unsigned int' */
2909 if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2910 {
2911 op_cost = neg_cost[speed][mode];
2912 if (MULT_COST_LESS (&alg->cost, mult_cost))
2913 {
2914 limit.cost = alg->cost.cost - op_cost;
2915 limit.latency = alg->cost.latency - op_cost;
2916 }
2917 else
2918 {
2919 limit.cost = mult_cost - op_cost;
2920 limit.latency = mult_cost - op_cost;
2921 }
2922
2923 synth_mult (&alg2, -val, &limit, mode);
2924 alg2.cost.cost += op_cost;
2925 alg2.cost.latency += op_cost;
2926 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2927 *alg = alg2, *variant = negate_variant;
2928 }
2929
2930 /* This proves very useful for division-by-constant. */
2931 op_cost = add_cost[speed][mode];
2932 if (MULT_COST_LESS (&alg->cost, mult_cost))
2933 {
2934 limit.cost = alg->cost.cost - op_cost;
2935 limit.latency = alg->cost.latency - op_cost;
2936 }
2937 else
2938 {
2939 limit.cost = mult_cost - op_cost;
2940 limit.latency = mult_cost - op_cost;
2941 }
2942
2943 synth_mult (&alg2, val - 1, &limit, mode);
2944 alg2.cost.cost += op_cost;
2945 alg2.cost.latency += op_cost;
2946 if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2947 *alg = alg2, *variant = add_variant;
2948
2949 return MULT_COST_LESS (&alg->cost, mult_cost);
2950 }
2951
2952 /* A subroutine of expand_mult, used for constant multiplications.
2953 Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2954 convenient. Use the shift/add sequence described by ALG and apply
2955 the final fixup specified by VARIANT. */
2956
2957 static rtx
2958 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2959 rtx target, const struct algorithm *alg,
2960 enum mult_variant variant)
2961 {
2962 HOST_WIDE_INT val_so_far;
2963 rtx insn, accum, tem;
2964 int opno;
2965 enum machine_mode nmode;
2966
2967 /* Avoid referencing memory over and over and invalid sharing
2968 on SUBREGs. */
2969 op0 = force_reg (mode, op0);
2970
2971 /* ACCUM starts out either as OP0 or as a zero, depending on
2972 the first operation. */
2973
2974 if (alg->op[0] == alg_zero)
2975 {
2976 accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
2977 val_so_far = 0;
2978 }
2979 else if (alg->op[0] == alg_m)
2980 {
2981 accum = copy_to_mode_reg (mode, op0);
2982 val_so_far = 1;
2983 }
2984 else
2985 gcc_unreachable ();
2986
2987 for (opno = 1; opno < alg->ops; opno++)
2988 {
2989 int log = alg->log[opno];
2990 rtx shift_subtarget = optimize ? 0 : accum;
2991 rtx add_target
2992 = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2993 && !optimize)
2994 ? target : 0;
2995 rtx accum_target = optimize ? 0 : accum;
2996 rtx accum_inner;
2997
2998 switch (alg->op[opno])
2999 {
3000 case alg_shift:
3001 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3002 /* REG_EQUAL note will be attached to the following insn. */
3003 emit_move_insn (accum, tem);
3004 val_so_far <<= log;
3005 break;
3006
3007 case alg_add_t_m2:
3008 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3009 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3010 add_target ? add_target : accum_target);
3011 val_so_far += (HOST_WIDE_INT) 1 << log;
3012 break;
3013
3014 case alg_sub_t_m2:
3015 tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3016 accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3017 add_target ? add_target : accum_target);
3018 val_so_far -= (HOST_WIDE_INT) 1 << log;
3019 break;
3020
3021 case alg_add_t2_m:
3022 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3023 log, shift_subtarget, 0);
3024 accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3025 add_target ? add_target : accum_target);
3026 val_so_far = (val_so_far << log) + 1;
3027 break;
3028
3029 case alg_sub_t2_m:
3030 accum = expand_shift (LSHIFT_EXPR, mode, accum,
3031 log, shift_subtarget, 0);
3032 accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3033 add_target ? add_target : accum_target);
3034 val_so_far = (val_so_far << log) - 1;
3035 break;
3036
3037 case alg_add_factor:
3038 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3039 accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3040 add_target ? add_target : accum_target);
3041 val_so_far += val_so_far << log;
3042 break;
3043
3044 case alg_sub_factor:
3045 tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3046 accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3047 (add_target
3048 ? add_target : (optimize ? 0 : tem)));
3049 val_so_far = (val_so_far << log) - val_so_far;
3050 break;
3051
3052 default:
3053 gcc_unreachable ();
3054 }
3055
3056 if (SCALAR_INT_MODE_P (mode))
3057 {
3058 /* Write a REG_EQUAL note on the last insn so that we can cse
3059 multiplication sequences. Note that if ACCUM is a SUBREG,
3060 we've set the inner register and must properly indicate that. */
3061 tem = op0, nmode = mode;
3062 accum_inner = accum;
3063 if (GET_CODE (accum) == SUBREG)
3064 {
3065 accum_inner = SUBREG_REG (accum);
3066 nmode = GET_MODE (accum_inner);
3067 tem = gen_lowpart (nmode, op0);
3068 }
3069
3070 insn = get_last_insn ();
3071 set_dst_reg_note (insn, REG_EQUAL,
3072 gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)),
3073 accum_inner);
3074 }
3075 }
3076
3077 if (variant == negate_variant)
3078 {
3079 val_so_far = -val_so_far;
3080 accum = expand_unop (mode, neg_optab, accum, target, 0);
3081 }
3082 else if (variant == add_variant)
3083 {
3084 val_so_far = val_so_far + 1;
3085 accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3086 }
3087
3088 /* Compare only the bits of val and val_so_far that are significant
3089 in the result mode, to avoid sign-/zero-extension confusion. */
3090 nmode = GET_MODE_INNER (mode);
3091 if (nmode == VOIDmode)
3092 nmode = mode;
3093 val &= GET_MODE_MASK (nmode);
3094 val_so_far &= GET_MODE_MASK (nmode);
3095 gcc_assert (val == val_so_far);
3096
3097 return accum;
3098 }
3099
3100 /* Perform a multiplication and return an rtx for the result.
3101 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3102 TARGET is a suggestion for where to store the result (an rtx).
3103
3104 We check specially for a constant integer as OP1.
3105 If you want this check for OP0 as well, then before calling
3106 you should swap the two operands if OP0 would be constant. */
3107
3108 rtx
3109 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3110 int unsignedp)
3111 {
3112 enum mult_variant variant;
3113 struct algorithm algorithm;
3114 rtx scalar_op1;
3115 int max_cost;
3116 bool speed = optimize_insn_for_speed_p ();
3117 bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3118
3119 if (CONSTANT_P (op0))
3120 {
3121 rtx temp = op0;
3122 op0 = op1;
3123 op1 = temp;
3124 }
3125
3126 /* For vectors, there are several simplifications that can be made if
3127 all elements of the vector constant are identical. */
3128 scalar_op1 = op1;
3129 if (GET_CODE (op1) == CONST_VECTOR)
3130 {
3131 int i, n = CONST_VECTOR_NUNITS (op1);
3132 scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3133 for (i = 1; i < n; ++i)
3134 if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3135 goto skip_scalar;
3136 }
3137
3138 if (INTEGRAL_MODE_P (mode))
3139 {
3140 rtx fake_reg;
3141 HOST_WIDE_INT coeff = 0;
3142 bool is_neg = false;
3143 int mode_bitsize;
3144
3145 if (op1 == CONST0_RTX (mode))
3146 return op1;
3147 if (op1 == CONST1_RTX (mode))
3148 return op0;
3149 if (op1 == CONSTM1_RTX (mode))
3150 return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3151 op0, target, 0);
3152
3153 if (do_trapv)
3154 goto skip_synth;
3155
3156 /* These are the operations that are potentially turned into
3157 a sequence of shifts and additions. */
3158 mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3159
3160 /* synth_mult does an `unsigned int' multiply. As long as the mode is
3161 less than or equal in size to `unsigned int' this doesn't matter.
3162 If the mode is larger than `unsigned int', then synth_mult works
3163 only if the constant value exactly fits in an `unsigned int' without
3164 any truncation. This means that multiplying by negative values does
3165 not work; results are off by 2^32 on a 32 bit machine. */
3166
3167 if (CONST_INT_P (scalar_op1))
3168 {
3169 coeff = INTVAL (scalar_op1);
3170 is_neg = coeff < 0;
3171 }
3172 else if (CONST_DOUBLE_P (scalar_op1))
3173 {
3174 /* If we are multiplying in DImode, it may still be a win
3175 to try to work with shifts and adds. */
3176 if (CONST_DOUBLE_HIGH (scalar_op1) == 0
3177 && CONST_DOUBLE_LOW (scalar_op1) > 0)
3178 {
3179 coeff = CONST_DOUBLE_LOW (scalar_op1);
3180 is_neg = false;
3181 }
3182 else if (CONST_DOUBLE_LOW (scalar_op1) == 0)
3183 {
3184 coeff = CONST_DOUBLE_HIGH (scalar_op1);
3185 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3186 {
3187 int shift = floor_log2 (coeff) + HOST_BITS_PER_WIDE_INT;
3188 if (shift < HOST_BITS_PER_DOUBLE_INT - 1
3189 || mode_bitsize <= HOST_BITS_PER_DOUBLE_INT)
3190 return expand_shift (LSHIFT_EXPR, mode, op0,
3191 shift, target, unsignedp);
3192 }
3193 goto skip_synth;
3194 }
3195 }
3196 else
3197 goto skip_synth;
3198
3199 /* We used to test optimize here, on the grounds that it's better to
3200 produce a smaller program when -O is not used. But this causes
3201 such a terrible slowdown sometimes that it seems better to always
3202 use synth_mult. */
3203
3204 /* Special case powers of two. */
3205 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3206 return expand_shift (LSHIFT_EXPR, mode, op0,
3207 floor_log2 (coeff), target, unsignedp);
3208
3209 fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3210
3211 /* Attempt to handle multiplication of DImode values by negative
3212 coefficients, by performing the multiplication by a positive
3213 multiplier and then inverting the result. */
3214 /* ??? How is this not slightly redundant with the neg variant? */
3215 if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3216 {
3217 /* Its safe to use -coeff even for INT_MIN, as the
3218 result is interpreted as an unsigned coefficient.
3219 Exclude cost of op0 from max_cost to match the cost
3220 calculation of the synth_mult. */
3221 max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3222 - neg_cost[speed][mode]);
3223 if (max_cost > 0
3224 && choose_mult_variant (mode, -coeff, &algorithm,
3225 &variant, max_cost))
3226 {
3227 rtx temp = expand_mult_const (mode, op0, -coeff, NULL_RTX,
3228 &algorithm, variant);
3229 return expand_unop (mode, neg_optab, temp, target, 0);
3230 }
3231 }
3232
3233 /* Exclude cost of op0 from max_cost to match the cost
3234 calculation of the synth_mult. */
3235 max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3236 if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3237 return expand_mult_const (mode, op0, coeff, target,
3238 &algorithm, variant);
3239 }
3240 skip_synth:
3241
3242 /* Expand x*2.0 as x+x. */
3243 if (GET_CODE (scalar_op1) == CONST_DOUBLE && FLOAT_MODE_P (mode))
3244 {
3245 REAL_VALUE_TYPE d;
3246 REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3247
3248 if (REAL_VALUES_EQUAL (d, dconst2))
3249 {
3250 op0 = force_reg (GET_MODE (op0), op0);
3251 return expand_binop (mode, add_optab, op0, op0,
3252 target, unsignedp, OPTAB_LIB_WIDEN);
3253 }
3254 }
3255 skip_scalar:
3256
3257 /* This used to use umul_optab if unsigned, but for non-widening multiply
3258 there is no difference between signed and unsigned. */
3259 op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3260 op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3261 gcc_assert (op0);
3262 return op0;
3263 }
3264
3265 /* Perform a widening multiplication and return an rtx for the result.
3266 MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3267 TARGET is a suggestion for where to store the result (an rtx).
3268 THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3269 or smul_widen_optab.
3270
3271 We check specially for a constant integer as OP1, comparing the
3272 cost of a widening multiply against the cost of a sequence of shifts
3273 and adds. */
3274
3275 rtx
3276 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3277 int unsignedp, optab this_optab)
3278 {
3279 bool speed = optimize_insn_for_speed_p ();
3280 rtx cop1;
3281
3282 if (CONST_INT_P (op1)
3283 && GET_MODE (op0) != VOIDmode
3284 && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3285 this_optab == umul_widen_optab))
3286 && CONST_INT_P (cop1)
3287 && (INTVAL (cop1) >= 0
3288 || HWI_COMPUTABLE_MODE_P (mode)))
3289 {
3290 HOST_WIDE_INT coeff = INTVAL (cop1);
3291 int max_cost;
3292 enum mult_variant variant;
3293 struct algorithm algorithm;
3294
3295 /* Special case powers of two. */
3296 if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3297 {
3298 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3299 return expand_shift (LSHIFT_EXPR, mode, op0,
3300 floor_log2 (coeff), target, unsignedp);
3301 }
3302
3303 /* Exclude cost of op0 from max_cost to match the cost
3304 calculation of the synth_mult. */
3305 max_cost = mul_widen_cost[speed][mode];
3306 if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3307 max_cost))
3308 {
3309 op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3310 return expand_mult_const (mode, op0, coeff, target,
3311 &algorithm, variant);
3312 }
3313 }
3314 return expand_binop (mode, this_optab, op0, op1, target,
3315 unsignedp, OPTAB_LIB_WIDEN);
3316 }
3317 \f
3318 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3319 replace division by D, and put the least significant N bits of the result
3320 in *MULTIPLIER_PTR and return the most significant bit.
3321
3322 The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3323 needed precision is in PRECISION (should be <= N).
3324
3325 PRECISION should be as small as possible so this function can choose
3326 multiplier more freely.
3327
3328 The rounded-up logarithm of D is placed in *lgup_ptr. A shift count that
3329 is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3330
3331 Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3332 where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier. */
3333
3334 unsigned HOST_WIDE_INT
3335 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3336 unsigned HOST_WIDE_INT *multiplier_ptr,
3337 int *post_shift_ptr, int *lgup_ptr)
3338 {
3339 HOST_WIDE_INT mhigh_hi, mlow_hi;
3340 unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3341 int lgup, post_shift;
3342 int pow, pow2;
3343 unsigned HOST_WIDE_INT nl, dummy1;
3344 HOST_WIDE_INT nh, dummy2;
3345
3346 /* lgup = ceil(log2(divisor)); */
3347 lgup = ceil_log2 (d);
3348
3349 gcc_assert (lgup <= n);
3350
3351 pow = n + lgup;
3352 pow2 = n + lgup - precision;
3353
3354 /* We could handle this with some effort, but this case is much
3355 better handled directly with a scc insn, so rely on caller using
3356 that. */
3357 gcc_assert (pow != HOST_BITS_PER_DOUBLE_INT);
3358
3359 /* mlow = 2^(N + lgup)/d */
3360 if (pow >= HOST_BITS_PER_WIDE_INT)
3361 {
3362 nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3363 nl = 0;
3364 }
3365 else
3366 {
3367 nh = 0;
3368 nl = (unsigned HOST_WIDE_INT) 1 << pow;
3369 }
3370 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3371 &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3372
3373 /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3374 if (pow2 >= HOST_BITS_PER_WIDE_INT)
3375 nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3376 else
3377 nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3378 div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3379 &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3380
3381 gcc_assert (!mhigh_hi || nh - d < d);
3382 gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3383 /* Assert that mlow < mhigh. */
3384 gcc_assert (mlow_hi < mhigh_hi
3385 || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3386
3387 /* If precision == N, then mlow, mhigh exceed 2^N
3388 (but they do not exceed 2^(N+1)). */
3389
3390 /* Reduce to lowest terms. */
3391 for (post_shift = lgup; post_shift > 0; post_shift--)
3392 {
3393 unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3394 unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3395 if (ml_lo >= mh_lo)
3396 break;
3397
3398 mlow_hi = 0;
3399 mlow_lo = ml_lo;
3400 mhigh_hi = 0;
3401 mhigh_lo = mh_lo;
3402 }
3403
3404 *post_shift_ptr = post_shift;
3405 *lgup_ptr = lgup;
3406 if (n < HOST_BITS_PER_WIDE_INT)
3407 {
3408 unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3409 *multiplier_ptr = mhigh_lo & mask;
3410 return mhigh_lo >= mask;
3411 }
3412 else
3413 {
3414 *multiplier_ptr = mhigh_lo;
3415 return mhigh_hi;
3416 }
3417 }
3418
3419 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3420 congruent to 1 (mod 2**N). */
3421
3422 static unsigned HOST_WIDE_INT
3423 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3424 {
3425 /* Solve x*y == 1 (mod 2^n), where x is odd. Return y. */
3426
3427 /* The algorithm notes that the choice y = x satisfies
3428 x*y == 1 mod 2^3, since x is assumed odd.
3429 Each iteration doubles the number of bits of significance in y. */
3430
3431 unsigned HOST_WIDE_INT mask;
3432 unsigned HOST_WIDE_INT y = x;
3433 int nbit = 3;
3434
3435 mask = (n == HOST_BITS_PER_WIDE_INT
3436 ? ~(unsigned HOST_WIDE_INT) 0
3437 : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3438
3439 while (nbit < n)
3440 {
3441 y = y * (2 - x*y) & mask; /* Modulo 2^N */
3442 nbit *= 2;
3443 }
3444 return y;
3445 }
3446
3447 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3448 flavor of OP0 and OP1. ADJ_OPERAND is already the high half of the
3449 product OP0 x OP1. If UNSIGNEDP is nonzero, adjust the signed product
3450 to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3451 become signed.
3452
3453 The result is put in TARGET if that is convenient.
3454
3455 MODE is the mode of operation. */
3456
3457 rtx
3458 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3459 rtx op1, rtx target, int unsignedp)
3460 {
3461 rtx tem;
3462 enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3463
3464 tem = expand_shift (RSHIFT_EXPR, mode, op0,
3465 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3466 tem = expand_and (mode, tem, op1, NULL_RTX);
3467 adj_operand
3468 = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3469 adj_operand);
3470
3471 tem = expand_shift (RSHIFT_EXPR, mode, op1,
3472 GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3473 tem = expand_and (mode, tem, op0, NULL_RTX);
3474 target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3475 target);
3476
3477 return target;
3478 }
3479
3480 /* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
3481
3482 static rtx
3483 extract_high_half (enum machine_mode mode, rtx op)
3484 {
3485 enum machine_mode wider_mode;
3486
3487 if (mode == word_mode)
3488 return gen_highpart (mode, op);
3489
3490 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3491
3492 wider_mode = GET_MODE_WIDER_MODE (mode);
3493 op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3494 GET_MODE_BITSIZE (mode), 0, 1);
3495 return convert_modes (mode, wider_mode, op, 0);
3496 }
3497
3498 /* Like expmed_mult_highpart, but only consider using a multiplication
3499 optab. OP1 is an rtx for the constant operand. */
3500
3501 static rtx
3502 expmed_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3503 rtx target, int unsignedp, int max_cost)
3504 {
3505 rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3506 enum machine_mode wider_mode;
3507 optab moptab;
3508 rtx tem;
3509 int size;
3510 bool speed = optimize_insn_for_speed_p ();
3511
3512 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3513
3514 wider_mode = GET_MODE_WIDER_MODE (mode);
3515 size = GET_MODE_BITSIZE (mode);
3516
3517 /* Firstly, try using a multiplication insn that only generates the needed
3518 high part of the product, and in the sign flavor of unsignedp. */
3519 if (mul_highpart_cost[speed][mode] < max_cost)
3520 {
3521 moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3522 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3523 unsignedp, OPTAB_DIRECT);
3524 if (tem)
3525 return tem;
3526 }
3527
3528 /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3529 Need to adjust the result after the multiplication. */
3530 if (size - 1 < BITS_PER_WORD
3531 && (mul_highpart_cost[speed][mode] + 2 * shift_cost[speed][mode][size-1]
3532 + 4 * add_cost[speed][mode] < max_cost))
3533 {
3534 moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3535 tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3536 unsignedp, OPTAB_DIRECT);
3537 if (tem)
3538 /* We used the wrong signedness. Adjust the result. */
3539 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3540 tem, unsignedp);
3541 }
3542
3543 /* Try widening multiplication. */
3544 moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3545 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3546 && mul_widen_cost[speed][wider_mode] < max_cost)
3547 {
3548 tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3549 unsignedp, OPTAB_WIDEN);
3550 if (tem)
3551 return extract_high_half (mode, tem);
3552 }
3553
3554 /* Try widening the mode and perform a non-widening multiplication. */
3555 if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3556 && size - 1 < BITS_PER_WORD
3557 && mul_cost[speed][wider_mode] + shift_cost[speed][mode][size-1] < max_cost)
3558 {
3559 rtx insns, wop0, wop1;
3560
3561 /* We need to widen the operands, for example to ensure the
3562 constant multiplier is correctly sign or zero extended.
3563 Use a sequence to clean-up any instructions emitted by
3564 the conversions if things don't work out. */
3565 start_sequence ();
3566 wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3567 wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3568 tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3569 unsignedp, OPTAB_WIDEN);
3570 insns = get_insns ();
3571 end_sequence ();
3572
3573 if (tem)
3574 {
3575 emit_insn (insns);
3576 return extract_high_half (mode, tem);
3577 }
3578 }
3579
3580 /* Try widening multiplication of opposite signedness, and adjust. */
3581 moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3582 if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3583 && size - 1 < BITS_PER_WORD
3584 && (mul_widen_cost[speed][wider_mode] + 2 * shift_cost[speed][mode][size-1]
3585 + 4 * add_cost[speed][mode] < max_cost))
3586 {
3587 tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3588 NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3589 if (tem != 0)
3590 {
3591 tem = extract_high_half (mode, tem);
3592 /* We used the wrong signedness. Adjust the result. */
3593 return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3594 target, unsignedp);
3595 }
3596 }
3597
3598 return 0;
3599 }
3600
3601 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3602 putting the high half of the result in TARGET if that is convenient,
3603 and return where the result is. If the operation can not be performed,
3604 0 is returned.
3605
3606 MODE is the mode of operation and result.
3607
3608 UNSIGNEDP nonzero means unsigned multiply.
3609
3610 MAX_COST is the total allowed cost for the expanded RTL. */
3611
3612 static rtx
3613 expmed_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3614 rtx target, int unsignedp, int max_cost)
3615 {
3616 enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3617 unsigned HOST_WIDE_INT cnst1;
3618 int extra_cost;
3619 bool sign_adjust = false;
3620 enum mult_variant variant;
3621 struct algorithm alg;
3622 rtx tem;
3623 bool speed = optimize_insn_for_speed_p ();
3624
3625 gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3626 /* We can't support modes wider than HOST_BITS_PER_INT. */
3627 gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3628
3629 cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3630
3631 /* We can't optimize modes wider than BITS_PER_WORD.
3632 ??? We might be able to perform double-word arithmetic if
3633 mode == word_mode, however all the cost calculations in
3634 synth_mult etc. assume single-word operations. */
3635 if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3636 return expmed_mult_highpart_optab (mode, op0, op1, target,
3637 unsignedp, max_cost);
3638
3639 extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1];
3640
3641 /* Check whether we try to multiply by a negative constant. */
3642 if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3643 {
3644 sign_adjust = true;
3645 extra_cost += add_cost[speed][mode];
3646 }
3647
3648 /* See whether shift/add multiplication is cheap enough. */
3649 if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3650 max_cost - extra_cost))
3651 {
3652 /* See whether the specialized multiplication optabs are
3653 cheaper than the shift/add version. */
3654 tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3655 alg.cost.cost + extra_cost);
3656 if (tem)
3657 return tem;
3658
3659 tem = convert_to_mode (wider_mode, op0, unsignedp);
3660 tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3661 tem = extract_high_half (mode, tem);
3662
3663 /* Adjust result for signedness. */
3664 if (sign_adjust)
3665 tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3666
3667 return tem;
3668 }
3669 return expmed_mult_highpart_optab (mode, op0, op1, target,
3670 unsignedp, max_cost);
3671 }
3672
3673
3674 /* Expand signed modulus of OP0 by a power of two D in mode MODE. */
3675
3676 static rtx
3677 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3678 {
3679 unsigned HOST_WIDE_INT masklow, maskhigh;
3680 rtx result, temp, shift, label;
3681 int logd;
3682
3683 logd = floor_log2 (d);
3684 result = gen_reg_rtx (mode);
3685
3686 /* Avoid conditional branches when they're expensive. */
3687 if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3688 && optimize_insn_for_speed_p ())
3689 {
3690 rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3691 mode, 0, -1);
3692 if (signmask)
3693 {
3694 signmask = force_reg (mode, signmask);
3695 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3696 shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3697
3698 /* Use the rtx_cost of a LSHIFTRT instruction to determine
3699 which instruction sequence to use. If logical right shifts
3700 are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3701 use a LSHIFTRT, 1 ADD, 1 SUB and an AND. */
3702
3703 temp = gen_rtx_LSHIFTRT (mode, result, shift);
3704 if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3705 || (set_src_cost (temp, optimize_insn_for_speed_p ())
3706 > COSTS_N_INSNS (2)))
3707 {
3708 temp = expand_binop (mode, xor_optab, op0, signmask,
3709 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3710 temp = expand_binop (mode, sub_optab, temp, signmask,
3711 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3712 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3713 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3714 temp = expand_binop (mode, xor_optab, temp, signmask,
3715 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3716 temp = expand_binop (mode, sub_optab, temp, signmask,
3717 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3718 }
3719 else
3720 {
3721 signmask = expand_binop (mode, lshr_optab, signmask, shift,
3722 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3723 signmask = force_reg (mode, signmask);
3724
3725 temp = expand_binop (mode, add_optab, op0, signmask,
3726 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3727 temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3728 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3729 temp = expand_binop (mode, sub_optab, temp, signmask,
3730 NULL_RTX, 1, OPTAB_LIB_WIDEN);
3731 }
3732 return temp;
3733 }
3734 }
3735
3736 /* Mask contains the mode's signbit and the significant bits of the
3737 modulus. By including the signbit in the operation, many targets
3738 can avoid an explicit compare operation in the following comparison
3739 against zero. */
3740
3741 masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3742 if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3743 {
3744 masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3745 maskhigh = -1;
3746 }
3747 else
3748 maskhigh = (HOST_WIDE_INT) -1
3749 << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3750
3751 temp = expand_binop (mode, and_optab, op0,
3752 immed_double_const (masklow, maskhigh, mode),
3753 result, 1, OPTAB_LIB_WIDEN);
3754 if (temp != result)
3755 emit_move_insn (result, temp);
3756
3757 label = gen_label_rtx ();
3758 do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3759
3760 temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3761 0, OPTAB_LIB_WIDEN);
3762 masklow = (HOST_WIDE_INT) -1 << logd;
3763 maskhigh = -1;
3764 temp = expand_binop (mode, ior_optab, temp,
3765 immed_double_const (masklow, maskhigh, mode),
3766 result, 1, OPTAB_LIB_WIDEN);
3767 temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3768 0, OPTAB_LIB_WIDEN);
3769 if (temp != result)
3770 emit_move_insn (result, temp);
3771 emit_label (label);
3772 return result;
3773 }
3774
3775 /* Expand signed division of OP0 by a power of two D in mode MODE.
3776 This routine is only called for positive values of D. */
3777
3778 static rtx
3779 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3780 {
3781 rtx temp, label;
3782 int logd;
3783
3784 logd = floor_log2 (d);
3785
3786 if (d == 2
3787 && BRANCH_COST (optimize_insn_for_speed_p (),
3788 false) >= 1)
3789 {
3790 temp = gen_reg_rtx (mode);
3791 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3792 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3793 0, OPTAB_LIB_WIDEN);
3794 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3795 }
3796
3797 #ifdef HAVE_conditional_move
3798 if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3799 >= 2)
3800 {
3801 rtx temp2;
3802
3803 /* ??? emit_conditional_move forces a stack adjustment via
3804 compare_from_rtx so, if the sequence is discarded, it will
3805 be lost. Do it now instead. */
3806 do_pending_stack_adjust ();
3807
3808 start_sequence ();
3809 temp2 = copy_to_mode_reg (mode, op0);
3810 temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3811 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3812 temp = force_reg (mode, temp);
3813
3814 /* Construct "temp2 = (temp2 < 0) ? temp : temp2". */
3815 temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3816 mode, temp, temp2, mode, 0);
3817 if (temp2)
3818 {
3819 rtx seq = get_insns ();
3820 end_sequence ();
3821 emit_insn (seq);
3822 return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3823 }
3824 end_sequence ();
3825 }
3826 #endif
3827
3828 if (BRANCH_COST (optimize_insn_for_speed_p (),
3829 false) >= 2)
3830 {
3831 int ushift = GET_MODE_BITSIZE (mode) - logd;
3832
3833 temp = gen_reg_rtx (mode);
3834 temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3835 if (shift_cost[optimize_insn_for_speed_p ()][mode][ushift] > COSTS_N_INSNS (1))
3836 temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3837 NULL_RTX, 0, OPTAB_LIB_WIDEN);
3838 else
3839 temp = expand_shift (RSHIFT_EXPR, mode, temp,
3840 ushift, NULL_RTX, 1);
3841 temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3842 0, OPTAB_LIB_WIDEN);
3843 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3844 }
3845
3846 label = gen_label_rtx ();
3847 temp = copy_to_mode_reg (mode, op0);
3848 do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3849 expand_inc (temp, GEN_INT (d - 1));
3850 emit_label (label);
3851 return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3852 }
3853 \f
3854 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3855 if that is convenient, and returning where the result is.
3856 You may request either the quotient or the remainder as the result;
3857 specify REM_FLAG nonzero to get the remainder.
3858
3859 CODE is the expression code for which kind of division this is;
3860 it controls how rounding is done. MODE is the machine mode to use.
3861 UNSIGNEDP nonzero means do unsigned division. */
3862
3863 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3864 and then correct it by or'ing in missing high bits
3865 if result of ANDI is nonzero.
3866 For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3867 This could optimize to a bfexts instruction.
3868 But C doesn't use these operations, so their optimizations are
3869 left for later. */
3870 /* ??? For modulo, we don't actually need the highpart of the first product,
3871 the low part will do nicely. And for small divisors, the second multiply
3872 can also be a low-part only multiply or even be completely left out.
3873 E.g. to calculate the remainder of a division by 3 with a 32 bit
3874 multiply, multiply with 0x55555556 and extract the upper two bits;
3875 the result is exact for inputs up to 0x1fffffff.
3876 The input range can be reduced by using cross-sum rules.
3877 For odd divisors >= 3, the following table gives right shift counts
3878 so that if a number is shifted by an integer multiple of the given
3879 amount, the remainder stays the same:
3880 2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3881 14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3882 0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3883 20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3884 0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3885
3886 Cross-sum rules for even numbers can be derived by leaving as many bits
3887 to the right alone as the divisor has zeros to the right.
3888 E.g. if x is an unsigned 32 bit number:
3889 (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3890 */
3891
3892 rtx
3893 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3894 rtx op0, rtx op1, rtx target, int unsignedp)
3895 {
3896 enum machine_mode compute_mode;
3897 rtx tquotient;
3898 rtx quotient = 0, remainder = 0;
3899 rtx last;
3900 int size;
3901 rtx insn;
3902 optab optab1, optab2;
3903 int op1_is_constant, op1_is_pow2 = 0;
3904 int max_cost, extra_cost;
3905 static HOST_WIDE_INT last_div_const = 0;
3906 static HOST_WIDE_INT ext_op1;
3907 bool speed = optimize_insn_for_speed_p ();
3908
3909 op1_is_constant = CONST_INT_P (op1);
3910 if (op1_is_constant)
3911 {
3912 ext_op1 = INTVAL (op1);
3913 if (unsignedp)
3914 ext_op1 &= GET_MODE_MASK (mode);
3915 op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3916 || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3917 }
3918
3919 /*
3920 This is the structure of expand_divmod:
3921
3922 First comes code to fix up the operands so we can perform the operations
3923 correctly and efficiently.
3924
3925 Second comes a switch statement with code specific for each rounding mode.
3926 For some special operands this code emits all RTL for the desired
3927 operation, for other cases, it generates only a quotient and stores it in
3928 QUOTIENT. The case for trunc division/remainder might leave quotient = 0,
3929 to indicate that it has not done anything.
3930
3931 Last comes code that finishes the operation. If QUOTIENT is set and
3932 REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1. If
3933 QUOTIENT is not set, it is computed using trunc rounding.
3934
3935 We try to generate special code for division and remainder when OP1 is a
3936 constant. If |OP1| = 2**n we can use shifts and some other fast
3937 operations. For other values of OP1, we compute a carefully selected
3938 fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3939 by m.
3940
3941 In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3942 half of the product. Different strategies for generating the product are
3943 implemented in expmed_mult_highpart.
3944
3945 If what we actually want is the remainder, we generate that by another
3946 by-constant multiplication and a subtraction. */
3947
3948 /* We shouldn't be called with OP1 == const1_rtx, but some of the
3949 code below will malfunction if we are, so check here and handle
3950 the special case if so. */
3951 if (op1 == const1_rtx)
3952 return rem_flag ? const0_rtx : op0;
3953
3954 /* When dividing by -1, we could get an overflow.
3955 negv_optab can handle overflows. */
3956 if (! unsignedp && op1 == constm1_rtx)
3957 {
3958 if (rem_flag)
3959 return const0_rtx;
3960 return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3961 ? negv_optab : neg_optab, op0, target, 0);
3962 }
3963
3964 if (target
3965 /* Don't use the function value register as a target
3966 since we have to read it as well as write it,
3967 and function-inlining gets confused by this. */
3968 && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3969 /* Don't clobber an operand while doing a multi-step calculation. */
3970 || ((rem_flag || op1_is_constant)
3971 && (reg_mentioned_p (target, op0)
3972 || (MEM_P (op0) && MEM_P (target))))
3973 || reg_mentioned_p (target, op1)
3974 || (MEM_P (op1) && MEM_P (target))))
3975 target = 0;
3976
3977 /* Get the mode in which to perform this computation. Normally it will
3978 be MODE, but sometimes we can't do the desired operation in MODE.
3979 If so, pick a wider mode in which we can do the operation. Convert
3980 to that mode at the start to avoid repeated conversions.
3981
3982 First see what operations we need. These depend on the expression
3983 we are evaluating. (We assume that divxx3 insns exist under the
3984 same conditions that modxx3 insns and that these insns don't normally
3985 fail. If these assumptions are not correct, we may generate less
3986 efficient code in some cases.)
3987
3988 Then see if we find a mode in which we can open-code that operation
3989 (either a division, modulus, or shift). Finally, check for the smallest
3990 mode for which we can do the operation with a library call. */
3991
3992 /* We might want to refine this now that we have division-by-constant
3993 optimization. Since expmed_mult_highpart tries so many variants, it is
3994 not straightforward to generalize this. Maybe we should make an array
3995 of possible modes in init_expmed? Save this for GCC 2.7. */
3996
3997 optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3998 ? (unsignedp ? lshr_optab : ashr_optab)
3999 : (unsignedp ? udiv_optab : sdiv_optab));
4000 optab2 = ((op1_is_pow2 && op1 != const0_rtx)
4001 ? optab1
4002 : (unsignedp ? udivmod_optab : sdivmod_optab));
4003
4004 for (compute_mode = mode; compute_mode != VOIDmode;
4005 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4006 if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4007 || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4008 break;
4009
4010 if (compute_mode == VOIDmode)
4011 for (compute_mode = mode; compute_mode != VOIDmode;
4012 compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4013 if (optab_libfunc (optab1, compute_mode)
4014 || optab_libfunc (optab2, compute_mode))
4015 break;
4016
4017 /* If we still couldn't find a mode, use MODE, but expand_binop will
4018 probably die. */
4019 if (compute_mode == VOIDmode)
4020 compute_mode = mode;
4021
4022 if (target && GET_MODE (target) == compute_mode)
4023 tquotient = target;
4024 else
4025 tquotient = gen_reg_rtx (compute_mode);
4026
4027 size = GET_MODE_BITSIZE (compute_mode);
4028 #if 0
4029 /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4030 (mode), and thereby get better code when OP1 is a constant. Do that
4031 later. It will require going over all usages of SIZE below. */
4032 size = GET_MODE_BITSIZE (mode);
4033 #endif
4034
4035 /* Only deduct something for a REM if the last divide done was
4036 for a different constant. Then set the constant of the last
4037 divide. */
4038 max_cost = unsignedp ? udiv_cost[speed][compute_mode] : sdiv_cost[speed][compute_mode];
4039 if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4040 && INTVAL (op1) == last_div_const))
4041 max_cost -= mul_cost[speed][compute_mode] + add_cost[speed][compute_mode];
4042
4043 last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4044
4045 /* Now convert to the best mode to use. */
4046 if (compute_mode != mode)
4047 {
4048 op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4049 op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4050
4051 /* convert_modes may have placed op1 into a register, so we
4052 must recompute the following. */
4053 op1_is_constant = CONST_INT_P (op1);
4054 op1_is_pow2 = (op1_is_constant
4055 && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4056 || (! unsignedp
4057 && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
4058 }
4059
4060 /* If one of the operands is a volatile MEM, copy it into a register. */
4061
4062 if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4063 op0 = force_reg (compute_mode, op0);
4064 if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4065 op1 = force_reg (compute_mode, op1);
4066
4067 /* If we need the remainder or if OP1 is constant, we need to
4068 put OP0 in a register in case it has any queued subexpressions. */
4069 if (rem_flag || op1_is_constant)
4070 op0 = force_reg (compute_mode, op0);
4071
4072 last = get_last_insn ();
4073
4074 /* Promote floor rounding to trunc rounding for unsigned operations. */
4075 if (unsignedp)
4076 {
4077 if (code == FLOOR_DIV_EXPR)
4078 code = TRUNC_DIV_EXPR;
4079 if (code == FLOOR_MOD_EXPR)
4080 code = TRUNC_MOD_EXPR;
4081 if (code == EXACT_DIV_EXPR && op1_is_pow2)
4082 code = TRUNC_DIV_EXPR;
4083 }
4084
4085 if (op1 != const0_rtx)
4086 switch (code)
4087 {
4088 case TRUNC_MOD_EXPR:
4089 case TRUNC_DIV_EXPR:
4090 if (op1_is_constant)
4091 {
4092 if (unsignedp)
4093 {
4094 unsigned HOST_WIDE_INT mh, ml;
4095 int pre_shift, post_shift;
4096 int dummy;
4097 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4098 & GET_MODE_MASK (compute_mode));
4099
4100 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4101 {
4102 pre_shift = floor_log2 (d);
4103 if (rem_flag)
4104 {
4105 remainder
4106 = expand_binop (compute_mode, and_optab, op0,
4107 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4108 remainder, 1,
4109 OPTAB_LIB_WIDEN);
4110 if (remainder)
4111 return gen_lowpart (mode, remainder);
4112 }
4113 quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4114 pre_shift, tquotient, 1);
4115 }
4116 else if (size <= HOST_BITS_PER_WIDE_INT)
4117 {
4118 if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4119 {
4120 /* Most significant bit of divisor is set; emit an scc
4121 insn. */
4122 quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4123 compute_mode, 1, 1);
4124 }
4125 else
4126 {
4127 /* Find a suitable multiplier and right shift count
4128 instead of multiplying with D. */
4129
4130 mh = choose_multiplier (d, size, size,
4131 &ml, &post_shift, &dummy);
4132
4133 /* If the suggested multiplier is more than SIZE bits,
4134 we can do better for even divisors, using an
4135 initial right shift. */
4136 if (mh != 0 && (d & 1) == 0)
4137 {
4138 pre_shift = floor_log2 (d & -d);
4139 mh = choose_multiplier (d >> pre_shift, size,
4140 size - pre_shift,
4141 &ml, &post_shift, &dummy);
4142 gcc_assert (!mh);
4143 }
4144 else
4145 pre_shift = 0;
4146
4147 if (mh != 0)
4148 {
4149 rtx t1, t2, t3, t4;
4150
4151 if (post_shift - 1 >= BITS_PER_WORD)
4152 goto fail1;
4153
4154 extra_cost
4155 = (shift_cost[speed][compute_mode][post_shift - 1]
4156 + shift_cost[speed][compute_mode][1]
4157 + 2 * add_cost[speed][compute_mode]);
4158 t1 = expmed_mult_highpart (compute_mode, op0,
4159 GEN_INT (ml),
4160 NULL_RTX, 1,
4161 max_cost - extra_cost);
4162 if (t1 == 0)
4163 goto fail1;
4164 t2 = force_operand (gen_rtx_MINUS (compute_mode,
4165 op0, t1),
4166 NULL_RTX);
4167 t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4168 t2, 1, NULL_RTX, 1);
4169 t4 = force_operand (gen_rtx_PLUS (compute_mode,
4170 t1, t3),
4171 NULL_RTX);
4172 quotient = expand_shift
4173 (RSHIFT_EXPR, compute_mode, t4,
4174 post_shift - 1, tquotient, 1);
4175 }
4176 else
4177 {
4178 rtx t1, t2;
4179
4180 if (pre_shift >= BITS_PER_WORD
4181 || post_shift >= BITS_PER_WORD)
4182 goto fail1;
4183
4184 t1 = expand_shift
4185 (RSHIFT_EXPR, compute_mode, op0,
4186 pre_shift, NULL_RTX, 1);
4187 extra_cost
4188 = (shift_cost[speed][compute_mode][pre_shift]
4189 + shift_cost[speed][compute_mode][post_shift]);
4190 t2 = expmed_mult_highpart (compute_mode, t1,
4191 GEN_INT (ml),
4192 NULL_RTX, 1,
4193 max_cost - extra_cost);
4194 if (t2 == 0)
4195 goto fail1;
4196 quotient = expand_shift
4197 (RSHIFT_EXPR, compute_mode, t2,
4198 post_shift, tquotient, 1);
4199 }
4200 }
4201 }
4202 else /* Too wide mode to use tricky code */
4203 break;
4204
4205 insn = get_last_insn ();
4206 if (insn != last)
4207 set_dst_reg_note (insn, REG_EQUAL,
4208 gen_rtx_UDIV (compute_mode, op0, op1),
4209 quotient);
4210 }
4211 else /* TRUNC_DIV, signed */
4212 {
4213 unsigned HOST_WIDE_INT ml;
4214 int lgup, post_shift;
4215 rtx mlr;
4216 HOST_WIDE_INT d = INTVAL (op1);
4217 unsigned HOST_WIDE_INT abs_d;
4218
4219 /* Since d might be INT_MIN, we have to cast to
4220 unsigned HOST_WIDE_INT before negating to avoid
4221 undefined signed overflow. */
4222 abs_d = (d >= 0
4223 ? (unsigned HOST_WIDE_INT) d
4224 : - (unsigned HOST_WIDE_INT) d);
4225
4226 /* n rem d = n rem -d */
4227 if (rem_flag && d < 0)
4228 {
4229 d = abs_d;
4230 op1 = gen_int_mode (abs_d, compute_mode);
4231 }
4232
4233 if (d == 1)
4234 quotient = op0;
4235 else if (d == -1)
4236 quotient = expand_unop (compute_mode, neg_optab, op0,
4237 tquotient, 0);
4238 else if (HOST_BITS_PER_WIDE_INT >= size
4239 && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4240 {
4241 /* This case is not handled correctly below. */
4242 quotient = emit_store_flag (tquotient, EQ, op0, op1,
4243 compute_mode, 1, 1);
4244 if (quotient == 0)
4245 goto fail1;
4246 }
4247 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4248 && (rem_flag ? smod_pow2_cheap[speed][compute_mode]
4249 : sdiv_pow2_cheap[speed][compute_mode])
4250 /* We assume that cheap metric is true if the
4251 optab has an expander for this mode. */
4252 && ((optab_handler ((rem_flag ? smod_optab
4253 : sdiv_optab),
4254 compute_mode)
4255 != CODE_FOR_nothing)
4256 || (optab_handler (sdivmod_optab,
4257 compute_mode)
4258 != CODE_FOR_nothing)))
4259 ;
4260 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4261 {
4262 if (rem_flag)
4263 {
4264 remainder = expand_smod_pow2 (compute_mode, op0, d);
4265 if (remainder)
4266 return gen_lowpart (mode, remainder);
4267 }
4268
4269 if (sdiv_pow2_cheap[speed][compute_mode]
4270 && ((optab_handler (sdiv_optab, compute_mode)
4271 != CODE_FOR_nothing)
4272 || (optab_handler (sdivmod_optab, compute_mode)
4273 != CODE_FOR_nothing)))
4274 quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4275 compute_mode, op0,
4276 gen_int_mode (abs_d,
4277 compute_mode),
4278 NULL_RTX, 0);
4279 else
4280 quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4281
4282 /* We have computed OP0 / abs(OP1). If OP1 is negative,
4283 negate the quotient. */
4284 if (d < 0)
4285 {
4286 insn = get_last_insn ();
4287 if (insn != last
4288 && abs_d < ((unsigned HOST_WIDE_INT) 1
4289 << (HOST_BITS_PER_WIDE_INT - 1)))
4290 set_dst_reg_note (insn, REG_EQUAL,
4291 gen_rtx_DIV (compute_mode, op0,
4292 gen_int_mode
4293 (abs_d,
4294 compute_mode)),
4295 quotient);
4296
4297 quotient = expand_unop (compute_mode, neg_optab,
4298 quotient, quotient, 0);
4299 }
4300 }
4301 else if (size <= HOST_BITS_PER_WIDE_INT)
4302 {
4303 choose_multiplier (abs_d, size, size - 1,
4304 &ml, &post_shift, &lgup);
4305 if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4306 {
4307 rtx t1, t2, t3;
4308
4309 if (post_shift >= BITS_PER_WORD
4310 || size - 1 >= BITS_PER_WORD)
4311 goto fail1;
4312
4313 extra_cost = (shift_cost[speed][compute_mode][post_shift]
4314 + shift_cost[speed][compute_mode][size - 1]
4315 + add_cost[speed][compute_mode]);
4316 t1 = expmed_mult_highpart (compute_mode, op0,
4317 GEN_INT (ml), NULL_RTX, 0,
4318 max_cost - extra_cost);
4319 if (t1 == 0)
4320 goto fail1;
4321 t2 = expand_shift
4322 (RSHIFT_EXPR, compute_mode, t1,
4323 post_shift, NULL_RTX, 0);
4324 t3 = expand_shift
4325 (RSHIFT_EXPR, compute_mode, op0,
4326 size - 1, NULL_RTX, 0);
4327 if (d < 0)
4328 quotient
4329 = force_operand (gen_rtx_MINUS (compute_mode,
4330 t3, t2),
4331 tquotient);
4332 else
4333 quotient
4334 = force_operand (gen_rtx_MINUS (compute_mode,
4335 t2, t3),
4336 tquotient);
4337 }
4338 else
4339 {
4340 rtx t1, t2, t3, t4;
4341
4342 if (post_shift >= BITS_PER_WORD
4343 || size - 1 >= BITS_PER_WORD)
4344 goto fail1;
4345
4346 ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4347 mlr = gen_int_mode (ml, compute_mode);
4348 extra_cost = (shift_cost[speed][compute_mode][post_shift]
4349 + shift_cost[speed][compute_mode][size - 1]
4350 + 2 * add_cost[speed][compute_mode]);
4351 t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4352 NULL_RTX, 0,
4353 max_cost - extra_cost);
4354 if (t1 == 0)
4355 goto fail1;
4356 t2 = force_operand (gen_rtx_PLUS (compute_mode,
4357 t1, op0),
4358 NULL_RTX);
4359 t3 = expand_shift
4360 (RSHIFT_EXPR, compute_mode, t2,
4361 post_shift, NULL_RTX, 0);
4362 t4 = expand_shift
4363 (RSHIFT_EXPR, compute_mode, op0,
4364 size - 1, NULL_RTX, 0);
4365 if (d < 0)
4366 quotient
4367 = force_operand (gen_rtx_MINUS (compute_mode,
4368 t4, t3),
4369 tquotient);
4370 else
4371 quotient
4372 = force_operand (gen_rtx_MINUS (compute_mode,
4373 t3, t4),
4374 tquotient);
4375 }
4376 }
4377 else /* Too wide mode to use tricky code */
4378 break;
4379
4380 insn = get_last_insn ();
4381 if (insn != last)
4382 set_dst_reg_note (insn, REG_EQUAL,
4383 gen_rtx_DIV (compute_mode, op0, op1),
4384 quotient);
4385 }
4386 break;
4387 }
4388 fail1:
4389 delete_insns_since (last);
4390 break;
4391
4392 case FLOOR_DIV_EXPR:
4393 case FLOOR_MOD_EXPR:
4394 /* We will come here only for signed operations. */
4395 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4396 {
4397 unsigned HOST_WIDE_INT mh, ml;
4398 int pre_shift, lgup, post_shift;
4399 HOST_WIDE_INT d = INTVAL (op1);
4400
4401 if (d > 0)
4402 {
4403 /* We could just as easily deal with negative constants here,
4404 but it does not seem worth the trouble for GCC 2.6. */
4405 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4406 {
4407 pre_shift = floor_log2 (d);
4408 if (rem_flag)
4409 {
4410 remainder = expand_binop (compute_mode, and_optab, op0,
4411 GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4412 remainder, 0, OPTAB_LIB_WIDEN);
4413 if (remainder)
4414 return gen_lowpart (mode, remainder);
4415 }
4416 quotient = expand_shift
4417 (RSHIFT_EXPR, compute_mode, op0,
4418 pre_shift, tquotient, 0);
4419 }
4420 else
4421 {
4422 rtx t1, t2, t3, t4;
4423
4424 mh = choose_multiplier (d, size, size - 1,
4425 &ml, &post_shift, &lgup);
4426 gcc_assert (!mh);
4427
4428 if (post_shift < BITS_PER_WORD
4429 && size - 1 < BITS_PER_WORD)
4430 {
4431 t1 = expand_shift
4432 (RSHIFT_EXPR, compute_mode, op0,
4433 size - 1, NULL_RTX, 0);
4434 t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4435 NULL_RTX, 0, OPTAB_WIDEN);
4436 extra_cost = (shift_cost[speed][compute_mode][post_shift]
4437 + shift_cost[speed][compute_mode][size - 1]
4438 + 2 * add_cost[speed][compute_mode]);
4439 t3 = expmed_mult_highpart (compute_mode, t2,
4440 GEN_INT (ml), NULL_RTX, 1,
4441 max_cost - extra_cost);
4442 if (t3 != 0)
4443 {
4444 t4 = expand_shift
4445 (RSHIFT_EXPR, compute_mode, t3,
4446 post_shift, NULL_RTX, 1);
4447 quotient = expand_binop (compute_mode, xor_optab,
4448 t4, t1, tquotient, 0,
4449 OPTAB_WIDEN);
4450 }
4451 }
4452 }
4453 }
4454 else
4455 {
4456 rtx nsign, t1, t2, t3, t4;
4457 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4458 op0, constm1_rtx), NULL_RTX);
4459 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4460 0, OPTAB_WIDEN);
4461 nsign = expand_shift
4462 (RSHIFT_EXPR, compute_mode, t2,
4463 size - 1, NULL_RTX, 0);
4464 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4465 NULL_RTX);
4466 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4467 NULL_RTX, 0);
4468 if (t4)
4469 {
4470 rtx t5;
4471 t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4472 NULL_RTX, 0);
4473 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4474 t4, t5),
4475 tquotient);
4476 }
4477 }
4478 }
4479
4480 if (quotient != 0)
4481 break;
4482 delete_insns_since (last);
4483
4484 /* Try using an instruction that produces both the quotient and
4485 remainder, using truncation. We can easily compensate the quotient
4486 or remainder to get floor rounding, once we have the remainder.
4487 Notice that we compute also the final remainder value here,
4488 and return the result right away. */
4489 if (target == 0 || GET_MODE (target) != compute_mode)
4490 target = gen_reg_rtx (compute_mode);
4491
4492 if (rem_flag)
4493 {
4494 remainder
4495 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4496 quotient = gen_reg_rtx (compute_mode);
4497 }
4498 else
4499 {
4500 quotient
4501 = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4502 remainder = gen_reg_rtx (compute_mode);
4503 }
4504
4505 if (expand_twoval_binop (sdivmod_optab, op0, op1,
4506 quotient, remainder, 0))
4507 {
4508 /* This could be computed with a branch-less sequence.
4509 Save that for later. */
4510 rtx tem;
4511 rtx label = gen_label_rtx ();
4512 do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4513 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4514 NULL_RTX, 0, OPTAB_WIDEN);
4515 do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4516 expand_dec (quotient, const1_rtx);
4517 expand_inc (remainder, op1);
4518 emit_label (label);
4519 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4520 }
4521
4522 /* No luck with division elimination or divmod. Have to do it
4523 by conditionally adjusting op0 *and* the result. */
4524 {
4525 rtx label1, label2, label3, label4, label5;
4526 rtx adjusted_op0;
4527 rtx tem;
4528
4529 quotient = gen_reg_rtx (compute_mode);
4530 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4531 label1 = gen_label_rtx ();
4532 label2 = gen_label_rtx ();
4533 label3 = gen_label_rtx ();
4534 label4 = gen_label_rtx ();
4535 label5 = gen_label_rtx ();
4536 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4537 do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4538 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4539 quotient, 0, OPTAB_LIB_WIDEN);
4540 if (tem != quotient)
4541 emit_move_insn (quotient, tem);
4542 emit_jump_insn (gen_jump (label5));
4543 emit_barrier ();
4544 emit_label (label1);
4545 expand_inc (adjusted_op0, const1_rtx);
4546 emit_jump_insn (gen_jump (label4));
4547 emit_barrier ();
4548 emit_label (label2);
4549 do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4550 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4551 quotient, 0, OPTAB_LIB_WIDEN);
4552 if (tem != quotient)
4553 emit_move_insn (quotient, tem);
4554 emit_jump_insn (gen_jump (label5));
4555 emit_barrier ();
4556 emit_label (label3);
4557 expand_dec (adjusted_op0, const1_rtx);
4558 emit_label (label4);
4559 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4560 quotient, 0, OPTAB_LIB_WIDEN);
4561 if (tem != quotient)
4562 emit_move_insn (quotient, tem);
4563 expand_dec (quotient, const1_rtx);
4564 emit_label (label5);
4565 }
4566 break;
4567
4568 case CEIL_DIV_EXPR:
4569 case CEIL_MOD_EXPR:
4570 if (unsignedp)
4571 {
4572 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4573 {
4574 rtx t1, t2, t3;
4575 unsigned HOST_WIDE_INT d = INTVAL (op1);
4576 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4577 floor_log2 (d), tquotient, 1);
4578 t2 = expand_binop (compute_mode, and_optab, op0,
4579 GEN_INT (d - 1),
4580 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4581 t3 = gen_reg_rtx (compute_mode);
4582 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4583 compute_mode, 1, 1);
4584 if (t3 == 0)
4585 {
4586 rtx lab;
4587 lab = gen_label_rtx ();
4588 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4589 expand_inc (t1, const1_rtx);
4590 emit_label (lab);
4591 quotient = t1;
4592 }
4593 else
4594 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4595 t1, t3),
4596 tquotient);
4597 break;
4598 }
4599
4600 /* Try using an instruction that produces both the quotient and
4601 remainder, using truncation. We can easily compensate the
4602 quotient or remainder to get ceiling rounding, once we have the
4603 remainder. Notice that we compute also the final remainder
4604 value here, and return the result right away. */
4605 if (target == 0 || GET_MODE (target) != compute_mode)
4606 target = gen_reg_rtx (compute_mode);
4607
4608 if (rem_flag)
4609 {
4610 remainder = (REG_P (target)
4611 ? target : gen_reg_rtx (compute_mode));
4612 quotient = gen_reg_rtx (compute_mode);
4613 }
4614 else
4615 {
4616 quotient = (REG_P (target)
4617 ? target : gen_reg_rtx (compute_mode));
4618 remainder = gen_reg_rtx (compute_mode);
4619 }
4620
4621 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4622 remainder, 1))
4623 {
4624 /* This could be computed with a branch-less sequence.
4625 Save that for later. */
4626 rtx label = gen_label_rtx ();
4627 do_cmp_and_jump (remainder, const0_rtx, EQ,
4628 compute_mode, label);
4629 expand_inc (quotient, const1_rtx);
4630 expand_dec (remainder, op1);
4631 emit_label (label);
4632 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4633 }
4634
4635 /* No luck with division elimination or divmod. Have to do it
4636 by conditionally adjusting op0 *and* the result. */
4637 {
4638 rtx label1, label2;
4639 rtx adjusted_op0, tem;
4640
4641 quotient = gen_reg_rtx (compute_mode);
4642 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4643 label1 = gen_label_rtx ();
4644 label2 = gen_label_rtx ();
4645 do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4646 compute_mode, label1);
4647 emit_move_insn (quotient, const0_rtx);
4648 emit_jump_insn (gen_jump (label2));
4649 emit_barrier ();
4650 emit_label (label1);
4651 expand_dec (adjusted_op0, const1_rtx);
4652 tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4653 quotient, 1, OPTAB_LIB_WIDEN);
4654 if (tem != quotient)
4655 emit_move_insn (quotient, tem);
4656 expand_inc (quotient, const1_rtx);
4657 emit_label (label2);
4658 }
4659 }
4660 else /* signed */
4661 {
4662 if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4663 && INTVAL (op1) >= 0)
4664 {
4665 /* This is extremely similar to the code for the unsigned case
4666 above. For 2.7 we should merge these variants, but for
4667 2.6.1 I don't want to touch the code for unsigned since that
4668 get used in C. The signed case will only be used by other
4669 languages (Ada). */
4670
4671 rtx t1, t2, t3;
4672 unsigned HOST_WIDE_INT d = INTVAL (op1);
4673 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4674 floor_log2 (d), tquotient, 0);
4675 t2 = expand_binop (compute_mode, and_optab, op0,
4676 GEN_INT (d - 1),
4677 NULL_RTX, 1, OPTAB_LIB_WIDEN);
4678 t3 = gen_reg_rtx (compute_mode);
4679 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4680 compute_mode, 1, 1);
4681 if (t3 == 0)
4682 {
4683 rtx lab;
4684 lab = gen_label_rtx ();
4685 do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4686 expand_inc (t1, const1_rtx);
4687 emit_label (lab);
4688 quotient = t1;
4689 }
4690 else
4691 quotient = force_operand (gen_rtx_PLUS (compute_mode,
4692 t1, t3),
4693 tquotient);
4694 break;
4695 }
4696
4697 /* Try using an instruction that produces both the quotient and
4698 remainder, using truncation. We can easily compensate the
4699 quotient or remainder to get ceiling rounding, once we have the
4700 remainder. Notice that we compute also the final remainder
4701 value here, and return the result right away. */
4702 if (target == 0 || GET_MODE (target) != compute_mode)
4703 target = gen_reg_rtx (compute_mode);
4704 if (rem_flag)
4705 {
4706 remainder= (REG_P (target)
4707 ? target : gen_reg_rtx (compute_mode));
4708 quotient = gen_reg_rtx (compute_mode);
4709 }
4710 else
4711 {
4712 quotient = (REG_P (target)
4713 ? target : gen_reg_rtx (compute_mode));
4714 remainder = gen_reg_rtx (compute_mode);
4715 }
4716
4717 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4718 remainder, 0))
4719 {
4720 /* This could be computed with a branch-less sequence.
4721 Save that for later. */
4722 rtx tem;
4723 rtx label = gen_label_rtx ();
4724 do_cmp_and_jump (remainder, const0_rtx, EQ,
4725 compute_mode, label);
4726 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4727 NULL_RTX, 0, OPTAB_WIDEN);
4728 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4729 expand_inc (quotient, const1_rtx);
4730 expand_dec (remainder, op1);
4731 emit_label (label);
4732 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4733 }
4734
4735 /* No luck with division elimination or divmod. Have to do it
4736 by conditionally adjusting op0 *and* the result. */
4737 {
4738 rtx label1, label2, label3, label4, label5;
4739 rtx adjusted_op0;
4740 rtx tem;
4741
4742 quotient = gen_reg_rtx (compute_mode);
4743 adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4744 label1 = gen_label_rtx ();
4745 label2 = gen_label_rtx ();
4746 label3 = gen_label_rtx ();
4747 label4 = gen_label_rtx ();
4748 label5 = gen_label_rtx ();
4749 do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4750 do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4751 compute_mode, label1);
4752 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4753 quotient, 0, OPTAB_LIB_WIDEN);
4754 if (tem != quotient)
4755 emit_move_insn (quotient, tem);
4756 emit_jump_insn (gen_jump (label5));
4757 emit_barrier ();
4758 emit_label (label1);
4759 expand_dec (adjusted_op0, const1_rtx);
4760 emit_jump_insn (gen_jump (label4));
4761 emit_barrier ();
4762 emit_label (label2);
4763 do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4764 compute_mode, label3);
4765 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4766 quotient, 0, OPTAB_LIB_WIDEN);
4767 if (tem != quotient)
4768 emit_move_insn (quotient, tem);
4769 emit_jump_insn (gen_jump (label5));
4770 emit_barrier ();
4771 emit_label (label3);
4772 expand_inc (adjusted_op0, const1_rtx);
4773 emit_label (label4);
4774 tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4775 quotient, 0, OPTAB_LIB_WIDEN);
4776 if (tem != quotient)
4777 emit_move_insn (quotient, tem);
4778 expand_inc (quotient, const1_rtx);
4779 emit_label (label5);
4780 }
4781 }
4782 break;
4783
4784 case EXACT_DIV_EXPR:
4785 if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4786 {
4787 HOST_WIDE_INT d = INTVAL (op1);
4788 unsigned HOST_WIDE_INT ml;
4789 int pre_shift;
4790 rtx t1;
4791
4792 pre_shift = floor_log2 (d & -d);
4793 ml = invert_mod2n (d >> pre_shift, size);
4794 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4795 pre_shift, NULL_RTX, unsignedp);
4796 quotient = expand_mult (compute_mode, t1,
4797 gen_int_mode (ml, compute_mode),
4798 NULL_RTX, 1);
4799
4800 insn = get_last_insn ();
4801 set_dst_reg_note (insn, REG_EQUAL,
4802 gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4803 compute_mode, op0, op1),
4804 quotient);
4805 }
4806 break;
4807
4808 case ROUND_DIV_EXPR:
4809 case ROUND_MOD_EXPR:
4810 if (unsignedp)
4811 {
4812 rtx tem;
4813 rtx label;
4814 label = gen_label_rtx ();
4815 quotient = gen_reg_rtx (compute_mode);
4816 remainder = gen_reg_rtx (compute_mode);
4817 if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4818 {
4819 rtx tem;
4820 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4821 quotient, 1, OPTAB_LIB_WIDEN);
4822 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4823 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4824 remainder, 1, OPTAB_LIB_WIDEN);
4825 }
4826 tem = plus_constant (compute_mode, op1, -1);
4827 tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4828 do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4829 expand_inc (quotient, const1_rtx);
4830 expand_dec (remainder, op1);
4831 emit_label (label);
4832 }
4833 else
4834 {
4835 rtx abs_rem, abs_op1, tem, mask;
4836 rtx label;
4837 label = gen_label_rtx ();
4838 quotient = gen_reg_rtx (compute_mode);
4839 remainder = gen_reg_rtx (compute_mode);
4840 if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4841 {
4842 rtx tem;
4843 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4844 quotient, 0, OPTAB_LIB_WIDEN);
4845 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4846 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4847 remainder, 0, OPTAB_LIB_WIDEN);
4848 }
4849 abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4850 abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4851 tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4852 1, NULL_RTX, 1);
4853 do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4854 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4855 NULL_RTX, 0, OPTAB_WIDEN);
4856 mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4857 size - 1, NULL_RTX, 0);
4858 tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4859 NULL_RTX, 0, OPTAB_WIDEN);
4860 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4861 NULL_RTX, 0, OPTAB_WIDEN);
4862 expand_inc (quotient, tem);
4863 tem = expand_binop (compute_mode, xor_optab, mask, op1,
4864 NULL_RTX, 0, OPTAB_WIDEN);
4865 tem = expand_binop (compute_mode, sub_optab, tem, mask,
4866 NULL_RTX, 0, OPTAB_WIDEN);
4867 expand_dec (remainder, tem);
4868 emit_label (label);
4869 }
4870 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4871
4872 default:
4873 gcc_unreachable ();
4874 }
4875
4876 if (quotient == 0)
4877 {
4878 if (target && GET_MODE (target) != compute_mode)
4879 target = 0;
4880
4881 if (rem_flag)
4882 {
4883 /* Try to produce the remainder without producing the quotient.
4884 If we seem to have a divmod pattern that does not require widening,
4885 don't try widening here. We should really have a WIDEN argument
4886 to expand_twoval_binop, since what we'd really like to do here is
4887 1) try a mod insn in compute_mode
4888 2) try a divmod insn in compute_mode
4889 3) try a div insn in compute_mode and multiply-subtract to get
4890 remainder
4891 4) try the same things with widening allowed. */
4892 remainder
4893 = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4894 op0, op1, target,
4895 unsignedp,
4896 ((optab_handler (optab2, compute_mode)
4897 != CODE_FOR_nothing)
4898 ? OPTAB_DIRECT : OPTAB_WIDEN));
4899 if (remainder == 0)
4900 {
4901 /* No luck there. Can we do remainder and divide at once
4902 without a library call? */
4903 remainder = gen_reg_rtx (compute_mode);
4904 if (! expand_twoval_binop ((unsignedp
4905 ? udivmod_optab
4906 : sdivmod_optab),
4907 op0, op1,
4908 NULL_RTX, remainder, unsignedp))
4909 remainder = 0;
4910 }
4911
4912 if (remainder)
4913 return gen_lowpart (mode, remainder);
4914 }
4915
4916 /* Produce the quotient. Try a quotient insn, but not a library call.
4917 If we have a divmod in this mode, use it in preference to widening
4918 the div (for this test we assume it will not fail). Note that optab2
4919 is set to the one of the two optabs that the call below will use. */
4920 quotient
4921 = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4922 op0, op1, rem_flag ? NULL_RTX : target,
4923 unsignedp,
4924 ((optab_handler (optab2, compute_mode)
4925 != CODE_FOR_nothing)
4926 ? OPTAB_DIRECT : OPTAB_WIDEN));
4927
4928 if (quotient == 0)
4929 {
4930 /* No luck there. Try a quotient-and-remainder insn,
4931 keeping the quotient alone. */
4932 quotient = gen_reg_rtx (compute_mode);
4933 if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4934 op0, op1,
4935 quotient, NULL_RTX, unsignedp))
4936 {
4937 quotient = 0;
4938 if (! rem_flag)
4939 /* Still no luck. If we are not computing the remainder,
4940 use a library call for the quotient. */
4941 quotient = sign_expand_binop (compute_mode,
4942 udiv_optab, sdiv_optab,
4943 op0, op1, target,
4944 unsignedp, OPTAB_LIB_WIDEN);
4945 }
4946 }
4947 }
4948
4949 if (rem_flag)
4950 {
4951 if (target && GET_MODE (target) != compute_mode)
4952 target = 0;
4953
4954 if (quotient == 0)
4955 {
4956 /* No divide instruction either. Use library for remainder. */
4957 remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4958 op0, op1, target,
4959 unsignedp, OPTAB_LIB_WIDEN);
4960 /* No remainder function. Try a quotient-and-remainder
4961 function, keeping the remainder. */
4962 if (!remainder)
4963 {
4964 remainder = gen_reg_rtx (compute_mode);
4965 if (!expand_twoval_binop_libfunc
4966 (unsignedp ? udivmod_optab : sdivmod_optab,
4967 op0, op1,
4968 NULL_RTX, remainder,
4969 unsignedp ? UMOD : MOD))
4970 remainder = NULL_RTX;
4971 }
4972 }
4973 else
4974 {
4975 /* We divided. Now finish doing X - Y * (X / Y). */
4976 remainder = expand_mult (compute_mode, quotient, op1,
4977 NULL_RTX, unsignedp);
4978 remainder = expand_binop (compute_mode, sub_optab, op0,
4979 remainder, target, unsignedp,
4980 OPTAB_LIB_WIDEN);
4981 }
4982 }
4983
4984 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4985 }
4986 \f
4987 /* Return a tree node with data type TYPE, describing the value of X.
4988 Usually this is an VAR_DECL, if there is no obvious better choice.
4989 X may be an expression, however we only support those expressions
4990 generated by loop.c. */
4991
4992 tree
4993 make_tree (tree type, rtx x)
4994 {
4995 tree t;
4996
4997 switch (GET_CODE (x))
4998 {
4999 case CONST_INT:
5000 {
5001 HOST_WIDE_INT hi = 0;
5002
5003 if (INTVAL (x) < 0
5004 && !(TYPE_UNSIGNED (type)
5005 && (GET_MODE_BITSIZE (TYPE_MODE (type))
5006 < HOST_BITS_PER_WIDE_INT)))
5007 hi = -1;
5008
5009 t = build_int_cst_wide (type, INTVAL (x), hi);
5010
5011 return t;
5012 }
5013
5014 case CONST_DOUBLE:
5015 if (GET_MODE (x) == VOIDmode)
5016 t = build_int_cst_wide (type,
5017 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
5018 else
5019 {
5020 REAL_VALUE_TYPE d;
5021
5022 REAL_VALUE_FROM_CONST_DOUBLE (d, x);
5023 t = build_real (type, d);
5024 }
5025
5026 return t;
5027
5028 case CONST_VECTOR:
5029 {
5030 int units = CONST_VECTOR_NUNITS (x);
5031 tree itype = TREE_TYPE (type);
5032 tree *elts;
5033 int i;
5034
5035 /* Build a tree with vector elements. */
5036 elts = XALLOCAVEC (tree, units);
5037 for (i = units - 1; i >= 0; --i)
5038 {
5039 rtx elt = CONST_VECTOR_ELT (x, i);
5040 elts[i] = make_tree (itype, elt);
5041 }
5042
5043 return build_vector (type, elts);
5044 }
5045
5046 case PLUS:
5047 return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5048 make_tree (type, XEXP (x, 1)));
5049
5050 case MINUS:
5051 return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5052 make_tree (type, XEXP (x, 1)));
5053
5054 case NEG:
5055 return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5056
5057 case MULT:
5058 return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5059 make_tree (type, XEXP (x, 1)));
5060
5061 case ASHIFT:
5062 return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5063 make_tree (type, XEXP (x, 1)));
5064
5065 case LSHIFTRT:
5066 t = unsigned_type_for (type);
5067 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5068 make_tree (t, XEXP (x, 0)),
5069 make_tree (type, XEXP (x, 1))));
5070
5071 case ASHIFTRT:
5072 t = signed_type_for (type);
5073 return fold_convert (type, build2 (RSHIFT_EXPR, t,
5074 make_tree (t, XEXP (x, 0)),
5075 make_tree (type, XEXP (x, 1))));
5076
5077 case DIV:
5078 if (TREE_CODE (type) != REAL_TYPE)
5079 t = signed_type_for (type);
5080 else
5081 t = type;
5082
5083 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5084 make_tree (t, XEXP (x, 0)),
5085 make_tree (t, XEXP (x, 1))));
5086 case UDIV:
5087 t = unsigned_type_for (type);
5088 return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5089 make_tree (t, XEXP (x, 0)),
5090 make_tree (t, XEXP (x, 1))));
5091
5092 case SIGN_EXTEND:
5093 case ZERO_EXTEND:
5094 t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5095 GET_CODE (x) == ZERO_EXTEND);
5096 return fold_convert (type, make_tree (t, XEXP (x, 0)));
5097
5098 case CONST:
5099 return make_tree (type, XEXP (x, 0));
5100
5101 case SYMBOL_REF:
5102 t = SYMBOL_REF_DECL (x);
5103 if (t)
5104 return fold_convert (type, build_fold_addr_expr (t));
5105 /* else fall through. */
5106
5107 default:
5108 t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5109
5110 /* If TYPE is a POINTER_TYPE, we might need to convert X from
5111 address mode to pointer mode. */
5112 if (POINTER_TYPE_P (type))
5113 x = convert_memory_address_addr_space
5114 (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5115
5116 /* Note that we do *not* use SET_DECL_RTL here, because we do not
5117 want set_decl_rtl to go adjusting REG_ATTRS for this temporary. */
5118 t->decl_with_rtl.rtl = x;
5119
5120 return t;
5121 }
5122 }
5123 \f
5124 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5125 and returning TARGET.
5126
5127 If TARGET is 0, a pseudo-register or constant is returned. */
5128
5129 rtx
5130 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5131 {
5132 rtx tem = 0;
5133
5134 if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5135 tem = simplify_binary_operation (AND, mode, op0, op1);
5136 if (tem == 0)
5137 tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5138
5139 if (target == 0)
5140 target = tem;
5141 else if (tem != target)
5142 emit_move_insn (target, tem);
5143 return target;
5144 }
5145
5146 /* Helper function for emit_store_flag. */
5147 static rtx
5148 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5149 enum machine_mode mode, enum machine_mode compare_mode,
5150 int unsignedp, rtx x, rtx y, int normalizep,
5151 enum machine_mode target_mode)
5152 {
5153 struct expand_operand ops[4];
5154 rtx op0, last, comparison, subtarget;
5155 enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5156
5157 last = get_last_insn ();
5158 x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5159 y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5160 if (!x || !y)
5161 {
5162 delete_insns_since (last);
5163 return NULL_RTX;
5164 }
5165
5166 if (target_mode == VOIDmode)
5167 target_mode = result_mode;
5168 if (!target)
5169 target = gen_reg_rtx (target_mode);
5170
5171 comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5172
5173 create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5174 create_fixed_operand (&ops[1], comparison);
5175 create_fixed_operand (&ops[2], x);
5176 create_fixed_operand (&ops[3], y);
5177 if (!maybe_expand_insn (icode, 4, ops))
5178 {
5179 delete_insns_since (last);
5180 return NULL_RTX;
5181 }
5182 subtarget = ops[0].value;
5183
5184 /* If we are converting to a wider mode, first convert to
5185 TARGET_MODE, then normalize. This produces better combining
5186 opportunities on machines that have a SIGN_EXTRACT when we are
5187 testing a single bit. This mostly benefits the 68k.
5188
5189 If STORE_FLAG_VALUE does not have the sign bit set when
5190 interpreted in MODE, we can do this conversion as unsigned, which
5191 is usually more efficient. */
5192 if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5193 {
5194 convert_move (target, subtarget,
5195 val_signbit_known_clear_p (result_mode,
5196 STORE_FLAG_VALUE));
5197 op0 = target;
5198 result_mode = target_mode;
5199 }
5200 else
5201 op0 = subtarget;
5202
5203 /* If we want to keep subexpressions around, don't reuse our last
5204 target. */
5205 if (optimize)
5206 subtarget = 0;
5207
5208 /* Now normalize to the proper value in MODE. Sometimes we don't
5209 have to do anything. */
5210 if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5211 ;
5212 /* STORE_FLAG_VALUE might be the most negative number, so write
5213 the comparison this way to avoid a compiler-time warning. */
5214 else if (- normalizep == STORE_FLAG_VALUE)
5215 op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5216
5217 /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5218 it hard to use a value of just the sign bit due to ANSI integer
5219 constant typing rules. */
5220 else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5221 op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5222 GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5223 normalizep == 1);
5224 else
5225 {
5226 gcc_assert (STORE_FLAG_VALUE & 1);
5227
5228 op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5229 if (normalizep == -1)
5230 op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5231 }
5232
5233 /* If we were converting to a smaller mode, do the conversion now. */
5234 if (target_mode != result_mode)
5235 {
5236 convert_move (target, op0, 0);
5237 return target;
5238 }
5239 else
5240 return op0;
5241 }
5242
5243
5244 /* A subroutine of emit_store_flag only including "tricks" that do not
5245 need a recursive call. These are kept separate to avoid infinite
5246 loops. */
5247
5248 static rtx
5249 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5250 enum machine_mode mode, int unsignedp, int normalizep,
5251 enum machine_mode target_mode)
5252 {
5253 rtx subtarget;
5254 enum insn_code icode;
5255 enum machine_mode compare_mode;
5256 enum mode_class mclass;
5257 enum rtx_code scode;
5258 rtx tem;
5259
5260 if (unsignedp)
5261 code = unsigned_condition (code);
5262 scode = swap_condition (code);
5263
5264 /* If one operand is constant, make it the second one. Only do this
5265 if the other operand is not constant as well. */
5266
5267 if (swap_commutative_operands_p (op0, op1))
5268 {
5269 tem = op0;
5270 op0 = op1;
5271 op1 = tem;
5272 code = swap_condition (code);
5273 }
5274
5275 if (mode == VOIDmode)
5276 mode = GET_MODE (op0);
5277
5278 /* For some comparisons with 1 and -1, we can convert this to
5279 comparisons with zero. This will often produce more opportunities for
5280 store-flag insns. */
5281
5282 switch (code)
5283 {
5284 case LT:
5285 if (op1 == const1_rtx)
5286 op1 = const0_rtx, code = LE;
5287 break;
5288 case LE:
5289 if (op1 == constm1_rtx)
5290 op1 = const0_rtx, code = LT;
5291 break;
5292 case GE:
5293 if (op1 == const1_rtx)
5294 op1 = const0_rtx, code = GT;
5295 break;
5296 case GT:
5297 if (op1 == constm1_rtx)
5298 op1 = const0_rtx, code = GE;
5299 break;
5300 case GEU:
5301 if (op1 == const1_rtx)
5302 op1 = const0_rtx, code = NE;
5303 break;
5304 case LTU:
5305 if (op1 == const1_rtx)
5306 op1 = const0_rtx, code = EQ;
5307 break;
5308 default:
5309 break;
5310 }
5311
5312 /* If we are comparing a double-word integer with zero or -1, we can
5313 convert the comparison into one involving a single word. */
5314 if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5315 && GET_MODE_CLASS (mode) == MODE_INT
5316 && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5317 {
5318 if ((code == EQ || code == NE)
5319 && (op1 == const0_rtx || op1 == constm1_rtx))
5320 {
5321 rtx op00, op01;
5322
5323 /* Do a logical OR or AND of the two words and compare the
5324 result. */
5325 op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5326 op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5327 tem = expand_binop (word_mode,
5328 op1 == const0_rtx ? ior_optab : and_optab,
5329 op00, op01, NULL_RTX, unsignedp,
5330 OPTAB_DIRECT);
5331
5332 if (tem != 0)
5333 tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5334 unsignedp, normalizep);
5335 }
5336 else if ((code == LT || code == GE) && op1 == const0_rtx)
5337 {
5338 rtx op0h;
5339
5340 /* If testing the sign bit, can just test on high word. */
5341 op0h = simplify_gen_subreg (word_mode, op0, mode,
5342 subreg_highpart_offset (word_mode,
5343 mode));
5344 tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5345 unsignedp, normalizep);
5346 }
5347 else
5348 tem = NULL_RTX;
5349
5350 if (tem)
5351 {
5352 if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5353 return tem;
5354 if (!target)
5355 target = gen_reg_rtx (target_mode);
5356
5357 convert_move (target, tem,
5358 !val_signbit_known_set_p (word_mode,
5359 (normalizep ? normalizep
5360 : STORE_FLAG_VALUE)));
5361 return target;
5362 }
5363 }
5364
5365 /* If this is A < 0 or A >= 0, we can do this by taking the ones
5366 complement of A (for GE) and shifting the sign bit to the low bit. */
5367 if (op1 == const0_rtx && (code == LT || code == GE)
5368 && GET_MODE_CLASS (mode) == MODE_INT
5369 && (normalizep || STORE_FLAG_VALUE == 1
5370 || val_signbit_p (mode, STORE_FLAG_VALUE)))
5371 {
5372 subtarget = target;
5373
5374 if (!target)
5375 target_mode = mode;
5376
5377 /* If the result is to be wider than OP0, it is best to convert it
5378 first. If it is to be narrower, it is *incorrect* to convert it
5379 first. */
5380 else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5381 {
5382 op0 = convert_modes (target_mode, mode, op0, 0);
5383 mode = target_mode;
5384 }
5385
5386 if (target_mode != mode)
5387 subtarget = 0;
5388
5389 if (code == GE)
5390 op0 = expand_unop (mode, one_cmpl_optab, op0,
5391 ((STORE_FLAG_VALUE == 1 || normalizep)
5392 ? 0 : subtarget), 0);
5393
5394 if (STORE_FLAG_VALUE == 1 || normalizep)
5395 /* If we are supposed to produce a 0/1 value, we want to do
5396 a logical shift from the sign bit to the low-order bit; for
5397 a -1/0 value, we do an arithmetic shift. */
5398 op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5399 GET_MODE_BITSIZE (mode) - 1,
5400 subtarget, normalizep != -1);
5401
5402 if (mode != target_mode)
5403 op0 = convert_modes (target_mode, mode, op0, 0);
5404
5405 return op0;
5406 }
5407
5408 mclass = GET_MODE_CLASS (mode);
5409 for (compare_mode = mode; compare_mode != VOIDmode;
5410 compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5411 {
5412 enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5413 icode = optab_handler (cstore_optab, optab_mode);
5414 if (icode != CODE_FOR_nothing)
5415 {
5416 do_pending_stack_adjust ();
5417 tem = emit_cstore (target, icode, code, mode, compare_mode,
5418 unsignedp, op0, op1, normalizep, target_mode);
5419 if (tem)
5420 return tem;
5421
5422 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5423 {
5424 tem = emit_cstore (target, icode, scode, mode, compare_mode,
5425 unsignedp, op1, op0, normalizep, target_mode);
5426 if (tem)
5427 return tem;
5428 }
5429 break;
5430 }
5431 }
5432
5433 return 0;
5434 }
5435
5436 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5437 and storing in TARGET. Normally return TARGET.
5438 Return 0 if that cannot be done.
5439
5440 MODE is the mode to use for OP0 and OP1 should they be CONST_INTs. If
5441 it is VOIDmode, they cannot both be CONST_INT.
5442
5443 UNSIGNEDP is for the case where we have to widen the operands
5444 to perform the operation. It says to use zero-extension.
5445
5446 NORMALIZEP is 1 if we should convert the result to be either zero
5447 or one. Normalize is -1 if we should convert the result to be
5448 either zero or -1. If NORMALIZEP is zero, the result will be left
5449 "raw" out of the scc insn. */
5450
5451 rtx
5452 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5453 enum machine_mode mode, int unsignedp, int normalizep)
5454 {
5455 enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5456 enum rtx_code rcode;
5457 rtx subtarget;
5458 rtx tem, last, trueval;
5459
5460 tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5461 target_mode);
5462 if (tem)
5463 return tem;
5464
5465 /* If we reached here, we can't do this with a scc insn, however there
5466 are some comparisons that can be done in other ways. Don't do any
5467 of these cases if branches are very cheap. */
5468 if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5469 return 0;
5470
5471 /* See what we need to return. We can only return a 1, -1, or the
5472 sign bit. */
5473
5474 if (normalizep == 0)
5475 {
5476 if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5477 normalizep = STORE_FLAG_VALUE;
5478
5479 else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5480 ;
5481 else
5482 return 0;
5483 }
5484
5485 last = get_last_insn ();
5486
5487 /* If optimizing, use different pseudo registers for each insn, instead
5488 of reusing the same pseudo. This leads to better CSE, but slows
5489 down the compiler, since there are more pseudos */
5490 subtarget = (!optimize
5491 && (target_mode == mode)) ? target : NULL_RTX;
5492 trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5493
5494 /* For floating-point comparisons, try the reverse comparison or try
5495 changing the "orderedness" of the comparison. */
5496 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5497 {
5498 enum rtx_code first_code;
5499 bool and_them;
5500
5501 rcode = reverse_condition_maybe_unordered (code);
5502 if (can_compare_p (rcode, mode, ccp_store_flag)
5503 && (code == ORDERED || code == UNORDERED
5504 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5505 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5506 {
5507 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5508 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5509
5510 /* For the reverse comparison, use either an addition or a XOR. */
5511 if (want_add
5512 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5513 optimize_insn_for_speed_p ()) == 0)
5514 {
5515 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5516 STORE_FLAG_VALUE, target_mode);
5517 if (tem)
5518 return expand_binop (target_mode, add_optab, tem,
5519 GEN_INT (normalizep),
5520 target, 0, OPTAB_WIDEN);
5521 }
5522 else if (!want_add
5523 && rtx_cost (trueval, XOR, 1,
5524 optimize_insn_for_speed_p ()) == 0)
5525 {
5526 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5527 normalizep, target_mode);
5528 if (tem)
5529 return expand_binop (target_mode, xor_optab, tem, trueval,
5530 target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5531 }
5532 }
5533
5534 delete_insns_since (last);
5535
5536 /* Cannot split ORDERED and UNORDERED, only try the above trick. */
5537 if (code == ORDERED || code == UNORDERED)
5538 return 0;
5539
5540 and_them = split_comparison (code, mode, &first_code, &code);
5541
5542 /* If there are no NaNs, the first comparison should always fall through.
5543 Effectively change the comparison to the other one. */
5544 if (!HONOR_NANS (mode))
5545 {
5546 gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5547 return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5548 target_mode);
5549 }
5550
5551 #ifdef HAVE_conditional_move
5552 /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5553 conditional move. */
5554 tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5555 normalizep, target_mode);
5556 if (tem == 0)
5557 return 0;
5558
5559 if (and_them)
5560 tem = emit_conditional_move (target, code, op0, op1, mode,
5561 tem, const0_rtx, GET_MODE (tem), 0);
5562 else
5563 tem = emit_conditional_move (target, code, op0, op1, mode,
5564 trueval, tem, GET_MODE (tem), 0);
5565
5566 if (tem == 0)
5567 delete_insns_since (last);
5568 return tem;
5569 #else
5570 return 0;
5571 #endif
5572 }
5573
5574 /* The remaining tricks only apply to integer comparisons. */
5575
5576 if (GET_MODE_CLASS (mode) != MODE_INT)
5577 return 0;
5578
5579 /* If this is an equality comparison of integers, we can try to exclusive-or
5580 (or subtract) the two operands and use a recursive call to try the
5581 comparison with zero. Don't do any of these cases if branches are
5582 very cheap. */
5583
5584 if ((code == EQ || code == NE) && op1 != const0_rtx)
5585 {
5586 tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5587 OPTAB_WIDEN);
5588
5589 if (tem == 0)
5590 tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5591 OPTAB_WIDEN);
5592 if (tem != 0)
5593 tem = emit_store_flag (target, code, tem, const0_rtx,
5594 mode, unsignedp, normalizep);
5595 if (tem != 0)
5596 return tem;
5597
5598 delete_insns_since (last);
5599 }
5600
5601 /* For integer comparisons, try the reverse comparison. However, for
5602 small X and if we'd have anyway to extend, implementing "X != 0"
5603 as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0". */
5604 rcode = reverse_condition (code);
5605 if (can_compare_p (rcode, mode, ccp_store_flag)
5606 && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5607 && code == NE
5608 && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5609 && op1 == const0_rtx))
5610 {
5611 int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5612 || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5613
5614 /* Again, for the reverse comparison, use either an addition or a XOR. */
5615 if (want_add
5616 && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5617 optimize_insn_for_speed_p ()) == 0)
5618 {
5619 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5620 STORE_FLAG_VALUE, target_mode);
5621 if (tem != 0)
5622 tem = expand_binop (target_mode, add_optab, tem,
5623 GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5624 }
5625 else if (!want_add
5626 && rtx_cost (trueval, XOR, 1,
5627 optimize_insn_for_speed_p ()) == 0)
5628 {
5629 tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5630 normalizep, target_mode);
5631 if (tem != 0)
5632 tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5633 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5634 }
5635
5636 if (tem != 0)
5637 return tem;
5638 delete_insns_since (last);
5639 }
5640
5641 /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5642 the constant zero. Reject all other comparisons at this point. Only
5643 do LE and GT if branches are expensive since they are expensive on
5644 2-operand machines. */
5645
5646 if (op1 != const0_rtx
5647 || (code != EQ && code != NE
5648 && (BRANCH_COST (optimize_insn_for_speed_p (),
5649 false) <= 1 || (code != LE && code != GT))))
5650 return 0;
5651
5652 /* Try to put the result of the comparison in the sign bit. Assume we can't
5653 do the necessary operation below. */
5654
5655 tem = 0;
5656
5657 /* To see if A <= 0, compute (A | (A - 1)). A <= 0 iff that result has
5658 the sign bit set. */
5659
5660 if (code == LE)
5661 {
5662 /* This is destructive, so SUBTARGET can't be OP0. */
5663 if (rtx_equal_p (subtarget, op0))
5664 subtarget = 0;
5665
5666 tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5667 OPTAB_WIDEN);
5668 if (tem)
5669 tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5670 OPTAB_WIDEN);
5671 }
5672
5673 /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5674 number of bits in the mode of OP0, minus one. */
5675
5676 if (code == GT)
5677 {
5678 if (rtx_equal_p (subtarget, op0))
5679 subtarget = 0;
5680
5681 tem = expand_shift (RSHIFT_EXPR, mode, op0,
5682 GET_MODE_BITSIZE (mode) - 1,
5683 subtarget, 0);
5684 tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5685 OPTAB_WIDEN);
5686 }
5687
5688 if (code == EQ || code == NE)
5689 {
5690 /* For EQ or NE, one way to do the comparison is to apply an operation
5691 that converts the operand into a positive number if it is nonzero
5692 or zero if it was originally zero. Then, for EQ, we subtract 1 and
5693 for NE we negate. This puts the result in the sign bit. Then we
5694 normalize with a shift, if needed.
5695
5696 Two operations that can do the above actions are ABS and FFS, so try
5697 them. If that doesn't work, and MODE is smaller than a full word,
5698 we can use zero-extension to the wider mode (an unsigned conversion)
5699 as the operation. */
5700
5701 /* Note that ABS doesn't yield a positive number for INT_MIN, but
5702 that is compensated by the subsequent overflow when subtracting
5703 one / negating. */
5704
5705 if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5706 tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5707 else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5708 tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5709 else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5710 {
5711 tem = convert_modes (word_mode, mode, op0, 1);
5712 mode = word_mode;
5713 }
5714
5715 if (tem != 0)
5716 {
5717 if (code == EQ)
5718 tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5719 0, OPTAB_WIDEN);
5720 else
5721 tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5722 }
5723
5724 /* If we couldn't do it that way, for NE we can "or" the two's complement
5725 of the value with itself. For EQ, we take the one's complement of
5726 that "or", which is an extra insn, so we only handle EQ if branches
5727 are expensive. */
5728
5729 if (tem == 0
5730 && (code == NE
5731 || BRANCH_COST (optimize_insn_for_speed_p (),
5732 false) > 1))
5733 {
5734 if (rtx_equal_p (subtarget, op0))
5735 subtarget = 0;
5736
5737 tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5738 tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5739 OPTAB_WIDEN);
5740
5741 if (tem && code == EQ)
5742 tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5743 }
5744 }
5745
5746 if (tem && normalizep)
5747 tem = expand_shift (RSHIFT_EXPR, mode, tem,
5748 GET_MODE_BITSIZE (mode) - 1,
5749 subtarget, normalizep == 1);
5750
5751 if (tem)
5752 {
5753 if (!target)
5754 ;
5755 else if (GET_MODE (tem) != target_mode)
5756 {
5757 convert_move (target, tem, 0);
5758 tem = target;
5759 }
5760 else if (!subtarget)
5761 {
5762 emit_move_insn (target, tem);
5763 tem = target;
5764 }
5765 }
5766 else
5767 delete_insns_since (last);
5768
5769 return tem;
5770 }
5771
5772 /* Like emit_store_flag, but always succeeds. */
5773
5774 rtx
5775 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5776 enum machine_mode mode, int unsignedp, int normalizep)
5777 {
5778 rtx tem, label;
5779 rtx trueval, falseval;
5780
5781 /* First see if emit_store_flag can do the job. */
5782 tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5783 if (tem != 0)
5784 return tem;
5785
5786 if (!target)
5787 target = gen_reg_rtx (word_mode);
5788
5789 /* If this failed, we have to do this with set/compare/jump/set code.
5790 For foo != 0, if foo is in OP0, just replace it with 1 if nonzero. */
5791 trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5792 if (code == NE
5793 && GET_MODE_CLASS (mode) == MODE_INT
5794 && REG_P (target)
5795 && op0 == target
5796 && op1 == const0_rtx)
5797 {
5798 label = gen_label_rtx ();
5799 do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5800 mode, NULL_RTX, NULL_RTX, label, -1);
5801 emit_move_insn (target, trueval);
5802 emit_label (label);
5803 return target;
5804 }
5805
5806 if (!REG_P (target)
5807 || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5808 target = gen_reg_rtx (GET_MODE (target));
5809
5810 /* Jump in the right direction if the target cannot implement CODE
5811 but can jump on its reverse condition. */
5812 falseval = const0_rtx;
5813 if (! can_compare_p (code, mode, ccp_jump)
5814 && (! FLOAT_MODE_P (mode)
5815 || code == ORDERED || code == UNORDERED
5816 || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5817 || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5818 {
5819 enum rtx_code rcode;
5820 if (FLOAT_MODE_P (mode))
5821 rcode = reverse_condition_maybe_unordered (code);
5822 else
5823 rcode = reverse_condition (code);
5824
5825 /* Canonicalize to UNORDERED for the libcall. */
5826 if (can_compare_p (rcode, mode, ccp_jump)
5827 || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5828 {
5829 falseval = trueval;
5830 trueval = const0_rtx;
5831 code = rcode;
5832 }
5833 }
5834
5835 emit_move_insn (target, trueval);
5836 label = gen_label_rtx ();
5837 do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5838 NULL_RTX, label, -1);
5839
5840 emit_move_insn (target, falseval);
5841 emit_label (label);
5842
5843 return target;
5844 }
5845 \f
5846 /* Perform possibly multi-word comparison and conditional jump to LABEL
5847 if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE. This is
5848 now a thin wrapper around do_compare_rtx_and_jump. */
5849
5850 static void
5851 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5852 rtx label)
5853 {
5854 int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5855 do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5856 NULL_RTX, NULL_RTX, label, -1);
5857 }
This page took 0.314795 seconds and 4 git commands to generate.