]> gcc.gnu.org Git - gcc.git/blame - gcc/gimple-ssa-store-merging.cc
Daily bump.
[gcc.git] / gcc / gimple-ssa-store-merging.cc
CommitLineData
dffec8eb 1/* GIMPLE store merging and byte swapping passes.
aeee4812 2 Copyright (C) 2009-2023 Free Software Foundation, Inc.
f663d9ad
KT
3 Contributed by ARM Ltd.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
c94c3532
EB
21/* The purpose of the store merging pass is to combine multiple memory stores
22 of constant values, values loaded from memory, bitwise operations on those,
23 or bit-field values, to consecutive locations, into fewer wider stores.
24
f663d9ad
KT
25 For example, if we have a sequence peforming four byte stores to
26 consecutive memory locations:
27 [p ] := imm1;
28 [p + 1B] := imm2;
29 [p + 2B] := imm3;
30 [p + 3B] := imm4;
31 we can transform this into a single 4-byte store if the target supports it:
c94c3532 32 [p] := imm1:imm2:imm3:imm4 concatenated according to endianness.
f663d9ad 33
245f6de1
JJ
34 Or:
35 [p ] := [q ];
36 [p + 1B] := [q + 1B];
37 [p + 2B] := [q + 2B];
38 [p + 3B] := [q + 3B];
39 if there is no overlap can be transformed into a single 4-byte
40 load followed by single 4-byte store.
41
42 Or:
43 [p ] := [q ] ^ imm1;
44 [p + 1B] := [q + 1B] ^ imm2;
45 [p + 2B] := [q + 2B] ^ imm3;
46 [p + 3B] := [q + 3B] ^ imm4;
47 if there is no overlap can be transformed into a single 4-byte
48 load, xored with imm1:imm2:imm3:imm4 and stored using a single 4-byte store.
49
c94c3532
EB
50 Or:
51 [p:1 ] := imm;
52 [p:31] := val & 0x7FFFFFFF;
53 we can transform this into a single 4-byte store if the target supports it:
54 [p] := imm:(val & 0x7FFFFFFF) concatenated according to endianness.
55
f663d9ad
KT
56 The algorithm is applied to each basic block in three phases:
57
c94c3532
EB
58 1) Scan through the basic block and record assignments to destinations
59 that can be expressed as a store to memory of a certain size at a certain
60 bit offset from base expressions we can handle. For bit-fields we also
61 record the surrounding bit region, i.e. bits that could be stored in
245f6de1
JJ
62 a read-modify-write operation when storing the bit-field. Record store
63 chains to different bases in a hash_map (m_stores) and make sure to
700d4cb0 64 terminate such chains when appropriate (for example when the stored
245f6de1 65 values get used subsequently).
f663d9ad
KT
66 These stores can be a result of structure element initializers, array stores
67 etc. A store_immediate_info object is recorded for every such store.
68 Record as many such assignments to a single base as possible until a
69 statement that interferes with the store sequence is encountered.
c94c3532
EB
70 Each store has up to 2 operands, which can be a either constant, a memory
71 load or an SSA name, from which the value to be stored can be computed.
245f6de1
JJ
72 At most one of the operands can be a constant. The operands are recorded
73 in store_operand_info struct.
f663d9ad 74
c94c3532 75 2) Analyze the chains of stores recorded in phase 1) (i.e. the vector of
f663d9ad 76 store_immediate_info objects) and coalesce contiguous stores into
c94c3532 77 merged_store_group objects. For bit-field stores, we don't need to
245f6de1
JJ
78 require the stores to be contiguous, just their surrounding bit regions
79 have to be contiguous. If the expression being stored is different
80 between adjacent stores, such as one store storing a constant and
81 following storing a value loaded from memory, or if the loaded memory
82 objects are not adjacent, a new merged_store_group is created as well.
f663d9ad
KT
83
84 For example, given the stores:
85 [p ] := 0;
86 [p + 1B] := 1;
87 [p + 3B] := 0;
88 [p + 4B] := 1;
89 [p + 5B] := 0;
90 [p + 6B] := 0;
91 This phase would produce two merged_store_group objects, one recording the
92 two bytes stored in the memory region [p : p + 1] and another
93 recording the four bytes stored in the memory region [p + 3 : p + 6].
94
95 3) The merged_store_group objects produced in phase 2) are processed
96 to generate the sequence of wider stores that set the contiguous memory
97 regions to the sequence of bytes that correspond to it. This may emit
98 multiple stores per store group to handle contiguous stores that are not
99 of a size that is a power of 2. For example it can try to emit a 40-bit
100 store as a 32-bit store followed by an 8-bit store.
c94c3532
EB
101 We try to emit as wide stores as we can while respecting STRICT_ALIGNMENT
102 or TARGET_SLOW_UNALIGNED_ACCESS settings.
f663d9ad
KT
103
104 Note on endianness and example:
105 Consider 2 contiguous 16-bit stores followed by 2 contiguous 8-bit stores:
106 [p ] := 0x1234;
107 [p + 2B] := 0x5678;
108 [p + 4B] := 0xab;
109 [p + 5B] := 0xcd;
110
111 The memory layout for little-endian (LE) and big-endian (BE) must be:
112 p |LE|BE|
113 ---------
114 0 |34|12|
115 1 |12|34|
116 2 |78|56|
117 3 |56|78|
118 4 |ab|ab|
119 5 |cd|cd|
120
121 To merge these into a single 48-bit merged value 'val' in phase 2)
122 on little-endian we insert stores to higher (consecutive) bitpositions
123 into the most significant bits of the merged value.
124 The final merged value would be: 0xcdab56781234
125
126 For big-endian we insert stores to higher bitpositions into the least
127 significant bits of the merged value.
128 The final merged value would be: 0x12345678abcd
129
130 Then, in phase 3), we want to emit this 48-bit value as a 32-bit store
131 followed by a 16-bit store. Again, we must consider endianness when
132 breaking down the 48-bit value 'val' computed above.
133 For little endian we emit:
134 [p] (32-bit) := 0x56781234; // val & 0x0000ffffffff;
135 [p + 4B] (16-bit) := 0xcdab; // (val & 0xffff00000000) >> 32;
136
137 Whereas for big-endian we emit:
138 [p] (32-bit) := 0x12345678; // (val & 0xffffffff0000) >> 16;
139 [p + 4B] (16-bit) := 0xabcd; // val & 0x00000000ffff; */
140
141#include "config.h"
142#include "system.h"
143#include "coretypes.h"
144#include "backend.h"
145#include "tree.h"
146#include "gimple.h"
147#include "builtins.h"
148#include "fold-const.h"
149#include "tree-pass.h"
150#include "ssa.h"
151#include "gimple-pretty-print.h"
152#include "alias.h"
153#include "fold-const.h"
f663d9ad
KT
154#include "print-tree.h"
155#include "tree-hash-traits.h"
156#include "gimple-iterator.h"
157#include "gimplify.h"
c94c3532 158#include "gimple-fold.h"
f663d9ad
KT
159#include "stor-layout.h"
160#include "timevar.h"
629387a6
EB
161#include "cfganal.h"
162#include "cfgcleanup.h"
f663d9ad 163#include "tree-cfg.h"
629387a6 164#include "except.h"
f663d9ad
KT
165#include "tree-eh.h"
166#include "target.h"
aa55dc0c 167#include "gimplify-me.h"
a62b3dc5
JJ
168#include "rtl.h"
169#include "expr.h" /* For get_bit_range. */
dffec8eb 170#include "optabs-tree.h"
a95b474a 171#include "dbgcnt.h"
c22d8787 172#include "selftest.h"
f663d9ad
KT
173
174/* The maximum size (in bits) of the stores this pass should generate. */
175#define MAX_STORE_BITSIZE (BITS_PER_WORD)
176#define MAX_STORE_BYTES (MAX_STORE_BITSIZE / BITS_PER_UNIT)
177
245f6de1
JJ
178/* Limit to bound the number of aliasing checks for loads with the same
179 vuse as the corresponding store. */
180#define MAX_STORE_ALIAS_CHECKS 64
181
f663d9ad
KT
182namespace {
183
bebadeca 184struct bswap_stat
dffec8eb
JJ
185{
186 /* Number of hand-written 16-bit nop / bswaps found. */
187 int found_16bit;
188
189 /* Number of hand-written 32-bit nop / bswaps found. */
190 int found_32bit;
191
192 /* Number of hand-written 64-bit nop / bswaps found. */
193 int found_64bit;
194} nop_stats, bswap_stats;
195
196/* A symbolic number structure is used to detect byte permutation and selection
197 patterns of a source. To achieve that, its field N contains an artificial
198 number consisting of BITS_PER_MARKER sized markers tracking where does each
199 byte come from in the source:
200
201 0 - target byte has the value 0
202 FF - target byte has an unknown value (eg. due to sign extension)
203 1..size - marker value is the byte index in the source (0 for lsb).
204
205 To detect permutations on memory sources (arrays and structures), a symbolic
206 number is also associated:
207 - a base address BASE_ADDR and an OFFSET giving the address of the source;
208 - a range which gives the difference between the highest and lowest accessed
209 memory location to make such a symbolic number;
210 - the address SRC of the source element of lowest address as a convenience
211 to easily get BASE_ADDR + offset + lowest bytepos;
212 - number of expressions N_OPS bitwise ored together to represent
213 approximate cost of the computation.
214
215 Note 1: the range is different from size as size reflects the size of the
216 type of the current expression. For instance, for an array char a[],
217 (short) a[0] | (short) a[3] would have a size of 2 but a range of 4 while
218 (short) a[0] | ((short) a[0] << 1) would still have a size of 2 but this
219 time a range of 1.
220
221 Note 2: for non-memory sources, range holds the same value as size.
222
223 Note 3: SRC points to the SSA_NAME in case of non-memory source. */
224
225struct symbolic_number {
226 uint64_t n;
227 tree type;
228 tree base_addr;
229 tree offset;
4a022c70 230 poly_int64_pod bytepos;
dffec8eb
JJ
231 tree src;
232 tree alias_set;
233 tree vuse;
234 unsigned HOST_WIDE_INT range;
235 int n_ops;
236};
237
238#define BITS_PER_MARKER 8
239#define MARKER_MASK ((1 << BITS_PER_MARKER) - 1)
240#define MARKER_BYTE_UNKNOWN MARKER_MASK
241#define HEAD_MARKER(n, size) \
242 ((n) & ((uint64_t) MARKER_MASK << (((size) - 1) * BITS_PER_MARKER)))
243
244/* The number which the find_bswap_or_nop_1 result should match in
245 order to have a nop. The number is masked according to the size of
246 the symbolic number before using it. */
247#define CMPNOP (sizeof (int64_t) < 8 ? 0 : \
248 (uint64_t)0x08070605 << 32 | 0x04030201)
249
250/* The number which the find_bswap_or_nop_1 result should match in
251 order to have a byte swap. The number is masked according to the
252 size of the symbolic number before using it. */
253#define CMPXCHG (sizeof (int64_t) < 8 ? 0 : \
254 (uint64_t)0x01020304 << 32 | 0x05060708)
255
256/* Perform a SHIFT or ROTATE operation by COUNT bits on symbolic
257 number N. Return false if the requested operation is not permitted
258 on a symbolic number. */
259
260inline bool
261do_shift_rotate (enum tree_code code,
262 struct symbolic_number *n,
263 int count)
264{
265 int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
f675afa4 266 uint64_t head_marker;
dffec8eb 267
444cda74
JJ
268 if (count < 0
269 || count >= TYPE_PRECISION (n->type)
270 || count % BITS_PER_UNIT != 0)
dffec8eb
JJ
271 return false;
272 count = (count / BITS_PER_UNIT) * BITS_PER_MARKER;
273
274 /* Zero out the extra bits of N in order to avoid them being shifted
275 into the significant bits. */
276 if (size < 64 / BITS_PER_MARKER)
277 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
278
279 switch (code)
280 {
281 case LSHIFT_EXPR:
282 n->n <<= count;
283 break;
284 case RSHIFT_EXPR:
285 head_marker = HEAD_MARKER (n->n, size);
286 n->n >>= count;
287 /* Arithmetic shift of signed type: result is dependent on the value. */
288 if (!TYPE_UNSIGNED (n->type) && head_marker)
289 for (i = 0; i < count / BITS_PER_MARKER; i++)
290 n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
291 << ((size - 1 - i) * BITS_PER_MARKER);
292 break;
293 case LROTATE_EXPR:
294 n->n = (n->n << count) | (n->n >> ((size * BITS_PER_MARKER) - count));
295 break;
296 case RROTATE_EXPR:
297 n->n = (n->n >> count) | (n->n << ((size * BITS_PER_MARKER) - count));
298 break;
299 default:
300 return false;
301 }
302 /* Zero unused bits for size. */
303 if (size < 64 / BITS_PER_MARKER)
304 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
305 return true;
306}
307
308/* Perform sanity checking for the symbolic number N and the gimple
309 statement STMT. */
310
311inline bool
312verify_symbolic_number_p (struct symbolic_number *n, gimple *stmt)
313{
314 tree lhs_type;
315
650c70a9 316 lhs_type = TREE_TYPE (gimple_get_lhs (stmt));
dffec8eb 317
5ea39b24
JJ
318 if (TREE_CODE (lhs_type) != INTEGER_TYPE
319 && TREE_CODE (lhs_type) != ENUMERAL_TYPE)
dffec8eb
JJ
320 return false;
321
322 if (TYPE_PRECISION (lhs_type) != TYPE_PRECISION (n->type))
323 return false;
324
325 return true;
326}
327
328/* Initialize the symbolic number N for the bswap pass from the base element
329 SRC manipulated by the bitwise OR expression. */
330
331bool
332init_symbolic_number (struct symbolic_number *n, tree src)
333{
334 int size;
335
5b9a65ec 336 if (!INTEGRAL_TYPE_P (TREE_TYPE (src)) && !POINTER_TYPE_P (TREE_TYPE (src)))
dffec8eb
JJ
337 return false;
338
339 n->base_addr = n->offset = n->alias_set = n->vuse = NULL_TREE;
340 n->src = src;
341
342 /* Set up the symbolic number N by setting each byte to a value between 1 and
343 the byte size of rhs1. The highest order byte is set to n->size and the
344 lowest order byte to 1. */
345 n->type = TREE_TYPE (src);
346 size = TYPE_PRECISION (n->type);
347 if (size % BITS_PER_UNIT != 0)
348 return false;
349 size /= BITS_PER_UNIT;
350 if (size > 64 / BITS_PER_MARKER)
351 return false;
352 n->range = size;
353 n->n = CMPNOP;
354 n->n_ops = 1;
355
356 if (size < 64 / BITS_PER_MARKER)
357 n->n &= ((uint64_t) 1 << (size * BITS_PER_MARKER)) - 1;
358
359 return true;
360}
361
362/* Check if STMT might be a byte swap or a nop from a memory source and returns
363 the answer. If so, REF is that memory source and the base of the memory area
364 accessed and the offset of the access from that base are recorded in N. */
365
366bool
367find_bswap_or_nop_load (gimple *stmt, tree ref, struct symbolic_number *n)
368{
369 /* Leaf node is an array or component ref. Memorize its base and
370 offset from base to compare to other such leaf node. */
f37fac2b 371 poly_int64 bitsize, bitpos, bytepos;
dffec8eb
JJ
372 machine_mode mode;
373 int unsignedp, reversep, volatilep;
374 tree offset, base_addr;
375
376 /* Not prepared to handle PDP endian. */
377 if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
378 return false;
379
380 if (!gimple_assign_load_p (stmt) || gimple_has_volatile_ops (stmt))
381 return false;
382
383 base_addr = get_inner_reference (ref, &bitsize, &bitpos, &offset, &mode,
384 &unsignedp, &reversep, &volatilep);
385
4b84d9b8
JJ
386 if (TREE_CODE (base_addr) == TARGET_MEM_REF)
387 /* Do not rewrite TARGET_MEM_REF. */
388 return false;
389 else if (TREE_CODE (base_addr) == MEM_REF)
dffec8eb 390 {
3fed2ce9 391 poly_offset_int bit_offset = 0;
dffec8eb
JJ
392 tree off = TREE_OPERAND (base_addr, 1);
393
394 if (!integer_zerop (off))
395 {
3fed2ce9
RS
396 poly_offset_int boff = mem_ref_offset (base_addr);
397 boff <<= LOG2_BITS_PER_UNIT;
dffec8eb
JJ
398 bit_offset += boff;
399 }
400
401 base_addr = TREE_OPERAND (base_addr, 0);
402
403 /* Avoid returning a negative bitpos as this may wreak havoc later. */
3fed2ce9 404 if (maybe_lt (bit_offset, 0))
dffec8eb 405 {
3fed2ce9
RS
406 tree byte_offset = wide_int_to_tree
407 (sizetype, bits_to_bytes_round_down (bit_offset));
408 bit_offset = num_trailing_bits (bit_offset);
dffec8eb 409 if (offset)
3fed2ce9 410 offset = size_binop (PLUS_EXPR, offset, byte_offset);
dffec8eb 411 else
3fed2ce9 412 offset = byte_offset;
dffec8eb
JJ
413 }
414
3fed2ce9 415 bitpos += bit_offset.force_shwi ();
dffec8eb 416 }
4b84d9b8
JJ
417 else
418 base_addr = build_fold_addr_expr (base_addr);
dffec8eb 419
f37fac2b 420 if (!multiple_p (bitpos, BITS_PER_UNIT, &bytepos))
dffec8eb 421 return false;
f37fac2b 422 if (!multiple_p (bitsize, BITS_PER_UNIT))
dffec8eb
JJ
423 return false;
424 if (reversep)
425 return false;
426
427 if (!init_symbolic_number (n, ref))
428 return false;
429 n->base_addr = base_addr;
430 n->offset = offset;
f37fac2b 431 n->bytepos = bytepos;
dffec8eb
JJ
432 n->alias_set = reference_alias_ptr_type (ref);
433 n->vuse = gimple_vuse (stmt);
434 return true;
435}
436
04eccbbe
JJ
437/* Compute the symbolic number N representing the result of a bitwise OR,
438 bitwise XOR or plus on 2 symbolic number N1 and N2 whose source statements
439 are respectively SOURCE_STMT1 and SOURCE_STMT2. CODE is the operation. */
dffec8eb
JJ
440
441gimple *
442perform_symbolic_merge (gimple *source_stmt1, struct symbolic_number *n1,
443 gimple *source_stmt2, struct symbolic_number *n2,
04eccbbe 444 struct symbolic_number *n, enum tree_code code)
dffec8eb
JJ
445{
446 int i, size;
447 uint64_t mask;
448 gimple *source_stmt;
449 struct symbolic_number *n_start;
450
451 tree rhs1 = gimple_assign_rhs1 (source_stmt1);
452 if (TREE_CODE (rhs1) == BIT_FIELD_REF
453 && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
454 rhs1 = TREE_OPERAND (rhs1, 0);
455 tree rhs2 = gimple_assign_rhs1 (source_stmt2);
456 if (TREE_CODE (rhs2) == BIT_FIELD_REF
457 && TREE_CODE (TREE_OPERAND (rhs2, 0)) == SSA_NAME)
458 rhs2 = TREE_OPERAND (rhs2, 0);
459
460 /* Sources are different, cancel bswap if they are not memory location with
461 the same base (array, structure, ...). */
462 if (rhs1 != rhs2)
463 {
464 uint64_t inc;
4a022c70 465 HOST_WIDE_INT start1, start2, start_sub, end_sub, end1, end2, end;
dffec8eb
JJ
466 struct symbolic_number *toinc_n_ptr, *n_end;
467 basic_block bb1, bb2;
468
469 if (!n1->base_addr || !n2->base_addr
470 || !operand_equal_p (n1->base_addr, n2->base_addr, 0))
471 return NULL;
472
473 if (!n1->offset != !n2->offset
474 || (n1->offset && !operand_equal_p (n1->offset, n2->offset, 0)))
475 return NULL;
476
4a022c70
RS
477 start1 = 0;
478 if (!(n2->bytepos - n1->bytepos).is_constant (&start2))
479 return NULL;
480
481 if (start1 < start2)
dffec8eb
JJ
482 {
483 n_start = n1;
4a022c70 484 start_sub = start2 - start1;
dffec8eb
JJ
485 }
486 else
487 {
488 n_start = n2;
4a022c70 489 start_sub = start1 - start2;
dffec8eb
JJ
490 }
491
492 bb1 = gimple_bb (source_stmt1);
493 bb2 = gimple_bb (source_stmt2);
494 if (dominated_by_p (CDI_DOMINATORS, bb1, bb2))
495 source_stmt = source_stmt1;
496 else
497 source_stmt = source_stmt2;
498
499 /* Find the highest address at which a load is performed and
500 compute related info. */
4a022c70
RS
501 end1 = start1 + (n1->range - 1);
502 end2 = start2 + (n2->range - 1);
dffec8eb
JJ
503 if (end1 < end2)
504 {
505 end = end2;
506 end_sub = end2 - end1;
507 }
508 else
509 {
510 end = end1;
511 end_sub = end1 - end2;
512 }
513 n_end = (end2 > end1) ? n2 : n1;
514
515 /* Find symbolic number whose lsb is the most significant. */
516 if (BYTES_BIG_ENDIAN)
517 toinc_n_ptr = (n_end == n1) ? n2 : n1;
518 else
519 toinc_n_ptr = (n_start == n1) ? n2 : n1;
520
4a022c70 521 n->range = end - MIN (start1, start2) + 1;
dffec8eb
JJ
522
523 /* Check that the range of memory covered can be represented by
524 a symbolic number. */
525 if (n->range > 64 / BITS_PER_MARKER)
526 return NULL;
527
528 /* Reinterpret byte marks in symbolic number holding the value of
529 bigger weight according to target endianness. */
530 inc = BYTES_BIG_ENDIAN ? end_sub : start_sub;
531 size = TYPE_PRECISION (n1->type) / BITS_PER_UNIT;
532 for (i = 0; i < size; i++, inc <<= BITS_PER_MARKER)
533 {
534 unsigned marker
535 = (toinc_n_ptr->n >> (i * BITS_PER_MARKER)) & MARKER_MASK;
536 if (marker && marker != MARKER_BYTE_UNKNOWN)
537 toinc_n_ptr->n += inc;
538 }
539 }
540 else
541 {
542 n->range = n1->range;
543 n_start = n1;
544 source_stmt = source_stmt1;
545 }
546
547 if (!n1->alias_set
548 || alias_ptr_types_compatible_p (n1->alias_set, n2->alias_set))
549 n->alias_set = n1->alias_set;
550 else
551 n->alias_set = ptr_type_node;
552 n->vuse = n_start->vuse;
553 n->base_addr = n_start->base_addr;
554 n->offset = n_start->offset;
555 n->src = n_start->src;
556 n->bytepos = n_start->bytepos;
557 n->type = n_start->type;
558 size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
531dae29 559 uint64_t res_n = n1->n | n2->n;
dffec8eb
JJ
560
561 for (i = 0, mask = MARKER_MASK; i < size; i++, mask <<= BITS_PER_MARKER)
562 {
563 uint64_t masked1, masked2;
564
565 masked1 = n1->n & mask;
566 masked2 = n2->n & mask;
531dae29
JJ
567 /* If at least one byte is 0, all of 0 | x == 0 ^ x == 0 + x == x. */
568 if (masked1 && masked2)
569 {
570 /* + can carry into upper bits, just punt. */
571 if (code == PLUS_EXPR)
572 return NULL;
573 /* x | x is still x. */
574 if (code == BIT_IOR_EXPR && masked1 == masked2)
575 continue;
576 if (code == BIT_XOR_EXPR)
577 {
578 /* x ^ x is 0, but MARKER_BYTE_UNKNOWN stands for
579 unknown values and unknown ^ unknown is unknown. */
580 if (masked1 == masked2
581 && masked1 != ((uint64_t) MARKER_BYTE_UNKNOWN
582 << i * BITS_PER_MARKER))
583 {
584 res_n &= ~mask;
585 continue;
586 }
587 }
588 /* Otherwise set the byte to unknown, it might still be
589 later masked off. */
590 res_n |= mask;
591 }
dffec8eb 592 }
531dae29 593 n->n = res_n;
dffec8eb
JJ
594 n->n_ops = n1->n_ops + n2->n_ops;
595
596 return source_stmt;
597}
598
599/* find_bswap_or_nop_1 invokes itself recursively with N and tries to perform
600 the operation given by the rhs of STMT on the result. If the operation
601 could successfully be executed the function returns a gimple stmt whose
602 rhs's first tree is the expression of the source operand and NULL
603 otherwise. */
604
605gimple *
606find_bswap_or_nop_1 (gimple *stmt, struct symbolic_number *n, int limit)
607{
608 enum tree_code code;
609 tree rhs1, rhs2 = NULL;
610 gimple *rhs1_stmt, *rhs2_stmt, *source_stmt1;
611 enum gimple_rhs_class rhs_class;
612
613 if (!limit || !is_gimple_assign (stmt))
614 return NULL;
615
616 rhs1 = gimple_assign_rhs1 (stmt);
617
618 if (find_bswap_or_nop_load (stmt, rhs1, n))
619 return stmt;
620
621 /* Handle BIT_FIELD_REF. */
622 if (TREE_CODE (rhs1) == BIT_FIELD_REF
623 && TREE_CODE (TREE_OPERAND (rhs1, 0)) == SSA_NAME)
624 {
35cf3c55
KZ
625 if (!tree_fits_uhwi_p (TREE_OPERAND (rhs1, 1))
626 || !tree_fits_uhwi_p (TREE_OPERAND (rhs1, 2)))
627 return NULL;
628
dffec8eb
JJ
629 unsigned HOST_WIDE_INT bitsize = tree_to_uhwi (TREE_OPERAND (rhs1, 1));
630 unsigned HOST_WIDE_INT bitpos = tree_to_uhwi (TREE_OPERAND (rhs1, 2));
631 if (bitpos % BITS_PER_UNIT == 0
632 && bitsize % BITS_PER_UNIT == 0
633 && init_symbolic_number (n, TREE_OPERAND (rhs1, 0)))
634 {
635 /* Handle big-endian bit numbering in BIT_FIELD_REF. */
636 if (BYTES_BIG_ENDIAN)
637 bitpos = TYPE_PRECISION (n->type) - bitpos - bitsize;
638
639 /* Shift. */
640 if (!do_shift_rotate (RSHIFT_EXPR, n, bitpos))
641 return NULL;
642
643 /* Mask. */
644 uint64_t mask = 0;
645 uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
646 for (unsigned i = 0; i < bitsize / BITS_PER_UNIT;
647 i++, tmp <<= BITS_PER_UNIT)
648 mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
649 n->n &= mask;
650
651 /* Convert. */
652 n->type = TREE_TYPE (rhs1);
653 if (!n->base_addr)
654 n->range = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
655
656 return verify_symbolic_number_p (n, stmt) ? stmt : NULL;
657 }
658
659 return NULL;
660 }
661
662 if (TREE_CODE (rhs1) != SSA_NAME)
663 return NULL;
664
665 code = gimple_assign_rhs_code (stmt);
666 rhs_class = gimple_assign_rhs_class (stmt);
667 rhs1_stmt = SSA_NAME_DEF_STMT (rhs1);
668
669 if (rhs_class == GIMPLE_BINARY_RHS)
670 rhs2 = gimple_assign_rhs2 (stmt);
671
672 /* Handle unary rhs and binary rhs with integer constants as second
673 operand. */
674
675 if (rhs_class == GIMPLE_UNARY_RHS
676 || (rhs_class == GIMPLE_BINARY_RHS
677 && TREE_CODE (rhs2) == INTEGER_CST))
678 {
679 if (code != BIT_AND_EXPR
680 && code != LSHIFT_EXPR
681 && code != RSHIFT_EXPR
682 && code != LROTATE_EXPR
683 && code != RROTATE_EXPR
684 && !CONVERT_EXPR_CODE_P (code))
685 return NULL;
686
687 source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, n, limit - 1);
688
689 /* If find_bswap_or_nop_1 returned NULL, STMT is a leaf node and
690 we have to initialize the symbolic number. */
691 if (!source_stmt1)
692 {
693 if (gimple_assign_load_p (stmt)
694 || !init_symbolic_number (n, rhs1))
695 return NULL;
696 source_stmt1 = stmt;
697 }
698
699 switch (code)
700 {
701 case BIT_AND_EXPR:
702 {
703 int i, size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
704 uint64_t val = int_cst_value (rhs2), mask = 0;
705 uint64_t tmp = (1 << BITS_PER_UNIT) - 1;
706
707 /* Only constants masking full bytes are allowed. */
708 for (i = 0; i < size; i++, tmp <<= BITS_PER_UNIT)
709 if ((val & tmp) != 0 && (val & tmp) != tmp)
710 return NULL;
711 else if (val & tmp)
712 mask |= (uint64_t) MARKER_MASK << (i * BITS_PER_MARKER);
713
714 n->n &= mask;
715 }
716 break;
717 case LSHIFT_EXPR:
718 case RSHIFT_EXPR:
719 case LROTATE_EXPR:
720 case RROTATE_EXPR:
721 if (!do_shift_rotate (code, n, (int) TREE_INT_CST_LOW (rhs2)))
722 return NULL;
723 break;
724 CASE_CONVERT:
725 {
726 int i, type_size, old_type_size;
727 tree type;
728
650c70a9 729 type = TREE_TYPE (gimple_assign_lhs (stmt));
dffec8eb
JJ
730 type_size = TYPE_PRECISION (type);
731 if (type_size % BITS_PER_UNIT != 0)
732 return NULL;
733 type_size /= BITS_PER_UNIT;
734 if (type_size > 64 / BITS_PER_MARKER)
735 return NULL;
736
737 /* Sign extension: result is dependent on the value. */
738 old_type_size = TYPE_PRECISION (n->type) / BITS_PER_UNIT;
739 if (!TYPE_UNSIGNED (n->type) && type_size > old_type_size
740 && HEAD_MARKER (n->n, old_type_size))
741 for (i = 0; i < type_size - old_type_size; i++)
742 n->n |= (uint64_t) MARKER_BYTE_UNKNOWN
743 << ((type_size - 1 - i) * BITS_PER_MARKER);
744
745 if (type_size < 64 / BITS_PER_MARKER)
746 {
747 /* If STMT casts to a smaller type mask out the bits not
748 belonging to the target type. */
749 n->n &= ((uint64_t) 1 << (type_size * BITS_PER_MARKER)) - 1;
750 }
751 n->type = type;
752 if (!n->base_addr)
753 n->range = type_size;
754 }
755 break;
756 default:
757 return NULL;
758 };
759 return verify_symbolic_number_p (n, stmt) ? source_stmt1 : NULL;
760 }
761
762 /* Handle binary rhs. */
763
764 if (rhs_class == GIMPLE_BINARY_RHS)
765 {
766 struct symbolic_number n1, n2;
767 gimple *source_stmt, *source_stmt2;
768
a944b5de 769 if (!rhs2 || TREE_CODE (rhs2) != SSA_NAME)
dffec8eb
JJ
770 return NULL;
771
772 rhs2_stmt = SSA_NAME_DEF_STMT (rhs2);
773
774 switch (code)
775 {
776 case BIT_IOR_EXPR:
a944b5de
RS
777 case BIT_XOR_EXPR:
778 case PLUS_EXPR:
dffec8eb
JJ
779 source_stmt1 = find_bswap_or_nop_1 (rhs1_stmt, &n1, limit - 1);
780
781 if (!source_stmt1)
782 return NULL;
783
784 source_stmt2 = find_bswap_or_nop_1 (rhs2_stmt, &n2, limit - 1);
785
786 if (!source_stmt2)
787 return NULL;
788
789 if (TYPE_PRECISION (n1.type) != TYPE_PRECISION (n2.type))
790 return NULL;
791
4b84d9b8 792 if (n1.vuse != n2.vuse)
dffec8eb
JJ
793 return NULL;
794
795 source_stmt
04eccbbe
JJ
796 = perform_symbolic_merge (source_stmt1, &n1, source_stmt2, &n2, n,
797 code);
dffec8eb
JJ
798
799 if (!source_stmt)
800 return NULL;
801
802 if (!verify_symbolic_number_p (n, stmt))
803 return NULL;
804
805 break;
806 default:
807 return NULL;
808 }
809 return source_stmt;
810 }
811 return NULL;
812}
813
4b84d9b8
JJ
814/* Helper for find_bswap_or_nop and try_coalesce_bswap to compute
815 *CMPXCHG, *CMPNOP and adjust *N. */
dffec8eb 816
4b84d9b8
JJ
817void
818find_bswap_or_nop_finalize (struct symbolic_number *n, uint64_t *cmpxchg,
b320edc0 819 uint64_t *cmpnop, bool *cast64_to_32)
dffec8eb
JJ
820{
821 unsigned rsize;
822 uint64_t tmpn, mask;
dffec8eb 823
4b84d9b8
JJ
824 /* The number which the find_bswap_or_nop_1 result should match in order
825 to have a full byte swap. The number is shifted to the right
826 according to the size of the symbolic number before using it. */
827 *cmpxchg = CMPXCHG;
828 *cmpnop = CMPNOP;
b320edc0 829 *cast64_to_32 = false;
dffec8eb
JJ
830
831 /* Find real size of result (highest non-zero byte). */
832 if (n->base_addr)
833 for (tmpn = n->n, rsize = 0; tmpn; tmpn >>= BITS_PER_MARKER, rsize++);
834 else
835 rsize = n->range;
836
837 /* Zero out the bits corresponding to untouched bytes in original gimple
838 expression. */
839 if (n->range < (int) sizeof (int64_t))
840 {
841 mask = ((uint64_t) 1 << (n->range * BITS_PER_MARKER)) - 1;
b320edc0
JJ
842 if (n->base_addr == NULL
843 && n->range == 4
844 && int_size_in_bytes (TREE_TYPE (n->src)) == 8)
845 {
846 /* If all bytes in n->n are either 0 or in [5..8] range, this
847 might be a candidate for (unsigned) __builtin_bswap64 (src).
848 It is not worth it for (unsigned short) __builtin_bswap64 (src)
849 or (unsigned short) __builtin_bswap32 (src). */
850 *cast64_to_32 = true;
851 for (tmpn = n->n; tmpn; tmpn >>= BITS_PER_MARKER)
852 if ((tmpn & MARKER_MASK)
853 && ((tmpn & MARKER_MASK) <= 4 || (tmpn & MARKER_MASK) > 8))
854 {
855 *cast64_to_32 = false;
856 break;
857 }
858 }
859 if (*cast64_to_32)
860 *cmpxchg &= mask;
861 else
862 *cmpxchg >>= (64 / BITS_PER_MARKER - n->range) * BITS_PER_MARKER;
4b84d9b8 863 *cmpnop &= mask;
dffec8eb
JJ
864 }
865
866 /* Zero out the bits corresponding to unused bytes in the result of the
867 gimple expression. */
868 if (rsize < n->range)
869 {
870 if (BYTES_BIG_ENDIAN)
871 {
872 mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
4b84d9b8 873 *cmpxchg &= mask;
567d5f3d
JJ
874 if (n->range - rsize == sizeof (int64_t))
875 *cmpnop = 0;
876 else
877 *cmpnop >>= (n->range - rsize) * BITS_PER_MARKER;
dffec8eb
JJ
878 }
879 else
880 {
881 mask = ((uint64_t) 1 << (rsize * BITS_PER_MARKER)) - 1;
567d5f3d
JJ
882 if (n->range - rsize == sizeof (int64_t))
883 *cmpxchg = 0;
884 else
885 *cmpxchg >>= (n->range - rsize) * BITS_PER_MARKER;
4b84d9b8 886 *cmpnop &= mask;
dffec8eb
JJ
887 }
888 n->range = rsize;
889 }
890
b320edc0
JJ
891 if (*cast64_to_32)
892 n->range = 8;
4b84d9b8
JJ
893 n->range *= BITS_PER_UNIT;
894}
895
896/* Check if STMT completes a bswap implementation or a read in a given
897 endianness consisting of ORs, SHIFTs and ANDs and sets *BSWAP
898 accordingly. It also sets N to represent the kind of operations
899 performed: size of the resulting expression and whether it works on
900 a memory source, and if so alias-set and vuse. At last, the
901 function returns a stmt whose rhs's first tree is the source
902 expression. */
903
904gimple *
b320edc0
JJ
905find_bswap_or_nop (gimple *stmt, struct symbolic_number *n, bool *bswap,
906 bool *cast64_to_32, uint64_t *mask)
4b84d9b8 907{
650c70a9 908 tree type_size = TYPE_SIZE_UNIT (TREE_TYPE (gimple_get_lhs (stmt)));
7f0ce82a
KT
909 if (!tree_fits_uhwi_p (type_size))
910 return NULL;
911
4b84d9b8
JJ
912 /* The last parameter determines the depth search limit. It usually
913 correlates directly to the number n of bytes to be touched. We
0f507a36 914 increase that number by 2 * (log2(n) + 1) here in order to also
4b84d9b8
JJ
915 cover signed -> unsigned conversions of the src operand as can be seen
916 in libgcc, and for initial shift/and operation of the src operand. */
7f0ce82a 917 int limit = tree_to_uhwi (type_size);
0f507a36 918 limit += 2 * (1 + (int) ceil_log2 ((unsigned HOST_WIDE_INT) limit));
4b84d9b8
JJ
919 gimple *ins_stmt = find_bswap_or_nop_1 (stmt, n, limit);
920
921 if (!ins_stmt)
cd676dfa
JJ
922 {
923 if (gimple_assign_rhs_code (stmt) != CONSTRUCTOR
924 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
925 return NULL;
926 unsigned HOST_WIDE_INT sz = tree_to_uhwi (type_size) * BITS_PER_UNIT;
927 if (sz != 16 && sz != 32 && sz != 64)
928 return NULL;
929 tree rhs = gimple_assign_rhs1 (stmt);
9032d2b2
JJ
930 if (CONSTRUCTOR_NELTS (rhs) == 0)
931 return NULL;
cd676dfa
JJ
932 tree eltype = TREE_TYPE (TREE_TYPE (rhs));
933 unsigned HOST_WIDE_INT eltsz
934 = int_size_in_bytes (eltype) * BITS_PER_UNIT;
935 if (TYPE_PRECISION (eltype) != eltsz)
936 return NULL;
937 constructor_elt *elt;
938 unsigned int i;
939 tree type = build_nonstandard_integer_type (sz, 1);
940 FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs), i, elt)
941 {
942 if (TREE_CODE (elt->value) != SSA_NAME
943 || !INTEGRAL_TYPE_P (TREE_TYPE (elt->value)))
944 return NULL;
945 struct symbolic_number n1;
946 gimple *source_stmt
947 = find_bswap_or_nop_1 (SSA_NAME_DEF_STMT (elt->value), &n1,
948 limit - 1);
949
950 if (!source_stmt)
951 return NULL;
952
953 n1.type = type;
954 if (!n1.base_addr)
955 n1.range = sz / BITS_PER_UNIT;
956
957 if (i == 0)
958 {
959 ins_stmt = source_stmt;
960 *n = n1;
961 }
962 else
963 {
964 if (n->vuse != n1.vuse)
965 return NULL;
966
967 struct symbolic_number n0 = *n;
968
969 if (!BYTES_BIG_ENDIAN)
970 {
971 if (!do_shift_rotate (LSHIFT_EXPR, &n1, i * eltsz))
972 return NULL;
973 }
974 else if (!do_shift_rotate (LSHIFT_EXPR, &n0, eltsz))
975 return NULL;
976 ins_stmt
04eccbbe
JJ
977 = perform_symbolic_merge (ins_stmt, &n0, source_stmt, &n1, n,
978 BIT_IOR_EXPR);
cd676dfa
JJ
979
980 if (!ins_stmt)
981 return NULL;
982 }
983 }
984 }
4b84d9b8
JJ
985
986 uint64_t cmpxchg, cmpnop;
b320edc0 987 find_bswap_or_nop_finalize (n, &cmpxchg, &cmpnop, cast64_to_32);
4b84d9b8 988
dffec8eb
JJ
989 /* A complete byte swap should make the symbolic number to start with
990 the largest digit in the highest order byte. Unchanged symbolic
991 number indicates a read with same endianness as target architecture. */
b320edc0 992 *mask = ~(uint64_t) 0;
dffec8eb
JJ
993 if (n->n == cmpnop)
994 *bswap = false;
995 else if (n->n == cmpxchg)
996 *bswap = true;
997 else
b320edc0
JJ
998 {
999 int set = 0;
1000 for (uint64_t msk = MARKER_MASK; msk; msk <<= BITS_PER_MARKER)
1001 if ((n->n & msk) == 0)
1002 *mask &= ~msk;
1003 else if ((n->n & msk) == (cmpxchg & msk))
1004 set++;
1005 else
1006 return NULL;
1007 if (set < 2)
1008 return NULL;
1009 *bswap = true;
1010 }
dffec8eb
JJ
1011
1012 /* Useless bit manipulation performed by code. */
1013 if (!n->base_addr && n->n == cmpnop && n->n_ops == 1)
1014 return NULL;
1015
dffec8eb
JJ
1016 return ins_stmt;
1017}
1018
1019const pass_data pass_data_optimize_bswap =
1020{
1021 GIMPLE_PASS, /* type */
1022 "bswap", /* name */
1023 OPTGROUP_NONE, /* optinfo_flags */
1024 TV_NONE, /* tv_id */
1025 PROP_ssa, /* properties_required */
1026 0, /* properties_provided */
1027 0, /* properties_destroyed */
1028 0, /* todo_flags_start */
1029 0, /* todo_flags_finish */
1030};
1031
1032class pass_optimize_bswap : public gimple_opt_pass
1033{
1034public:
1035 pass_optimize_bswap (gcc::context *ctxt)
1036 : gimple_opt_pass (pass_data_optimize_bswap, ctxt)
1037 {}
1038
1039 /* opt_pass methods: */
725793af 1040 bool gate (function *) final override
dffec8eb
JJ
1041 {
1042 return flag_expensive_optimizations && optimize && BITS_PER_UNIT == 8;
1043 }
1044
725793af 1045 unsigned int execute (function *) final override;
dffec8eb
JJ
1046
1047}; // class pass_optimize_bswap
1048
d02a8b63
JJ
1049/* Helper function for bswap_replace. Build VIEW_CONVERT_EXPR from
1050 VAL to TYPE. If VAL has different type size, emit a NOP_EXPR cast
1051 first. */
1052
1053static tree
45ff1251
JJ
1054bswap_view_convert (gimple_stmt_iterator *gsi, tree type, tree val,
1055 bool before)
d02a8b63 1056{
a4001578
JJ
1057 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (val))
1058 || POINTER_TYPE_P (TREE_TYPE (val)));
d02a8b63
JJ
1059 if (TYPE_SIZE (type) != TYPE_SIZE (TREE_TYPE (val)))
1060 {
1061 HOST_WIDE_INT prec = TREE_INT_CST_LOW (TYPE_SIZE (type));
a4001578
JJ
1062 if (POINTER_TYPE_P (TREE_TYPE (val)))
1063 {
1064 gimple *g
1065 = gimple_build_assign (make_ssa_name (pointer_sized_int_node),
1066 NOP_EXPR, val);
45ff1251
JJ
1067 if (before)
1068 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1069 else
1070 gsi_insert_after (gsi, g, GSI_NEW_STMT);
a4001578
JJ
1071 val = gimple_assign_lhs (g);
1072 }
d02a8b63
JJ
1073 tree itype = build_nonstandard_integer_type (prec, 1);
1074 gimple *g = gimple_build_assign (make_ssa_name (itype), NOP_EXPR, val);
45ff1251
JJ
1075 if (before)
1076 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1077 else
1078 gsi_insert_after (gsi, g, GSI_NEW_STMT);
d02a8b63
JJ
1079 val = gimple_assign_lhs (g);
1080 }
1081 return build1 (VIEW_CONVERT_EXPR, type, val);
1082}
1083
dffec8eb 1084/* Perform the bswap optimization: replace the expression computed in the rhs
4b84d9b8
JJ
1085 of gsi_stmt (GSI) (or if NULL add instead of replace) by an equivalent
1086 bswap, load or load + bswap expression.
dffec8eb
JJ
1087 Which of these alternatives replace the rhs is given by N->base_addr (non
1088 null if a load is needed) and BSWAP. The type, VUSE and set-alias of the
1089 load to perform are also given in N while the builtin bswap invoke is given
4b84d9b8
JJ
1090 in FNDEL. Finally, if a load is involved, INS_STMT refers to one of the
1091 load statements involved to construct the rhs in gsi_stmt (GSI) and
1092 N->range gives the size of the rhs expression for maintaining some
1093 statistics.
dffec8eb 1094
4b84d9b8
JJ
1095 Note that if the replacement involve a load and if gsi_stmt (GSI) is
1096 non-NULL, that stmt is moved just after INS_STMT to do the load with the
1097 same VUSE which can lead to gsi_stmt (GSI) changing of basic block. */
dffec8eb 1098
4b84d9b8
JJ
1099tree
1100bswap_replace (gimple_stmt_iterator gsi, gimple *ins_stmt, tree fndecl,
dffec8eb 1101 tree bswap_type, tree load_type, struct symbolic_number *n,
b320edc0 1102 bool bswap, uint64_t mask)
dffec8eb 1103{
4b84d9b8 1104 tree src, tmp, tgt = NULL_TREE;
b320edc0 1105 gimple *bswap_stmt, *mask_stmt = NULL;
cd676dfa 1106 tree_code conv_code = NOP_EXPR;
dffec8eb 1107
4b84d9b8 1108 gimple *cur_stmt = gsi_stmt (gsi);
dffec8eb 1109 src = n->src;
4b84d9b8 1110 if (cur_stmt)
cd676dfa
JJ
1111 {
1112 tgt = gimple_assign_lhs (cur_stmt);
1113 if (gimple_assign_rhs_code (cur_stmt) == CONSTRUCTOR
1114 && tgt
1115 && VECTOR_TYPE_P (TREE_TYPE (tgt)))
1116 conv_code = VIEW_CONVERT_EXPR;
1117 }
dffec8eb
JJ
1118
1119 /* Need to load the value from memory first. */
1120 if (n->base_addr)
1121 {
4b84d9b8
JJ
1122 gimple_stmt_iterator gsi_ins = gsi;
1123 if (ins_stmt)
1124 gsi_ins = gsi_for_stmt (ins_stmt);
dffec8eb
JJ
1125 tree addr_expr, addr_tmp, val_expr, val_tmp;
1126 tree load_offset_ptr, aligned_load_type;
4b84d9b8
JJ
1127 gimple *load_stmt;
1128 unsigned align = get_object_alignment (src);
4a022c70 1129 poly_int64 load_offset = 0;
dffec8eb 1130
4b84d9b8
JJ
1131 if (cur_stmt)
1132 {
1133 basic_block ins_bb = gimple_bb (ins_stmt);
1134 basic_block cur_bb = gimple_bb (cur_stmt);
1135 if (!dominated_by_p (CDI_DOMINATORS, cur_bb, ins_bb))
1136 return NULL_TREE;
1137
1138 /* Move cur_stmt just before one of the load of the original
1139 to ensure it has the same VUSE. See PR61517 for what could
1140 go wrong. */
1141 if (gimple_bb (cur_stmt) != gimple_bb (ins_stmt))
1142 reset_flow_sensitive_info (gimple_assign_lhs (cur_stmt));
1143 gsi_move_before (&gsi, &gsi_ins);
1144 gsi = gsi_for_stmt (cur_stmt);
1145 }
1146 else
1147 gsi = gsi_ins;
dffec8eb
JJ
1148
1149 /* Compute address to load from and cast according to the size
1150 of the load. */
4b84d9b8 1151 addr_expr = build_fold_addr_expr (src);
dffec8eb 1152 if (is_gimple_mem_ref_addr (addr_expr))
4b84d9b8 1153 addr_tmp = unshare_expr (addr_expr);
dffec8eb
JJ
1154 else
1155 {
4b84d9b8
JJ
1156 addr_tmp = unshare_expr (n->base_addr);
1157 if (!is_gimple_mem_ref_addr (addr_tmp))
1158 addr_tmp = force_gimple_operand_gsi_1 (&gsi, addr_tmp,
1159 is_gimple_mem_ref_addr,
1160 NULL_TREE, true,
1161 GSI_SAME_STMT);
1162 load_offset = n->bytepos;
1163 if (n->offset)
1164 {
1165 tree off
1166 = force_gimple_operand_gsi (&gsi, unshare_expr (n->offset),
1167 true, NULL_TREE, true,
1168 GSI_SAME_STMT);
1169 gimple *stmt
1170 = gimple_build_assign (make_ssa_name (TREE_TYPE (addr_tmp)),
1171 POINTER_PLUS_EXPR, addr_tmp, off);
1172 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1173 addr_tmp = gimple_assign_lhs (stmt);
1174 }
dffec8eb
JJ
1175 }
1176
1177 /* Perform the load. */
1178 aligned_load_type = load_type;
1179 if (align < TYPE_ALIGN (load_type))
1180 aligned_load_type = build_aligned_type (load_type, align);
1181 load_offset_ptr = build_int_cst (n->alias_set, load_offset);
1182 val_expr = fold_build2 (MEM_REF, aligned_load_type, addr_tmp,
1183 load_offset_ptr);
1184
1185 if (!bswap)
1186 {
1187 if (n->range == 16)
1188 nop_stats.found_16bit++;
1189 else if (n->range == 32)
1190 nop_stats.found_32bit++;
1191 else
1192 {
1193 gcc_assert (n->range == 64);
1194 nop_stats.found_64bit++;
1195 }
1196
1197 /* Convert the result of load if necessary. */
4b84d9b8 1198 if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), load_type))
dffec8eb
JJ
1199 {
1200 val_tmp = make_temp_ssa_name (aligned_load_type, NULL,
1201 "load_dst");
1202 load_stmt = gimple_build_assign (val_tmp, val_expr);
1203 gimple_set_vuse (load_stmt, n->vuse);
1204 gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
cd676dfa 1205 if (conv_code == VIEW_CONVERT_EXPR)
45ff1251
JJ
1206 val_tmp = bswap_view_convert (&gsi, TREE_TYPE (tgt), val_tmp,
1207 true);
cd676dfa 1208 gimple_assign_set_rhs_with_ops (&gsi, conv_code, val_tmp);
4b84d9b8 1209 update_stmt (cur_stmt);
dffec8eb 1210 }
4b84d9b8 1211 else if (cur_stmt)
dffec8eb
JJ
1212 {
1213 gimple_assign_set_rhs_with_ops (&gsi, MEM_REF, val_expr);
1214 gimple_set_vuse (cur_stmt, n->vuse);
4b84d9b8
JJ
1215 update_stmt (cur_stmt);
1216 }
1217 else
1218 {
1219 tgt = make_ssa_name (load_type);
1220 cur_stmt = gimple_build_assign (tgt, MEM_REF, val_expr);
1221 gimple_set_vuse (cur_stmt, n->vuse);
1222 gsi_insert_before (&gsi, cur_stmt, GSI_SAME_STMT);
dffec8eb 1223 }
dffec8eb
JJ
1224
1225 if (dump_file)
1226 {
1227 fprintf (dump_file,
1228 "%d bit load in target endianness found at: ",
1229 (int) n->range);
1230 print_gimple_stmt (dump_file, cur_stmt, 0);
1231 }
4b84d9b8 1232 return tgt;
dffec8eb
JJ
1233 }
1234 else
1235 {
1236 val_tmp = make_temp_ssa_name (aligned_load_type, NULL, "load_dst");
1237 load_stmt = gimple_build_assign (val_tmp, val_expr);
1238 gimple_set_vuse (load_stmt, n->vuse);
1239 gsi_insert_before (&gsi, load_stmt, GSI_SAME_STMT);
1240 }
1241 src = val_tmp;
1242 }
1243 else if (!bswap)
1244 {
4b84d9b8
JJ
1245 gimple *g = NULL;
1246 if (tgt && !useless_type_conversion_p (TREE_TYPE (tgt), TREE_TYPE (src)))
dffec8eb
JJ
1247 {
1248 if (!is_gimple_val (src))
4b84d9b8 1249 return NULL_TREE;
cd676dfa 1250 if (conv_code == VIEW_CONVERT_EXPR)
45ff1251 1251 src = bswap_view_convert (&gsi, TREE_TYPE (tgt), src, true);
cd676dfa 1252 g = gimple_build_assign (tgt, conv_code, src);
dffec8eb 1253 }
4b84d9b8 1254 else if (cur_stmt)
dffec8eb 1255 g = gimple_build_assign (tgt, src);
4b84d9b8
JJ
1256 else
1257 tgt = src;
dffec8eb
JJ
1258 if (n->range == 16)
1259 nop_stats.found_16bit++;
1260 else if (n->range == 32)
1261 nop_stats.found_32bit++;
1262 else
1263 {
1264 gcc_assert (n->range == 64);
1265 nop_stats.found_64bit++;
1266 }
1267 if (dump_file)
1268 {
1269 fprintf (dump_file,
1270 "%d bit reshuffle in target endianness found at: ",
1271 (int) n->range);
4b84d9b8
JJ
1272 if (cur_stmt)
1273 print_gimple_stmt (dump_file, cur_stmt, 0);
1274 else
1275 {
4af78ef8 1276 print_generic_expr (dump_file, tgt, TDF_NONE);
4b84d9b8
JJ
1277 fprintf (dump_file, "\n");
1278 }
dffec8eb 1279 }
4b84d9b8
JJ
1280 if (cur_stmt)
1281 gsi_replace (&gsi, g, true);
1282 return tgt;
dffec8eb
JJ
1283 }
1284 else if (TREE_CODE (src) == BIT_FIELD_REF)
1285 src = TREE_OPERAND (src, 0);
1286
1287 if (n->range == 16)
1288 bswap_stats.found_16bit++;
1289 else if (n->range == 32)
1290 bswap_stats.found_32bit++;
1291 else
1292 {
1293 gcc_assert (n->range == 64);
1294 bswap_stats.found_64bit++;
1295 }
1296
1297 tmp = src;
1298
1299 /* Convert the src expression if necessary. */
1300 if (!useless_type_conversion_p (TREE_TYPE (tmp), bswap_type))
1301 {
1302 gimple *convert_stmt;
1303
1304 tmp = make_temp_ssa_name (bswap_type, NULL, "bswapsrc");
1305 convert_stmt = gimple_build_assign (tmp, NOP_EXPR, src);
1306 gsi_insert_before (&gsi, convert_stmt, GSI_SAME_STMT);
1307 }
1308
1309 /* Canonical form for 16 bit bswap is a rotate expression. Only 16bit values
1310 are considered as rotation of 2N bit values by N bits is generally not
1311 equivalent to a bswap. Consider for instance 0x01020304 r>> 16 which
1312 gives 0x03040102 while a bswap for that value is 0x04030201. */
1313 if (bswap && n->range == 16)
1314 {
1315 tree count = build_int_cst (NULL, BITS_PER_UNIT);
1316 src = fold_build2 (LROTATE_EXPR, bswap_type, tmp, count);
1317 bswap_stmt = gimple_build_assign (NULL, src);
1318 }
1319 else
1320 bswap_stmt = gimple_build_call (fndecl, 1, tmp);
1321
4b84d9b8
JJ
1322 if (tgt == NULL_TREE)
1323 tgt = make_ssa_name (bswap_type);
dffec8eb
JJ
1324 tmp = tgt;
1325
b320edc0
JJ
1326 if (mask != ~(uint64_t) 0)
1327 {
1328 tree m = build_int_cst (bswap_type, mask);
1329 tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
1330 gimple_set_lhs (bswap_stmt, tmp);
1331 mask_stmt = gimple_build_assign (tgt, BIT_AND_EXPR, tmp, m);
1332 tmp = tgt;
1333 }
1334
dffec8eb
JJ
1335 /* Convert the result if necessary. */
1336 if (!useless_type_conversion_p (TREE_TYPE (tgt), bswap_type))
1337 {
dffec8eb 1338 tmp = make_temp_ssa_name (bswap_type, NULL, "bswapdst");
cd676dfa 1339 tree atmp = tmp;
45ff1251 1340 gimple_stmt_iterator gsi2 = gsi;
cd676dfa 1341 if (conv_code == VIEW_CONVERT_EXPR)
45ff1251
JJ
1342 atmp = bswap_view_convert (&gsi2, TREE_TYPE (tgt), tmp, false);
1343 gimple *convert_stmt = gimple_build_assign (tgt, conv_code, atmp);
1344 gsi_insert_after (&gsi2, convert_stmt, GSI_SAME_STMT);
dffec8eb
JJ
1345 }
1346
b320edc0 1347 gimple_set_lhs (mask_stmt ? mask_stmt : bswap_stmt, tmp);
dffec8eb
JJ
1348
1349 if (dump_file)
1350 {
1351 fprintf (dump_file, "%d bit bswap implementation found at: ",
1352 (int) n->range);
4b84d9b8
JJ
1353 if (cur_stmt)
1354 print_gimple_stmt (dump_file, cur_stmt, 0);
1355 else
1356 {
4af78ef8 1357 print_generic_expr (dump_file, tgt, TDF_NONE);
4b84d9b8
JJ
1358 fprintf (dump_file, "\n");
1359 }
dffec8eb
JJ
1360 }
1361
4b84d9b8
JJ
1362 if (cur_stmt)
1363 {
b320edc0
JJ
1364 if (mask_stmt)
1365 gsi_insert_after (&gsi, mask_stmt, GSI_SAME_STMT);
4b84d9b8
JJ
1366 gsi_insert_after (&gsi, bswap_stmt, GSI_SAME_STMT);
1367 gsi_remove (&gsi, true);
1368 }
1369 else
b320edc0
JJ
1370 {
1371 gsi_insert_before (&gsi, bswap_stmt, GSI_SAME_STMT);
1372 if (mask_stmt)
1373 gsi_insert_before (&gsi, mask_stmt, GSI_SAME_STMT);
1374 }
4b84d9b8 1375 return tgt;
dffec8eb
JJ
1376}
1377
a7553ad6
JJ
1378/* Try to optimize an assignment CUR_STMT with CONSTRUCTOR on the rhs
1379 using bswap optimizations. CDI_DOMINATORS need to be
1380 computed on entry. Return true if it has been optimized and
1381 TODO_update_ssa is needed. */
1382
1383static bool
1384maybe_optimize_vector_constructor (gimple *cur_stmt)
1385{
1386 tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
1387 struct symbolic_number n;
1388 bool bswap;
1389
1390 gcc_assert (is_gimple_assign (cur_stmt)
1391 && gimple_assign_rhs_code (cur_stmt) == CONSTRUCTOR);
1392
1393 tree rhs = gimple_assign_rhs1 (cur_stmt);
1394 if (!VECTOR_TYPE_P (TREE_TYPE (rhs))
1395 || !INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs)))
1396 || gimple_assign_lhs (cur_stmt) == NULL_TREE)
1397 return false;
1398
1399 HOST_WIDE_INT sz = int_size_in_bytes (TREE_TYPE (rhs)) * BITS_PER_UNIT;
1400 switch (sz)
1401 {
1402 case 16:
1403 load_type = bswap_type = uint16_type_node;
1404 break;
1405 case 32:
1406 if (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1407 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)
1408 {
1409 load_type = uint32_type_node;
1410 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1411 bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1412 }
1413 else
1414 return false;
1415 break;
1416 case 64:
1417 if (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
1418 && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
1419 || (word_mode == SImode
1420 && builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1421 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)))
1422 {
1423 load_type = uint64_type_node;
1424 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1425 bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1426 }
1427 else
1428 return false;
1429 break;
1430 default:
1431 return false;
1432 }
1433
b320edc0
JJ
1434 bool cast64_to_32;
1435 uint64_t mask;
1436 gimple *ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap,
1437 &cast64_to_32, &mask);
1438 if (!ins_stmt
1439 || n.range != (unsigned HOST_WIDE_INT) sz
1440 || cast64_to_32
1441 || mask != ~(uint64_t) 0)
a7553ad6
JJ
1442 return false;
1443
1444 if (bswap && !fndecl && n.range != 16)
1445 return false;
1446
1447 memset (&nop_stats, 0, sizeof (nop_stats));
1448 memset (&bswap_stats, 0, sizeof (bswap_stats));
1449 return bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl,
b320edc0 1450 bswap_type, load_type, &n, bswap, mask) != NULL_TREE;
a7553ad6
JJ
1451}
1452
dffec8eb
JJ
1453/* Find manual byte swap implementations as well as load in a given
1454 endianness. Byte swaps are turned into a bswap builtin invokation
1455 while endian loads are converted to bswap builtin invokation or
1456 simple load according to the target endianness. */
1457
1458unsigned int
1459pass_optimize_bswap::execute (function *fun)
1460{
1461 basic_block bb;
1462 bool bswap32_p, bswap64_p;
1463 bool changed = false;
1464 tree bswap32_type = NULL_TREE, bswap64_type = NULL_TREE;
1465
1466 bswap32_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
1467 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing);
1468 bswap64_p = (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
1469 && (optab_handler (bswap_optab, DImode) != CODE_FOR_nothing
1470 || (bswap32_p && word_mode == SImode)));
1471
1472 /* Determine the argument type of the builtins. The code later on
1473 assumes that the return and argument type are the same. */
1474 if (bswap32_p)
1475 {
1476 tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1477 bswap32_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1478 }
1479
1480 if (bswap64_p)
1481 {
1482 tree fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1483 bswap64_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
1484 }
1485
1486 memset (&nop_stats, 0, sizeof (nop_stats));
1487 memset (&bswap_stats, 0, sizeof (bswap_stats));
1488 calculate_dominance_info (CDI_DOMINATORS);
1489
1490 FOR_EACH_BB_FN (bb, fun)
1491 {
1492 gimple_stmt_iterator gsi;
1493
1494 /* We do a reverse scan for bswap patterns to make sure we get the
1495 widest match. As bswap pattern matching doesn't handle previously
1496 inserted smaller bswap replacements as sub-patterns, the wider
1497 variant wouldn't be detected. */
1498 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi);)
1499 {
1500 gimple *ins_stmt, *cur_stmt = gsi_stmt (gsi);
1501 tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
1502 enum tree_code code;
1503 struct symbolic_number n;
b320edc0
JJ
1504 bool bswap, cast64_to_32;
1505 uint64_t mask;
dffec8eb
JJ
1506
1507 /* This gsi_prev (&gsi) is not part of the for loop because cur_stmt
1508 might be moved to a different basic block by bswap_replace and gsi
1509 must not points to it if that's the case. Moving the gsi_prev
1510 there make sure that gsi points to the statement previous to
1511 cur_stmt while still making sure that all statements are
1512 considered in this basic block. */
1513 gsi_prev (&gsi);
1514
1515 if (!is_gimple_assign (cur_stmt))
1516 continue;
1517
1518 code = gimple_assign_rhs_code (cur_stmt);
1519 switch (code)
1520 {
1521 case LROTATE_EXPR:
1522 case RROTATE_EXPR:
1523 if (!tree_fits_uhwi_p (gimple_assign_rhs2 (cur_stmt))
1524 || tree_to_uhwi (gimple_assign_rhs2 (cur_stmt))
1525 % BITS_PER_UNIT)
1526 continue;
1527 /* Fall through. */
1528 case BIT_IOR_EXPR:
a944b5de
RS
1529 case BIT_XOR_EXPR:
1530 case PLUS_EXPR:
dffec8eb 1531 break;
cd676dfa
JJ
1532 case CONSTRUCTOR:
1533 {
1534 tree rhs = gimple_assign_rhs1 (cur_stmt);
1535 if (VECTOR_TYPE_P (TREE_TYPE (rhs))
1536 && INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs))))
1537 break;
1538 }
1539 continue;
dffec8eb
JJ
1540 default:
1541 continue;
1542 }
1543
b320edc0
JJ
1544 ins_stmt = find_bswap_or_nop (cur_stmt, &n, &bswap,
1545 &cast64_to_32, &mask);
dffec8eb
JJ
1546
1547 if (!ins_stmt)
1548 continue;
1549
1550 switch (n.range)
1551 {
1552 case 16:
1553 /* Already in canonical form, nothing to do. */
1554 if (code == LROTATE_EXPR || code == RROTATE_EXPR)
1555 continue;
1556 load_type = bswap_type = uint16_type_node;
1557 break;
1558 case 32:
1559 load_type = uint32_type_node;
1560 if (bswap32_p)
1561 {
1562 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
1563 bswap_type = bswap32_type;
1564 }
1565 break;
1566 case 64:
1567 load_type = uint64_type_node;
1568 if (bswap64_p)
1569 {
1570 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
1571 bswap_type = bswap64_type;
1572 }
1573 break;
1574 default:
1575 continue;
1576 }
1577
1578 if (bswap && !fndecl && n.range != 16)
1579 continue;
1580
4b84d9b8 1581 if (bswap_replace (gsi_for_stmt (cur_stmt), ins_stmt, fndecl,
b320edc0 1582 bswap_type, load_type, &n, bswap, mask))
dffec8eb
JJ
1583 changed = true;
1584 }
1585 }
1586
1587 statistics_counter_event (fun, "16-bit nop implementations found",
1588 nop_stats.found_16bit);
1589 statistics_counter_event (fun, "32-bit nop implementations found",
1590 nop_stats.found_32bit);
1591 statistics_counter_event (fun, "64-bit nop implementations found",
1592 nop_stats.found_64bit);
1593 statistics_counter_event (fun, "16-bit bswap implementations found",
1594 bswap_stats.found_16bit);
1595 statistics_counter_event (fun, "32-bit bswap implementations found",
1596 bswap_stats.found_32bit);
1597 statistics_counter_event (fun, "64-bit bswap implementations found",
1598 bswap_stats.found_64bit);
1599
1600 return (changed ? TODO_update_ssa : 0);
1601}
1602
1603} // anon namespace
1604
1605gimple_opt_pass *
1606make_pass_optimize_bswap (gcc::context *ctxt)
1607{
1608 return new pass_optimize_bswap (ctxt);
1609}
1610
1611namespace {
1612
245f6de1 1613/* Struct recording one operand for the store, which is either a constant,
c94c3532
EB
1614 then VAL represents the constant and all the other fields are zero, or
1615 a memory load, then VAL represents the reference, BASE_ADDR is non-NULL
1616 and the other fields also reflect the memory load, or an SSA name, then
617be7ba 1617 VAL represents the SSA name and all the other fields are zero. */
245f6de1 1618
6c1dae73 1619class store_operand_info
245f6de1 1620{
6c1dae73 1621public:
245f6de1
JJ
1622 tree val;
1623 tree base_addr;
8a91d545
RS
1624 poly_uint64 bitsize;
1625 poly_uint64 bitpos;
1626 poly_uint64 bitregion_start;
1627 poly_uint64 bitregion_end;
245f6de1 1628 gimple *stmt;
383ac8dc 1629 bool bit_not_p;
245f6de1
JJ
1630 store_operand_info ();
1631};
1632
1633store_operand_info::store_operand_info ()
1634 : val (NULL_TREE), base_addr (NULL_TREE), bitsize (0), bitpos (0),
383ac8dc 1635 bitregion_start (0), bitregion_end (0), stmt (NULL), bit_not_p (false)
245f6de1
JJ
1636{
1637}
1638
f663d9ad
KT
1639/* Struct recording the information about a single store of an immediate
1640 to memory. These are created in the first phase and coalesced into
1641 merged_store_group objects in the second phase. */
1642
6c1dae73 1643class store_immediate_info
f663d9ad 1644{
6c1dae73 1645public:
f663d9ad
KT
1646 unsigned HOST_WIDE_INT bitsize;
1647 unsigned HOST_WIDE_INT bitpos;
a62b3dc5
JJ
1648 unsigned HOST_WIDE_INT bitregion_start;
1649 /* This is one past the last bit of the bit region. */
1650 unsigned HOST_WIDE_INT bitregion_end;
f663d9ad
KT
1651 gimple *stmt;
1652 unsigned int order;
e362a897
EB
1653 /* INTEGER_CST for constant store, STRING_CST for string store,
1654 MEM_REF for memory copy, BIT_*_EXPR for logical bitwise operation,
1655 BIT_INSERT_EXPR for bit insertion.
4b84d9b8
JJ
1656 LROTATE_EXPR if it can be only bswap optimized and
1657 ops are not really meaningful.
1658 NOP_EXPR if bswap optimization detected identity, ops
1659 are not meaningful. */
245f6de1 1660 enum tree_code rhs_code;
4b84d9b8
JJ
1661 /* Two fields for bswap optimization purposes. */
1662 struct symbolic_number n;
1663 gimple *ins_stmt;
127ef369 1664 /* True if BIT_{AND,IOR,XOR}_EXPR result is inverted before storing. */
d60edaba 1665 bool bit_not_p;
127ef369
JJ
1666 /* True if ops have been swapped and thus ops[1] represents
1667 rhs1 of BIT_{AND,IOR,XOR}_EXPR and ops[0] represents rhs2. */
1668 bool ops_swapped_p;
629387a6
EB
1669 /* The index number of the landing pad, or 0 if there is none. */
1670 int lp_nr;
245f6de1
JJ
1671 /* Operands. For BIT_*_EXPR rhs_code both operands are used, otherwise
1672 just the first one. */
1673 store_operand_info ops[2];
b5926e23 1674 store_immediate_info (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
a62b3dc5 1675 unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
4b84d9b8 1676 gimple *, unsigned int, enum tree_code,
629387a6 1677 struct symbolic_number &, gimple *, bool, int,
245f6de1
JJ
1678 const store_operand_info &,
1679 const store_operand_info &);
f663d9ad
KT
1680};
1681
1682store_immediate_info::store_immediate_info (unsigned HOST_WIDE_INT bs,
b5926e23 1683 unsigned HOST_WIDE_INT bp,
a62b3dc5
JJ
1684 unsigned HOST_WIDE_INT brs,
1685 unsigned HOST_WIDE_INT bre,
b5926e23 1686 gimple *st,
245f6de1
JJ
1687 unsigned int ord,
1688 enum tree_code rhscode,
4b84d9b8
JJ
1689 struct symbolic_number &nr,
1690 gimple *ins_stmtp,
d60edaba 1691 bool bitnotp,
629387a6 1692 int nr2,
245f6de1
JJ
1693 const store_operand_info &op0r,
1694 const store_operand_info &op1r)
a62b3dc5 1695 : bitsize (bs), bitpos (bp), bitregion_start (brs), bitregion_end (bre),
4b84d9b8 1696 stmt (st), order (ord), rhs_code (rhscode), n (nr),
629387a6 1697 ins_stmt (ins_stmtp), bit_not_p (bitnotp), ops_swapped_p (false),
4bc6fb21 1698 lp_nr (nr2), ops { op0r, op1r }
245f6de1
JJ
1699{
1700}
f663d9ad
KT
1701
1702/* Struct representing a group of stores to contiguous memory locations.
1703 These are produced by the second phase (coalescing) and consumed in the
1704 third phase that outputs the widened stores. */
1705
6c1dae73 1706class merged_store_group
f663d9ad 1707{
6c1dae73 1708public:
f663d9ad
KT
1709 unsigned HOST_WIDE_INT start;
1710 unsigned HOST_WIDE_INT width;
a62b3dc5
JJ
1711 unsigned HOST_WIDE_INT bitregion_start;
1712 unsigned HOST_WIDE_INT bitregion_end;
1713 /* The size of the allocated memory for val and mask. */
f663d9ad 1714 unsigned HOST_WIDE_INT buf_size;
a62b3dc5 1715 unsigned HOST_WIDE_INT align_base;
8a91d545 1716 poly_uint64 load_align_base[2];
f663d9ad
KT
1717
1718 unsigned int align;
245f6de1 1719 unsigned int load_align[2];
f663d9ad
KT
1720 unsigned int first_order;
1721 unsigned int last_order;
7f5a3982 1722 bool bit_insertion;
e362a897 1723 bool string_concatenation;
18e0c3d1 1724 bool only_constants;
1b3c9813 1725 bool consecutive;
18e0c3d1 1726 unsigned int first_nonmergeable_order;
629387a6 1727 int lp_nr;
f663d9ad 1728
a62b3dc5 1729 auto_vec<store_immediate_info *> stores;
f663d9ad
KT
1730 /* We record the first and last original statements in the sequence because
1731 we'll need their vuse/vdef and replacement position. It's easier to keep
1732 track of them separately as 'stores' is reordered by apply_stores. */
1733 gimple *last_stmt;
1734 gimple *first_stmt;
1735 unsigned char *val;
a62b3dc5 1736 unsigned char *mask;
f663d9ad
KT
1737
1738 merged_store_group (store_immediate_info *);
1739 ~merged_store_group ();
7f5a3982 1740 bool can_be_merged_into (store_immediate_info *);
f663d9ad
KT
1741 void merge_into (store_immediate_info *);
1742 void merge_overlapping (store_immediate_info *);
1743 bool apply_stores ();
a62b3dc5
JJ
1744private:
1745 void do_merge (store_immediate_info *);
f663d9ad
KT
1746};
1747
1748/* Debug helper. Dump LEN elements of byte array PTR to FD in hex. */
1749
1750static void
1751dump_char_array (FILE *fd, unsigned char *ptr, unsigned int len)
1752{
1753 if (!fd)
1754 return;
1755
1756 for (unsigned int i = 0; i < len; i++)
c94c3532 1757 fprintf (fd, "%02x ", ptr[i]);
f663d9ad
KT
1758 fprintf (fd, "\n");
1759}
1760
f663d9ad
KT
1761/* Clear out LEN bits starting from bit START in the byte array
1762 PTR. This clears the bits to the *right* from START.
1763 START must be within [0, BITS_PER_UNIT) and counts starting from
1764 the least significant bit. */
1765
1766static void
1767clear_bit_region_be (unsigned char *ptr, unsigned int start,
1768 unsigned int len)
1769{
1770 if (len == 0)
1771 return;
1772 /* Clear len bits to the right of start. */
1773 else if (len <= start + 1)
1774 {
1775 unsigned char mask = (~(~0U << len));
1776 mask = mask << (start + 1U - len);
1777 ptr[0] &= ~mask;
1778 }
1779 else if (start != BITS_PER_UNIT - 1)
1780 {
1781 clear_bit_region_be (ptr, start, (start % BITS_PER_UNIT) + 1);
1782 clear_bit_region_be (ptr + 1, BITS_PER_UNIT - 1,
1783 len - (start % BITS_PER_UNIT) - 1);
1784 }
1785 else if (start == BITS_PER_UNIT - 1
1786 && len > BITS_PER_UNIT)
1787 {
1788 unsigned int nbytes = len / BITS_PER_UNIT;
a62b3dc5 1789 memset (ptr, 0, nbytes);
f663d9ad
KT
1790 if (len % BITS_PER_UNIT != 0)
1791 clear_bit_region_be (ptr + nbytes, BITS_PER_UNIT - 1,
1792 len % BITS_PER_UNIT);
1793 }
1794 else
1795 gcc_unreachable ();
1796}
1797
1798/* In the byte array PTR clear the bit region starting at bit
1799 START and is LEN bits wide.
1800 For regions spanning multiple bytes do this recursively until we reach
1801 zero LEN or a region contained within a single byte. */
1802
1803static void
1804clear_bit_region (unsigned char *ptr, unsigned int start,
1805 unsigned int len)
1806{
1807 /* Degenerate base case. */
1808 if (len == 0)
1809 return;
1810 else if (start >= BITS_PER_UNIT)
1811 clear_bit_region (ptr + 1, start - BITS_PER_UNIT, len);
1812 /* Second base case. */
1813 else if ((start + len) <= BITS_PER_UNIT)
1814 {
46a61395 1815 unsigned char mask = (~0U) << (unsigned char) (BITS_PER_UNIT - len);
f663d9ad
KT
1816 mask >>= BITS_PER_UNIT - (start + len);
1817
1818 ptr[0] &= ~mask;
1819
1820 return;
1821 }
1822 /* Clear most significant bits in a byte and proceed with the next byte. */
1823 else if (start != 0)
1824 {
1825 clear_bit_region (ptr, start, BITS_PER_UNIT - start);
1f069ef5 1826 clear_bit_region (ptr + 1, 0, len - (BITS_PER_UNIT - start));
f663d9ad
KT
1827 }
1828 /* Whole bytes need to be cleared. */
1829 else if (start == 0 && len > BITS_PER_UNIT)
1830 {
1831 unsigned int nbytes = len / BITS_PER_UNIT;
a848c710
KT
1832 /* We could recurse on each byte but we clear whole bytes, so a simple
1833 memset will do. */
46a61395 1834 memset (ptr, '\0', nbytes);
f663d9ad
KT
1835 /* Clear the remaining sub-byte region if there is one. */
1836 if (len % BITS_PER_UNIT != 0)
1837 clear_bit_region (ptr + nbytes, 0, len % BITS_PER_UNIT);
1838 }
1839 else
1840 gcc_unreachable ();
1841}
1842
1843/* Write BITLEN bits of EXPR to the byte array PTR at
1844 bit position BITPOS. PTR should contain TOTAL_BYTES elements.
1845 Return true if the operation succeeded. */
1846
1847static bool
1848encode_tree_to_bitpos (tree expr, unsigned char *ptr, int bitlen, int bitpos,
46a61395 1849 unsigned int total_bytes)
f663d9ad
KT
1850{
1851 unsigned int first_byte = bitpos / BITS_PER_UNIT;
ad1de652
JJ
1852 bool sub_byte_op_p = ((bitlen % BITS_PER_UNIT)
1853 || (bitpos % BITS_PER_UNIT)
f4b31647 1854 || !int_mode_for_size (bitlen, 0).exists ());
3afd514b
JJ
1855 bool empty_ctor_p
1856 = (TREE_CODE (expr) == CONSTRUCTOR
1857 && CONSTRUCTOR_NELTS (expr) == 0
1858 && TYPE_SIZE_UNIT (TREE_TYPE (expr))
1859 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (expr))));
f663d9ad
KT
1860
1861 if (!sub_byte_op_p)
3afd514b
JJ
1862 {
1863 if (first_byte >= total_bytes)
1864 return false;
1865 total_bytes -= first_byte;
1866 if (empty_ctor_p)
1867 {
1868 unsigned HOST_WIDE_INT rhs_bytes
1869 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
1870 if (rhs_bytes > total_bytes)
1871 return false;
1872 memset (ptr + first_byte, '\0', rhs_bytes);
1873 return true;
1874 }
1875 return native_encode_expr (expr, ptr + first_byte, total_bytes) != 0;
1876 }
f663d9ad
KT
1877
1878 /* LITTLE-ENDIAN
1879 We are writing a non byte-sized quantity or at a position that is not
1880 at a byte boundary.
1881 |--------|--------|--------| ptr + first_byte
1882 ^ ^
1883 xxx xxxxxxxx xxx< bp>
1884 |______EXPR____|
1885
46a61395 1886 First native_encode_expr EXPR into a temporary buffer and shift each
f663d9ad
KT
1887 byte in the buffer by 'bp' (carrying the bits over as necessary).
1888 |00000000|00xxxxxx|xxxxxxxx| << bp = |000xxxxx|xxxxxxxx|xxx00000|
1889 <------bitlen---->< bp>
1890 Then we clear the destination bits:
1891 |---00000|00000000|000-----| ptr + first_byte
1892 <-------bitlen--->< bp>
1893
1894 Finally we ORR the bytes of the shifted EXPR into the cleared region:
1895 |---xxxxx||xxxxxxxx||xxx-----| ptr + first_byte.
1896
1897 BIG-ENDIAN
1898 We are writing a non byte-sized quantity or at a position that is not
1899 at a byte boundary.
1900 ptr + first_byte |--------|--------|--------|
1901 ^ ^
1902 <bp >xxx xxxxxxxx xxx
1903 |_____EXPR_____|
1904
46a61395 1905 First native_encode_expr EXPR into a temporary buffer and shift each
f663d9ad
KT
1906 byte in the buffer to the right by (carrying the bits over as necessary).
1907 We shift by as much as needed to align the most significant bit of EXPR
1908 with bitpos:
1909 |00xxxxxx|xxxxxxxx| >> 3 = |00000xxx|xxxxxxxx|xxxxx000|
1910 <---bitlen----> <bp ><-----bitlen----->
1911 Then we clear the destination bits:
1912 ptr + first_byte |-----000||00000000||00000---|
1913 <bp ><-------bitlen----->
1914
1915 Finally we ORR the bytes of the shifted EXPR into the cleared region:
1916 ptr + first_byte |---xxxxx||xxxxxxxx||xxx-----|.
1917 The awkwardness comes from the fact that bitpos is counted from the
1918 most significant bit of a byte. */
1919
ef1d3b57
RS
1920 /* We must be dealing with fixed-size data at this point, since the
1921 total size is also fixed. */
3afd514b
JJ
1922 unsigned int byte_size;
1923 if (empty_ctor_p)
1924 {
1925 unsigned HOST_WIDE_INT rhs_bytes
1926 = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)));
1927 if (rhs_bytes > total_bytes)
1928 return false;
1929 byte_size = rhs_bytes;
1930 }
1931 else
1932 {
1933 fixed_size_mode mode
1934 = as_a <fixed_size_mode> (TYPE_MODE (TREE_TYPE (expr)));
e362a897
EB
1935 byte_size
1936 = mode == BLKmode
1937 ? tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (expr)))
1938 : GET_MODE_SIZE (mode);
3afd514b 1939 }
f663d9ad 1940 /* Allocate an extra byte so that we have space to shift into. */
3afd514b 1941 byte_size++;
f663d9ad 1942 unsigned char *tmpbuf = XALLOCAVEC (unsigned char, byte_size);
46a61395 1943 memset (tmpbuf, '\0', byte_size);
f663d9ad 1944 /* The store detection code should only have allowed constants that are
3afd514b
JJ
1945 accepted by native_encode_expr or empty ctors. */
1946 if (!empty_ctor_p
1947 && native_encode_expr (expr, tmpbuf, byte_size - 1) == 0)
f663d9ad
KT
1948 gcc_unreachable ();
1949
1950 /* The native_encode_expr machinery uses TYPE_MODE to determine how many
1951 bytes to write. This means it can write more than
1952 ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT bytes (for example
1953 write 8 bytes for a bitlen of 40). Skip the bytes that are not within
1954 bitlen and zero out the bits that are not relevant as well (that may
1955 contain a sign bit due to sign-extension). */
1956 unsigned int padding
1957 = byte_size - ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT - 1;
ad1de652
JJ
1958 /* On big-endian the padding is at the 'front' so just skip the initial
1959 bytes. */
1960 if (BYTES_BIG_ENDIAN)
1961 tmpbuf += padding;
1962
1963 byte_size -= padding;
1964
1965 if (bitlen % BITS_PER_UNIT != 0)
f663d9ad 1966 {
4b2c06f4 1967 if (BYTES_BIG_ENDIAN)
ad1de652
JJ
1968 clear_bit_region_be (tmpbuf, BITS_PER_UNIT - 1,
1969 BITS_PER_UNIT - (bitlen % BITS_PER_UNIT));
1970 else
1971 clear_bit_region (tmpbuf, bitlen,
1972 byte_size * BITS_PER_UNIT - bitlen);
f663d9ad 1973 }
ad1de652
JJ
1974 /* Left shifting relies on the last byte being clear if bitlen is
1975 a multiple of BITS_PER_UNIT, which might not be clear if
1976 there are padding bytes. */
1977 else if (!BYTES_BIG_ENDIAN)
1978 tmpbuf[byte_size - 1] = '\0';
f663d9ad
KT
1979
1980 /* Clear the bit region in PTR where the bits from TMPBUF will be
46a61395 1981 inserted into. */
f663d9ad
KT
1982 if (BYTES_BIG_ENDIAN)
1983 clear_bit_region_be (ptr + first_byte,
1984 BITS_PER_UNIT - 1 - (bitpos % BITS_PER_UNIT), bitlen);
1985 else
1986 clear_bit_region (ptr + first_byte, bitpos % BITS_PER_UNIT, bitlen);
1987
1988 int shift_amnt;
1989 int bitlen_mod = bitlen % BITS_PER_UNIT;
1990 int bitpos_mod = bitpos % BITS_PER_UNIT;
1991
1992 bool skip_byte = false;
1993 if (BYTES_BIG_ENDIAN)
1994 {
1995 /* BITPOS and BITLEN are exactly aligned and no shifting
1996 is necessary. */
1997 if (bitpos_mod + bitlen_mod == BITS_PER_UNIT
1998 || (bitpos_mod == 0 && bitlen_mod == 0))
1999 shift_amnt = 0;
2000 /* |. . . . . . . .|
2001 <bp > <blen >.
2002 We always shift right for BYTES_BIG_ENDIAN so shift the beginning
2003 of the value until it aligns with 'bp' in the next byte over. */
2004 else if (bitpos_mod + bitlen_mod < BITS_PER_UNIT)
2005 {
2006 shift_amnt = bitlen_mod + bitpos_mod;
2007 skip_byte = bitlen_mod != 0;
2008 }
2009 /* |. . . . . . . .|
2010 <----bp--->
2011 <---blen---->.
2012 Shift the value right within the same byte so it aligns with 'bp'. */
2013 else
2014 shift_amnt = bitlen_mod + bitpos_mod - BITS_PER_UNIT;
2015 }
2016 else
2017 shift_amnt = bitpos % BITS_PER_UNIT;
2018
2019 /* Create the shifted version of EXPR. */
2020 if (!BYTES_BIG_ENDIAN)
46a61395 2021 {
8aba425f 2022 shift_bytes_in_array_left (tmpbuf, byte_size, shift_amnt);
46a61395
JJ
2023 if (shift_amnt == 0)
2024 byte_size--;
2025 }
f663d9ad
KT
2026 else
2027 {
2028 gcc_assert (BYTES_BIG_ENDIAN);
2029 shift_bytes_in_array_right (tmpbuf, byte_size, shift_amnt);
2030 /* If shifting right forced us to move into the next byte skip the now
2031 empty byte. */
2032 if (skip_byte)
2033 {
2034 tmpbuf++;
2035 byte_size--;
2036 }
2037 }
2038
2039 /* Insert the bits from TMPBUF. */
2040 for (unsigned int i = 0; i < byte_size; i++)
2041 ptr[first_byte + i] |= tmpbuf[i];
2042
2043 return true;
2044}
2045
2046/* Sorting function for store_immediate_info objects.
2047 Sorts them by bitposition. */
2048
2049static int
2050sort_by_bitpos (const void *x, const void *y)
2051{
2052 store_immediate_info *const *tmp = (store_immediate_info * const *) x;
2053 store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
2054
109cca3b 2055 if ((*tmp)->bitpos < (*tmp2)->bitpos)
f663d9ad
KT
2056 return -1;
2057 else if ((*tmp)->bitpos > (*tmp2)->bitpos)
2058 return 1;
109cca3b 2059 else
0f0027d1
KT
2060 /* If they are the same let's use the order which is guaranteed to
2061 be different. */
2062 return (*tmp)->order - (*tmp2)->order;
f663d9ad
KT
2063}
2064
2065/* Sorting function for store_immediate_info objects.
2066 Sorts them by the order field. */
2067
2068static int
2069sort_by_order (const void *x, const void *y)
2070{
2071 store_immediate_info *const *tmp = (store_immediate_info * const *) x;
2072 store_immediate_info *const *tmp2 = (store_immediate_info * const *) y;
2073
2074 if ((*tmp)->order < (*tmp2)->order)
2075 return -1;
2076 else if ((*tmp)->order > (*tmp2)->order)
2077 return 1;
2078
2079 gcc_unreachable ();
2080}
2081
2082/* Initialize a merged_store_group object from a store_immediate_info
2083 object. */
2084
2085merged_store_group::merged_store_group (store_immediate_info *info)
2086{
2087 start = info->bitpos;
2088 width = info->bitsize;
a62b3dc5
JJ
2089 bitregion_start = info->bitregion_start;
2090 bitregion_end = info->bitregion_end;
f663d9ad
KT
2091 /* VAL has memory allocated for it in apply_stores once the group
2092 width has been finalized. */
2093 val = NULL;
a62b3dc5 2094 mask = NULL;
e362a897
EB
2095 bit_insertion = info->rhs_code == BIT_INSERT_EXPR;
2096 string_concatenation = info->rhs_code == STRING_CST;
18e0c3d1 2097 only_constants = info->rhs_code == INTEGER_CST;
1b3c9813 2098 consecutive = true;
18e0c3d1 2099 first_nonmergeable_order = ~0U;
629387a6 2100 lp_nr = info->lp_nr;
a62b3dc5
JJ
2101 unsigned HOST_WIDE_INT align_bitpos = 0;
2102 get_object_alignment_1 (gimple_assign_lhs (info->stmt),
2103 &align, &align_bitpos);
2104 align_base = start - align_bitpos;
245f6de1
JJ
2105 for (int i = 0; i < 2; ++i)
2106 {
2107 store_operand_info &op = info->ops[i];
2108 if (op.base_addr == NULL_TREE)
2109 {
2110 load_align[i] = 0;
2111 load_align_base[i] = 0;
2112 }
2113 else
2114 {
2115 get_object_alignment_1 (op.val, &load_align[i], &align_bitpos);
2116 load_align_base[i] = op.bitpos - align_bitpos;
2117 }
2118 }
f663d9ad
KT
2119 stores.create (1);
2120 stores.safe_push (info);
2121 last_stmt = info->stmt;
2122 last_order = info->order;
2123 first_stmt = last_stmt;
2124 first_order = last_order;
2125 buf_size = 0;
2126}
2127
2128merged_store_group::~merged_store_group ()
2129{
2130 if (val)
2131 XDELETEVEC (val);
2132}
2133
7f5a3982
EB
2134/* Return true if the store described by INFO can be merged into the group. */
2135
2136bool
2137merged_store_group::can_be_merged_into (store_immediate_info *info)
2138{
2139 /* Do not merge bswap patterns. */
2140 if (info->rhs_code == LROTATE_EXPR)
2141 return false;
2142
629387a6
EB
2143 if (info->lp_nr != lp_nr)
2144 return false;
2145
7f5a3982
EB
2146 /* The canonical case. */
2147 if (info->rhs_code == stores[0]->rhs_code)
2148 return true;
2149
e362a897 2150 /* BIT_INSERT_EXPR is compatible with INTEGER_CST if no STRING_CST. */
7f5a3982 2151 if (info->rhs_code == BIT_INSERT_EXPR && stores[0]->rhs_code == INTEGER_CST)
e362a897 2152 return !string_concatenation;
7f5a3982
EB
2153
2154 if (stores[0]->rhs_code == BIT_INSERT_EXPR && info->rhs_code == INTEGER_CST)
e362a897 2155 return !string_concatenation;
7f5a3982 2156
ed01d707
EB
2157 /* We can turn MEM_REF into BIT_INSERT_EXPR for bit-field stores, but do it
2158 only for small regions since this can generate a lot of instructions. */
7f5a3982
EB
2159 if (info->rhs_code == MEM_REF
2160 && (stores[0]->rhs_code == INTEGER_CST
2161 || stores[0]->rhs_code == BIT_INSERT_EXPR)
2162 && info->bitregion_start == stores[0]->bitregion_start
ed01d707 2163 && info->bitregion_end == stores[0]->bitregion_end
2815558a 2164 && info->bitregion_end - info->bitregion_start <= MAX_FIXED_MODE_SIZE)
e362a897 2165 return !string_concatenation;
7f5a3982
EB
2166
2167 if (stores[0]->rhs_code == MEM_REF
2168 && (info->rhs_code == INTEGER_CST
2169 || info->rhs_code == BIT_INSERT_EXPR)
2170 && info->bitregion_start == stores[0]->bitregion_start
ed01d707 2171 && info->bitregion_end == stores[0]->bitregion_end
2815558a 2172 && info->bitregion_end - info->bitregion_start <= MAX_FIXED_MODE_SIZE)
e362a897
EB
2173 return !string_concatenation;
2174
2175 /* STRING_CST is compatible with INTEGER_CST if no BIT_INSERT_EXPR. */
2176 if (info->rhs_code == STRING_CST
2177 && stores[0]->rhs_code == INTEGER_CST
2178 && stores[0]->bitsize == CHAR_BIT)
2179 return !bit_insertion;
2180
2181 if (stores[0]->rhs_code == STRING_CST
2182 && info->rhs_code == INTEGER_CST
2183 && info->bitsize == CHAR_BIT)
2184 return !bit_insertion;
7f5a3982
EB
2185
2186 return false;
2187}
2188
a62b3dc5
JJ
2189/* Helper method for merge_into and merge_overlapping to do
2190 the common part. */
7f5a3982 2191
f663d9ad 2192void
a62b3dc5 2193merged_store_group::do_merge (store_immediate_info *info)
f663d9ad 2194{
a62b3dc5
JJ
2195 bitregion_start = MIN (bitregion_start, info->bitregion_start);
2196 bitregion_end = MAX (bitregion_end, info->bitregion_end);
2197
2198 unsigned int this_align;
2199 unsigned HOST_WIDE_INT align_bitpos = 0;
2200 get_object_alignment_1 (gimple_assign_lhs (info->stmt),
2201 &this_align, &align_bitpos);
2202 if (this_align > align)
2203 {
2204 align = this_align;
2205 align_base = info->bitpos - align_bitpos;
2206 }
245f6de1
JJ
2207 for (int i = 0; i < 2; ++i)
2208 {
2209 store_operand_info &op = info->ops[i];
2210 if (!op.base_addr)
2211 continue;
2212
2213 get_object_alignment_1 (op.val, &this_align, &align_bitpos);
2214 if (this_align > load_align[i])
2215 {
2216 load_align[i] = this_align;
2217 load_align_base[i] = op.bitpos - align_bitpos;
2218 }
2219 }
f663d9ad 2220
f663d9ad
KT
2221 gimple *stmt = info->stmt;
2222 stores.safe_push (info);
2223 if (info->order > last_order)
2224 {
2225 last_order = info->order;
2226 last_stmt = stmt;
2227 }
2228 else if (info->order < first_order)
2229 {
2230 first_order = info->order;
2231 first_stmt = stmt;
2232 }
e362a897 2233
1b3c9813
EB
2234 if (info->bitpos != start + width)
2235 consecutive = false;
2236
e362a897
EB
2237 /* We need to use extraction if there is any bit-field. */
2238 if (info->rhs_code == BIT_INSERT_EXPR)
2239 {
2240 bit_insertion = true;
2241 gcc_assert (!string_concatenation);
2242 }
2243
1b3c9813 2244 /* We want to use concatenation if there is any string. */
e362a897
EB
2245 if (info->rhs_code == STRING_CST)
2246 {
2247 string_concatenation = true;
2248 gcc_assert (!bit_insertion);
2249 }
2250
1b3c9813
EB
2251 /* But we cannot use it if we don't have consecutive stores. */
2252 if (!consecutive)
2253 string_concatenation = false;
2254
18e0c3d1
JJ
2255 if (info->rhs_code != INTEGER_CST)
2256 only_constants = false;
f663d9ad
KT
2257}
2258
a62b3dc5
JJ
2259/* Merge a store recorded by INFO into this merged store.
2260 The store is not overlapping with the existing recorded
2261 stores. */
2262
2263void
2264merged_store_group::merge_into (store_immediate_info *info)
2265{
1b3c9813
EB
2266 do_merge (info);
2267
a62b3dc5
JJ
2268 /* Make sure we're inserting in the position we think we're inserting. */
2269 gcc_assert (info->bitpos >= start + width
2270 && info->bitregion_start <= bitregion_end);
2271
c5679c37 2272 width = info->bitpos + info->bitsize - start;
a62b3dc5
JJ
2273}
2274
f663d9ad
KT
2275/* Merge a store described by INFO into this merged store.
2276 INFO overlaps in some way with the current store (i.e. it's not contiguous
2277 which is handled by merged_store_group::merge_into). */
2278
2279void
2280merged_store_group::merge_overlapping (store_immediate_info *info)
2281{
1b3c9813
EB
2282 do_merge (info);
2283
f663d9ad 2284 /* If the store extends the size of the group, extend the width. */
a62b3dc5 2285 if (info->bitpos + info->bitsize > start + width)
c5679c37 2286 width = info->bitpos + info->bitsize - start;
f663d9ad
KT
2287}
2288
2289/* Go through all the recorded stores in this group in program order and
2290 apply their values to the VAL byte array to create the final merged
2291 value. Return true if the operation succeeded. */
2292
2293bool
2294merged_store_group::apply_stores ()
2295{
e362a897
EB
2296 store_immediate_info *info;
2297 unsigned int i;
2298
a62b3dc5
JJ
2299 /* Make sure we have more than one store in the group, otherwise we cannot
2300 merge anything. */
2301 if (bitregion_start % BITS_PER_UNIT != 0
2302 || bitregion_end % BITS_PER_UNIT != 0
f663d9ad
KT
2303 || stores.length () == 1)
2304 return false;
2305
e362a897
EB
2306 buf_size = (bitregion_end - bitregion_start) / BITS_PER_UNIT;
2307
2308 /* Really do string concatenation for large strings only. */
2309 if (buf_size <= MOVE_MAX)
2310 string_concatenation = false;
2311
617be7ba
JJ
2312 /* String concatenation only works for byte aligned start and end. */
2313 if (start % BITS_PER_UNIT != 0 || width % BITS_PER_UNIT != 0)
2314 string_concatenation = false;
2315
c94c3532 2316 /* Create a power-of-2-sized buffer for native_encode_expr. */
e362a897
EB
2317 if (!string_concatenation)
2318 buf_size = 1 << ceil_log2 (buf_size);
2319
a62b3dc5
JJ
2320 val = XNEWVEC (unsigned char, 2 * buf_size);
2321 mask = val + buf_size;
2322 memset (val, 0, buf_size);
2323 memset (mask, ~0U, buf_size);
f663d9ad 2324
e362a897
EB
2325 stores.qsort (sort_by_order);
2326
f663d9ad
KT
2327 FOR_EACH_VEC_ELT (stores, i, info)
2328 {
a62b3dc5 2329 unsigned int pos_in_buffer = info->bitpos - bitregion_start;
c94c3532 2330 tree cst;
245f6de1
JJ
2331 if (info->ops[0].val && info->ops[0].base_addr == NULL_TREE)
2332 cst = info->ops[0].val;
2333 else if (info->ops[1].val && info->ops[1].base_addr == NULL_TREE)
2334 cst = info->ops[1].val;
c94c3532
EB
2335 else
2336 cst = NULL_TREE;
245f6de1 2337 bool ret = true;
e362a897
EB
2338 if (cst && info->rhs_code != BIT_INSERT_EXPR)
2339 ret = encode_tree_to_bitpos (cst, val, info->bitsize, pos_in_buffer,
2340 buf_size);
c94c3532
EB
2341 unsigned char *m = mask + (pos_in_buffer / BITS_PER_UNIT);
2342 if (BYTES_BIG_ENDIAN)
2343 clear_bit_region_be (m, (BITS_PER_UNIT - 1
2344 - (pos_in_buffer % BITS_PER_UNIT)),
2345 info->bitsize);
2346 else
2347 clear_bit_region (m, pos_in_buffer % BITS_PER_UNIT, info->bitsize);
245f6de1 2348 if (cst && dump_file && (dump_flags & TDF_DETAILS))
f663d9ad
KT
2349 {
2350 if (ret)
2351 {
c94c3532 2352 fputs ("After writing ", dump_file);
4af78ef8 2353 print_generic_expr (dump_file, cst, TDF_NONE);
f663d9ad 2354 fprintf (dump_file, " of size " HOST_WIDE_INT_PRINT_DEC
c94c3532
EB
2355 " at position %d\n", info->bitsize, pos_in_buffer);
2356 fputs (" the merged value contains ", dump_file);
f663d9ad 2357 dump_char_array (dump_file, val, buf_size);
c94c3532
EB
2358 fputs (" the merged mask contains ", dump_file);
2359 dump_char_array (dump_file, mask, buf_size);
2360 if (bit_insertion)
2361 fputs (" bit insertion is required\n", dump_file);
e362a897
EB
2362 if (string_concatenation)
2363 fputs (" string concatenation is required\n", dump_file);
f663d9ad
KT
2364 }
2365 else
2366 fprintf (dump_file, "Failed to merge stores\n");
4b84d9b8 2367 }
f663d9ad
KT
2368 if (!ret)
2369 return false;
2370 }
4b84d9b8 2371 stores.qsort (sort_by_bitpos);
f663d9ad
KT
2372 return true;
2373}
2374
2375/* Structure describing the store chain. */
2376
6c1dae73 2377class imm_store_chain_info
f663d9ad 2378{
6c1dae73 2379public:
50b6d676
AO
2380 /* Doubly-linked list that imposes an order on chain processing.
2381 PNXP (prev's next pointer) points to the head of a list, or to
2382 the next field in the previous chain in the list.
2383 See pass_store_merging::m_stores_head for more rationale. */
2384 imm_store_chain_info *next, **pnxp;
b5926e23 2385 tree base_addr;
a62b3dc5 2386 auto_vec<store_immediate_info *> m_store_info;
f663d9ad
KT
2387 auto_vec<merged_store_group *> m_merged_store_groups;
2388
50b6d676
AO
2389 imm_store_chain_info (imm_store_chain_info *&inspt, tree b_a)
2390 : next (inspt), pnxp (&inspt), base_addr (b_a)
2391 {
2392 inspt = this;
2393 if (next)
2394 {
2395 gcc_checking_assert (pnxp == next->pnxp);
2396 next->pnxp = &next;
2397 }
2398 }
2399 ~imm_store_chain_info ()
2400 {
2401 *pnxp = next;
2402 if (next)
2403 {
2404 gcc_checking_assert (&next == next->pnxp);
2405 next->pnxp = pnxp;
2406 }
2407 }
b5926e23 2408 bool terminate_and_process_chain ();
bd909071
JJ
2409 bool try_coalesce_bswap (merged_store_group *, unsigned int, unsigned int,
2410 unsigned int);
f663d9ad 2411 bool coalesce_immediate_stores ();
b5926e23
RB
2412 bool output_merged_store (merged_store_group *);
2413 bool output_merged_stores ();
f663d9ad
KT
2414};
2415
2416const pass_data pass_data_tree_store_merging = {
2417 GIMPLE_PASS, /* type */
2418 "store-merging", /* name */
2419 OPTGROUP_NONE, /* optinfo_flags */
2420 TV_GIMPLE_STORE_MERGING, /* tv_id */
2421 PROP_ssa, /* properties_required */
2422 0, /* properties_provided */
2423 0, /* properties_destroyed */
2424 0, /* todo_flags_start */
2425 TODO_update_ssa, /* todo_flags_finish */
2426};
2427
2428class pass_store_merging : public gimple_opt_pass
2429{
2430public:
2431 pass_store_merging (gcc::context *ctxt)
95d94b52
RB
2432 : gimple_opt_pass (pass_data_tree_store_merging, ctxt), m_stores_head (),
2433 m_n_chains (0), m_n_stores (0)
f663d9ad
KT
2434 {
2435 }
2436
c94c3532
EB
2437 /* Pass not supported for PDP-endian, nor for insane hosts or
2438 target character sizes where native_{encode,interpret}_expr
a62b3dc5 2439 doesn't work properly. */
725793af
DM
2440 bool
2441 gate (function *) final override
f663d9ad 2442 {
a62b3dc5 2443 return flag_store_merging
c94c3532 2444 && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN
a62b3dc5
JJ
2445 && CHAR_BIT == 8
2446 && BITS_PER_UNIT == 8;
f663d9ad
KT
2447 }
2448
725793af 2449 unsigned int execute (function *) final override;
f663d9ad
KT
2450
2451private:
99b1c316 2452 hash_map<tree_operand_hash, class imm_store_chain_info *> m_stores;
f663d9ad 2453
50b6d676
AO
2454 /* Form a doubly-linked stack of the elements of m_stores, so that
2455 we can iterate over them in a predictable way. Using this order
2456 avoids extraneous differences in the compiler output just because
2457 of tree pointer variations (e.g. different chains end up in
2458 different positions of m_stores, so they are handled in different
2459 orders, so they allocate or release SSA names in different
2460 orders, and when they get reused, subsequent passes end up
2461 getting different SSA names, which may ultimately change
2462 decisions when going out of SSA). */
2463 imm_store_chain_info *m_stores_head;
2464
95d94b52
RB
2465 /* The number of store chains currently tracked. */
2466 unsigned m_n_chains;
2467 /* The number of stores currently tracked. */
2468 unsigned m_n_stores;
2469
629387a6
EB
2470 bool process_store (gimple *);
2471 bool terminate_and_process_chain (imm_store_chain_info *);
383ac8dc 2472 bool terminate_all_aliasing_chains (imm_store_chain_info **, gimple *);
629387a6 2473 bool terminate_and_process_all_chains ();
f663d9ad
KT
2474}; // class pass_store_merging
2475
2476/* Terminate and process all recorded chains. Return true if any changes
2477 were made. */
2478
2479bool
2480pass_store_merging::terminate_and_process_all_chains ()
2481{
f663d9ad 2482 bool ret = false;
50b6d676 2483 while (m_stores_head)
629387a6 2484 ret |= terminate_and_process_chain (m_stores_head);
b119c055 2485 gcc_assert (m_stores.is_empty ());
f663d9ad
KT
2486 return ret;
2487}
2488
383ac8dc
JJ
2489/* Terminate all chains that are affected by the statement STMT.
2490 CHAIN_INFO is the chain we should ignore from the checks if
629387a6 2491 non-NULL. Return true if any changes were made. */
f663d9ad
KT
2492
2493bool
20770eb8 2494pass_store_merging::terminate_all_aliasing_chains (imm_store_chain_info
b5926e23 2495 **chain_info,
f663d9ad
KT
2496 gimple *stmt)
2497{
2498 bool ret = false;
2499
2500 /* If the statement doesn't touch memory it can't alias. */
2501 if (!gimple_vuse (stmt))
2502 return false;
2503
9e875fd8 2504 tree store_lhs = gimple_store_p (stmt) ? gimple_get_lhs (stmt) : NULL_TREE;
6b412bf6
RB
2505 ao_ref store_lhs_ref;
2506 ao_ref_init (&store_lhs_ref, store_lhs);
383ac8dc 2507 for (imm_store_chain_info *next = m_stores_head, *cur = next; cur; cur = next)
f663d9ad 2508 {
383ac8dc
JJ
2509 next = cur->next;
2510
2511 /* We already checked all the stores in chain_info and terminated the
2512 chain if necessary. Skip it here. */
2513 if (chain_info && *chain_info == cur)
2514 continue;
2515
245f6de1
JJ
2516 store_immediate_info *info;
2517 unsigned int i;
383ac8dc 2518 FOR_EACH_VEC_ELT (cur->m_store_info, i, info)
f663d9ad 2519 {
9e875fd8 2520 tree lhs = gimple_assign_lhs (info->stmt);
6b412bf6
RB
2521 ao_ref lhs_ref;
2522 ao_ref_init (&lhs_ref, lhs);
2523 if (ref_maybe_used_by_stmt_p (stmt, &lhs_ref)
2524 || stmt_may_clobber_ref_p_1 (stmt, &lhs_ref)
2525 || (store_lhs && refs_may_alias_p_1 (&store_lhs_ref,
2526 &lhs_ref, false)))
f663d9ad 2527 {
245f6de1 2528 if (dump_file && (dump_flags & TDF_DETAILS))
f663d9ad 2529 {
245f6de1
JJ
2530 fprintf (dump_file, "stmt causes chain termination:\n");
2531 print_gimple_stmt (dump_file, stmt, 0);
f663d9ad 2532 }
629387a6 2533 ret |= terminate_and_process_chain (cur);
245f6de1 2534 break;
f663d9ad
KT
2535 }
2536 }
2537 }
2538
f663d9ad
KT
2539 return ret;
2540}
2541
2542/* Helper function. Terminate the recorded chain storing to base object
2543 BASE. Return true if the merging and output was successful. The m_stores
2544 entry is removed after the processing in any case. */
2545
2546bool
629387a6 2547pass_store_merging::terminate_and_process_chain (imm_store_chain_info *chain_info)
f663d9ad 2548{
95d94b52
RB
2549 m_n_stores -= chain_info->m_store_info.length ();
2550 m_n_chains--;
b5926e23
RB
2551 bool ret = chain_info->terminate_and_process_chain ();
2552 m_stores.remove (chain_info->base_addr);
2553 delete chain_info;
f663d9ad
KT
2554 return ret;
2555}
2556
245f6de1 2557/* Return true if stmts in between FIRST (inclusive) and LAST (exclusive)
629387a6
EB
2558 may clobber REF. FIRST and LAST must have non-NULL vdef. We want to
2559 be able to sink load of REF across stores between FIRST and LAST, up
2560 to right before LAST. */
245f6de1
JJ
2561
2562bool
2563stmts_may_clobber_ref_p (gimple *first, gimple *last, tree ref)
2564{
2565 ao_ref r;
2566 ao_ref_init (&r, ref);
2567 unsigned int count = 0;
2568 tree vop = gimple_vdef (last);
2569 gimple *stmt;
2570
629387a6
EB
2571 /* Return true conservatively if the basic blocks are different. */
2572 if (gimple_bb (first) != gimple_bb (last))
2573 return true;
2574
245f6de1
JJ
2575 do
2576 {
2577 stmt = SSA_NAME_DEF_STMT (vop);
2578 if (stmt_may_clobber_ref_p_1 (stmt, &r))
2579 return true;
4b84d9b8
JJ
2580 if (gimple_store_p (stmt)
2581 && refs_anti_dependent_p (ref, gimple_get_lhs (stmt)))
2582 return true;
245f6de1
JJ
2583 /* Avoid quadratic compile time by bounding the number of checks
2584 we perform. */
2585 if (++count > MAX_STORE_ALIAS_CHECKS)
2586 return true;
2587 vop = gimple_vuse (stmt);
2588 }
2589 while (stmt != first);
629387a6 2590
245f6de1
JJ
2591 return false;
2592}
2593
2594/* Return true if INFO->ops[IDX] is mergeable with the
2595 corresponding loads already in MERGED_STORE group.
2596 BASE_ADDR is the base address of the whole store group. */
2597
2598bool
2599compatible_load_p (merged_store_group *merged_store,
2600 store_immediate_info *info,
2601 tree base_addr, int idx)
2602{
2603 store_immediate_info *infof = merged_store->stores[0];
2604 if (!info->ops[idx].base_addr
8a91d545
RS
2605 || maybe_ne (info->ops[idx].bitpos - infof->ops[idx].bitpos,
2606 info->bitpos - infof->bitpos)
245f6de1
JJ
2607 || !operand_equal_p (info->ops[idx].base_addr,
2608 infof->ops[idx].base_addr, 0))
2609 return false;
2610
2611 store_immediate_info *infol = merged_store->stores.last ();
2612 tree load_vuse = gimple_vuse (info->ops[idx].stmt);
2613 /* In this case all vuses should be the same, e.g.
2614 _1 = s.a; _2 = s.b; _3 = _1 | 1; t.a = _3; _4 = _2 | 2; t.b = _4;
2615 or
2616 _1 = s.a; _2 = s.b; t.a = _1; t.b = _2;
2617 and we can emit the coalesced load next to any of those loads. */
2618 if (gimple_vuse (infof->ops[idx].stmt) == load_vuse
2619 && gimple_vuse (infol->ops[idx].stmt) == load_vuse)
2620 return true;
2621
2622 /* Otherwise, at least for now require that the load has the same
2623 vuse as the store. See following examples. */
2624 if (gimple_vuse (info->stmt) != load_vuse)
2625 return false;
2626
2627 if (gimple_vuse (infof->stmt) != gimple_vuse (infof->ops[idx].stmt)
2628 || (infof != infol
2629 && gimple_vuse (infol->stmt) != gimple_vuse (infol->ops[idx].stmt)))
2630 return false;
2631
2632 /* If the load is from the same location as the store, already
2633 the construction of the immediate chain info guarantees no intervening
2634 stores, so no further checks are needed. Example:
2635 _1 = s.a; _2 = _1 & -7; s.a = _2; _3 = s.b; _4 = _3 & -7; s.b = _4; */
8a91d545 2636 if (known_eq (info->ops[idx].bitpos, info->bitpos)
245f6de1
JJ
2637 && operand_equal_p (info->ops[idx].base_addr, base_addr, 0))
2638 return true;
2639
2640 /* Otherwise, we need to punt if any of the loads can be clobbered by any
2641 of the stores in the group, or any other stores in between those.
2642 Previous calls to compatible_load_p ensured that for all the
2643 merged_store->stores IDX loads, no stmts starting with
2644 merged_store->first_stmt and ending right before merged_store->last_stmt
2645 clobbers those loads. */
2646 gimple *first = merged_store->first_stmt;
2647 gimple *last = merged_store->last_stmt;
245f6de1
JJ
2648 /* The stores are sorted by increasing store bitpos, so if info->stmt store
2649 comes before the so far first load, we'll be changing
2650 merged_store->first_stmt. In that case we need to give up if
2651 any of the earlier processed loads clobber with the stmts in the new
2652 range. */
2653 if (info->order < merged_store->first_order)
2654 {
3f207ab3 2655 for (store_immediate_info *infoc : merged_store->stores)
245f6de1
JJ
2656 if (stmts_may_clobber_ref_p (info->stmt, first, infoc->ops[idx].val))
2657 return false;
2658 first = info->stmt;
2659 }
2660 /* Similarly, we could change merged_store->last_stmt, so ensure
2661 in that case no stmts in the new range clobber any of the earlier
2662 processed loads. */
2663 else if (info->order > merged_store->last_order)
2664 {
3f207ab3 2665 for (store_immediate_info *infoc : merged_store->stores)
245f6de1
JJ
2666 if (stmts_may_clobber_ref_p (last, info->stmt, infoc->ops[idx].val))
2667 return false;
2668 last = info->stmt;
2669 }
2670 /* And finally, we'd be adding a new load to the set, ensure it isn't
2671 clobbered in the new range. */
2672 if (stmts_may_clobber_ref_p (first, last, info->ops[idx].val))
2673 return false;
2674
2675 /* Otherwise, we are looking for:
2676 _1 = s.a; _2 = _1 ^ 15; t.a = _2; _3 = s.b; _4 = _3 ^ 15; t.b = _4;
2677 or
2678 _1 = s.a; t.a = _1; _2 = s.b; t.b = _2; */
2679 return true;
2680}
2681
4b84d9b8
JJ
2682/* Add all refs loaded to compute VAL to REFS vector. */
2683
2684void
2685gather_bswap_load_refs (vec<tree> *refs, tree val)
2686{
2687 if (TREE_CODE (val) != SSA_NAME)
2688 return;
2689
2690 gimple *stmt = SSA_NAME_DEF_STMT (val);
2691 if (!is_gimple_assign (stmt))
2692 return;
2693
2694 if (gimple_assign_load_p (stmt))
2695 {
2696 refs->safe_push (gimple_assign_rhs1 (stmt));
2697 return;
2698 }
2699
2700 switch (gimple_assign_rhs_class (stmt))
2701 {
2702 case GIMPLE_BINARY_RHS:
2703 gather_bswap_load_refs (refs, gimple_assign_rhs2 (stmt));
2704 /* FALLTHRU */
2705 case GIMPLE_UNARY_RHS:
2706 gather_bswap_load_refs (refs, gimple_assign_rhs1 (stmt));
2707 break;
2708 default:
2709 gcc_unreachable ();
2710 }
2711}
2712
c5679c37
JJ
2713/* Check if there are any stores in M_STORE_INFO after index I
2714 (where M_STORE_INFO must be sorted by sort_by_bitpos) that overlap
2715 a potential group ending with END that have their order
4d213bf6
JJ
2716 smaller than LAST_ORDER. ALL_INTEGER_CST_P is true if
2717 all the stores already merged and the one under consideration
2718 have rhs_code of INTEGER_CST. Return true if there are no such stores.
c5679c37
JJ
2719 Consider:
2720 MEM[(long long int *)p_28] = 0;
2721 MEM[(long long int *)p_28 + 8B] = 0;
2722 MEM[(long long int *)p_28 + 16B] = 0;
2723 MEM[(long long int *)p_28 + 24B] = 0;
2724 _129 = (int) _130;
2725 MEM[(int *)p_28 + 8B] = _129;
2726 MEM[(int *)p_28].a = -1;
2727 We already have
2728 MEM[(long long int *)p_28] = 0;
2729 MEM[(int *)p_28].a = -1;
2730 stmts in the current group and need to consider if it is safe to
2731 add MEM[(long long int *)p_28 + 8B] = 0; store into the same group.
2732 There is an overlap between that store and the MEM[(int *)p_28 + 8B] = _129;
2733 store though, so if we add the MEM[(long long int *)p_28 + 8B] = 0;
2734 into the group and merging of those 3 stores is successful, merged
2735 stmts will be emitted at the latest store from that group, i.e.
2736 LAST_ORDER, which is the MEM[(int *)p_28].a = -1; store.
2737 The MEM[(int *)p_28 + 8B] = _129; store that originally follows
2738 the MEM[(long long int *)p_28 + 8B] = 0; would now be before it,
2739 so we need to refuse merging MEM[(long long int *)p_28 + 8B] = 0;
2740 into the group. That way it will be its own store group and will
4d213bf6 2741 not be touched. If ALL_INTEGER_CST_P and there are overlapping
c5679c37 2742 INTEGER_CST stores, those are mergeable using merge_overlapping,
bd909071
JJ
2743 so don't return false for those.
2744
2745 Similarly, check stores from FIRST_EARLIER (inclusive) to END_EARLIER
2746 (exclusive), whether they don't overlap the bitrange START to END
2747 and have order in between FIRST_ORDER and LAST_ORDER. This is to
2748 prevent merging in cases like:
2749 MEM <char[12]> [&b + 8B] = {};
2750 MEM[(short *) &b] = 5;
2751 _5 = *x_4(D);
2752 MEM <long long unsigned int> [&b + 2B] = _5;
2753 MEM[(char *)&b + 16B] = 88;
2754 MEM[(int *)&b + 20B] = 1;
2755 The = {} store comes in sort_by_bitpos before the = 88 store, and can't
2756 be merged with it, because the = _5 store overlaps these and is in between
2757 them in sort_by_order ordering. If it was merged, the merged store would
2758 go after the = _5 store and thus change behavior. */
c5679c37
JJ
2759
2760static bool
00dcc88a
MS
2761check_no_overlap (const vec<store_immediate_info *> &m_store_info,
2762 unsigned int i,
bd909071
JJ
2763 bool all_integer_cst_p, unsigned int first_order,
2764 unsigned int last_order, unsigned HOST_WIDE_INT start,
2765 unsigned HOST_WIDE_INT end, unsigned int first_earlier,
2766 unsigned end_earlier)
c5679c37
JJ
2767{
2768 unsigned int len = m_store_info.length ();
bd909071
JJ
2769 for (unsigned int j = first_earlier; j < end_earlier; j++)
2770 {
2771 store_immediate_info *info = m_store_info[j];
2772 if (info->order > first_order
2773 && info->order < last_order
2774 && info->bitpos + info->bitsize > start)
2775 return false;
2776 }
c5679c37
JJ
2777 for (++i; i < len; ++i)
2778 {
2779 store_immediate_info *info = m_store_info[i];
2780 if (info->bitpos >= end)
2781 break;
2782 if (info->order < last_order
4d213bf6 2783 && (!all_integer_cst_p || info->rhs_code != INTEGER_CST))
c5679c37
JJ
2784 return false;
2785 }
2786 return true;
2787}
2788
4b84d9b8
JJ
2789/* Return true if m_store_info[first] and at least one following store
2790 form a group which store try_size bitsize value which is byte swapped
2791 from a memory load or some value, or identity from some value.
2792 This uses the bswap pass APIs. */
2793
2794bool
2795imm_store_chain_info::try_coalesce_bswap (merged_store_group *merged_store,
2796 unsigned int first,
bd909071
JJ
2797 unsigned int try_size,
2798 unsigned int first_earlier)
4b84d9b8
JJ
2799{
2800 unsigned int len = m_store_info.length (), last = first;
2801 unsigned HOST_WIDE_INT width = m_store_info[first]->bitsize;
2802 if (width >= try_size)
2803 return false;
2804 for (unsigned int i = first + 1; i < len; ++i)
2805 {
2806 if (m_store_info[i]->bitpos != m_store_info[first]->bitpos + width
cb76fcd7 2807 || m_store_info[i]->lp_nr != merged_store->lp_nr
4b84d9b8
JJ
2808 || m_store_info[i]->ins_stmt == NULL)
2809 return false;
2810 width += m_store_info[i]->bitsize;
2811 if (width >= try_size)
2812 {
2813 last = i;
2814 break;
2815 }
2816 }
2817 if (width != try_size)
2818 return false;
2819
2820 bool allow_unaligned
028d4092 2821 = !STRICT_ALIGNMENT && param_store_merging_allow_unaligned;
4b84d9b8
JJ
2822 /* Punt if the combined store would not be aligned and we need alignment. */
2823 if (!allow_unaligned)
2824 {
2825 unsigned int align = merged_store->align;
2826 unsigned HOST_WIDE_INT align_base = merged_store->align_base;
2827 for (unsigned int i = first + 1; i <= last; ++i)
2828 {
2829 unsigned int this_align;
2830 unsigned HOST_WIDE_INT align_bitpos = 0;
2831 get_object_alignment_1 (gimple_assign_lhs (m_store_info[i]->stmt),
2832 &this_align, &align_bitpos);
2833 if (this_align > align)
2834 {
2835 align = this_align;
2836 align_base = m_store_info[i]->bitpos - align_bitpos;
2837 }
2838 }
2839 unsigned HOST_WIDE_INT align_bitpos
2840 = (m_store_info[first]->bitpos - align_base) & (align - 1);
2841 if (align_bitpos)
2842 align = least_bit_hwi (align_bitpos);
2843 if (align < try_size)
2844 return false;
2845 }
2846
2847 tree type;
2848 switch (try_size)
2849 {
2850 case 16: type = uint16_type_node; break;
2851 case 32: type = uint32_type_node; break;
2852 case 64: type = uint64_type_node; break;
2853 default: gcc_unreachable ();
2854 }
2855 struct symbolic_number n;
2856 gimple *ins_stmt = NULL;
2857 int vuse_store = -1;
2858 unsigned int first_order = merged_store->first_order;
2859 unsigned int last_order = merged_store->last_order;
2860 gimple *first_stmt = merged_store->first_stmt;
2861 gimple *last_stmt = merged_store->last_stmt;
c5679c37 2862 unsigned HOST_WIDE_INT end = merged_store->start + merged_store->width;
4b84d9b8
JJ
2863 store_immediate_info *infof = m_store_info[first];
2864
2865 for (unsigned int i = first; i <= last; ++i)
2866 {
2867 store_immediate_info *info = m_store_info[i];
2868 struct symbolic_number this_n = info->n;
2869 this_n.type = type;
2870 if (!this_n.base_addr)
2871 this_n.range = try_size / BITS_PER_UNIT;
30fa8e9c
JJ
2872 else
2873 /* Update vuse in case it has changed by output_merged_stores. */
2874 this_n.vuse = gimple_vuse (info->ins_stmt);
4b84d9b8
JJ
2875 unsigned int bitpos = info->bitpos - infof->bitpos;
2876 if (!do_shift_rotate (LSHIFT_EXPR, &this_n,
2877 BYTES_BIG_ENDIAN
2878 ? try_size - info->bitsize - bitpos
2879 : bitpos))
2880 return false;
aa11164a 2881 if (this_n.base_addr && vuse_store)
4b84d9b8
JJ
2882 {
2883 unsigned int j;
2884 for (j = first; j <= last; ++j)
2885 if (this_n.vuse == gimple_vuse (m_store_info[j]->stmt))
2886 break;
2887 if (j > last)
2888 {
2889 if (vuse_store == 1)
2890 return false;
2891 vuse_store = 0;
2892 }
2893 }
2894 if (i == first)
2895 {
2896 n = this_n;
2897 ins_stmt = info->ins_stmt;
2898 }
2899 else
2900 {
c5679c37 2901 if (n.base_addr && n.vuse != this_n.vuse)
4b84d9b8 2902 {
c5679c37
JJ
2903 if (vuse_store == 0)
2904 return false;
2905 vuse_store = 1;
4b84d9b8 2906 }
c5679c37
JJ
2907 if (info->order > last_order)
2908 {
2909 last_order = info->order;
2910 last_stmt = info->stmt;
2911 }
2912 else if (info->order < first_order)
2913 {
2914 first_order = info->order;
2915 first_stmt = info->stmt;
2916 }
2917 end = MAX (end, info->bitpos + info->bitsize);
4b84d9b8
JJ
2918
2919 ins_stmt = perform_symbolic_merge (ins_stmt, &n, info->ins_stmt,
04eccbbe 2920 &this_n, &n, BIT_IOR_EXPR);
4b84d9b8
JJ
2921 if (ins_stmt == NULL)
2922 return false;
2923 }
2924 }
2925
2926 uint64_t cmpxchg, cmpnop;
b320edc0
JJ
2927 bool cast64_to_32;
2928 find_bswap_or_nop_finalize (&n, &cmpxchg, &cmpnop, &cast64_to_32);
4b84d9b8
JJ
2929
2930 /* A complete byte swap should make the symbolic number to start with
2931 the largest digit in the highest order byte. Unchanged symbolic
2932 number indicates a read with same endianness as target architecture. */
2933 if (n.n != cmpnop && n.n != cmpxchg)
2934 return false;
2935
b320edc0
JJ
2936 /* For now. */
2937 if (cast64_to_32)
2938 return false;
2939
4b84d9b8
JJ
2940 if (n.base_addr == NULL_TREE && !is_gimple_val (n.src))
2941 return false;
2942
bd909071
JJ
2943 if (!check_no_overlap (m_store_info, last, false, first_order, last_order,
2944 merged_store->start, end, first_earlier, first))
c5679c37
JJ
2945 return false;
2946
4b84d9b8
JJ
2947 /* Don't handle memory copy this way if normal non-bswap processing
2948 would handle it too. */
2949 if (n.n == cmpnop && (unsigned) n.n_ops == last - first + 1)
2950 {
2951 unsigned int i;
2952 for (i = first; i <= last; ++i)
2953 if (m_store_info[i]->rhs_code != MEM_REF)
2954 break;
2955 if (i == last + 1)
2956 return false;
2957 }
2958
2959 if (n.n == cmpxchg)
2960 switch (try_size)
2961 {
2962 case 16:
2963 /* Will emit LROTATE_EXPR. */
2964 break;
2965 case 32:
2966 if (builtin_decl_explicit_p (BUILT_IN_BSWAP32)
2967 && optab_handler (bswap_optab, SImode) != CODE_FOR_nothing)
2968 break;
2969 return false;
2970 case 64:
2971 if (builtin_decl_explicit_p (BUILT_IN_BSWAP64)
2972 && optab_handler (bswap_optab, DImode) != CODE_FOR_nothing)
2973 break;
2974 return false;
2975 default:
2976 gcc_unreachable ();
2977 }
2978
2979 if (!allow_unaligned && n.base_addr)
2980 {
2981 unsigned int align = get_object_alignment (n.src);
2982 if (align < try_size)
2983 return false;
2984 }
2985
2986 /* If each load has vuse of the corresponding store, need to verify
2987 the loads can be sunk right before the last store. */
2988 if (vuse_store == 1)
2989 {
2990 auto_vec<tree, 64> refs;
2991 for (unsigned int i = first; i <= last; ++i)
2992 gather_bswap_load_refs (&refs,
2993 gimple_assign_rhs1 (m_store_info[i]->stmt));
2994
3f207ab3 2995 for (tree ref : refs)
4b84d9b8
JJ
2996 if (stmts_may_clobber_ref_p (first_stmt, last_stmt, ref))
2997 return false;
2998 n.vuse = NULL_TREE;
2999 }
3000
3001 infof->n = n;
3002 infof->ins_stmt = ins_stmt;
3003 for (unsigned int i = first; i <= last; ++i)
3004 {
3005 m_store_info[i]->rhs_code = n.n == cmpxchg ? LROTATE_EXPR : NOP_EXPR;
3006 m_store_info[i]->ops[0].base_addr = NULL_TREE;
3007 m_store_info[i]->ops[1].base_addr = NULL_TREE;
3008 if (i != first)
3009 merged_store->merge_into (m_store_info[i]);
3010 }
3011
3012 return true;
3013}
3014
f663d9ad
KT
3015/* Go through the candidate stores recorded in m_store_info and merge them
3016 into merged_store_group objects recorded into m_merged_store_groups
3017 representing the widened stores. Return true if coalescing was successful
3018 and the number of widened stores is fewer than the original number
3019 of stores. */
3020
3021bool
3022imm_store_chain_info::coalesce_immediate_stores ()
3023{
3024 /* Anything less can't be processed. */
3025 if (m_store_info.length () < 2)
3026 return false;
3027
3028 if (dump_file && (dump_flags & TDF_DETAILS))
c94c3532 3029 fprintf (dump_file, "Attempting to coalesce %u stores in chain\n",
f663d9ad
KT
3030 m_store_info.length ());
3031
3032 store_immediate_info *info;
4b84d9b8 3033 unsigned int i, ignore = 0;
bd909071
JJ
3034 unsigned int first_earlier = 0;
3035 unsigned int end_earlier = 0;
f663d9ad
KT
3036
3037 /* Order the stores by the bitposition they write to. */
3038 m_store_info.qsort (sort_by_bitpos);
3039
3040 info = m_store_info[0];
3041 merged_store_group *merged_store = new merged_store_group (info);
c94c3532
EB
3042 if (dump_file && (dump_flags & TDF_DETAILS))
3043 fputs ("New store group\n", dump_file);
f663d9ad
KT
3044
3045 FOR_EACH_VEC_ELT (m_store_info, i, info)
3046 {
3afd514b
JJ
3047 unsigned HOST_WIDE_INT new_bitregion_start, new_bitregion_end;
3048
4b84d9b8 3049 if (i <= ignore)
c94c3532 3050 goto done;
f663d9ad 3051
bd909071
JJ
3052 while (first_earlier < end_earlier
3053 && (m_store_info[first_earlier]->bitpos
3054 + m_store_info[first_earlier]->bitsize
3055 <= merged_store->start))
3056 first_earlier++;
3057
4b84d9b8
JJ
3058 /* First try to handle group of stores like:
3059 p[0] = data >> 24;
3060 p[1] = data >> 16;
3061 p[2] = data >> 8;
3062 p[3] = data;
3063 using the bswap framework. */
3064 if (info->bitpos == merged_store->start + merged_store->width
3065 && merged_store->stores.length () == 1
3066 && merged_store->stores[0]->ins_stmt != NULL
cb76fcd7 3067 && info->lp_nr == merged_store->lp_nr
4b84d9b8
JJ
3068 && info->ins_stmt != NULL)
3069 {
3070 unsigned int try_size;
3071 for (try_size = 64; try_size >= 16; try_size >>= 1)
bd909071
JJ
3072 if (try_coalesce_bswap (merged_store, i - 1, try_size,
3073 first_earlier))
4b84d9b8
JJ
3074 break;
3075
3076 if (try_size >= 16)
3077 {
3078 ignore = i + merged_store->stores.length () - 1;
3079 m_merged_store_groups.safe_push (merged_store);
3080 if (ignore < m_store_info.length ())
bd909071
JJ
3081 {
3082 merged_store = new merged_store_group (m_store_info[ignore]);
3083 end_earlier = ignore;
3084 }
4b84d9b8
JJ
3085 else
3086 merged_store = NULL;
c94c3532 3087 goto done;
4b84d9b8
JJ
3088 }
3089 }
3090
3afd514b
JJ
3091 new_bitregion_start
3092 = MIN (merged_store->bitregion_start, info->bitregion_start);
3093 new_bitregion_end
3094 = MAX (merged_store->bitregion_end, info->bitregion_end);
3095
3096 if (info->order >= merged_store->first_nonmergeable_order
3097 || (((new_bitregion_end - new_bitregion_start + 1) / BITS_PER_UNIT)
028d4092 3098 > (unsigned) param_store_merging_max_size))
18e0c3d1
JJ
3099 ;
3100
f663d9ad
KT
3101 /* |---store 1---|
3102 |---store 2---|
4b84d9b8 3103 Overlapping stores. */
18e0c3d1 3104 else if (IN_RANGE (info->bitpos, merged_store->start,
4d213bf6
JJ
3105 merged_store->start + merged_store->width - 1)
3106 /* |---store 1---||---store 2---|
3107 Handle also the consecutive INTEGER_CST stores case here,
3108 as we have here the code to deal with overlaps. */
3109 || (info->bitregion_start <= merged_store->bitregion_end
3110 && info->rhs_code == INTEGER_CST
3111 && merged_store->only_constants
3112 && merged_store->can_be_merged_into (info)))
f663d9ad 3113 {
245f6de1 3114 /* Only allow overlapping stores of constants. */
629387a6
EB
3115 if (info->rhs_code == INTEGER_CST
3116 && merged_store->only_constants
3117 && info->lp_nr == merged_store->lp_nr)
245f6de1 3118 {
bd909071
JJ
3119 unsigned int first_order
3120 = MIN (merged_store->first_order, info->order);
6cd4c66e
JJ
3121 unsigned int last_order
3122 = MAX (merged_store->last_order, info->order);
3123 unsigned HOST_WIDE_INT end
3124 = MAX (merged_store->start + merged_store->width,
3125 info->bitpos + info->bitsize);
bd909071
JJ
3126 if (check_no_overlap (m_store_info, i, true, first_order,
3127 last_order, merged_store->start, end,
3128 first_earlier, end_earlier))
6cd4c66e
JJ
3129 {
3130 /* check_no_overlap call above made sure there are no
3131 overlapping stores with non-INTEGER_CST rhs_code
3132 in between the first and last of the stores we've
3133 just merged. If there are any INTEGER_CST rhs_code
3134 stores in between, we need to merge_overlapping them
3135 even if in the sort_by_bitpos order there are other
3136 overlapping stores in between. Keep those stores as is.
3137 Example:
3138 MEM[(int *)p_28] = 0;
3139 MEM[(char *)p_28 + 3B] = 1;
3140 MEM[(char *)p_28 + 1B] = 2;
3141 MEM[(char *)p_28 + 2B] = MEM[(char *)p_28 + 6B];
3142 We can't merge the zero store with the store of two and
3143 not merge anything else, because the store of one is
3144 in the original order in between those two, but in
3145 store_by_bitpos order it comes after the last store that
3146 we can't merge with them. We can merge the first 3 stores
3147 and keep the last store as is though. */
18e0c3d1
JJ
3148 unsigned int len = m_store_info.length ();
3149 unsigned int try_order = last_order;
3150 unsigned int first_nonmergeable_order;
3151 unsigned int k;
3152 bool last_iter = false;
3153 int attempts = 0;
3154 do
6cd4c66e 3155 {
18e0c3d1 3156 unsigned int max_order = 0;
bd909071 3157 unsigned int min_order = first_order;
18e0c3d1
JJ
3158 unsigned first_nonmergeable_int_order = ~0U;
3159 unsigned HOST_WIDE_INT this_end = end;
3160 k = i;
3161 first_nonmergeable_order = ~0U;
3162 for (unsigned int j = i + 1; j < len; ++j)
6cd4c66e 3163 {
18e0c3d1
JJ
3164 store_immediate_info *info2 = m_store_info[j];
3165 if (info2->bitpos >= this_end)
3166 break;
3167 if (info2->order < try_order)
6cd4c66e 3168 {
4119cd69
JJ
3169 if (info2->rhs_code != INTEGER_CST
3170 || info2->lp_nr != merged_store->lp_nr)
18e0c3d1
JJ
3171 {
3172 /* Normally check_no_overlap makes sure this
3173 doesn't happen, but if end grows below,
3174 then we need to process more stores than
3175 check_no_overlap verified. Example:
3176 MEM[(int *)p_5] = 0;
3177 MEM[(short *)p_5 + 3B] = 1;
3178 MEM[(char *)p_5 + 4B] = _9;
3179 MEM[(char *)p_5 + 2B] = 2; */
3180 k = 0;
3181 break;
3182 }
3183 k = j;
bd909071 3184 min_order = MIN (min_order, info2->order);
18e0c3d1
JJ
3185 this_end = MAX (this_end,
3186 info2->bitpos + info2->bitsize);
6cd4c66e 3187 }
18e0c3d1 3188 else if (info2->rhs_code == INTEGER_CST
4119cd69 3189 && info2->lp_nr == merged_store->lp_nr
18e0c3d1
JJ
3190 && !last_iter)
3191 {
3192 max_order = MAX (max_order, info2->order + 1);
3193 first_nonmergeable_int_order
3194 = MIN (first_nonmergeable_int_order,
3195 info2->order);
3196 }
3197 else
3198 first_nonmergeable_order
3199 = MIN (first_nonmergeable_order, info2->order);
6cd4c66e 3200 }
bd909071
JJ
3201 if (k > i
3202 && !check_no_overlap (m_store_info, len - 1, true,
3203 min_order, try_order,
3204 merged_store->start, this_end,
3205 first_earlier, end_earlier))
3206 k = 0;
18e0c3d1
JJ
3207 if (k == 0)
3208 {
3209 if (last_order == try_order)
3210 break;
3211 /* If this failed, but only because we grew
3212 try_order, retry with the last working one,
3213 so that we merge at least something. */
3214 try_order = last_order;
3215 last_iter = true;
3216 continue;
3217 }
3218 last_order = try_order;
3219 /* Retry with a larger try_order to see if we could
3220 merge some further INTEGER_CST stores. */
3221 if (max_order
3222 && (first_nonmergeable_int_order
3223 < first_nonmergeable_order))
3224 {
3225 try_order = MIN (max_order,
3226 first_nonmergeable_order);
3227 try_order
3228 = MIN (try_order,
3229 merged_store->first_nonmergeable_order);
3230 if (try_order > last_order && ++attempts < 16)
3231 continue;
3232 }
3233 first_nonmergeable_order
3234 = MIN (first_nonmergeable_order,
3235 first_nonmergeable_int_order);
3236 end = this_end;
3237 break;
6cd4c66e 3238 }
18e0c3d1 3239 while (1);
6cd4c66e
JJ
3240
3241 if (k != 0)
3242 {
3243 merged_store->merge_overlapping (info);
3244
18e0c3d1
JJ
3245 merged_store->first_nonmergeable_order
3246 = MIN (merged_store->first_nonmergeable_order,
3247 first_nonmergeable_order);
3248
6cd4c66e
JJ
3249 for (unsigned int j = i + 1; j <= k; j++)
3250 {
3251 store_immediate_info *info2 = m_store_info[j];
3252 gcc_assert (info2->bitpos < end);
3253 if (info2->order < last_order)
3254 {
3255 gcc_assert (info2->rhs_code == INTEGER_CST);
18e0c3d1
JJ
3256 if (info != info2)
3257 merged_store->merge_overlapping (info2);
6cd4c66e
JJ
3258 }
3259 /* Other stores are kept and not merged in any
3260 way. */
3261 }
3262 ignore = k;
3263 goto done;
3264 }
3265 }
245f6de1 3266 }
f663d9ad 3267 }
245f6de1
JJ
3268 /* |---store 1---||---store 2---|
3269 This store is consecutive to the previous one.
3270 Merge it into the current store group. There can be gaps in between
3271 the stores, but there can't be gaps in between bitregions. */
c94c3532 3272 else if (info->bitregion_start <= merged_store->bitregion_end
7f5a3982 3273 && merged_store->can_be_merged_into (info))
f663d9ad 3274 {
245f6de1
JJ
3275 store_immediate_info *infof = merged_store->stores[0];
3276
3277 /* All the rhs_code ops that take 2 operands are commutative,
3278 swap the operands if it could make the operands compatible. */
3279 if (infof->ops[0].base_addr
3280 && infof->ops[1].base_addr
3281 && info->ops[0].base_addr
3282 && info->ops[1].base_addr
8a91d545
RS
3283 && known_eq (info->ops[1].bitpos - infof->ops[0].bitpos,
3284 info->bitpos - infof->bitpos)
245f6de1
JJ
3285 && operand_equal_p (info->ops[1].base_addr,
3286 infof->ops[0].base_addr, 0))
127ef369
JJ
3287 {
3288 std::swap (info->ops[0], info->ops[1]);
3289 info->ops_swapped_p = true;
3290 }
4d213bf6 3291 if (check_no_overlap (m_store_info, i, false,
bd909071 3292 MIN (merged_store->first_order, info->order),
a7fe6482 3293 MAX (merged_store->last_order, info->order),
bd909071 3294 merged_store->start,
a7fe6482 3295 MAX (merged_store->start + merged_store->width,
bd909071
JJ
3296 info->bitpos + info->bitsize),
3297 first_earlier, end_earlier))
245f6de1 3298 {
7f5a3982
EB
3299 /* Turn MEM_REF into BIT_INSERT_EXPR for bit-field stores. */
3300 if (info->rhs_code == MEM_REF && infof->rhs_code != MEM_REF)
3301 {
3302 info->rhs_code = BIT_INSERT_EXPR;
3303 info->ops[0].val = gimple_assign_rhs1 (info->stmt);
3304 info->ops[0].base_addr = NULL_TREE;
3305 }
3306 else if (infof->rhs_code == MEM_REF && info->rhs_code != MEM_REF)
3307 {
3f207ab3 3308 for (store_immediate_info *infoj : merged_store->stores)
7f5a3982
EB
3309 {
3310 infoj->rhs_code = BIT_INSERT_EXPR;
3311 infoj->ops[0].val = gimple_assign_rhs1 (infoj->stmt);
3312 infoj->ops[0].base_addr = NULL_TREE;
3313 }
e362a897 3314 merged_store->bit_insertion = true;
7f5a3982
EB
3315 }
3316 if ((infof->ops[0].base_addr
3317 ? compatible_load_p (merged_store, info, base_addr, 0)
3318 : !info->ops[0].base_addr)
3319 && (infof->ops[1].base_addr
3320 ? compatible_load_p (merged_store, info, base_addr, 1)
3321 : !info->ops[1].base_addr))
3322 {
3323 merged_store->merge_into (info);
3324 goto done;
3325 }
245f6de1
JJ
3326 }
3327 }
f663d9ad 3328
245f6de1
JJ
3329 /* |---store 1---| <gap> |---store 2---|.
3330 Gap between stores or the rhs not compatible. Start a new group. */
f663d9ad 3331
245f6de1
JJ
3332 /* Try to apply all the stores recorded for the group to determine
3333 the bitpattern they write and discard it if that fails.
3334 This will also reject single-store groups. */
c94c3532 3335 if (merged_store->apply_stores ())
245f6de1 3336 m_merged_store_groups.safe_push (merged_store);
c94c3532
EB
3337 else
3338 delete merged_store;
f663d9ad 3339
245f6de1 3340 merged_store = new merged_store_group (info);
bd909071 3341 end_earlier = i;
c94c3532
EB
3342 if (dump_file && (dump_flags & TDF_DETAILS))
3343 fputs ("New store group\n", dump_file);
3344
3345 done:
3346 if (dump_file && (dump_flags & TDF_DETAILS))
3347 {
3348 fprintf (dump_file, "Store %u:\nbitsize:" HOST_WIDE_INT_PRINT_DEC
3349 " bitpos:" HOST_WIDE_INT_PRINT_DEC " val:",
3350 i, info->bitsize, info->bitpos);
3351 print_generic_expr (dump_file, gimple_assign_rhs1 (info->stmt));
3352 fputc ('\n', dump_file);
3353 }
f663d9ad
KT
3354 }
3355
a62b3dc5 3356 /* Record or discard the last store group. */
4b84d9b8
JJ
3357 if (merged_store)
3358 {
c94c3532 3359 if (merged_store->apply_stores ())
4b84d9b8 3360 m_merged_store_groups.safe_push (merged_store);
c94c3532
EB
3361 else
3362 delete merged_store;
4b84d9b8 3363 }
f663d9ad
KT
3364
3365 gcc_assert (m_merged_store_groups.length () <= m_store_info.length ());
c94c3532 3366
f663d9ad
KT
3367 bool success
3368 = !m_merged_store_groups.is_empty ()
3369 && m_merged_store_groups.length () < m_store_info.length ();
3370
3371 if (success && dump_file)
c94c3532 3372 fprintf (dump_file, "Coalescing successful!\nMerged into %u stores\n",
a62b3dc5 3373 m_merged_store_groups.length ());
f663d9ad
KT
3374
3375 return success;
3376}
3377
245f6de1
JJ
3378/* Return the type to use for the merged stores or loads described by STMTS.
3379 This is needed to get the alias sets right. If IS_LOAD, look for rhs,
3380 otherwise lhs. Additionally set *CLIQUEP and *BASEP to MR_DEPENDENCE_*
3381 of the MEM_REFs if any. */
f663d9ad
KT
3382
3383static tree
245f6de1
JJ
3384get_alias_type_for_stmts (vec<gimple *> &stmts, bool is_load,
3385 unsigned short *cliquep, unsigned short *basep)
f663d9ad
KT
3386{
3387 gimple *stmt;
3388 unsigned int i;
245f6de1
JJ
3389 tree type = NULL_TREE;
3390 tree ret = NULL_TREE;
3391 *cliquep = 0;
3392 *basep = 0;
f663d9ad
KT
3393
3394 FOR_EACH_VEC_ELT (stmts, i, stmt)
3395 {
245f6de1
JJ
3396 tree ref = is_load ? gimple_assign_rhs1 (stmt)
3397 : gimple_assign_lhs (stmt);
3398 tree type1 = reference_alias_ptr_type (ref);
3399 tree base = get_base_address (ref);
f663d9ad 3400
245f6de1
JJ
3401 if (i == 0)
3402 {
3403 if (TREE_CODE (base) == MEM_REF)
3404 {
3405 *cliquep = MR_DEPENDENCE_CLIQUE (base);
3406 *basep = MR_DEPENDENCE_BASE (base);
3407 }
3408 ret = type = type1;
3409 continue;
3410 }
f663d9ad 3411 if (!alias_ptr_types_compatible_p (type, type1))
245f6de1
JJ
3412 ret = ptr_type_node;
3413 if (TREE_CODE (base) != MEM_REF
3414 || *cliquep != MR_DEPENDENCE_CLIQUE (base)
3415 || *basep != MR_DEPENDENCE_BASE (base))
3416 {
3417 *cliquep = 0;
3418 *basep = 0;
3419 }
f663d9ad 3420 }
245f6de1 3421 return ret;
f663d9ad
KT
3422}
3423
3424/* Return the location_t information we can find among the statements
3425 in STMTS. */
3426
3427static location_t
245f6de1 3428get_location_for_stmts (vec<gimple *> &stmts)
f663d9ad 3429{
3f207ab3 3430 for (gimple *stmt : stmts)
f663d9ad
KT
3431 if (gimple_has_location (stmt))
3432 return gimple_location (stmt);
3433
3434 return UNKNOWN_LOCATION;
3435}
3436
3437/* Used to decribe a store resulting from splitting a wide store in smaller
3438 regularly-sized stores in split_group. */
3439
6c1dae73 3440class split_store
f663d9ad 3441{
6c1dae73 3442public:
f663d9ad
KT
3443 unsigned HOST_WIDE_INT bytepos;
3444 unsigned HOST_WIDE_INT size;
3445 unsigned HOST_WIDE_INT align;
245f6de1 3446 auto_vec<store_immediate_info *> orig_stores;
a62b3dc5
JJ
3447 /* True if there is a single orig stmt covering the whole split store. */
3448 bool orig;
f663d9ad
KT
3449 split_store (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
3450 unsigned HOST_WIDE_INT);
3451};
3452
3453/* Simple constructor. */
3454
3455split_store::split_store (unsigned HOST_WIDE_INT bp,
3456 unsigned HOST_WIDE_INT sz,
3457 unsigned HOST_WIDE_INT al)
a62b3dc5 3458 : bytepos (bp), size (sz), align (al), orig (false)
f663d9ad 3459{
245f6de1 3460 orig_stores.create (0);
f663d9ad
KT
3461}
3462
245f6de1
JJ
3463/* Record all stores in GROUP that write to the region starting at BITPOS and
3464 is of size BITSIZE. Record infos for such statements in STORES if
3465 non-NULL. The stores in GROUP must be sorted by bitposition. Return INFO
5384a802
JJ
3466 if there is exactly one original store in the range (in that case ignore
3467 clobber stmts, unless there are only clobber stmts). */
f663d9ad 3468
a62b3dc5 3469static store_immediate_info *
99b1c316 3470find_constituent_stores (class merged_store_group *group,
245f6de1
JJ
3471 vec<store_immediate_info *> *stores,
3472 unsigned int *first,
3473 unsigned HOST_WIDE_INT bitpos,
3474 unsigned HOST_WIDE_INT bitsize)
f663d9ad 3475{
a62b3dc5 3476 store_immediate_info *info, *ret = NULL;
f663d9ad 3477 unsigned int i;
a62b3dc5
JJ
3478 bool second = false;
3479 bool update_first = true;
f663d9ad 3480 unsigned HOST_WIDE_INT end = bitpos + bitsize;
a62b3dc5 3481 for (i = *first; group->stores.iterate (i, &info); ++i)
f663d9ad
KT
3482 {
3483 unsigned HOST_WIDE_INT stmt_start = info->bitpos;
3484 unsigned HOST_WIDE_INT stmt_end = stmt_start + info->bitsize;
a62b3dc5
JJ
3485 if (stmt_end <= bitpos)
3486 {
3487 /* BITPOS passed to this function never decreases from within the
3488 same split_group call, so optimize and don't scan info records
3489 which are known to end before or at BITPOS next time.
3490 Only do it if all stores before this one also pass this. */
3491 if (update_first)
3492 *first = i + 1;
3493 continue;
3494 }
3495 else
3496 update_first = false;
3497
f663d9ad 3498 /* The stores in GROUP are ordered by bitposition so if we're past
a62b3dc5
JJ
3499 the region for this group return early. */
3500 if (stmt_start >= end)
3501 return ret;
3502
5384a802
JJ
3503 if (gimple_clobber_p (info->stmt))
3504 {
3505 if (stores)
3506 stores->safe_push (info);
3507 if (ret == NULL)
3508 ret = info;
3509 continue;
3510 }
245f6de1 3511 if (stores)
a62b3dc5 3512 {
245f6de1 3513 stores->safe_push (info);
5384a802 3514 if (ret && !gimple_clobber_p (ret->stmt))
a62b3dc5
JJ
3515 {
3516 ret = NULL;
3517 second = true;
3518 }
3519 }
5384a802 3520 else if (ret && !gimple_clobber_p (ret->stmt))
a62b3dc5
JJ
3521 return NULL;
3522 if (!second)
3523 ret = info;
f663d9ad 3524 }
a62b3dc5 3525 return ret;
f663d9ad
KT
3526}
3527
d7a9512e
JJ
3528/* Return how many SSA_NAMEs used to compute value to store in the INFO
3529 store have multiple uses. If any SSA_NAME has multiple uses, also
3530 count statements needed to compute it. */
3531
3532static unsigned
3533count_multiple_uses (store_immediate_info *info)
3534{
3535 gimple *stmt = info->stmt;
3536 unsigned ret = 0;
3537 switch (info->rhs_code)
3538 {
3539 case INTEGER_CST:
e362a897 3540 case STRING_CST:
d7a9512e
JJ
3541 return 0;
3542 case BIT_AND_EXPR:
3543 case BIT_IOR_EXPR:
3544 case BIT_XOR_EXPR:
d60edaba
JJ
3545 if (info->bit_not_p)
3546 {
3547 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3548 ret = 1; /* Fall through below to return
3549 the BIT_NOT_EXPR stmt and then
3550 BIT_{AND,IOR,XOR}_EXPR and anything it
3551 uses. */
3552 else
3553 /* stmt is after this the BIT_NOT_EXPR. */
3554 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3555 }
d7a9512e
JJ
3556 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3557 {
3558 ret += 1 + info->ops[0].bit_not_p;
3559 if (info->ops[1].base_addr)
3560 ret += 1 + info->ops[1].bit_not_p;
3561 return ret + 1;
3562 }
3563 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3564 /* stmt is now the BIT_*_EXPR. */
3565 if (!has_single_use (gimple_assign_rhs1 (stmt)))
127ef369
JJ
3566 ret += 1 + info->ops[info->ops_swapped_p].bit_not_p;
3567 else if (info->ops[info->ops_swapped_p].bit_not_p)
d7a9512e
JJ
3568 {
3569 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3570 if (!has_single_use (gimple_assign_rhs1 (stmt2)))
3571 ++ret;
3572 }
3573 if (info->ops[1].base_addr == NULL_TREE)
127ef369
JJ
3574 {
3575 gcc_checking_assert (!info->ops_swapped_p);
3576 return ret;
3577 }
d7a9512e 3578 if (!has_single_use (gimple_assign_rhs2 (stmt)))
127ef369
JJ
3579 ret += 1 + info->ops[1 - info->ops_swapped_p].bit_not_p;
3580 else if (info->ops[1 - info->ops_swapped_p].bit_not_p)
d7a9512e
JJ
3581 {
3582 gimple *stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
3583 if (!has_single_use (gimple_assign_rhs1 (stmt2)))
3584 ++ret;
3585 }
3586 return ret;
3587 case MEM_REF:
3588 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3589 return 1 + info->ops[0].bit_not_p;
3590 else if (info->ops[0].bit_not_p)
3591 {
3592 stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
3593 if (!has_single_use (gimple_assign_rhs1 (stmt)))
3594 return 1;
3595 }
3596 return 0;
c94c3532
EB
3597 case BIT_INSERT_EXPR:
3598 return has_single_use (gimple_assign_rhs1 (stmt)) ? 0 : 1;
d7a9512e
JJ
3599 default:
3600 gcc_unreachable ();
3601 }
3602}
3603
f663d9ad 3604/* Split a merged store described by GROUP by populating the SPLIT_STORES
a62b3dc5
JJ
3605 vector (if non-NULL) with split_store structs describing the byte offset
3606 (from the base), the bit size and alignment of each store as well as the
3607 original statements involved in each such split group.
f663d9ad
KT
3608 This is to separate the splitting strategy from the statement
3609 building/emission/linking done in output_merged_store.
a62b3dc5 3610 Return number of new stores.
245f6de1
JJ
3611 If ALLOW_UNALIGNED_STORE is false, then all stores must be aligned.
3612 If ALLOW_UNALIGNED_LOAD is false, then all loads must be aligned.
3afd514b
JJ
3613 BZERO_FIRST may be true only when the first store covers the whole group
3614 and clears it; if BZERO_FIRST is true, keep that first store in the set
3615 unmodified and emit further stores for the overrides only.
a62b3dc5
JJ
3616 If SPLIT_STORES is NULL, it is just a dry run to count number of
3617 new stores. */
f663d9ad 3618
a62b3dc5 3619static unsigned int
245f6de1 3620split_group (merged_store_group *group, bool allow_unaligned_store,
3afd514b 3621 bool allow_unaligned_load, bool bzero_first,
99b1c316 3622 vec<split_store *> *split_stores,
d7a9512e
JJ
3623 unsigned *total_orig,
3624 unsigned *total_new)
f663d9ad 3625{
a62b3dc5
JJ
3626 unsigned HOST_WIDE_INT pos = group->bitregion_start;
3627 unsigned HOST_WIDE_INT size = group->bitregion_end - pos;
f663d9ad 3628 unsigned HOST_WIDE_INT bytepos = pos / BITS_PER_UNIT;
a62b3dc5
JJ
3629 unsigned HOST_WIDE_INT group_align = group->align;
3630 unsigned HOST_WIDE_INT align_base = group->align_base;
245f6de1 3631 unsigned HOST_WIDE_INT group_load_align = group_align;
d7a9512e 3632 bool any_orig = false;
f663d9ad 3633
f663d9ad
KT
3634 gcc_assert ((size % BITS_PER_UNIT == 0) && (pos % BITS_PER_UNIT == 0));
3635
e362a897
EB
3636 /* For bswap framework using sets of stores, all the checking has been done
3637 earlier in try_coalesce_bswap and the result always needs to be emitted
617be7ba 3638 as a single store. Likewise for string concatenation. */
4b84d9b8 3639 if (group->stores[0]->rhs_code == LROTATE_EXPR
e362a897
EB
3640 || group->stores[0]->rhs_code == NOP_EXPR
3641 || group->string_concatenation)
4b84d9b8 3642 {
3afd514b 3643 gcc_assert (!bzero_first);
4b84d9b8
JJ
3644 if (total_orig)
3645 {
3646 /* Avoid the old/new stmt count heuristics. It should be
3647 always beneficial. */
3648 total_new[0] = 1;
3649 total_orig[0] = 2;
3650 }
3651
3652 if (split_stores)
3653 {
3654 unsigned HOST_WIDE_INT align_bitpos
3655 = (group->start - align_base) & (group_align - 1);
3656 unsigned HOST_WIDE_INT align = group_align;
3657 if (align_bitpos)
3658 align = least_bit_hwi (align_bitpos);
3659 bytepos = group->start / BITS_PER_UNIT;
99b1c316 3660 split_store *store
4b84d9b8
JJ
3661 = new split_store (bytepos, group->width, align);
3662 unsigned int first = 0;
3663 find_constituent_stores (group, &store->orig_stores,
3664 &first, group->start, group->width);
3665 split_stores->safe_push (store);
3666 }
3667
3668 return 1;
3669 }
3670
a62b3dc5 3671 unsigned int ret = 0, first = 0;
f663d9ad 3672 unsigned HOST_WIDE_INT try_pos = bytepos;
f663d9ad 3673
d7a9512e
JJ
3674 if (total_orig)
3675 {
3676 unsigned int i;
3677 store_immediate_info *info = group->stores[0];
3678
3679 total_new[0] = 0;
3680 total_orig[0] = 1; /* The orig store. */
3681 info = group->stores[0];
3682 if (info->ops[0].base_addr)
a6fbd154 3683 total_orig[0]++;
d7a9512e 3684 if (info->ops[1].base_addr)
a6fbd154 3685 total_orig[0]++;
d7a9512e
JJ
3686 switch (info->rhs_code)
3687 {
3688 case BIT_AND_EXPR:
3689 case BIT_IOR_EXPR:
3690 case BIT_XOR_EXPR:
3691 total_orig[0]++; /* The orig BIT_*_EXPR stmt. */
3692 break;
3693 default:
3694 break;
3695 }
3696 total_orig[0] *= group->stores.length ();
3697
3698 FOR_EACH_VEC_ELT (group->stores, i, info)
a6fbd154
JJ
3699 {
3700 total_new[0] += count_multiple_uses (info);
3701 total_orig[0] += (info->bit_not_p
3702 + info->ops[0].bit_not_p
3703 + info->ops[1].bit_not_p);
3704 }
d7a9512e
JJ
3705 }
3706
245f6de1
JJ
3707 if (!allow_unaligned_load)
3708 for (int i = 0; i < 2; ++i)
3709 if (group->load_align[i])
3710 group_load_align = MIN (group_load_align, group->load_align[i]);
3711
3afd514b
JJ
3712 if (bzero_first)
3713 {
5384a802
JJ
3714 store_immediate_info *gstore;
3715 FOR_EACH_VEC_ELT (group->stores, first, gstore)
3716 if (!gimple_clobber_p (gstore->stmt))
3717 break;
3718 ++first;
3afd514b
JJ
3719 ret = 1;
3720 if (split_stores)
3721 {
99b1c316 3722 split_store *store
5384a802
JJ
3723 = new split_store (bytepos, gstore->bitsize, align_base);
3724 store->orig_stores.safe_push (gstore);
3afd514b
JJ
3725 store->orig = true;
3726 any_orig = true;
3727 split_stores->safe_push (store);
3728 }
3729 }
3730
f663d9ad
KT
3731 while (size > 0)
3732 {
245f6de1 3733 if ((allow_unaligned_store || group_align <= BITS_PER_UNIT)
3afd514b
JJ
3734 && (group->mask[try_pos - bytepos] == (unsigned char) ~0U
3735 || (bzero_first && group->val[try_pos - bytepos] == 0)))
a62b3dc5
JJ
3736 {
3737 /* Skip padding bytes. */
3738 ++try_pos;
3739 size -= BITS_PER_UNIT;
3740 continue;
3741 }
3742
f663d9ad 3743 unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
a62b3dc5
JJ
3744 unsigned int try_size = MAX_STORE_BITSIZE, nonmasked;
3745 unsigned HOST_WIDE_INT align_bitpos
3746 = (try_bitpos - align_base) & (group_align - 1);
3747 unsigned HOST_WIDE_INT align = group_align;
5384a802 3748 bool found_orig = false;
a62b3dc5
JJ
3749 if (align_bitpos)
3750 align = least_bit_hwi (align_bitpos);
245f6de1 3751 if (!allow_unaligned_store)
a62b3dc5 3752 try_size = MIN (try_size, align);
245f6de1
JJ
3753 if (!allow_unaligned_load)
3754 {
3755 /* If we can't do or don't want to do unaligned stores
3756 as well as loads, we need to take the loads into account
3757 as well. */
3758 unsigned HOST_WIDE_INT load_align = group_load_align;
3759 align_bitpos = (try_bitpos - align_base) & (load_align - 1);
3760 if (align_bitpos)
3761 load_align = least_bit_hwi (align_bitpos);
3762 for (int i = 0; i < 2; ++i)
3763 if (group->load_align[i])
3764 {
8a91d545
RS
3765 align_bitpos
3766 = known_alignment (try_bitpos
3767 - group->stores[0]->bitpos
3768 + group->stores[0]->ops[i].bitpos
3769 - group->load_align_base[i]);
3770 if (align_bitpos & (group_load_align - 1))
245f6de1
JJ
3771 {
3772 unsigned HOST_WIDE_INT a = least_bit_hwi (align_bitpos);
3773 load_align = MIN (load_align, a);
3774 }
3775 }
3776 try_size = MIN (try_size, load_align);
3777 }
a62b3dc5 3778 store_immediate_info *info
245f6de1 3779 = find_constituent_stores (group, NULL, &first, try_bitpos, try_size);
5384a802 3780 if (info && !gimple_clobber_p (info->stmt))
a62b3dc5
JJ
3781 {
3782 /* If there is just one original statement for the range, see if
3783 we can just reuse the original store which could be even larger
3784 than try_size. */
3785 unsigned HOST_WIDE_INT stmt_end
3786 = ROUND_UP (info->bitpos + info->bitsize, BITS_PER_UNIT);
245f6de1
JJ
3787 info = find_constituent_stores (group, NULL, &first, try_bitpos,
3788 stmt_end - try_bitpos);
a62b3dc5
JJ
3789 if (info && info->bitpos >= try_bitpos)
3790 {
5384a802
JJ
3791 store_immediate_info *info2 = NULL;
3792 unsigned int first_copy = first;
3793 if (info->bitpos > try_bitpos
3794 && stmt_end - try_bitpos <= try_size)
3795 {
3796 info2 = find_constituent_stores (group, NULL, &first_copy,
3797 try_bitpos,
3798 info->bitpos - try_bitpos);
3799 gcc_assert (info2 == NULL || gimple_clobber_p (info2->stmt));
3800 }
3801 if (info2 == NULL && stmt_end - try_bitpos < try_size)
3802 {
3803 info2 = find_constituent_stores (group, NULL, &first_copy,
3804 stmt_end,
3805 (try_bitpos + try_size)
3806 - stmt_end);
3807 gcc_assert (info2 == NULL || gimple_clobber_p (info2->stmt));
3808 }
3809 if (info2 == NULL)
3810 {
3811 try_size = stmt_end - try_bitpos;
3812 found_orig = true;
3813 goto found;
3814 }
a62b3dc5
JJ
3815 }
3816 }
f663d9ad 3817
a62b3dc5
JJ
3818 /* Approximate store bitsize for the case when there are no padding
3819 bits. */
3820 while (try_size > size)
3821 try_size /= 2;
3822 /* Now look for whole padding bytes at the end of that bitsize. */
3823 for (nonmasked = try_size / BITS_PER_UNIT; nonmasked > 0; --nonmasked)
3824 if (group->mask[try_pos - bytepos + nonmasked - 1]
3afd514b
JJ
3825 != (unsigned char) ~0U
3826 && (!bzero_first
3827 || group->val[try_pos - bytepos + nonmasked - 1] != 0))
a62b3dc5 3828 break;
5384a802 3829 if (nonmasked == 0 || (info && gimple_clobber_p (info->stmt)))
a62b3dc5
JJ
3830 {
3831 /* If entire try_size range is padding, skip it. */
3832 try_pos += try_size / BITS_PER_UNIT;
3833 size -= try_size;
3834 continue;
3835 }
3836 /* Otherwise try to decrease try_size if second half, last 3 quarters
3837 etc. are padding. */
3838 nonmasked *= BITS_PER_UNIT;
3839 while (nonmasked <= try_size / 2)
3840 try_size /= 2;
245f6de1 3841 if (!allow_unaligned_store && group_align > BITS_PER_UNIT)
a62b3dc5
JJ
3842 {
3843 /* Now look for whole padding bytes at the start of that bitsize. */
3844 unsigned int try_bytesize = try_size / BITS_PER_UNIT, masked;
3845 for (masked = 0; masked < try_bytesize; ++masked)
3afd514b
JJ
3846 if (group->mask[try_pos - bytepos + masked] != (unsigned char) ~0U
3847 && (!bzero_first
3848 || group->val[try_pos - bytepos + masked] != 0))
a62b3dc5
JJ
3849 break;
3850 masked *= BITS_PER_UNIT;
3851 gcc_assert (masked < try_size);
3852 if (masked >= try_size / 2)
3853 {
3854 while (masked >= try_size / 2)
3855 {
3856 try_size /= 2;
3857 try_pos += try_size / BITS_PER_UNIT;
3858 size -= try_size;
3859 masked -= try_size;
3860 }
3861 /* Need to recompute the alignment, so just retry at the new
3862 position. */
3863 continue;
3864 }
3865 }
3866
3867 found:
3868 ++ret;
f663d9ad 3869
a62b3dc5
JJ
3870 if (split_stores)
3871 {
99b1c316 3872 split_store *store
a62b3dc5 3873 = new split_store (try_pos, try_size, align);
245f6de1
JJ
3874 info = find_constituent_stores (group, &store->orig_stores,
3875 &first, try_bitpos, try_size);
a62b3dc5 3876 if (info
5384a802 3877 && !gimple_clobber_p (info->stmt)
a62b3dc5 3878 && info->bitpos >= try_bitpos
5384a802
JJ
3879 && info->bitpos + info->bitsize <= try_bitpos + try_size
3880 && (store->orig_stores.length () == 1
3881 || found_orig
3882 || (info->bitpos == try_bitpos
3883 && (info->bitpos + info->bitsize
3884 == try_bitpos + try_size))))
d7a9512e
JJ
3885 {
3886 store->orig = true;
3887 any_orig = true;
3888 }
a62b3dc5
JJ
3889 split_stores->safe_push (store);
3890 }
3891
3892 try_pos += try_size / BITS_PER_UNIT;
f663d9ad 3893 size -= try_size;
f663d9ad 3894 }
a62b3dc5 3895
d7a9512e
JJ
3896 if (total_orig)
3897 {
a6fbd154 3898 unsigned int i;
99b1c316 3899 split_store *store;
d7a9512e
JJ
3900 /* If we are reusing some original stores and any of the
3901 original SSA_NAMEs had multiple uses, we need to subtract
3902 those now before we add the new ones. */
3903 if (total_new[0] && any_orig)
3904 {
d7a9512e
JJ
3905 FOR_EACH_VEC_ELT (*split_stores, i, store)
3906 if (store->orig)
3907 total_new[0] -= count_multiple_uses (store->orig_stores[0]);
3908 }
3909 total_new[0] += ret; /* The new store. */
3910 store_immediate_info *info = group->stores[0];
3911 if (info->ops[0].base_addr)
a6fbd154 3912 total_new[0] += ret;
d7a9512e 3913 if (info->ops[1].base_addr)
a6fbd154 3914 total_new[0] += ret;
d7a9512e
JJ
3915 switch (info->rhs_code)
3916 {
3917 case BIT_AND_EXPR:
3918 case BIT_IOR_EXPR:
3919 case BIT_XOR_EXPR:
3920 total_new[0] += ret; /* The new BIT_*_EXPR stmt. */
3921 break;
3922 default:
3923 break;
3924 }
a6fbd154
JJ
3925 FOR_EACH_VEC_ELT (*split_stores, i, store)
3926 {
3927 unsigned int j;
3928 bool bit_not_p[3] = { false, false, false };
3929 /* If all orig_stores have certain bit_not_p set, then
3930 we'd use a BIT_NOT_EXPR stmt and need to account for it.
3931 If some orig_stores have certain bit_not_p set, then
3932 we'd use a BIT_XOR_EXPR with a mask and need to account for
3933 it. */
3934 FOR_EACH_VEC_ELT (store->orig_stores, j, info)
3935 {
3936 if (info->ops[0].bit_not_p)
3937 bit_not_p[0] = true;
3938 if (info->ops[1].bit_not_p)
3939 bit_not_p[1] = true;
3940 if (info->bit_not_p)
3941 bit_not_p[2] = true;
3942 }
3943 total_new[0] += bit_not_p[0] + bit_not_p[1] + bit_not_p[2];
3944 }
3945
d7a9512e
JJ
3946 }
3947
a62b3dc5 3948 return ret;
f663d9ad
KT
3949}
3950
a6fbd154
JJ
3951/* Return the operation through which the operand IDX (if < 2) or
3952 result (IDX == 2) should be inverted. If NOP_EXPR, no inversion
3953 is done, if BIT_NOT_EXPR, all bits are inverted, if BIT_XOR_EXPR,
3954 the bits should be xored with mask. */
3955
3956static enum tree_code
3957invert_op (split_store *split_store, int idx, tree int_type, tree &mask)
3958{
3959 unsigned int i;
3960 store_immediate_info *info;
3961 unsigned int cnt = 0;
e215422f 3962 bool any_paddings = false;
a6fbd154
JJ
3963 FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
3964 {
3965 bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
3966 if (bit_not_p)
e215422f
JJ
3967 {
3968 ++cnt;
3969 tree lhs = gimple_assign_lhs (info->stmt);
3970 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3971 && TYPE_PRECISION (TREE_TYPE (lhs)) < info->bitsize)
3972 any_paddings = true;
3973 }
a6fbd154
JJ
3974 }
3975 mask = NULL_TREE;
3976 if (cnt == 0)
3977 return NOP_EXPR;
e215422f 3978 if (cnt == split_store->orig_stores.length () && !any_paddings)
a6fbd154
JJ
3979 return BIT_NOT_EXPR;
3980
3981 unsigned HOST_WIDE_INT try_bitpos = split_store->bytepos * BITS_PER_UNIT;
3982 unsigned buf_size = split_store->size / BITS_PER_UNIT;
3983 unsigned char *buf
3984 = XALLOCAVEC (unsigned char, buf_size);
3985 memset (buf, ~0U, buf_size);
3986 FOR_EACH_VEC_ELT (split_store->orig_stores, i, info)
3987 {
3988 bool bit_not_p = idx < 2 ? info->ops[idx].bit_not_p : info->bit_not_p;
3989 if (!bit_not_p)
3990 continue;
3991 /* Clear regions with bit_not_p and invert afterwards, rather than
3992 clear regions with !bit_not_p, so that gaps in between stores aren't
3993 set in the mask. */
3994 unsigned HOST_WIDE_INT bitsize = info->bitsize;
e215422f 3995 unsigned HOST_WIDE_INT prec = bitsize;
a6fbd154 3996 unsigned int pos_in_buffer = 0;
e215422f
JJ
3997 if (any_paddings)
3998 {
3999 tree lhs = gimple_assign_lhs (info->stmt);
4000 if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))
4001 && TYPE_PRECISION (TREE_TYPE (lhs)) < bitsize)
4002 prec = TYPE_PRECISION (TREE_TYPE (lhs));
4003 }
a6fbd154
JJ
4004 if (info->bitpos < try_bitpos)
4005 {
4006 gcc_assert (info->bitpos + bitsize > try_bitpos);
e215422f
JJ
4007 if (!BYTES_BIG_ENDIAN)
4008 {
4009 if (prec <= try_bitpos - info->bitpos)
4010 continue;
4011 prec -= try_bitpos - info->bitpos;
4012 }
4013 bitsize -= try_bitpos - info->bitpos;
4014 if (BYTES_BIG_ENDIAN && prec > bitsize)
4015 prec = bitsize;
a6fbd154
JJ
4016 }
4017 else
4018 pos_in_buffer = info->bitpos - try_bitpos;
e215422f
JJ
4019 if (prec < bitsize)
4020 {
4021 /* If this is a bool inversion, invert just the least significant
4022 prec bits rather than all bits of it. */
4023 if (BYTES_BIG_ENDIAN)
4024 {
4025 pos_in_buffer += bitsize - prec;
4026 if (pos_in_buffer >= split_store->size)
4027 continue;
4028 }
4029 bitsize = prec;
4030 }
a6fbd154
JJ
4031 if (pos_in_buffer + bitsize > split_store->size)
4032 bitsize = split_store->size - pos_in_buffer;
4033 unsigned char *p = buf + (pos_in_buffer / BITS_PER_UNIT);
4034 if (BYTES_BIG_ENDIAN)
4035 clear_bit_region_be (p, (BITS_PER_UNIT - 1
4036 - (pos_in_buffer % BITS_PER_UNIT)), bitsize);
4037 else
4038 clear_bit_region (p, pos_in_buffer % BITS_PER_UNIT, bitsize);
4039 }
4040 for (unsigned int i = 0; i < buf_size; ++i)
4041 buf[i] = ~buf[i];
4042 mask = native_interpret_expr (int_type, buf, buf_size);
4043 return BIT_XOR_EXPR;
4044}
4045
f663d9ad
KT
4046/* Given a merged store group GROUP output the widened version of it.
4047 The store chain is against the base object BASE.
4048 Try store sizes of at most MAX_STORE_BITSIZE bits wide and don't output
4049 unaligned stores for STRICT_ALIGNMENT targets or if it's too expensive.
4050 Make sure that the number of statements output is less than the number of
4051 original statements. If a better sequence is possible emit it and
4052 return true. */
4053
4054bool
b5926e23 4055imm_store_chain_info::output_merged_store (merged_store_group *group)
f663d9ad 4056{
e362a897 4057 const unsigned HOST_WIDE_INT start_byte_pos
a62b3dc5 4058 = group->bitregion_start / BITS_PER_UNIT;
f663d9ad
KT
4059 unsigned int orig_num_stmts = group->stores.length ();
4060 if (orig_num_stmts < 2)
4061 return false;
4062
245f6de1 4063 bool allow_unaligned_store
028d4092 4064 = !STRICT_ALIGNMENT && param_store_merging_allow_unaligned;
245f6de1 4065 bool allow_unaligned_load = allow_unaligned_store;
3afd514b 4066 bool bzero_first = false;
5384a802
JJ
4067 store_immediate_info *store;
4068 unsigned int num_clobber_stmts = 0;
4069 if (group->stores[0]->rhs_code == INTEGER_CST)
4070 {
e362a897 4071 unsigned int i;
5384a802
JJ
4072 FOR_EACH_VEC_ELT (group->stores, i, store)
4073 if (gimple_clobber_p (store->stmt))
4074 num_clobber_stmts++;
4075 else if (TREE_CODE (gimple_assign_rhs1 (store->stmt)) == CONSTRUCTOR
4076 && CONSTRUCTOR_NELTS (gimple_assign_rhs1 (store->stmt)) == 0
4077 && group->start == store->bitpos
4078 && group->width == store->bitsize
4079 && (group->start % BITS_PER_UNIT) == 0
4080 && (group->width % BITS_PER_UNIT) == 0)
4081 {
4082 bzero_first = true;
4083 break;
4084 }
4085 else
4086 break;
4087 FOR_EACH_VEC_ELT_FROM (group->stores, i, store, i)
4088 if (gimple_clobber_p (store->stmt))
4089 num_clobber_stmts++;
4090 if (num_clobber_stmts == orig_num_stmts)
4091 return false;
4092 orig_num_stmts -= num_clobber_stmts;
4093 }
3afd514b 4094 if (allow_unaligned_store || bzero_first)
a62b3dc5
JJ
4095 {
4096 /* If unaligned stores are allowed, see how many stores we'd emit
4097 for unaligned and how many stores we'd emit for aligned stores.
3afd514b
JJ
4098 Only use unaligned stores if it allows fewer stores than aligned.
4099 Similarly, if there is a whole region clear first, prefer expanding
4100 it together compared to expanding clear first followed by merged
4101 further stores. */
21f65995 4102 unsigned cnt[4] = { ~0U, ~0U, ~0U, ~0U };
3afd514b
JJ
4103 int pass_min = 0;
4104 for (int pass = 0; pass < 4; ++pass)
4105 {
4106 if (!allow_unaligned_store && (pass & 1) != 0)
4107 continue;
4108 if (!bzero_first && (pass & 2) != 0)
4109 continue;
4110 cnt[pass] = split_group (group, (pass & 1) != 0,
4111 allow_unaligned_load, (pass & 2) != 0,
4112 NULL, NULL, NULL);
4113 if (cnt[pass] < cnt[pass_min])
4114 pass_min = pass;
4115 }
4116 if ((pass_min & 1) == 0)
245f6de1 4117 allow_unaligned_store = false;
3afd514b
JJ
4118 if ((pass_min & 2) == 0)
4119 bzero_first = false;
a62b3dc5 4120 }
e362a897
EB
4121
4122 auto_vec<class split_store *, 32> split_stores;
4123 split_store *split_store;
4124 unsigned total_orig, total_new, i;
3afd514b 4125 split_group (group, allow_unaligned_store, allow_unaligned_load, bzero_first,
d7a9512e 4126 &split_stores, &total_orig, &total_new);
a62b3dc5 4127
5384a802
JJ
4128 /* Determine if there is a clobber covering the whole group at the start,
4129 followed by proposed split stores that cover the whole group. In that
4130 case, prefer the transformation even if
4131 split_stores.length () == orig_num_stmts. */
4132 bool clobber_first = false;
4133 if (num_clobber_stmts
4134 && gimple_clobber_p (group->stores[0]->stmt)
4135 && group->start == group->stores[0]->bitpos
4136 && group->width == group->stores[0]->bitsize
4137 && (group->start % BITS_PER_UNIT) == 0
4138 && (group->width % BITS_PER_UNIT) == 0)
4139 {
4140 clobber_first = true;
4141 unsigned HOST_WIDE_INT pos = group->start / BITS_PER_UNIT;
4142 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4143 if (split_store->bytepos != pos)
4144 {
4145 clobber_first = false;
4146 break;
4147 }
4148 else
4149 pos += split_store->size / BITS_PER_UNIT;
4150 if (pos != (group->start + group->width) / BITS_PER_UNIT)
4151 clobber_first = false;
4152 }
4153
4154 if (split_stores.length () >= orig_num_stmts + clobber_first)
a62b3dc5 4155 {
5384a802 4156
a62b3dc5
JJ
4157 /* We didn't manage to reduce the number of statements. Bail out. */
4158 if (dump_file && (dump_flags & TDF_DETAILS))
d7a9512e
JJ
4159 fprintf (dump_file, "Exceeded original number of stmts (%u)."
4160 " Not profitable to emit new sequence.\n",
4161 orig_num_stmts);
dd172744
RB
4162 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4163 delete split_store;
a62b3dc5
JJ
4164 return false;
4165 }
d7a9512e
JJ
4166 if (total_orig <= total_new)
4167 {
4168 /* If number of estimated new statements is above estimated original
4169 statements, bail out too. */
4170 if (dump_file && (dump_flags & TDF_DETAILS))
4171 fprintf (dump_file, "Estimated number of original stmts (%u)"
4172 " not larger than estimated number of new"
4173 " stmts (%u).\n",
4174 total_orig, total_new);
dd172744
RB
4175 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4176 delete split_store;
4b84d9b8 4177 return false;
d7a9512e 4178 }
5384a802
JJ
4179 if (group->stores[0]->rhs_code == INTEGER_CST)
4180 {
4181 bool all_orig = true;
4182 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4183 if (!split_store->orig)
4184 {
4185 all_orig = false;
4186 break;
4187 }
4188 if (all_orig)
4189 {
4190 unsigned int cnt = split_stores.length ();
4191 store_immediate_info *store;
4192 FOR_EACH_VEC_ELT (group->stores, i, store)
4193 if (gimple_clobber_p (store->stmt))
4194 ++cnt;
4195 /* Punt if we wouldn't make any real changes, i.e. keep all
4196 orig stmts + all clobbers. */
4197 if (cnt == group->stores.length ())
4198 {
4199 if (dump_file && (dump_flags & TDF_DETAILS))
4200 fprintf (dump_file, "Exceeded original number of stmts (%u)."
4201 " Not profitable to emit new sequence.\n",
4202 orig_num_stmts);
4203 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4204 delete split_store;
4205 return false;
4206 }
4207 }
4208 }
f663d9ad
KT
4209
4210 gimple_stmt_iterator last_gsi = gsi_for_stmt (group->last_stmt);
4211 gimple_seq seq = NULL;
f663d9ad
KT
4212 tree last_vdef, new_vuse;
4213 last_vdef = gimple_vdef (group->last_stmt);
4214 new_vuse = gimple_vuse (group->last_stmt);
4b84d9b8
JJ
4215 tree bswap_res = NULL_TREE;
4216
5384a802
JJ
4217 /* Clobbers are not removed. */
4218 if (gimple_clobber_p (group->last_stmt))
4219 {
4220 new_vuse = make_ssa_name (gimple_vop (cfun), group->last_stmt);
4221 gimple_set_vdef (group->last_stmt, new_vuse);
4222 }
4223
4b84d9b8
JJ
4224 if (group->stores[0]->rhs_code == LROTATE_EXPR
4225 || group->stores[0]->rhs_code == NOP_EXPR)
4226 {
4227 tree fndecl = NULL_TREE, bswap_type = NULL_TREE, load_type;
4228 gimple *ins_stmt = group->stores[0]->ins_stmt;
4229 struct symbolic_number *n = &group->stores[0]->n;
4230 bool bswap = group->stores[0]->rhs_code == LROTATE_EXPR;
4231
4232 switch (n->range)
4233 {
4234 case 16:
4235 load_type = bswap_type = uint16_type_node;
4236 break;
4237 case 32:
4238 load_type = uint32_type_node;
4239 if (bswap)
4240 {
4241 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP32);
4242 bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
4243 }
4244 break;
4245 case 64:
4246 load_type = uint64_type_node;
4247 if (bswap)
4248 {
4249 fndecl = builtin_decl_explicit (BUILT_IN_BSWAP64);
4250 bswap_type = TREE_VALUE (TYPE_ARG_TYPES (TREE_TYPE (fndecl)));
4251 }
4252 break;
4253 default:
4254 gcc_unreachable ();
4255 }
4256
4257 /* If the loads have each vuse of the corresponding store,
4258 we've checked the aliasing already in try_coalesce_bswap and
4259 we want to sink the need load into seq. So need to use new_vuse
4260 on the load. */
30fa8e9c 4261 if (n->base_addr)
4b84d9b8 4262 {
30fa8e9c
JJ
4263 if (n->vuse == NULL)
4264 {
4265 n->vuse = new_vuse;
4266 ins_stmt = NULL;
4267 }
4268 else
4269 /* Update vuse in case it has changed by output_merged_stores. */
4270 n->vuse = gimple_vuse (ins_stmt);
4b84d9b8
JJ
4271 }
4272 bswap_res = bswap_replace (gsi_start (seq), ins_stmt, fndecl,
b320edc0
JJ
4273 bswap_type, load_type, n, bswap,
4274 ~(uint64_t) 0);
4b84d9b8
JJ
4275 gcc_assert (bswap_res);
4276 }
f663d9ad
KT
4277
4278 gimple *stmt = NULL;
245f6de1 4279 auto_vec<gimple *, 32> orig_stmts;
4b84d9b8
JJ
4280 gimple_seq this_seq;
4281 tree addr = force_gimple_operand_1 (unshare_expr (base_addr), &this_seq,
aa55dc0c 4282 is_gimple_mem_ref_addr, NULL_TREE);
4b84d9b8 4283 gimple_seq_add_seq_without_update (&seq, this_seq);
245f6de1
JJ
4284
4285 tree load_addr[2] = { NULL_TREE, NULL_TREE };
4286 gimple_seq load_seq[2] = { NULL, NULL };
4287 gimple_stmt_iterator load_gsi[2] = { gsi_none (), gsi_none () };
4288 for (int j = 0; j < 2; ++j)
4289 {
4290 store_operand_info &op = group->stores[0]->ops[j];
4291 if (op.base_addr == NULL_TREE)
4292 continue;
4293
4294 store_immediate_info *infol = group->stores.last ();
4295 if (gimple_vuse (op.stmt) == gimple_vuse (infol->ops[j].stmt))
4296 {
97031af7
JJ
4297 /* We can't pick the location randomly; while we've verified
4298 all the loads have the same vuse, they can be still in different
4299 basic blocks and we need to pick the one from the last bb:
4300 int x = q[0];
4301 if (x == N) return;
4302 int y = q[1];
4303 p[0] = x;
4304 p[1] = y;
4305 otherwise if we put the wider load at the q[0] load, we might
4306 segfault if q[1] is not mapped. */
4307 basic_block bb = gimple_bb (op.stmt);
4308 gimple *ostmt = op.stmt;
4309 store_immediate_info *info;
4310 FOR_EACH_VEC_ELT (group->stores, i, info)
4311 {
4312 gimple *tstmt = info->ops[j].stmt;
4313 basic_block tbb = gimple_bb (tstmt);
4314 if (dominated_by_p (CDI_DOMINATORS, tbb, bb))
4315 {
4316 ostmt = tstmt;
4317 bb = tbb;
4318 }
4319 }
4320 load_gsi[j] = gsi_for_stmt (ostmt);
245f6de1
JJ
4321 load_addr[j]
4322 = force_gimple_operand_1 (unshare_expr (op.base_addr),
4323 &load_seq[j], is_gimple_mem_ref_addr,
4324 NULL_TREE);
4325 }
4326 else if (operand_equal_p (base_addr, op.base_addr, 0))
4327 load_addr[j] = addr;
4328 else
3e2927a1 4329 {
3e2927a1
JJ
4330 load_addr[j]
4331 = force_gimple_operand_1 (unshare_expr (op.base_addr),
4332 &this_seq, is_gimple_mem_ref_addr,
4333 NULL_TREE);
4334 gimple_seq_add_seq_without_update (&seq, this_seq);
4335 }
245f6de1
JJ
4336 }
4337
f663d9ad
KT
4338 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4339 {
e362a897
EB
4340 const unsigned HOST_WIDE_INT try_size = split_store->size;
4341 const unsigned HOST_WIDE_INT try_pos = split_store->bytepos;
4342 const unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT;
4343 const unsigned HOST_WIDE_INT try_align = split_store->align;
4344 const unsigned HOST_WIDE_INT try_offset = try_pos - start_byte_pos;
a62b3dc5
JJ
4345 tree dest, src;
4346 location_t loc;
e362a897 4347
a62b3dc5
JJ
4348 if (split_store->orig)
4349 {
5384a802
JJ
4350 /* If there is just a single non-clobber constituent store
4351 which covers the whole area, just reuse the lhs and rhs. */
4352 gimple *orig_stmt = NULL;
4353 store_immediate_info *store;
4354 unsigned int j;
4355 FOR_EACH_VEC_ELT (split_store->orig_stores, j, store)
4356 if (!gimple_clobber_p (store->stmt))
4357 {
4358 orig_stmt = store->stmt;
4359 break;
4360 }
245f6de1
JJ
4361 dest = gimple_assign_lhs (orig_stmt);
4362 src = gimple_assign_rhs1 (orig_stmt);
4363 loc = gimple_location (orig_stmt);
a62b3dc5
JJ
4364 }
4365 else
4366 {
245f6de1
JJ
4367 store_immediate_info *info;
4368 unsigned short clique, base;
4369 unsigned int k;
4370 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4371 orig_stmts.safe_push (info->stmt);
a62b3dc5 4372 tree offset_type
245f6de1 4373 = get_alias_type_for_stmts (orig_stmts, false, &clique, &base);
e362a897 4374 tree dest_type;
245f6de1
JJ
4375 loc = get_location_for_stmts (orig_stmts);
4376 orig_stmts.truncate (0);
a62b3dc5 4377
e362a897
EB
4378 if (group->string_concatenation)
4379 dest_type
4380 = build_array_type_nelts (char_type_node,
4381 try_size / BITS_PER_UNIT);
4382 else
4383 {
4384 dest_type = build_nonstandard_integer_type (try_size, UNSIGNED);
4385 dest_type = build_aligned_type (dest_type, try_align);
4386 }
4387 dest = fold_build2 (MEM_REF, dest_type, addr,
a62b3dc5 4388 build_int_cst (offset_type, try_pos));
245f6de1
JJ
4389 if (TREE_CODE (dest) == MEM_REF)
4390 {
4391 MR_DEPENDENCE_CLIQUE (dest) = clique;
4392 MR_DEPENDENCE_BASE (dest) = base;
4393 }
4394
c94c3532 4395 tree mask;
e362a897 4396 if (bswap_res || group->string_concatenation)
c94c3532
EB
4397 mask = integer_zero_node;
4398 else
e362a897
EB
4399 mask = native_interpret_expr (dest_type,
4400 group->mask + try_offset,
4b84d9b8 4401 group->buf_size);
245f6de1
JJ
4402
4403 tree ops[2];
4404 for (int j = 0;
4405 j < 1 + (split_store->orig_stores[0]->ops[1].val != NULL_TREE);
4406 ++j)
4407 {
4408 store_operand_info &op = split_store->orig_stores[0]->ops[j];
4b84d9b8
JJ
4409 if (bswap_res)
4410 ops[j] = bswap_res;
e362a897
EB
4411 else if (group->string_concatenation)
4412 {
4413 ops[j] = build_string (try_size / BITS_PER_UNIT,
4414 (const char *) group->val + try_offset);
4415 TREE_TYPE (ops[j]) = dest_type;
4416 }
4b84d9b8 4417 else if (op.base_addr)
245f6de1
JJ
4418 {
4419 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4420 orig_stmts.safe_push (info->ops[j].stmt);
4421
4422 offset_type = get_alias_type_for_stmts (orig_stmts, true,
4423 &clique, &base);
4424 location_t load_loc = get_location_for_stmts (orig_stmts);
4425 orig_stmts.truncate (0);
4426
4427 unsigned HOST_WIDE_INT load_align = group->load_align[j];
4428 unsigned HOST_WIDE_INT align_bitpos
c94c3532 4429 = known_alignment (try_bitpos
8a91d545
RS
4430 - split_store->orig_stores[0]->bitpos
4431 + op.bitpos);
4432 if (align_bitpos & (load_align - 1))
245f6de1
JJ
4433 load_align = least_bit_hwi (align_bitpos);
4434
4435 tree load_int_type
4436 = build_nonstandard_integer_type (try_size, UNSIGNED);
4437 load_int_type
4438 = build_aligned_type (load_int_type, load_align);
4439
8a91d545 4440 poly_uint64 load_pos
c94c3532 4441 = exact_div (try_bitpos
8a91d545
RS
4442 - split_store->orig_stores[0]->bitpos
4443 + op.bitpos,
4444 BITS_PER_UNIT);
245f6de1
JJ
4445 ops[j] = fold_build2 (MEM_REF, load_int_type, load_addr[j],
4446 build_int_cst (offset_type, load_pos));
4447 if (TREE_CODE (ops[j]) == MEM_REF)
4448 {
4449 MR_DEPENDENCE_CLIQUE (ops[j]) = clique;
4450 MR_DEPENDENCE_BASE (ops[j]) = base;
4451 }
4452 if (!integer_zerop (mask))
e9e2bad7
MS
4453 {
4454 /* The load might load some bits (that will be masked
4455 off later on) uninitialized, avoid -W*uninitialized
4456 warnings in that case. */
4457 suppress_warning (ops[j], OPT_Wuninitialized);
4458 }
245f6de1 4459
e362a897 4460 stmt = gimple_build_assign (make_ssa_name (dest_type), ops[j]);
245f6de1
JJ
4461 gimple_set_location (stmt, load_loc);
4462 if (gsi_bb (load_gsi[j]))
4463 {
4464 gimple_set_vuse (stmt, gimple_vuse (op.stmt));
4465 gimple_seq_add_stmt_without_update (&load_seq[j], stmt);
4466 }
4467 else
4468 {
4469 gimple_set_vuse (stmt, new_vuse);
4470 gimple_seq_add_stmt_without_update (&seq, stmt);
4471 }
4472 ops[j] = gimple_assign_lhs (stmt);
a6fbd154
JJ
4473 tree xor_mask;
4474 enum tree_code inv_op
e362a897 4475 = invert_op (split_store, j, dest_type, xor_mask);
a6fbd154 4476 if (inv_op != NOP_EXPR)
383ac8dc 4477 {
e362a897 4478 stmt = gimple_build_assign (make_ssa_name (dest_type),
a6fbd154 4479 inv_op, ops[j], xor_mask);
383ac8dc
JJ
4480 gimple_set_location (stmt, load_loc);
4481 ops[j] = gimple_assign_lhs (stmt);
4482
4483 if (gsi_bb (load_gsi[j]))
4484 gimple_seq_add_stmt_without_update (&load_seq[j],
4485 stmt);
4486 else
4487 gimple_seq_add_stmt_without_update (&seq, stmt);
4488 }
245f6de1
JJ
4489 }
4490 else
e362a897
EB
4491 ops[j] = native_interpret_expr (dest_type,
4492 group->val + try_offset,
245f6de1
JJ
4493 group->buf_size);
4494 }
4495
4496 switch (split_store->orig_stores[0]->rhs_code)
4497 {
4498 case BIT_AND_EXPR:
4499 case BIT_IOR_EXPR:
4500 case BIT_XOR_EXPR:
4501 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4502 {
4503 tree rhs1 = gimple_assign_rhs1 (info->stmt);
4504 orig_stmts.safe_push (SSA_NAME_DEF_STMT (rhs1));
4505 }
4506 location_t bit_loc;
4507 bit_loc = get_location_for_stmts (orig_stmts);
4508 orig_stmts.truncate (0);
4509
4510 stmt
e362a897 4511 = gimple_build_assign (make_ssa_name (dest_type),
245f6de1
JJ
4512 split_store->orig_stores[0]->rhs_code,
4513 ops[0], ops[1]);
4514 gimple_set_location (stmt, bit_loc);
4515 /* If there is just one load and there is a separate
4516 load_seq[0], emit the bitwise op right after it. */
4517 if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
4518 gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
4519 /* Otherwise, if at least one load is in seq, we need to
4520 emit the bitwise op right before the store. If there
4521 are two loads and are emitted somewhere else, it would
4522 be better to emit the bitwise op as early as possible;
4523 we don't track where that would be possible right now
4524 though. */
4525 else
4526 gimple_seq_add_stmt_without_update (&seq, stmt);
4527 src = gimple_assign_lhs (stmt);
a6fbd154
JJ
4528 tree xor_mask;
4529 enum tree_code inv_op;
e362a897 4530 inv_op = invert_op (split_store, 2, dest_type, xor_mask);
a6fbd154 4531 if (inv_op != NOP_EXPR)
d60edaba 4532 {
e362a897 4533 stmt = gimple_build_assign (make_ssa_name (dest_type),
a6fbd154 4534 inv_op, src, xor_mask);
d60edaba
JJ
4535 gimple_set_location (stmt, bit_loc);
4536 if (load_addr[1] == NULL_TREE && gsi_bb (load_gsi[0]))
4537 gimple_seq_add_stmt_without_update (&load_seq[0], stmt);
4538 else
4539 gimple_seq_add_stmt_without_update (&seq, stmt);
4540 src = gimple_assign_lhs (stmt);
4541 }
245f6de1 4542 break;
4b84d9b8
JJ
4543 case LROTATE_EXPR:
4544 case NOP_EXPR:
4545 src = ops[0];
4546 if (!is_gimple_val (src))
4547 {
4548 stmt = gimple_build_assign (make_ssa_name (TREE_TYPE (src)),
4549 src);
4550 gimple_seq_add_stmt_without_update (&seq, stmt);
4551 src = gimple_assign_lhs (stmt);
4552 }
e362a897 4553 if (!useless_type_conversion_p (dest_type, TREE_TYPE (src)))
4b84d9b8 4554 {
e362a897 4555 stmt = gimple_build_assign (make_ssa_name (dest_type),
4b84d9b8
JJ
4556 NOP_EXPR, src);
4557 gimple_seq_add_stmt_without_update (&seq, stmt);
4558 src = gimple_assign_lhs (stmt);
4559 }
e362a897 4560 inv_op = invert_op (split_store, 2, dest_type, xor_mask);
be52ac73
JJ
4561 if (inv_op != NOP_EXPR)
4562 {
e362a897 4563 stmt = gimple_build_assign (make_ssa_name (dest_type),
be52ac73
JJ
4564 inv_op, src, xor_mask);
4565 gimple_set_location (stmt, loc);
4566 gimple_seq_add_stmt_without_update (&seq, stmt);
4567 src = gimple_assign_lhs (stmt);
4568 }
4b84d9b8 4569 break;
245f6de1
JJ
4570 default:
4571 src = ops[0];
4572 break;
4573 }
4574
c94c3532
EB
4575 /* If bit insertion is required, we use the source as an accumulator
4576 into which the successive bit-field values are manually inserted.
4577 FIXME: perhaps use BIT_INSERT_EXPR instead in some cases? */
4578 if (group->bit_insertion)
4579 FOR_EACH_VEC_ELT (split_store->orig_stores, k, info)
4580 if (info->rhs_code == BIT_INSERT_EXPR
4581 && info->bitpos < try_bitpos + try_size
4582 && info->bitpos + info->bitsize > try_bitpos)
4583 {
4584 /* Mask, truncate, convert to final type, shift and ior into
4585 the accumulator. Note that every step can be a no-op. */
4586 const HOST_WIDE_INT start_gap = info->bitpos - try_bitpos;
4587 const HOST_WIDE_INT end_gap
4588 = (try_bitpos + try_size) - (info->bitpos + info->bitsize);
4589 tree tem = info->ops[0].val;
ed01d707
EB
4590 if (!INTEGRAL_TYPE_P (TREE_TYPE (tem)))
4591 {
4592 const unsigned HOST_WIDE_INT size
4593 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (tem)));
4594 tree integer_type
4595 = build_nonstandard_integer_type (size, UNSIGNED);
4596 tem = gimple_build (&seq, loc, VIEW_CONVERT_EXPR,
4597 integer_type, tem);
4598 }
c14add82
EB
4599 if (TYPE_PRECISION (TREE_TYPE (tem)) <= info->bitsize)
4600 {
4601 tree bitfield_type
4602 = build_nonstandard_integer_type (info->bitsize,
4603 UNSIGNED);
4604 tem = gimple_convert (&seq, loc, bitfield_type, tem);
4605 }
4606 else if ((BYTES_BIG_ENDIAN ? start_gap : end_gap) > 0)
c94c3532
EB
4607 {
4608 const unsigned HOST_WIDE_INT imask
4609 = (HOST_WIDE_INT_1U << info->bitsize) - 1;
4610 tem = gimple_build (&seq, loc,
4611 BIT_AND_EXPR, TREE_TYPE (tem), tem,
4612 build_int_cst (TREE_TYPE (tem),
4613 imask));
4614 }
4615 const HOST_WIDE_INT shift
4616 = (BYTES_BIG_ENDIAN ? end_gap : start_gap);
4617 if (shift < 0)
4618 tem = gimple_build (&seq, loc,
4619 RSHIFT_EXPR, TREE_TYPE (tem), tem,
4620 build_int_cst (NULL_TREE, -shift));
e362a897 4621 tem = gimple_convert (&seq, loc, dest_type, tem);
c94c3532
EB
4622 if (shift > 0)
4623 tem = gimple_build (&seq, loc,
e362a897 4624 LSHIFT_EXPR, dest_type, tem,
c94c3532
EB
4625 build_int_cst (NULL_TREE, shift));
4626 src = gimple_build (&seq, loc,
e362a897 4627 BIT_IOR_EXPR, dest_type, tem, src);
c94c3532
EB
4628 }
4629
a62b3dc5
JJ
4630 if (!integer_zerop (mask))
4631 {
e362a897 4632 tree tem = make_ssa_name (dest_type);
a62b3dc5
JJ
4633 tree load_src = unshare_expr (dest);
4634 /* The load might load some or all bits uninitialized,
4635 avoid -W*uninitialized warnings in that case.
4636 As optimization, it would be nice if all the bits are
4637 provably uninitialized (no stores at all yet or previous
4638 store a CLOBBER) we'd optimize away the load and replace
4639 it e.g. with 0. */
e9e2bad7 4640 suppress_warning (load_src, OPT_Wuninitialized);
a62b3dc5
JJ
4641 stmt = gimple_build_assign (tem, load_src);
4642 gimple_set_location (stmt, loc);
4643 gimple_set_vuse (stmt, new_vuse);
4644 gimple_seq_add_stmt_without_update (&seq, stmt);
4645
4646 /* FIXME: If there is a single chunk of zero bits in mask,
4647 perhaps use BIT_INSERT_EXPR instead? */
e362a897 4648 stmt = gimple_build_assign (make_ssa_name (dest_type),
a62b3dc5
JJ
4649 BIT_AND_EXPR, tem, mask);
4650 gimple_set_location (stmt, loc);
4651 gimple_seq_add_stmt_without_update (&seq, stmt);
4652 tem = gimple_assign_lhs (stmt);
4653
245f6de1 4654 if (TREE_CODE (src) == INTEGER_CST)
e362a897 4655 src = wide_int_to_tree (dest_type,
245f6de1
JJ
4656 wi::bit_and_not (wi::to_wide (src),
4657 wi::to_wide (mask)));
4658 else
4659 {
4660 tree nmask
e362a897 4661 = wide_int_to_tree (dest_type,
245f6de1 4662 wi::bit_not (wi::to_wide (mask)));
e362a897 4663 stmt = gimple_build_assign (make_ssa_name (dest_type),
245f6de1
JJ
4664 BIT_AND_EXPR, src, nmask);
4665 gimple_set_location (stmt, loc);
4666 gimple_seq_add_stmt_without_update (&seq, stmt);
4667 src = gimple_assign_lhs (stmt);
4668 }
e362a897 4669 stmt = gimple_build_assign (make_ssa_name (dest_type),
a62b3dc5
JJ
4670 BIT_IOR_EXPR, tem, src);
4671 gimple_set_location (stmt, loc);
4672 gimple_seq_add_stmt_without_update (&seq, stmt);
4673 src = gimple_assign_lhs (stmt);
4674 }
4675 }
f663d9ad
KT
4676
4677 stmt = gimple_build_assign (dest, src);
4678 gimple_set_location (stmt, loc);
4679 gimple_set_vuse (stmt, new_vuse);
4680 gimple_seq_add_stmt_without_update (&seq, stmt);
4681
629387a6
EB
4682 if (group->lp_nr && stmt_could_throw_p (cfun, stmt))
4683 add_stmt_to_eh_lp (stmt, group->lp_nr);
4684
f663d9ad
KT
4685 tree new_vdef;
4686 if (i < split_stores.length () - 1)
a62b3dc5 4687 new_vdef = make_ssa_name (gimple_vop (cfun), stmt);
f663d9ad
KT
4688 else
4689 new_vdef = last_vdef;
4690
4691 gimple_set_vdef (stmt, new_vdef);
4692 SSA_NAME_DEF_STMT (new_vdef) = stmt;
4693 new_vuse = new_vdef;
4694 }
4695
4696 FOR_EACH_VEC_ELT (split_stores, i, split_store)
4697 delete split_store;
4698
f663d9ad
KT
4699 gcc_assert (seq);
4700 if (dump_file)
4701 {
4702 fprintf (dump_file,
c94c3532 4703 "New sequence of %u stores to replace old one of %u stores\n",
a62b3dc5 4704 split_stores.length (), orig_num_stmts);
f663d9ad
KT
4705 if (dump_flags & TDF_DETAILS)
4706 print_gimple_seq (dump_file, seq, 0, TDF_VOPS | TDF_MEMSYMS);
4707 }
629387a6 4708
5384a802
JJ
4709 if (gimple_clobber_p (group->last_stmt))
4710 update_stmt (group->last_stmt);
4711
629387a6
EB
4712 if (group->lp_nr > 0)
4713 {
4714 /* We're going to insert a sequence of (potentially) throwing stores
4715 into an active EH region. This means that we're going to create
4716 new basic blocks with EH edges pointing to the post landing pad
4717 and, therefore, to have to update its PHI nodes, if any. For the
4718 virtual PHI node, we're going to use the VDEFs created above, but
4719 for the other nodes, we need to record the original reaching defs. */
4720 eh_landing_pad lp = get_eh_landing_pad_from_number (group->lp_nr);
4721 basic_block lp_bb = label_to_block (cfun, lp->post_landing_pad);
4722 basic_block last_bb = gimple_bb (group->last_stmt);
4723 edge last_edge = find_edge (last_bb, lp_bb);
4724 auto_vec<tree, 16> last_defs;
4725 gphi_iterator gpi;
4726 for (gpi = gsi_start_phis (lp_bb); !gsi_end_p (gpi); gsi_next (&gpi))
4727 {
4728 gphi *phi = gpi.phi ();
4729 tree last_def;
4730 if (virtual_operand_p (gimple_phi_result (phi)))
4731 last_def = NULL_TREE;
4732 else
4733 last_def = gimple_phi_arg_def (phi, last_edge->dest_idx);
4734 last_defs.safe_push (last_def);
4735 }
4736
4737 /* Do the insertion. Then, if new basic blocks have been created in the
4738 process, rewind the chain of VDEFs create above to walk the new basic
4739 blocks and update the corresponding arguments of the PHI nodes. */
4740 update_modified_stmts (seq);
4741 if (gimple_find_sub_bbs (seq, &last_gsi))
4742 while (last_vdef != gimple_vuse (group->last_stmt))
4743 {
4744 gimple *stmt = SSA_NAME_DEF_STMT (last_vdef);
4745 if (stmt_could_throw_p (cfun, stmt))
4746 {
4747 edge new_edge = find_edge (gimple_bb (stmt), lp_bb);
4748 unsigned int i;
4749 for (gpi = gsi_start_phis (lp_bb), i = 0;
4750 !gsi_end_p (gpi);
4751 gsi_next (&gpi), i++)
4752 {
4753 gphi *phi = gpi.phi ();
4754 tree new_def;
4755 if (virtual_operand_p (gimple_phi_result (phi)))
4756 new_def = last_vdef;
4757 else
4758 new_def = last_defs[i];
4759 add_phi_arg (phi, new_def, new_edge, UNKNOWN_LOCATION);
4760 }
4761 }
4762 last_vdef = gimple_vuse (stmt);
4763 }
4764 }
4765 else
4766 gsi_insert_seq_after (&last_gsi, seq, GSI_SAME_STMT);
4767
245f6de1
JJ
4768 for (int j = 0; j < 2; ++j)
4769 if (load_seq[j])
4770 gsi_insert_seq_after (&load_gsi[j], load_seq[j], GSI_SAME_STMT);
f663d9ad
KT
4771
4772 return true;
4773}
4774
4775/* Process the merged_store_group objects created in the coalescing phase.
4776 The stores are all against the base object BASE.
4777 Try to output the widened stores and delete the original statements if
4778 successful. Return true iff any changes were made. */
4779
4780bool
b5926e23 4781imm_store_chain_info::output_merged_stores ()
f663d9ad
KT
4782{
4783 unsigned int i;
4784 merged_store_group *merged_store;
4785 bool ret = false;
4786 FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_store)
4787 {
a95b474a
ML
4788 if (dbg_cnt (store_merging)
4789 && output_merged_store (merged_store))
f663d9ad
KT
4790 {
4791 unsigned int j;
4792 store_immediate_info *store;
4793 FOR_EACH_VEC_ELT (merged_store->stores, j, store)
4794 {
4795 gimple *stmt = store->stmt;
4796 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
5384a802
JJ
4797 /* Don't remove clobbers, they are still useful even if
4798 everything is overwritten afterwards. */
4799 if (gimple_clobber_p (stmt))
4800 continue;
f663d9ad 4801 gsi_remove (&gsi, true);
629387a6
EB
4802 if (store->lp_nr)
4803 remove_stmt_from_eh_lp (stmt);
f663d9ad
KT
4804 if (stmt != merged_store->last_stmt)
4805 {
4806 unlink_stmt_vdef (stmt);
4807 release_defs (stmt);
4808 }
4809 }
4810 ret = true;
4811 }
4812 }
4813 if (ret && dump_file)
4814 fprintf (dump_file, "Merging successful!\n");
4815
4816 return ret;
4817}
4818
4819/* Coalesce the store_immediate_info objects recorded against the base object
4820 BASE in the first phase and output them.
4821 Delete the allocated structures.
4822 Return true if any changes were made. */
4823
4824bool
b5926e23 4825imm_store_chain_info::terminate_and_process_chain ()
f663d9ad 4826{
95d94b52
RB
4827 if (dump_file && (dump_flags & TDF_DETAILS))
4828 fprintf (dump_file, "Terminating chain with %u stores\n",
4829 m_store_info.length ());
f663d9ad
KT
4830 /* Process store chain. */
4831 bool ret = false;
4832 if (m_store_info.length () > 1)
4833 {
4834 ret = coalesce_immediate_stores ();
4835 if (ret)
b5926e23 4836 ret = output_merged_stores ();
f663d9ad
KT
4837 }
4838
4839 /* Delete all the entries we allocated ourselves. */
4840 store_immediate_info *info;
4841 unsigned int i;
4842 FOR_EACH_VEC_ELT (m_store_info, i, info)
4843 delete info;
4844
4845 merged_store_group *merged_info;
4846 FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_info)
4847 delete merged_info;
4848
4849 return ret;
4850}
4851
4852/* Return true iff LHS is a destination potentially interesting for
4853 store merging. In practice these are the codes that get_inner_reference
4854 can process. */
4855
4856static bool
4857lhs_valid_for_store_merging_p (tree lhs)
4858{
629387a6 4859 if (DECL_P (lhs))
f663d9ad
KT
4860 return true;
4861
629387a6
EB
4862 switch (TREE_CODE (lhs))
4863 {
4864 case ARRAY_REF:
4865 case ARRAY_RANGE_REF:
4866 case BIT_FIELD_REF:
4867 case COMPONENT_REF:
4868 case MEM_REF:
e362a897 4869 case VIEW_CONVERT_EXPR:
629387a6
EB
4870 return true;
4871 default:
4872 return false;
4873 }
f663d9ad
KT
4874}
4875
4876/* Return true if the tree RHS is a constant we want to consider
4877 during store merging. In practice accept all codes that
4878 native_encode_expr accepts. */
4879
4880static bool
4881rhs_valid_for_store_merging_p (tree rhs)
4882{
cf098191 4883 unsigned HOST_WIDE_INT size;
3afd514b 4884 if (TREE_CODE (rhs) == CONSTRUCTOR
3afd514b
JJ
4885 && CONSTRUCTOR_NELTS (rhs) == 0
4886 && TYPE_SIZE_UNIT (TREE_TYPE (rhs))
4887 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (TREE_TYPE (rhs))))
4888 return true;
cf098191
RS
4889 return (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (rhs))).is_constant (&size)
4890 && native_encode_expr (rhs, NULL, size) != 0);
f663d9ad
KT
4891}
4892
629387a6
EB
4893/* Adjust *PBITPOS, *PBITREGION_START and *PBITREGION_END by BYTE_OFF bytes
4894 and return true on success or false on failure. */
4895
4896static bool
4897adjust_bit_pos (poly_offset_int byte_off,
4898 poly_int64 *pbitpos,
4899 poly_uint64 *pbitregion_start,
4900 poly_uint64 *pbitregion_end)
4901{
4902 poly_offset_int bit_off = byte_off << LOG2_BITS_PER_UNIT;
4903 bit_off += *pbitpos;
4904
4905 if (known_ge (bit_off, 0) && bit_off.to_shwi (pbitpos))
4906 {
4907 if (maybe_ne (*pbitregion_end, 0U))
4908 {
4909 bit_off = byte_off << LOG2_BITS_PER_UNIT;
4910 bit_off += *pbitregion_start;
4911 if (bit_off.to_uhwi (pbitregion_start))
4912 {
4913 bit_off = byte_off << LOG2_BITS_PER_UNIT;
4914 bit_off += *pbitregion_end;
4915 if (!bit_off.to_uhwi (pbitregion_end))
4916 *pbitregion_end = 0;
4917 }
4918 else
4919 *pbitregion_end = 0;
4920 }
4921 return true;
4922 }
4923 else
4924 return false;
4925}
4926
245f6de1
JJ
4927/* If MEM is a memory reference usable for store merging (either as
4928 store destination or for loads), return the non-NULL base_addr
4929 and set *PBITSIZE, *PBITPOS, *PBITREGION_START and *PBITREGION_END.
4930 Otherwise return NULL, *PBITPOS should be still valid even for that
4931 case. */
4932
4933static tree
8a91d545
RS
4934mem_valid_for_store_merging (tree mem, poly_uint64 *pbitsize,
4935 poly_uint64 *pbitpos,
4936 poly_uint64 *pbitregion_start,
4937 poly_uint64 *pbitregion_end)
245f6de1 4938{
8a91d545
RS
4939 poly_int64 bitsize, bitpos;
4940 poly_uint64 bitregion_start = 0, bitregion_end = 0;
245f6de1
JJ
4941 machine_mode mode;
4942 int unsignedp = 0, reversep = 0, volatilep = 0;
4943 tree offset;
4944 tree base_addr = get_inner_reference (mem, &bitsize, &bitpos, &offset, &mode,
4945 &unsignedp, &reversep, &volatilep);
4946 *pbitsize = bitsize;
387e818c 4947 if (known_le (bitsize, 0))
245f6de1
JJ
4948 return NULL_TREE;
4949
4950 if (TREE_CODE (mem) == COMPONENT_REF
4951 && DECL_BIT_FIELD_TYPE (TREE_OPERAND (mem, 1)))
4952 {
4953 get_bit_range (&bitregion_start, &bitregion_end, mem, &bitpos, &offset);
8a91d545
RS
4954 if (maybe_ne (bitregion_end, 0U))
4955 bitregion_end += 1;
245f6de1
JJ
4956 }
4957
4958 if (reversep)
4959 return NULL_TREE;
4960
4961 /* We do not want to rewrite TARGET_MEM_REFs. */
4962 if (TREE_CODE (base_addr) == TARGET_MEM_REF)
4963 return NULL_TREE;
4964 /* In some cases get_inner_reference may return a
4965 MEM_REF [ptr + byteoffset]. For the purposes of this pass
4966 canonicalize the base_addr to MEM_REF [ptr] and take
4967 byteoffset into account in the bitpos. This occurs in
4968 PR 23684 and this way we can catch more chains. */
4969 else if (TREE_CODE (base_addr) == MEM_REF)
4970 {
629387a6
EB
4971 if (!adjust_bit_pos (mem_ref_offset (base_addr), &bitpos,
4972 &bitregion_start, &bitregion_end))
245f6de1
JJ
4973 return NULL_TREE;
4974 base_addr = TREE_OPERAND (base_addr, 0);
4975 }
4976 /* get_inner_reference returns the base object, get at its
4977 address now. */
4978 else
4979 {
8a91d545 4980 if (maybe_lt (bitpos, 0))
245f6de1
JJ
4981 return NULL_TREE;
4982 base_addr = build_fold_addr_expr (base_addr);
4983 }
4984
629387a6 4985 if (offset)
245f6de1
JJ
4986 {
4987 /* If the access is variable offset then a base decl has to be
4988 address-taken to be able to emit pointer-based stores to it.
4989 ??? We might be able to get away with re-using the original
4990 base up to the first variable part and then wrapping that inside
4991 a BIT_FIELD_REF. */
4992 tree base = get_base_address (base_addr);
629387a6 4993 if (!base || (DECL_P (base) && !TREE_ADDRESSABLE (base)))
245f6de1
JJ
4994 return NULL_TREE;
4995
629387a6
EB
4996 /* Similarly to above for the base, remove constant from the offset. */
4997 if (TREE_CODE (offset) == PLUS_EXPR
4998 && TREE_CODE (TREE_OPERAND (offset, 1)) == INTEGER_CST
4999 && adjust_bit_pos (wi::to_poly_offset (TREE_OPERAND (offset, 1)),
5000 &bitpos, &bitregion_start, &bitregion_end))
5001 offset = TREE_OPERAND (offset, 0);
5002
245f6de1
JJ
5003 base_addr = build2 (POINTER_PLUS_EXPR, TREE_TYPE (base_addr),
5004 base_addr, offset);
5005 }
5006
629387a6
EB
5007 if (known_eq (bitregion_end, 0U))
5008 {
5009 bitregion_start = round_down_to_byte_boundary (bitpos);
5010 bitregion_end = round_up_to_byte_boundary (bitpos + bitsize);
5011 }
5012
245f6de1
JJ
5013 *pbitsize = bitsize;
5014 *pbitpos = bitpos;
5015 *pbitregion_start = bitregion_start;
5016 *pbitregion_end = bitregion_end;
5017 return base_addr;
5018}
5019
5020/* Return true if STMT is a load that can be used for store merging.
5021 In that case fill in *OP. BITSIZE, BITPOS, BITREGION_START and
5022 BITREGION_END are properties of the corresponding store. */
5023
5024static bool
5025handled_load (gimple *stmt, store_operand_info *op,
8a91d545
RS
5026 poly_uint64 bitsize, poly_uint64 bitpos,
5027 poly_uint64 bitregion_start, poly_uint64 bitregion_end)
245f6de1 5028{
383ac8dc 5029 if (!is_gimple_assign (stmt))
245f6de1 5030 return false;
383ac8dc
JJ
5031 if (gimple_assign_rhs_code (stmt) == BIT_NOT_EXPR)
5032 {
5033 tree rhs1 = gimple_assign_rhs1 (stmt);
5034 if (TREE_CODE (rhs1) == SSA_NAME
383ac8dc
JJ
5035 && handled_load (SSA_NAME_DEF_STMT (rhs1), op, bitsize, bitpos,
5036 bitregion_start, bitregion_end))
5037 {
d60edaba
JJ
5038 /* Don't allow _1 = load; _2 = ~1; _3 = ~_2; which should have
5039 been optimized earlier, but if allowed here, would confuse the
5040 multiple uses counting. */
5041 if (op->bit_not_p)
5042 return false;
383ac8dc
JJ
5043 op->bit_not_p = !op->bit_not_p;
5044 return true;
5045 }
5046 return false;
5047 }
5048 if (gimple_vuse (stmt)
5049 && gimple_assign_load_p (stmt)
36bbc05d 5050 && !stmt_can_throw_internal (cfun, stmt)
245f6de1
JJ
5051 && !gimple_has_volatile_ops (stmt))
5052 {
5053 tree mem = gimple_assign_rhs1 (stmt);
5054 op->base_addr
5055 = mem_valid_for_store_merging (mem, &op->bitsize, &op->bitpos,
5056 &op->bitregion_start,
5057 &op->bitregion_end);
5058 if (op->base_addr != NULL_TREE
8a91d545
RS
5059 && known_eq (op->bitsize, bitsize)
5060 && multiple_p (op->bitpos - bitpos, BITS_PER_UNIT)
5061 && known_ge (op->bitpos - op->bitregion_start,
5062 bitpos - bitregion_start)
5063 && known_ge (op->bitregion_end - op->bitpos,
5064 bitregion_end - bitpos))
245f6de1
JJ
5065 {
5066 op->stmt = stmt;
5067 op->val = mem;
383ac8dc 5068 op->bit_not_p = false;
245f6de1
JJ
5069 return true;
5070 }
5071 }
5072 return false;
5073}
5074
629387a6
EB
5075/* Return the index number of the landing pad for STMT, if any. */
5076
5077static int
5078lp_nr_for_store (gimple *stmt)
5079{
5080 if (!cfun->can_throw_non_call_exceptions || !cfun->eh)
5081 return 0;
5082
5083 if (!stmt_could_throw_p (cfun, stmt))
5084 return 0;
5085
5086 return lookup_stmt_eh_lp (stmt);
5087}
5088
245f6de1 5089/* Record the store STMT for store merging optimization if it can be
629387a6 5090 optimized. Return true if any changes were made. */
245f6de1 5091
629387a6 5092bool
245f6de1
JJ
5093pass_store_merging::process_store (gimple *stmt)
5094{
5095 tree lhs = gimple_assign_lhs (stmt);
5096 tree rhs = gimple_assign_rhs1 (stmt);
2c832ffe
SSF
5097 poly_uint64 bitsize, bitpos = 0;
5098 poly_uint64 bitregion_start = 0, bitregion_end = 0;
245f6de1
JJ
5099 tree base_addr
5100 = mem_valid_for_store_merging (lhs, &bitsize, &bitpos,
5101 &bitregion_start, &bitregion_end);
8a91d545 5102 if (known_eq (bitsize, 0U))
629387a6 5103 return false;
245f6de1
JJ
5104
5105 bool invalid = (base_addr == NULL_TREE
8a91d545
RS
5106 || (maybe_gt (bitsize,
5107 (unsigned int) MAX_BITSIZE_MODE_ANY_INT)
3afd514b
JJ
5108 && TREE_CODE (rhs) != INTEGER_CST
5109 && (TREE_CODE (rhs) != CONSTRUCTOR
5110 || CONSTRUCTOR_NELTS (rhs) != 0)));
245f6de1 5111 enum tree_code rhs_code = ERROR_MARK;
d60edaba 5112 bool bit_not_p = false;
4b84d9b8
JJ
5113 struct symbolic_number n;
5114 gimple *ins_stmt = NULL;
245f6de1
JJ
5115 store_operand_info ops[2];
5116 if (invalid)
5117 ;
e362a897
EB
5118 else if (TREE_CODE (rhs) == STRING_CST)
5119 {
5120 rhs_code = STRING_CST;
5121 ops[0].val = rhs;
5122 }
245f6de1
JJ
5123 else if (rhs_valid_for_store_merging_p (rhs))
5124 {
5125 rhs_code = INTEGER_CST;
5126 ops[0].val = rhs;
5127 }
e362a897 5128 else if (TREE_CODE (rhs) == SSA_NAME)
245f6de1
JJ
5129 {
5130 gimple *def_stmt = SSA_NAME_DEF_STMT (rhs), *def_stmt1, *def_stmt2;
5131 if (!is_gimple_assign (def_stmt))
5132 invalid = true;
5133 else if (handled_load (def_stmt, &ops[0], bitsize, bitpos,
5134 bitregion_start, bitregion_end))
5135 rhs_code = MEM_REF;
d60edaba
JJ
5136 else if (gimple_assign_rhs_code (def_stmt) == BIT_NOT_EXPR)
5137 {
5138 tree rhs1 = gimple_assign_rhs1 (def_stmt);
5139 if (TREE_CODE (rhs1) == SSA_NAME
5140 && is_gimple_assign (SSA_NAME_DEF_STMT (rhs1)))
5141 {
5142 bit_not_p = true;
5143 def_stmt = SSA_NAME_DEF_STMT (rhs1);
5144 }
5145 }
c94c3532 5146
d60edaba 5147 if (rhs_code == ERROR_MARK && !invalid)
245f6de1
JJ
5148 switch ((rhs_code = gimple_assign_rhs_code (def_stmt)))
5149 {
5150 case BIT_AND_EXPR:
5151 case BIT_IOR_EXPR:
5152 case BIT_XOR_EXPR:
5153 tree rhs1, rhs2;
5154 rhs1 = gimple_assign_rhs1 (def_stmt);
5155 rhs2 = gimple_assign_rhs2 (def_stmt);
5156 invalid = true;
d7a9512e 5157 if (TREE_CODE (rhs1) != SSA_NAME)
245f6de1
JJ
5158 break;
5159 def_stmt1 = SSA_NAME_DEF_STMT (rhs1);
5160 if (!is_gimple_assign (def_stmt1)
5161 || !handled_load (def_stmt1, &ops[0], bitsize, bitpos,
5162 bitregion_start, bitregion_end))
5163 break;
5164 if (rhs_valid_for_store_merging_p (rhs2))
5165 ops[1].val = rhs2;
d7a9512e 5166 else if (TREE_CODE (rhs2) != SSA_NAME)
245f6de1
JJ
5167 break;
5168 else
5169 {
5170 def_stmt2 = SSA_NAME_DEF_STMT (rhs2);
5171 if (!is_gimple_assign (def_stmt2))
5172 break;
5173 else if (!handled_load (def_stmt2, &ops[1], bitsize, bitpos,
5174 bitregion_start, bitregion_end))
5175 break;
5176 }
5177 invalid = false;
5178 break;
5179 default:
5180 invalid = true;
5181 break;
5182 }
c94c3532 5183
8a91d545
RS
5184 unsigned HOST_WIDE_INT const_bitsize;
5185 if (bitsize.is_constant (&const_bitsize)
c94c3532 5186 && (const_bitsize % BITS_PER_UNIT) == 0
8a91d545 5187 && const_bitsize <= 64
c94c3532 5188 && multiple_p (bitpos, BITS_PER_UNIT))
4b84d9b8
JJ
5189 {
5190 ins_stmt = find_bswap_or_nop_1 (def_stmt, &n, 12);
5191 if (ins_stmt)
5192 {
5193 uint64_t nn = n.n;
5194 for (unsigned HOST_WIDE_INT i = 0;
8a91d545
RS
5195 i < const_bitsize;
5196 i += BITS_PER_UNIT, nn >>= BITS_PER_MARKER)
4b84d9b8
JJ
5197 if ((nn & MARKER_MASK) == 0
5198 || (nn & MARKER_MASK) == MARKER_BYTE_UNKNOWN)
5199 {
5200 ins_stmt = NULL;
5201 break;
5202 }
5203 if (ins_stmt)
5204 {
5205 if (invalid)
5206 {
5207 rhs_code = LROTATE_EXPR;
5208 ops[0].base_addr = NULL_TREE;
5209 ops[1].base_addr = NULL_TREE;
5210 }
5211 invalid = false;
5212 }
5213 }
5214 }
c94c3532
EB
5215
5216 if (invalid
5217 && bitsize.is_constant (&const_bitsize)
5218 && ((const_bitsize % BITS_PER_UNIT) != 0
5219 || !multiple_p (bitpos, BITS_PER_UNIT))
ed01d707 5220 && const_bitsize <= MAX_FIXED_MODE_SIZE)
c94c3532 5221 {
c14add82 5222 /* Bypass a conversion to the bit-field type. */
31a5d8c5
EB
5223 if (!bit_not_p
5224 && is_gimple_assign (def_stmt)
5225 && CONVERT_EXPR_CODE_P (rhs_code))
c94c3532
EB
5226 {
5227 tree rhs1 = gimple_assign_rhs1 (def_stmt);
5228 if (TREE_CODE (rhs1) == SSA_NAME
c14add82 5229 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1)))
c94c3532
EB
5230 rhs = rhs1;
5231 }
5232 rhs_code = BIT_INSERT_EXPR;
31a5d8c5 5233 bit_not_p = false;
c94c3532
EB
5234 ops[0].val = rhs;
5235 ops[0].base_addr = NULL_TREE;
5236 ops[1].base_addr = NULL_TREE;
5237 invalid = false;
5238 }
245f6de1 5239 }
e362a897
EB
5240 else
5241 invalid = true;
245f6de1 5242
8a91d545
RS
5243 unsigned HOST_WIDE_INT const_bitsize, const_bitpos;
5244 unsigned HOST_WIDE_INT const_bitregion_start, const_bitregion_end;
5245 if (invalid
5246 || !bitsize.is_constant (&const_bitsize)
5247 || !bitpos.is_constant (&const_bitpos)
5248 || !bitregion_start.is_constant (&const_bitregion_start)
5249 || !bitregion_end.is_constant (&const_bitregion_end))
629387a6 5250 return terminate_all_aliasing_chains (NULL, stmt);
245f6de1 5251
4b84d9b8
JJ
5252 if (!ins_stmt)
5253 memset (&n, 0, sizeof (n));
5254
99b1c316 5255 class imm_store_chain_info **chain_info = NULL;
629387a6 5256 bool ret = false;
383ac8dc
JJ
5257 if (base_addr)
5258 chain_info = m_stores.get (base_addr);
5259
245f6de1
JJ
5260 store_immediate_info *info;
5261 if (chain_info)
5262 {
5263 unsigned int ord = (*chain_info)->m_store_info.length ();
8a91d545
RS
5264 info = new store_immediate_info (const_bitsize, const_bitpos,
5265 const_bitregion_start,
5266 const_bitregion_end,
5267 stmt, ord, rhs_code, n, ins_stmt,
629387a6
EB
5268 bit_not_p, lp_nr_for_store (stmt),
5269 ops[0], ops[1]);
245f6de1
JJ
5270 if (dump_file && (dump_flags & TDF_DETAILS))
5271 {
5272 fprintf (dump_file, "Recording immediate store from stmt:\n");
5273 print_gimple_stmt (dump_file, stmt, 0);
5274 }
5275 (*chain_info)->m_store_info.safe_push (info);
95d94b52 5276 m_n_stores++;
629387a6 5277 ret |= terminate_all_aliasing_chains (chain_info, stmt);
245f6de1
JJ
5278 /* If we reach the limit of stores to merge in a chain terminate and
5279 process the chain now. */
5280 if ((*chain_info)->m_store_info.length ()
028d4092 5281 == (unsigned int) param_max_stores_to_merge)
245f6de1
JJ
5282 {
5283 if (dump_file && (dump_flags & TDF_DETAILS))
5284 fprintf (dump_file,
5285 "Reached maximum number of statements to merge:\n");
629387a6 5286 ret |= terminate_and_process_chain (*chain_info);
245f6de1 5287 }
245f6de1 5288 }
95d94b52
RB
5289 else
5290 {
5291 /* Store aliases any existing chain? */
5292 ret |= terminate_all_aliasing_chains (NULL, stmt);
245f6de1 5293
95d94b52
RB
5294 /* Start a new chain. */
5295 class imm_store_chain_info *new_chain
5296 = new imm_store_chain_info (m_stores_head, base_addr);
5297 info = new store_immediate_info (const_bitsize, const_bitpos,
5298 const_bitregion_start,
5299 const_bitregion_end,
5300 stmt, 0, rhs_code, n, ins_stmt,
5301 bit_not_p, lp_nr_for_store (stmt),
5302 ops[0], ops[1]);
5303 new_chain->m_store_info.safe_push (info);
5304 m_n_stores++;
5305 m_stores.put (base_addr, new_chain);
5306 m_n_chains++;
5307 if (dump_file && (dump_flags & TDF_DETAILS))
5308 {
5309 fprintf (dump_file, "Starting active chain number %u with statement:\n",
5310 m_n_chains);
5311 print_gimple_stmt (dump_file, stmt, 0);
5312 fprintf (dump_file, "The base object is:\n");
5313 print_generic_expr (dump_file, base_addr);
5314 fprintf (dump_file, "\n");
5315 }
5316 }
5317
5318 /* Prune oldest chains so that after adding the chain or store above
5319 we're again within the limits set by the params. */
5320 if (m_n_chains > (unsigned)param_max_store_chains_to_track
5321 || m_n_stores > (unsigned)param_max_stores_to_track)
245f6de1 5322 {
95d94b52
RB
5323 if (dump_file && (dump_flags & TDF_DETAILS))
5324 fprintf (dump_file, "Too many chains (%u > %d) or stores (%u > %d), "
5325 "terminating oldest chain(s).\n", m_n_chains,
5326 param_max_store_chains_to_track, m_n_stores,
5327 param_max_stores_to_track);
5328 imm_store_chain_info **e = &m_stores_head;
5329 unsigned idx = 0;
5330 unsigned n_stores = 0;
5331 while (*e)
5332 {
5333 if (idx >= (unsigned)param_max_store_chains_to_track
5334 || (n_stores + (*e)->m_store_info.length ()
5335 > (unsigned)param_max_stores_to_track))
8a8eee6b 5336 ret |= terminate_and_process_chain (*e);
95d94b52
RB
5337 else
5338 {
5339 n_stores += (*e)->m_store_info.length ();
5340 e = &(*e)->next;
5341 ++idx;
5342 }
5343 }
245f6de1 5344 }
95d94b52 5345
629387a6
EB
5346 return ret;
5347}
5348
5349/* Return true if STMT is a store valid for store merging. */
5350
5351static bool
5352store_valid_for_store_merging_p (gimple *stmt)
5353{
5354 return gimple_assign_single_p (stmt)
5355 && gimple_vdef (stmt)
5356 && lhs_valid_for_store_merging_p (gimple_assign_lhs (stmt))
5384a802 5357 && (!gimple_has_volatile_ops (stmt) || gimple_clobber_p (stmt));
629387a6
EB
5358}
5359
5360enum basic_block_status { BB_INVALID, BB_VALID, BB_EXTENDED_VALID };
5361
5362/* Return the status of basic block BB wrt store merging. */
5363
5364static enum basic_block_status
5365get_status_for_store_merging (basic_block bb)
5366{
5367 unsigned int num_statements = 0;
a7553ad6 5368 unsigned int num_constructors = 0;
629387a6
EB
5369 gimple_stmt_iterator gsi;
5370 edge e;
a591c71b 5371 gimple *last_stmt = NULL;
629387a6
EB
5372
5373 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi))
5374 {
5375 gimple *stmt = gsi_stmt (gsi);
5376
5377 if (is_gimple_debug (stmt))
5378 continue;
5379
a591c71b
JJ
5380 last_stmt = stmt;
5381
629387a6
EB
5382 if (store_valid_for_store_merging_p (stmt) && ++num_statements >= 2)
5383 break;
a7553ad6
JJ
5384
5385 if (is_gimple_assign (stmt)
5386 && gimple_assign_rhs_code (stmt) == CONSTRUCTOR)
5387 {
5388 tree rhs = gimple_assign_rhs1 (stmt);
5389 if (VECTOR_TYPE_P (TREE_TYPE (rhs))
5390 && INTEGRAL_TYPE_P (TREE_TYPE (TREE_TYPE (rhs)))
5391 && gimple_assign_lhs (stmt) != NULL_TREE)
5392 {
5393 HOST_WIDE_INT sz
5394 = int_size_in_bytes (TREE_TYPE (rhs)) * BITS_PER_UNIT;
5395 if (sz == 16 || sz == 32 || sz == 64)
5396 {
5397 num_constructors = 1;
5398 break;
5399 }
5400 }
5401 }
629387a6
EB
5402 }
5403
a7553ad6 5404 if (num_statements == 0 && num_constructors == 0)
629387a6
EB
5405 return BB_INVALID;
5406
5407 if (cfun->can_throw_non_call_exceptions && cfun->eh
a591c71b 5408 && store_valid_for_store_merging_p (last_stmt)
629387a6
EB
5409 && (e = find_fallthru_edge (bb->succs))
5410 && e->dest == bb->next_bb)
5411 return BB_EXTENDED_VALID;
5412
a7553ad6 5413 return (num_statements >= 2 || num_constructors) ? BB_VALID : BB_INVALID;
245f6de1
JJ
5414}
5415
f663d9ad 5416/* Entry point for the pass. Go over each basic block recording chains of
245f6de1
JJ
5417 immediate stores. Upon encountering a terminating statement (as defined
5418 by stmt_terminates_chain_p) process the recorded stores and emit the widened
5419 variants. */
f663d9ad
KT
5420
5421unsigned int
5422pass_store_merging::execute (function *fun)
5423{
5424 basic_block bb;
5425 hash_set<gimple *> orig_stmts;
629387a6
EB
5426 bool changed = false, open_chains = false;
5427
5428 /* If the function can throw and catch non-call exceptions, we'll be trying
5429 to merge stores across different basic blocks so we need to first unsplit
5430 the EH edges in order to streamline the CFG of the function. */
5431 if (cfun->can_throw_non_call_exceptions && cfun->eh)
5432 unsplit_eh_edges ();
f663d9ad 5433
4b84d9b8
JJ
5434 calculate_dominance_info (CDI_DOMINATORS);
5435
f663d9ad
KT
5436 FOR_EACH_BB_FN (bb, fun)
5437 {
629387a6 5438 const basic_block_status bb_status = get_status_for_store_merging (bb);
f663d9ad 5439 gimple_stmt_iterator gsi;
f663d9ad 5440
629387a6
EB
5441 if (open_chains && (bb_status == BB_INVALID || !single_pred_p (bb)))
5442 {
5443 changed |= terminate_and_process_all_chains ();
5444 open_chains = false;
f663d9ad
KT
5445 }
5446
629387a6 5447 if (bb_status == BB_INVALID)
f663d9ad
KT
5448 continue;
5449
5450 if (dump_file && (dump_flags & TDF_DETAILS))
5451 fprintf (dump_file, "Processing basic block <%d>:\n", bb->index);
5452
a7553ad6 5453 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); )
f663d9ad
KT
5454 {
5455 gimple *stmt = gsi_stmt (gsi);
a7553ad6 5456 gsi_next (&gsi);
f663d9ad 5457
50b6d676
AO
5458 if (is_gimple_debug (stmt))
5459 continue;
5460
5384a802 5461 if (gimple_has_volatile_ops (stmt) && !gimple_clobber_p (stmt))
f663d9ad
KT
5462 {
5463 /* Terminate all chains. */
5464 if (dump_file && (dump_flags & TDF_DETAILS))
5465 fprintf (dump_file, "Volatile access terminates "
5466 "all chains\n");
629387a6
EB
5467 changed |= terminate_and_process_all_chains ();
5468 open_chains = false;
f663d9ad
KT
5469 continue;
5470 }
5471
a7553ad6
JJ
5472 if (is_gimple_assign (stmt)
5473 && gimple_assign_rhs_code (stmt) == CONSTRUCTOR
5474 && maybe_optimize_vector_constructor (stmt))
5475 continue;
5476
629387a6
EB
5477 if (store_valid_for_store_merging_p (stmt))
5478 changed |= process_store (stmt);
245f6de1 5479 else
629387a6
EB
5480 changed |= terminate_all_aliasing_chains (NULL, stmt);
5481 }
5482
5483 if (bb_status == BB_EXTENDED_VALID)
5484 open_chains = true;
5485 else
5486 {
5487 changed |= terminate_and_process_all_chains ();
5488 open_chains = false;
f663d9ad 5489 }
f663d9ad 5490 }
629387a6
EB
5491
5492 if (open_chains)
5493 changed |= terminate_and_process_all_chains ();
5494
5495 /* If the function can throw and catch non-call exceptions and something
5496 changed during the pass, then the CFG has (very likely) changed too. */
5497 if (cfun->can_throw_non_call_exceptions && cfun->eh && changed)
5498 {
5499 free_dominance_info (CDI_DOMINATORS);
5500 return TODO_cleanup_cfg;
5501 }
5502
f663d9ad
KT
5503 return 0;
5504}
5505
5506} // anon namespace
5507
5508/* Construct and return a store merging pass object. */
5509
5510gimple_opt_pass *
5511make_pass_store_merging (gcc::context *ctxt)
5512{
5513 return new pass_store_merging (ctxt);
5514}
c22d8787
KT
5515
5516#if CHECKING_P
5517
5518namespace selftest {
5519
5520/* Selftests for store merging helpers. */
5521
5522/* Assert that all elements of the byte arrays X and Y, both of length N
5523 are equal. */
5524
5525static void
5526verify_array_eq (unsigned char *x, unsigned char *y, unsigned int n)
5527{
5528 for (unsigned int i = 0; i < n; i++)
5529 {
5530 if (x[i] != y[i])
5531 {
5532 fprintf (stderr, "Arrays do not match. X:\n");
5533 dump_char_array (stderr, x, n);
5534 fprintf (stderr, "Y:\n");
5535 dump_char_array (stderr, y, n);
5536 }
5537 ASSERT_EQ (x[i], y[i]);
5538 }
5539}
5540
8aba425f 5541/* Test shift_bytes_in_array_left and that it carries bits across between
c22d8787
KT
5542 bytes correctly. */
5543
5544static void
8aba425f 5545verify_shift_bytes_in_array_left (void)
c22d8787
KT
5546{
5547 /* byte 1 | byte 0
5548 00011111 | 11100000. */
5549 unsigned char orig[2] = { 0xe0, 0x1f };
5550 unsigned char in[2];
5551 memcpy (in, orig, sizeof orig);
5552
5553 unsigned char expected[2] = { 0x80, 0x7f };
8aba425f 5554 shift_bytes_in_array_left (in, sizeof (in), 2);
c22d8787
KT
5555 verify_array_eq (in, expected, sizeof (in));
5556
5557 memcpy (in, orig, sizeof orig);
5558 memcpy (expected, orig, sizeof orig);
5559 /* Check that shifting by zero doesn't change anything. */
8aba425f 5560 shift_bytes_in_array_left (in, sizeof (in), 0);
c22d8787
KT
5561 verify_array_eq (in, expected, sizeof (in));
5562
5563}
5564
5565/* Test shift_bytes_in_array_right and that it carries bits across between
5566 bytes correctly. */
5567
5568static void
5569verify_shift_bytes_in_array_right (void)
5570{
5571 /* byte 1 | byte 0
5572 00011111 | 11100000. */
5573 unsigned char orig[2] = { 0x1f, 0xe0};
5574 unsigned char in[2];
5575 memcpy (in, orig, sizeof orig);
5576 unsigned char expected[2] = { 0x07, 0xf8};
5577 shift_bytes_in_array_right (in, sizeof (in), 2);
5578 verify_array_eq (in, expected, sizeof (in));
5579
5580 memcpy (in, orig, sizeof orig);
5581 memcpy (expected, orig, sizeof orig);
5582 /* Check that shifting by zero doesn't change anything. */
5583 shift_bytes_in_array_right (in, sizeof (in), 0);
5584 verify_array_eq (in, expected, sizeof (in));
5585}
5586
5587/* Test clear_bit_region that it clears exactly the bits asked and
5588 nothing more. */
5589
5590static void
5591verify_clear_bit_region (void)
5592{
5593 /* Start with all bits set and test clearing various patterns in them. */
5594 unsigned char orig[3] = { 0xff, 0xff, 0xff};
5595 unsigned char in[3];
5596 unsigned char expected[3];
5597 memcpy (in, orig, sizeof in);
5598
5599 /* Check zeroing out all the bits. */
5600 clear_bit_region (in, 0, 3 * BITS_PER_UNIT);
5601 expected[0] = expected[1] = expected[2] = 0;
5602 verify_array_eq (in, expected, sizeof in);
5603
5604 memcpy (in, orig, sizeof in);
5605 /* Leave the first and last bits intact. */
5606 clear_bit_region (in, 1, 3 * BITS_PER_UNIT - 2);
5607 expected[0] = 0x1;
5608 expected[1] = 0;
5609 expected[2] = 0x80;
5610 verify_array_eq (in, expected, sizeof in);
5611}
5612
5384a802 5613/* Test clear_bit_region_be that it clears exactly the bits asked and
c22d8787
KT
5614 nothing more. */
5615
5616static void
5617verify_clear_bit_region_be (void)
5618{
5619 /* Start with all bits set and test clearing various patterns in them. */
5620 unsigned char orig[3] = { 0xff, 0xff, 0xff};
5621 unsigned char in[3];
5622 unsigned char expected[3];
5623 memcpy (in, orig, sizeof in);
5624
5625 /* Check zeroing out all the bits. */
5626 clear_bit_region_be (in, BITS_PER_UNIT - 1, 3 * BITS_PER_UNIT);
5627 expected[0] = expected[1] = expected[2] = 0;
5628 verify_array_eq (in, expected, sizeof in);
5629
5630 memcpy (in, orig, sizeof in);
5631 /* Leave the first and last bits intact. */
5632 clear_bit_region_be (in, BITS_PER_UNIT - 2, 3 * BITS_PER_UNIT - 2);
5633 expected[0] = 0x80;
5634 expected[1] = 0;
5635 expected[2] = 0x1;
5636 verify_array_eq (in, expected, sizeof in);
5637}
5638
5639
5640/* Run all of the selftests within this file. */
5641
5642void
d5148d4f 5643store_merging_cc_tests (void)
c22d8787 5644{
8aba425f 5645 verify_shift_bytes_in_array_left ();
c22d8787
KT
5646 verify_shift_bytes_in_array_right ();
5647 verify_clear_bit_region ();
5648 verify_clear_bit_region_be ();
5649}
5650
5651} // namespace selftest
5652#endif /* CHECKING_P. */
This page took 3.815207 seconds and 5 git commands to generate.