]> gcc.gnu.org Git - gcc.git/blame - gcc/config/i386/i386.c
2002-10-10 Michael Koch <konqueror@gmx.de>
[gcc.git] / gcc / config / i386 / i386.c
CommitLineData
e075ae69 1/* Subroutines used for code generation on IA-32.
07933f72
GS
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002 Free Software Foundation, Inc.
2a2ab3f9
JVA
4
5This file is part of GNU CC.
6
7GNU CC is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
9the Free Software Foundation; either version 2, or (at your option)
10any later version.
11
12GNU CC is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU CC; see the file COPYING. If not, write to
97aadbb9 19the Free Software Foundation, 59 Temple Place - Suite 330,
0f290768 20Boston, MA 02111-1307, USA. */
2a2ab3f9 21
2a2ab3f9 22#include "config.h"
bb5177ac 23#include "system.h"
2a2ab3f9 24#include "rtl.h"
6baf1cc8
BS
25#include "tree.h"
26#include "tm_p.h"
2a2ab3f9
JVA
27#include "regs.h"
28#include "hard-reg-set.h"
29#include "real.h"
30#include "insn-config.h"
31#include "conditions.h"
2a2ab3f9
JVA
32#include "output.h"
33#include "insn-attr.h"
2a2ab3f9 34#include "flags.h"
a8ffcc81 35#include "except.h"
ecbc4695 36#include "function.h"
00c79232 37#include "recog.h"
ced8dd8c 38#include "expr.h"
e78d8e51 39#include "optabs.h"
f103890b 40#include "toplev.h"
e075ae69 41#include "basic-block.h"
1526a060 42#include "ggc.h"
672a6f42
NB
43#include "target.h"
44#include "target-def.h"
f1e639b1 45#include "langhooks.h"
2a2ab3f9 46
8dfe5673 47#ifndef CHECK_STACK_LIMIT
07933f72 48#define CHECK_STACK_LIMIT (-1)
8dfe5673
RK
49#endif
50
2ab0437e 51/* Processor costs (relative to an add) */
fce5a9f2 52static const
2ab0437e
JH
53struct processor_costs size_cost = { /* costs for tunning for size */
54 2, /* cost of an add instruction */
55 3, /* cost of a lea instruction */
56 2, /* variable shift costs */
57 3, /* constant shift costs */
58 3, /* cost of starting a multiply */
59 0, /* cost of multiply per each bit set */
60 3, /* cost of a divide/mod */
44cf5b6a
JH
61 3, /* cost of movsx */
62 3, /* cost of movzx */
2ab0437e
JH
63 0, /* "large" insn */
64 2, /* MOVE_RATIO */
65 2, /* cost for loading QImode using movzbl */
66 {2, 2, 2}, /* cost of loading integer registers
67 in QImode, HImode and SImode.
68 Relative to reg-reg move (2). */
69 {2, 2, 2}, /* cost of storing integer registers */
70 2, /* cost of reg,reg fld/fst */
71 {2, 2, 2}, /* cost of loading fp registers
72 in SFmode, DFmode and XFmode */
73 {2, 2, 2}, /* cost of loading integer registers */
74 3, /* cost of moving MMX register */
75 {3, 3}, /* cost of loading MMX registers
76 in SImode and DImode */
77 {3, 3}, /* cost of storing MMX registers
78 in SImode and DImode */
79 3, /* cost of moving SSE register */
80 {3, 3, 3}, /* cost of loading SSE registers
81 in SImode, DImode and TImode */
82 {3, 3, 3}, /* cost of storing SSE registers
83 in SImode, DImode and TImode */
84 3, /* MMX or SSE register to integer */
f4365627
JH
85 0, /* size of prefetch block */
86 0, /* number of parallel prefetches */
229b303a
RS
87 2, /* cost of FADD and FSUB insns. */
88 2, /* cost of FMUL instruction. */
89 2, /* cost of FDIV instruction. */
90 2, /* cost of FABS instruction. */
91 2, /* cost of FCHS instruction. */
92 2, /* cost of FSQRT instruction. */
2ab0437e 93};
229b303a 94
32b5b1aa 95/* Processor costs (relative to an add) */
fce5a9f2 96static const
32b5b1aa 97struct processor_costs i386_cost = { /* 386 specific costs */
e9a25f70 98 1, /* cost of an add instruction */
32b5b1aa
SC
99 1, /* cost of a lea instruction */
100 3, /* variable shift costs */
101 2, /* constant shift costs */
102 6, /* cost of starting a multiply */
103 1, /* cost of multiply per each bit set */
e075ae69 104 23, /* cost of a divide/mod */
44cf5b6a
JH
105 3, /* cost of movsx */
106 2, /* cost of movzx */
96e7ae40 107 15, /* "large" insn */
e2e52e1b 108 3, /* MOVE_RATIO */
7c6b971d 109 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
110 {2, 4, 2}, /* cost of loading integer registers
111 in QImode, HImode and SImode.
0f290768 112 Relative to reg-reg move (2). */
96e7ae40
JH
113 {2, 4, 2}, /* cost of storing integer registers */
114 2, /* cost of reg,reg fld/fst */
115 {8, 8, 8}, /* cost of loading fp registers
116 in SFmode, DFmode and XFmode */
fa79946e
JH
117 {8, 8, 8}, /* cost of loading integer registers */
118 2, /* cost of moving MMX register */
119 {4, 8}, /* cost of loading MMX registers
120 in SImode and DImode */
121 {4, 8}, /* cost of storing MMX registers
122 in SImode and DImode */
123 2, /* cost of moving SSE register */
124 {4, 8, 16}, /* cost of loading SSE registers
125 in SImode, DImode and TImode */
126 {4, 8, 16}, /* cost of storing SSE registers
127 in SImode, DImode and TImode */
128 3, /* MMX or SSE register to integer */
f4365627
JH
129 0, /* size of prefetch block */
130 0, /* number of parallel prefetches */
229b303a
RS
131 23, /* cost of FADD and FSUB insns. */
132 27, /* cost of FMUL instruction. */
133 88, /* cost of FDIV instruction. */
134 22, /* cost of FABS instruction. */
135 24, /* cost of FCHS instruction. */
136 122, /* cost of FSQRT instruction. */
32b5b1aa
SC
137};
138
fce5a9f2 139static const
32b5b1aa
SC
140struct processor_costs i486_cost = { /* 486 specific costs */
141 1, /* cost of an add instruction */
142 1, /* cost of a lea instruction */
143 3, /* variable shift costs */
144 2, /* constant shift costs */
145 12, /* cost of starting a multiply */
146 1, /* cost of multiply per each bit set */
e075ae69 147 40, /* cost of a divide/mod */
44cf5b6a
JH
148 3, /* cost of movsx */
149 2, /* cost of movzx */
96e7ae40 150 15, /* "large" insn */
e2e52e1b 151 3, /* MOVE_RATIO */
7c6b971d 152 4, /* cost for loading QImode using movzbl */
96e7ae40
JH
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
0f290768 155 Relative to reg-reg move (2). */
96e7ae40
JH
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
fa79946e
JH
160 {8, 8, 8}, /* cost of loading integer registers */
161 2, /* cost of moving MMX register */
162 {4, 8}, /* cost of loading MMX registers
163 in SImode and DImode */
164 {4, 8}, /* cost of storing MMX registers
165 in SImode and DImode */
166 2, /* cost of moving SSE register */
167 {4, 8, 16}, /* cost of loading SSE registers
168 in SImode, DImode and TImode */
169 {4, 8, 16}, /* cost of storing SSE registers
170 in SImode, DImode and TImode */
f4365627
JH
171 3, /* MMX or SSE register to integer */
172 0, /* size of prefetch block */
173 0, /* number of parallel prefetches */
229b303a
RS
174 8, /* cost of FADD and FSUB insns. */
175 16, /* cost of FMUL instruction. */
176 73, /* cost of FDIV instruction. */
177 3, /* cost of FABS instruction. */
178 3, /* cost of FCHS instruction. */
179 83, /* cost of FSQRT instruction. */
32b5b1aa
SC
180};
181
fce5a9f2 182static const
e5cb57e8 183struct processor_costs pentium_cost = {
32b5b1aa
SC
184 1, /* cost of an add instruction */
185 1, /* cost of a lea instruction */
856b07a1 186 4, /* variable shift costs */
e5cb57e8 187 1, /* constant shift costs */
856b07a1
SC
188 11, /* cost of starting a multiply */
189 0, /* cost of multiply per each bit set */
e075ae69 190 25, /* cost of a divide/mod */
44cf5b6a
JH
191 3, /* cost of movsx */
192 2, /* cost of movzx */
96e7ae40 193 8, /* "large" insn */
e2e52e1b 194 6, /* MOVE_RATIO */
7c6b971d 195 6, /* cost for loading QImode using movzbl */
96e7ae40
JH
196 {2, 4, 2}, /* cost of loading integer registers
197 in QImode, HImode and SImode.
0f290768 198 Relative to reg-reg move (2). */
96e7ae40
JH
199 {2, 4, 2}, /* cost of storing integer registers */
200 2, /* cost of reg,reg fld/fst */
201 {2, 2, 6}, /* cost of loading fp registers
202 in SFmode, DFmode and XFmode */
fa79946e
JH
203 {4, 4, 6}, /* cost of loading integer registers */
204 8, /* cost of moving MMX register */
205 {8, 8}, /* cost of loading MMX registers
206 in SImode and DImode */
207 {8, 8}, /* cost of storing MMX registers
208 in SImode and DImode */
209 2, /* cost of moving SSE register */
210 {4, 8, 16}, /* cost of loading SSE registers
211 in SImode, DImode and TImode */
212 {4, 8, 16}, /* cost of storing SSE registers
213 in SImode, DImode and TImode */
f4365627
JH
214 3, /* MMX or SSE register to integer */
215 0, /* size of prefetch block */
216 0, /* number of parallel prefetches */
229b303a
RS
217 3, /* cost of FADD and FSUB insns. */
218 3, /* cost of FMUL instruction. */
219 39, /* cost of FDIV instruction. */
220 1, /* cost of FABS instruction. */
221 1, /* cost of FCHS instruction. */
222 70, /* cost of FSQRT instruction. */
32b5b1aa
SC
223};
224
fce5a9f2 225static const
856b07a1
SC
226struct processor_costs pentiumpro_cost = {
227 1, /* cost of an add instruction */
228 1, /* cost of a lea instruction */
e075ae69 229 1, /* variable shift costs */
856b07a1 230 1, /* constant shift costs */
369e59b1 231 4, /* cost of starting a multiply */
856b07a1 232 0, /* cost of multiply per each bit set */
e075ae69 233 17, /* cost of a divide/mod */
44cf5b6a
JH
234 1, /* cost of movsx */
235 1, /* cost of movzx */
96e7ae40 236 8, /* "large" insn */
e2e52e1b 237 6, /* MOVE_RATIO */
7c6b971d 238 2, /* cost for loading QImode using movzbl */
96e7ae40
JH
239 {4, 4, 4}, /* cost of loading integer registers
240 in QImode, HImode and SImode.
0f290768 241 Relative to reg-reg move (2). */
96e7ae40
JH
242 {2, 2, 2}, /* cost of storing integer registers */
243 2, /* cost of reg,reg fld/fst */
244 {2, 2, 6}, /* cost of loading fp registers
245 in SFmode, DFmode and XFmode */
fa79946e
JH
246 {4, 4, 6}, /* cost of loading integer registers */
247 2, /* cost of moving MMX register */
248 {2, 2}, /* cost of loading MMX registers
249 in SImode and DImode */
250 {2, 2}, /* cost of storing MMX registers
251 in SImode and DImode */
252 2, /* cost of moving SSE register */
253 {2, 2, 8}, /* cost of loading SSE registers
254 in SImode, DImode and TImode */
255 {2, 2, 8}, /* cost of storing SSE registers
256 in SImode, DImode and TImode */
f4365627
JH
257 3, /* MMX or SSE register to integer */
258 32, /* size of prefetch block */
259 6, /* number of parallel prefetches */
229b303a
RS
260 3, /* cost of FADD and FSUB insns. */
261 5, /* cost of FMUL instruction. */
262 56, /* cost of FDIV instruction. */
263 2, /* cost of FABS instruction. */
264 2, /* cost of FCHS instruction. */
265 56, /* cost of FSQRT instruction. */
856b07a1
SC
266};
267
fce5a9f2 268static const
a269a03c
JC
269struct processor_costs k6_cost = {
270 1, /* cost of an add instruction */
e075ae69 271 2, /* cost of a lea instruction */
a269a03c
JC
272 1, /* variable shift costs */
273 1, /* constant shift costs */
73fe76e4 274 3, /* cost of starting a multiply */
a269a03c 275 0, /* cost of multiply per each bit set */
e075ae69 276 18, /* cost of a divide/mod */
44cf5b6a
JH
277 2, /* cost of movsx */
278 2, /* cost of movzx */
96e7ae40 279 8, /* "large" insn */
e2e52e1b 280 4, /* MOVE_RATIO */
7c6b971d 281 3, /* cost for loading QImode using movzbl */
96e7ae40
JH
282 {4, 5, 4}, /* cost of loading integer registers
283 in QImode, HImode and SImode.
0f290768 284 Relative to reg-reg move (2). */
96e7ae40
JH
285 {2, 3, 2}, /* cost of storing integer registers */
286 4, /* cost of reg,reg fld/fst */
287 {6, 6, 6}, /* cost of loading fp registers
288 in SFmode, DFmode and XFmode */
fa79946e
JH
289 {4, 4, 4}, /* cost of loading integer registers */
290 2, /* cost of moving MMX register */
291 {2, 2}, /* cost of loading MMX registers
292 in SImode and DImode */
293 {2, 2}, /* cost of storing MMX registers
294 in SImode and DImode */
295 2, /* cost of moving SSE register */
296 {2, 2, 8}, /* cost of loading SSE registers
297 in SImode, DImode and TImode */
298 {2, 2, 8}, /* cost of storing SSE registers
299 in SImode, DImode and TImode */
f4365627
JH
300 6, /* MMX or SSE register to integer */
301 32, /* size of prefetch block */
302 1, /* number of parallel prefetches */
229b303a
RS
303 2, /* cost of FADD and FSUB insns. */
304 2, /* cost of FMUL instruction. */
305 2, /* cost of FDIV instruction. */
306 56, /* cost of FABS instruction. */
307 2, /* cost of FCHS instruction. */
308 56, /* cost of FSQRT instruction. */
a269a03c
JC
309};
310
fce5a9f2 311static const
309ada50
JH
312struct processor_costs athlon_cost = {
313 1, /* cost of an add instruction */
0b5107cf 314 2, /* cost of a lea instruction */
309ada50
JH
315 1, /* variable shift costs */
316 1, /* constant shift costs */
317 5, /* cost of starting a multiply */
318 0, /* cost of multiply per each bit set */
0b5107cf 319 42, /* cost of a divide/mod */
44cf5b6a
JH
320 1, /* cost of movsx */
321 1, /* cost of movzx */
309ada50 322 8, /* "large" insn */
e2e52e1b 323 9, /* MOVE_RATIO */
309ada50 324 4, /* cost for loading QImode using movzbl */
b72b1c29 325 {3, 4, 3}, /* cost of loading integer registers
309ada50 326 in QImode, HImode and SImode.
0f290768 327 Relative to reg-reg move (2). */
b72b1c29 328 {3, 4, 3}, /* cost of storing integer registers */
309ada50 329 4, /* cost of reg,reg fld/fst */
b72b1c29 330 {4, 4, 12}, /* cost of loading fp registers
309ada50 331 in SFmode, DFmode and XFmode */
b72b1c29 332 {6, 6, 8}, /* cost of loading integer registers */
fa79946e 333 2, /* cost of moving MMX register */
b72b1c29 334 {4, 4}, /* cost of loading MMX registers
fa79946e 335 in SImode and DImode */
b72b1c29 336 {4, 4}, /* cost of storing MMX registers
fa79946e
JH
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
b72b1c29 339 {4, 4, 6}, /* cost of loading SSE registers
fa79946e 340 in SImode, DImode and TImode */
b72b1c29 341 {4, 4, 5}, /* cost of storing SSE registers
fa79946e 342 in SImode, DImode and TImode */
b72b1c29 343 5, /* MMX or SSE register to integer */
f4365627
JH
344 64, /* size of prefetch block */
345 6, /* number of parallel prefetches */
229b303a
RS
346 4, /* cost of FADD and FSUB insns. */
347 4, /* cost of FMUL instruction. */
348 24, /* cost of FDIV instruction. */
349 2, /* cost of FABS instruction. */
350 2, /* cost of FCHS instruction. */
351 35, /* cost of FSQRT instruction. */
309ada50
JH
352};
353
fce5a9f2 354static const
b4e89e2d
JH
355struct processor_costs pentium4_cost = {
356 1, /* cost of an add instruction */
357 1, /* cost of a lea instruction */
358 8, /* variable shift costs */
359 8, /* constant shift costs */
360 30, /* cost of starting a multiply */
361 0, /* cost of multiply per each bit set */
362 112, /* cost of a divide/mod */
44cf5b6a
JH
363 1, /* cost of movsx */
364 1, /* cost of movzx */
b4e89e2d
JH
365 16, /* "large" insn */
366 6, /* MOVE_RATIO */
367 2, /* cost for loading QImode using movzbl */
368 {4, 5, 4}, /* cost of loading integer registers
369 in QImode, HImode and SImode.
370 Relative to reg-reg move (2). */
371 {2, 3, 2}, /* cost of storing integer registers */
372 2, /* cost of reg,reg fld/fst */
373 {2, 2, 6}, /* cost of loading fp registers
374 in SFmode, DFmode and XFmode */
375 {4, 4, 6}, /* cost of loading integer registers */
376 2, /* cost of moving MMX register */
377 {2, 2}, /* cost of loading MMX registers
378 in SImode and DImode */
379 {2, 2}, /* cost of storing MMX registers
380 in SImode and DImode */
381 12, /* cost of moving SSE register */
382 {12, 12, 12}, /* cost of loading SSE registers
383 in SImode, DImode and TImode */
384 {2, 2, 8}, /* cost of storing SSE registers
385 in SImode, DImode and TImode */
386 10, /* MMX or SSE register to integer */
f4365627
JH
387 64, /* size of prefetch block */
388 6, /* number of parallel prefetches */
229b303a
RS
389 5, /* cost of FADD and FSUB insns. */
390 7, /* cost of FMUL instruction. */
391 43, /* cost of FDIV instruction. */
392 2, /* cost of FABS instruction. */
393 2, /* cost of FCHS instruction. */
394 43, /* cost of FSQRT instruction. */
b4e89e2d
JH
395};
396
8b60264b 397const struct processor_costs *ix86_cost = &pentium_cost;
32b5b1aa 398
a269a03c
JC
399/* Processor feature/optimization bitmasks. */
400#define m_386 (1<<PROCESSOR_I386)
401#define m_486 (1<<PROCESSOR_I486)
402#define m_PENT (1<<PROCESSOR_PENTIUM)
403#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
404#define m_K6 (1<<PROCESSOR_K6)
309ada50 405#define m_ATHLON (1<<PROCESSOR_ATHLON)
b4e89e2d 406#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
a269a03c 407
309ada50 408const int x86_use_leave = m_386 | m_K6 | m_ATHLON;
b4e89e2d 409const int x86_push_memory = m_386 | m_K6 | m_ATHLON | m_PENT4;
a269a03c 410const int x86_zero_extend_with_and = m_486 | m_PENT;
b4e89e2d 411const int x86_movx = m_ATHLON | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
e075ae69 412const int x86_double_with_add = ~m_386;
a269a03c 413const int x86_use_bit_test = m_386;
e2e52e1b 414const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON | m_K6;
b4e89e2d 415const int x86_cmove = m_PPRO | m_ATHLON | m_PENT4;
47f339cf 416const int x86_3dnow_a = m_ATHLON;
b4e89e2d 417const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON | m_PENT4;
ef6257cd 418const int x86_branch_hints = m_PENT4;
b4e89e2d 419const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
e075ae69
RH
420const int x86_partial_reg_stall = m_PPRO;
421const int x86_use_loop = m_K6;
309ada50 422const int x86_use_fiop = ~(m_PPRO | m_ATHLON | m_PENT);
e075ae69
RH
423const int x86_use_mov0 = m_K6;
424const int x86_use_cltd = ~(m_PENT | m_K6);
425const int x86_read_modify_write = ~m_PENT;
426const int x86_read_modify = ~(m_PENT | m_PPRO);
427const int x86_split_long_moves = m_PPRO;
285464d0
JH
428const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON;
429const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
b4e89e2d 430const int x86_single_stringop = m_386 | m_PENT4;
d9f32422
JH
431const int x86_qimode_math = ~(0);
432const int x86_promote_qi_regs = 0;
433const int x86_himode_math = ~(m_PPRO);
434const int x86_promote_hi_regs = m_PPRO;
b4e89e2d
JH
435const int x86_sub_esp_4 = m_ATHLON | m_PPRO | m_PENT4;
436const int x86_sub_esp_8 = m_ATHLON | m_PPRO | m_386 | m_486 | m_PENT4;
437const int x86_add_esp_4 = m_ATHLON | m_K6 | m_PENT4;
438const int x86_add_esp_8 = m_ATHLON | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
77966be3 439const int x86_integer_DFmode_moves = ~(m_ATHLON | m_PENT4 | m_PPRO);
b4e89e2d
JH
440const int x86_partial_reg_dependency = m_ATHLON | m_PENT4;
441const int x86_memory_mismatch_stall = m_ATHLON | m_PENT4;
c6036a37
JH
442const int x86_accumulate_outgoing_args = m_ATHLON | m_PENT4 | m_PPRO;
443const int x86_prologue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
444const int x86_epilogue_using_move = m_ATHLON | m_PENT4 | m_PPRO;
b972dd02 445const int x86_decompose_lea = m_PENT4;
495333a6 446const int x86_shift1 = ~m_486;
285464d0 447const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON | m_PENT4;
a269a03c 448
6ab16dd9
JH
449/* In case the avreage insn count for single function invocation is
450 lower than this constant, emit fast (but longer) prologue and
451 epilogue code. */
452#define FAST_PROLOGUE_INSN_COUNT 30
5bf0ebab 453
6ab16dd9
JH
454/* Set by prologue expander and used by epilogue expander to determine
455 the style used. */
456static int use_fast_prologue_epilogue;
457
5bf0ebab
RH
458/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
459static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
460static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
461static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
4c0d89b5
RS
462
463/* Array of the smallest class containing reg number REGNO, indexed by
0f290768 464 REGNO. Used by REGNO_REG_CLASS in i386.h. */
4c0d89b5 465
e075ae69 466enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
4c0d89b5
RS
467{
468 /* ax, dx, cx, bx */
ab408a86 469 AREG, DREG, CREG, BREG,
4c0d89b5 470 /* si, di, bp, sp */
e075ae69 471 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
4c0d89b5
RS
472 /* FP registers */
473 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
79325812 474 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
4c0d89b5 475 /* arg pointer */
83774849 476 NON_Q_REGS,
564d80f4 477 /* flags, fpsr, dirflag, frame */
a7180f70
BS
478 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
479 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
480 SSE_REGS, SSE_REGS,
481 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
3d117b30
JH
482 MMX_REGS, MMX_REGS,
483 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
484 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
485 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
486 SSE_REGS, SSE_REGS,
4c0d89b5 487};
c572e5ba 488
3d117b30 489/* The "default" register map used in 32bit mode. */
83774849 490
0f290768 491int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
492{
493 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
494 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
3d117b30 495 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
496 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
497 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
3d117b30
JH
498 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
499 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
83774849
RH
500};
501
5bf0ebab
RH
502static int const x86_64_int_parameter_registers[6] =
503{
504 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
505 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
506};
507
508static int const x86_64_int_return_registers[4] =
509{
510 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
511};
53c17031 512
0f7fa3d0
JH
513/* The "default" register map used in 64bit mode. */
514int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
515{
516 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
3de72741 517 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
0f7fa3d0
JH
518 -1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
519 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
520 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
521 8,9,10,11,12,13,14,15, /* extended integer registers */
522 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
523};
524
83774849
RH
525/* Define the register numbers to be used in Dwarf debugging information.
526 The SVR4 reference port C compiler uses the following register numbers
527 in its Dwarf output code:
528 0 for %eax (gcc regno = 0)
529 1 for %ecx (gcc regno = 2)
530 2 for %edx (gcc regno = 1)
531 3 for %ebx (gcc regno = 3)
532 4 for %esp (gcc regno = 7)
533 5 for %ebp (gcc regno = 6)
534 6 for %esi (gcc regno = 4)
535 7 for %edi (gcc regno = 5)
536 The following three DWARF register numbers are never generated by
537 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
538 believes these numbers have these meanings.
539 8 for %eip (no gcc equivalent)
540 9 for %eflags (gcc regno = 17)
541 10 for %trapno (no gcc equivalent)
542 It is not at all clear how we should number the FP stack registers
543 for the x86 architecture. If the version of SDB on x86/svr4 were
544 a bit less brain dead with respect to floating-point then we would
545 have a precedent to follow with respect to DWARF register numbers
546 for x86 FP registers, but the SDB on x86/svr4 is so completely
547 broken with respect to FP registers that it is hardly worth thinking
548 of it as something to strive for compatibility with.
549 The version of x86/svr4 SDB I have at the moment does (partially)
550 seem to believe that DWARF register number 11 is associated with
551 the x86 register %st(0), but that's about all. Higher DWARF
552 register numbers don't seem to be associated with anything in
553 particular, and even for DWARF regno 11, SDB only seems to under-
554 stand that it should say that a variable lives in %st(0) (when
555 asked via an `=' command) if we said it was in DWARF regno 11,
556 but SDB still prints garbage when asked for the value of the
557 variable in question (via a `/' command).
558 (Also note that the labels SDB prints for various FP stack regs
559 when doing an `x' command are all wrong.)
560 Note that these problems generally don't affect the native SVR4
561 C compiler because it doesn't allow the use of -O with -g and
562 because when it is *not* optimizing, it allocates a memory
563 location for each floating-point variable, and the memory
564 location is what gets described in the DWARF AT_location
565 attribute for the variable in question.
566 Regardless of the severe mental illness of the x86/svr4 SDB, we
567 do something sensible here and we use the following DWARF
568 register numbers. Note that these are all stack-top-relative
569 numbers.
570 11 for %st(0) (gcc regno = 8)
571 12 for %st(1) (gcc regno = 9)
572 13 for %st(2) (gcc regno = 10)
573 14 for %st(3) (gcc regno = 11)
574 15 for %st(4) (gcc regno = 12)
575 16 for %st(5) (gcc regno = 13)
576 17 for %st(6) (gcc regno = 14)
577 18 for %st(7) (gcc regno = 15)
578*/
0f290768 579int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
83774849
RH
580{
581 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
582 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
3f3f2124 583 -1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
a7180f70
BS
584 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
585 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
3f3f2124
JH
586 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded integer registers */
587 -1, -1, -1, -1, -1, -1, -1, -1, /* extemded SSE registers */
83774849
RH
588};
589
c572e5ba
JVA
590/* Test and compare insns in i386.md store the information needed to
591 generate branch and scc insns here. */
592
07933f72
GS
593rtx ix86_compare_op0 = NULL_RTX;
594rtx ix86_compare_op1 = NULL_RTX;
f5316dfe 595
f996902d
RH
596/* The encoding characters for the four TLS models present in ELF. */
597
755ac5d4 598static char const tls_model_chars[] = " GLil";
f996902d 599
7a2e09f4 600#define MAX_386_STACK_LOCALS 3
8362f420
JH
601/* Size of the register save area. */
602#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
36edd3cc
BS
603
604/* Define the structure for the machine field in struct function. */
e2500fed 605struct machine_function GTY(())
36edd3cc
BS
606{
607 rtx stack_locals[(int) MAX_MACHINE_MODE][MAX_386_STACK_LOCALS];
f996902d 608 const char *some_ld_name;
8362f420 609 int save_varrargs_registers;
6fca22eb 610 int accesses_prev_frame;
36edd3cc
BS
611};
612
01d939e8 613#define ix86_stack_locals (cfun->machine->stack_locals)
8362f420 614#define ix86_save_varrargs_registers (cfun->machine->save_varrargs_registers)
36edd3cc 615
4dd2ac2c
JH
616/* Structure describing stack frame layout.
617 Stack grows downward:
618
619 [arguments]
620 <- ARG_POINTER
621 saved pc
622
623 saved frame pointer if frame_pointer_needed
624 <- HARD_FRAME_POINTER
625 [saved regs]
626
627 [padding1] \
628 )
629 [va_arg registers] (
630 > to_allocate <- FRAME_POINTER
631 [frame] (
632 )
633 [padding2] /
634 */
635struct ix86_frame
636{
637 int nregs;
638 int padding1;
8362f420 639 int va_arg_size;
4dd2ac2c
JH
640 HOST_WIDE_INT frame;
641 int padding2;
642 int outgoing_arguments_size;
8362f420 643 int red_zone_size;
4dd2ac2c
JH
644
645 HOST_WIDE_INT to_allocate;
646 /* The offsets relative to ARG_POINTER. */
647 HOST_WIDE_INT frame_pointer_offset;
648 HOST_WIDE_INT hard_frame_pointer_offset;
649 HOST_WIDE_INT stack_pointer_offset;
650};
651
c93e80a5
JH
652/* Used to enable/disable debugging features. */
653const char *ix86_debug_arg_string, *ix86_debug_addr_string;
6189a572
JH
654/* Code model option as passed by user. */
655const char *ix86_cmodel_string;
656/* Parsed value. */
657enum cmodel ix86_cmodel;
80f33d06
GS
658/* Asm dialect. */
659const char *ix86_asm_string;
660enum asm_dialect ix86_asm_dialect = ASM_ATT;
f996902d
RH
661/* TLS dialext. */
662const char *ix86_tls_dialect_string;
663enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
6189a572 664
5bf0ebab 665/* Which unit we are generating floating point math for. */
965f5423
JH
666enum fpmath_unit ix86_fpmath;
667
5bf0ebab
RH
668/* Which cpu are we scheduling for. */
669enum processor_type ix86_cpu;
670/* Which instruction set architecture to use. */
671enum processor_type ix86_arch;
c8c5cb99
SC
672
673/* Strings to hold which cpu and instruction set architecture to use. */
9c23aa47
ZW
674const char *ix86_cpu_string; /* for -mcpu=<xxx> */
675const char *ix86_arch_string; /* for -march=<xxx> */
965f5423 676const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
c8c5cb99 677
0f290768 678/* # of registers to use to pass arguments. */
e075ae69 679const char *ix86_regparm_string;
e9a25f70 680
f4365627
JH
681/* true if sse prefetch instruction is not NOOP. */
682int x86_prefetch_sse;
683
e075ae69
RH
684/* ix86_regparm_string as a number */
685int ix86_regparm;
e9a25f70
JL
686
687/* Alignment to use for loops and jumps: */
688
0f290768 689/* Power of two alignment for loops. */
e075ae69 690const char *ix86_align_loops_string;
e9a25f70 691
0f290768 692/* Power of two alignment for non-loop jumps. */
e075ae69 693const char *ix86_align_jumps_string;
e9a25f70 694
3af4bd89 695/* Power of two alignment for stack boundary in bytes. */
e075ae69 696const char *ix86_preferred_stack_boundary_string;
3af4bd89
JH
697
698/* Preferred alignment for stack boundary in bits. */
e075ae69 699int ix86_preferred_stack_boundary;
3af4bd89 700
e9a25f70 701/* Values 1-5: see jump.c */
e075ae69
RH
702int ix86_branch_cost;
703const char *ix86_branch_cost_string;
e9a25f70 704
0f290768 705/* Power of two alignment for functions. */
e075ae69 706const char *ix86_align_funcs_string;
623fe810
RH
707
708/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
709static char internal_label_prefix[16];
710static int internal_label_prefix_len;
e075ae69 711\f
623fe810 712static int local_symbolic_operand PARAMS ((rtx, enum machine_mode));
f996902d 713static int tls_symbolic_operand_1 PARAMS ((rtx, enum tls_model));
f6da8bc3
KG
714static void output_pic_addr_const PARAMS ((FILE *, rtx, int));
715static void put_condition_code PARAMS ((enum rtx_code, enum machine_mode,
e075ae69 716 int, int, FILE *));
f996902d
RH
717static const char *get_some_local_dynamic_name PARAMS ((void));
718static int get_some_local_dynamic_name_1 PARAMS ((rtx *, void *));
719static rtx maybe_get_pool_constant PARAMS ((rtx));
f6da8bc3 720static rtx ix86_expand_int_compare PARAMS ((enum rtx_code, rtx, rtx));
3a3677ff
RH
721static enum rtx_code ix86_prepare_fp_compare_args PARAMS ((enum rtx_code,
722 rtx *, rtx *));
f996902d 723static rtx get_thread_pointer PARAMS ((void));
145aacc2 724static void get_pc_thunk_name PARAMS ((char [32], unsigned int));
f6da8bc3
KG
725static rtx gen_push PARAMS ((rtx));
726static int memory_address_length PARAMS ((rtx addr));
727static int ix86_flags_dependant PARAMS ((rtx, rtx, enum attr_type));
728static int ix86_agi_dependant PARAMS ((rtx, rtx, enum attr_type));
f6da8bc3
KG
729static enum attr_ppro_uops ix86_safe_ppro_uops PARAMS ((rtx));
730static void ix86_dump_ppro_packet PARAMS ((FILE *));
731static void ix86_reorder_insn PARAMS ((rtx *, rtx *));
e2500fed 732static struct machine_function * ix86_init_machine_status PARAMS ((void));
2b589241 733static int ix86_split_to_parts PARAMS ((rtx, rtx *, enum machine_mode));
b531087a
KH
734static int ix86_nsaved_regs PARAMS ((void));
735static void ix86_emit_save_regs PARAMS ((void));
c6036a37 736static void ix86_emit_save_regs_using_mov PARAMS ((rtx, HOST_WIDE_INT));
37a58036 737static void ix86_emit_restore_regs_using_mov PARAMS ((rtx, int, int));
bd09bdeb 738static void ix86_output_function_epilogue PARAMS ((FILE *, HOST_WIDE_INT));
0e4970d7 739static void ix86_set_move_mem_attrs_1 PARAMS ((rtx, rtx, rtx, rtx, rtx));
b531087a 740static void ix86_sched_reorder_ppro PARAMS ((rtx *, rtx *));
55efb413 741static HOST_WIDE_INT ix86_GOT_alias_set PARAMS ((void));
0945b39d 742static void ix86_adjust_counter PARAMS ((rtx, HOST_WIDE_INT));
0945b39d
JH
743static rtx ix86_expand_aligntest PARAMS ((rtx, int));
744static void ix86_expand_strlensi_unroll_1 PARAMS ((rtx, rtx));
c237e94a
ZW
745static int ix86_issue_rate PARAMS ((void));
746static int ix86_adjust_cost PARAMS ((rtx, rtx, rtx, int));
747static void ix86_sched_init PARAMS ((FILE *, int, int));
748static int ix86_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
749static int ix86_variable_issue PARAMS ((FILE *, int, rtx, int));
9b690711
RH
750static int ia32_use_dfa_pipeline_interface PARAMS ((void));
751static int ia32_multipass_dfa_lookahead PARAMS ((void));
e37af218 752static void ix86_init_mmx_sse_builtins PARAMS ((void));
e075ae69
RH
753
754struct ix86_address
755{
756 rtx base, index, disp;
757 HOST_WIDE_INT scale;
758};
b08de47e 759
e075ae69 760static int ix86_decompose_address PARAMS ((rtx, struct ix86_address *));
bd793c65 761
f996902d
RH
762static void ix86_encode_section_info PARAMS ((tree, int)) ATTRIBUTE_UNUSED;
763static const char *ix86_strip_name_encoding PARAMS ((const char *))
764 ATTRIBUTE_UNUSED;
fb49053f 765
bd793c65 766struct builtin_description;
8b60264b
KG
767static rtx ix86_expand_sse_comi PARAMS ((const struct builtin_description *,
768 tree, rtx));
769static rtx ix86_expand_sse_compare PARAMS ((const struct builtin_description *,
770 tree, rtx));
bd793c65
BS
771static rtx ix86_expand_unop1_builtin PARAMS ((enum insn_code, tree, rtx));
772static rtx ix86_expand_unop_builtin PARAMS ((enum insn_code, tree, rtx, int));
773static rtx ix86_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx));
e37af218
RH
774static rtx ix86_expand_timode_binop_builtin PARAMS ((enum insn_code,
775 tree, rtx));
776static rtx ix86_expand_store_builtin PARAMS ((enum insn_code, tree));
bd793c65 777static rtx safe_vector_operand PARAMS ((rtx, enum machine_mode));
c0c102a9
JH
778static enum rtx_code ix86_fp_compare_code_to_integer PARAMS ((enum rtx_code));
779static void ix86_fp_comparison_codes PARAMS ((enum rtx_code code,
780 enum rtx_code *,
781 enum rtx_code *,
782 enum rtx_code *));
9e7adcb3
JH
783static rtx ix86_expand_fp_compare PARAMS ((enum rtx_code, rtx, rtx, rtx,
784 rtx *, rtx *));
785static int ix86_fp_comparison_arithmetics_cost PARAMS ((enum rtx_code code));
786static int ix86_fp_comparison_fcomi_cost PARAMS ((enum rtx_code code));
787static int ix86_fp_comparison_sahf_cost PARAMS ((enum rtx_code code));
788static int ix86_fp_comparison_cost PARAMS ((enum rtx_code code));
bd09bdeb 789static unsigned int ix86_select_alt_pic_regnum PARAMS ((void));
9b690711 790static int ix86_save_reg PARAMS ((unsigned int, int));
4dd2ac2c 791static void ix86_compute_frame_layout PARAMS ((struct ix86_frame *));
8d8e52be 792static int ix86_comp_type_attributes PARAMS ((tree, tree));
91d231cb
JM
793const struct attribute_spec ix86_attribute_table[];
794static tree ix86_handle_cdecl_attribute PARAMS ((tree *, tree, tree, int, bool *));
795static tree ix86_handle_regparm_attribute PARAMS ((tree *, tree, tree, int, bool *));
b069de3b 796static int ix86_value_regno PARAMS ((enum machine_mode));
7c262518 797
21c318ba 798#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
2cc07db4
RH
799static void ix86_svr3_asm_out_constructor PARAMS ((rtx, int));
800#endif
e56feed6 801
53c17031
JH
802/* Register class used for passing given 64bit part of the argument.
803 These represent classes as documented by the PS ABI, with the exception
804 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
805 use SF or DFmode move instead of DImode to avoid reformating penalties.
806
807 Similary we play games with INTEGERSI_CLASS to use cheaper SImode moves
808 whenever possible (upper half does contain padding).
809 */
810enum x86_64_reg_class
811 {
812 X86_64_NO_CLASS,
813 X86_64_INTEGER_CLASS,
814 X86_64_INTEGERSI_CLASS,
815 X86_64_SSE_CLASS,
816 X86_64_SSESF_CLASS,
817 X86_64_SSEDF_CLASS,
818 X86_64_SSEUP_CLASS,
819 X86_64_X87_CLASS,
820 X86_64_X87UP_CLASS,
821 X86_64_MEMORY_CLASS
822 };
0b5826ac 823static const char * const x86_64_reg_class_name[] =
53c17031
JH
824 {"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
825
826#define MAX_CLASSES 4
827static int classify_argument PARAMS ((enum machine_mode, tree,
828 enum x86_64_reg_class [MAX_CLASSES],
829 int));
830static int examine_argument PARAMS ((enum machine_mode, tree, int, int *,
831 int *));
832static rtx construct_container PARAMS ((enum machine_mode, tree, int, int, int,
07933f72 833 const int *, int));
53c17031
JH
834static enum x86_64_reg_class merge_classes PARAMS ((enum x86_64_reg_class,
835 enum x86_64_reg_class));
672a6f42
NB
836\f
837/* Initialize the GCC target structure. */
91d231cb
JM
838#undef TARGET_ATTRIBUTE_TABLE
839#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
672a6f42 840#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
f5f4be42
NB
841# undef TARGET_MERGE_DECL_ATTRIBUTES
842# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
672a6f42
NB
843#endif
844
8d8e52be
JM
845#undef TARGET_COMP_TYPE_ATTRIBUTES
846#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
847
f6155fda
SS
848#undef TARGET_INIT_BUILTINS
849#define TARGET_INIT_BUILTINS ix86_init_builtins
850
851#undef TARGET_EXPAND_BUILTIN
852#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
853
bd09bdeb
RH
854#undef TARGET_ASM_FUNCTION_EPILOGUE
855#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
08c148a8 856
17b53c33
NB
857#undef TARGET_ASM_OPEN_PAREN
858#define TARGET_ASM_OPEN_PAREN ""
859#undef TARGET_ASM_CLOSE_PAREN
860#define TARGET_ASM_CLOSE_PAREN ""
861
301d03af
RS
862#undef TARGET_ASM_ALIGNED_HI_OP
863#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
864#undef TARGET_ASM_ALIGNED_SI_OP
865#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
866#ifdef ASM_QUAD
867#undef TARGET_ASM_ALIGNED_DI_OP
868#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
869#endif
870
871#undef TARGET_ASM_UNALIGNED_HI_OP
872#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
873#undef TARGET_ASM_UNALIGNED_SI_OP
874#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
875#undef TARGET_ASM_UNALIGNED_DI_OP
876#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
877
c237e94a
ZW
878#undef TARGET_SCHED_ADJUST_COST
879#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
880#undef TARGET_SCHED_ISSUE_RATE
881#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
882#undef TARGET_SCHED_VARIABLE_ISSUE
883#define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
884#undef TARGET_SCHED_INIT
885#define TARGET_SCHED_INIT ix86_sched_init
886#undef TARGET_SCHED_REORDER
887#define TARGET_SCHED_REORDER ix86_sched_reorder
fce5a9f2 888#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
9b690711
RH
889#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
890 ia32_use_dfa_pipeline_interface
891#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
892#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
893 ia32_multipass_dfa_lookahead
c237e94a 894
f996902d
RH
895#ifdef HAVE_AS_TLS
896#undef TARGET_HAVE_TLS
897#define TARGET_HAVE_TLS true
898#endif
899
f6897b10 900struct gcc_target targetm = TARGET_INITIALIZER;
e075ae69 901\f
f5316dfe
MM
902/* Sometimes certain combinations of command options do not make
903 sense on a particular target machine. You can define a macro
904 `OVERRIDE_OPTIONS' to take account of this. This macro, if
905 defined, is executed once just after all the command options have
906 been parsed.
907
908 Don't use this macro to turn on various extra optimizations for
909 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
910
911void
912override_options ()
913{
400500c4 914 int i;
e075ae69
RH
915 /* Comes from final.c -- no real reason to change it. */
916#define MAX_CODE_ALIGN 16
f5316dfe 917
c8c5cb99
SC
918 static struct ptt
919 {
8b60264b
KG
920 const struct processor_costs *cost; /* Processor costs */
921 const int target_enable; /* Target flags to enable. */
922 const int target_disable; /* Target flags to disable. */
923 const int align_loop; /* Default alignments. */
2cca7283 924 const int align_loop_max_skip;
8b60264b 925 const int align_jump;
2cca7283 926 const int align_jump_max_skip;
8b60264b
KG
927 const int align_func;
928 const int branch_cost;
e075ae69 929 }
0f290768 930 const processor_target_table[PROCESSOR_max] =
e075ae69 931 {
2cca7283
JH
932 {&i386_cost, 0, 0, 4, 3, 4, 3, 4, 1},
933 {&i486_cost, 0, 0, 16, 15, 16, 15, 16, 1},
934 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16, 1},
935 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16, 1},
936 {&k6_cost, 0, 0, 32, 7, 32, 7, 32, 1},
937 {&athlon_cost, 0, 0, 16, 7, 64, 7, 16, 1},
938 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0, 1}
e075ae69
RH
939 };
940
f4365627 941 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
e075ae69
RH
942 static struct pta
943 {
8b60264b
KG
944 const char *const name; /* processor name or nickname. */
945 const enum processor_type processor;
0dd0e980
JH
946 const enum pta_flags
947 {
948 PTA_SSE = 1,
949 PTA_SSE2 = 2,
950 PTA_MMX = 4,
f4365627 951 PTA_PREFETCH_SSE = 8,
0dd0e980
JH
952 PTA_3DNOW = 16,
953 PTA_3DNOW_A = 64
954 } flags;
e075ae69 955 }
0f290768 956 const processor_alias_table[] =
e075ae69 957 {
0dd0e980
JH
958 {"i386", PROCESSOR_I386, 0},
959 {"i486", PROCESSOR_I486, 0},
960 {"i586", PROCESSOR_PENTIUM, 0},
961 {"pentium", PROCESSOR_PENTIUM, 0},
962 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
3780101d
JG
963 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
964 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
965 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
0dd0e980
JH
966 {"i686", PROCESSOR_PENTIUMPRO, 0},
967 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
968 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
f4365627 969 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
0dd0e980 970 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 |
f4365627 971 PTA_MMX | PTA_PREFETCH_SSE},
0dd0e980
JH
972 {"k6", PROCESSOR_K6, PTA_MMX},
973 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
974 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
f4365627 975 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 976 | PTA_3DNOW_A},
f4365627 977 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
0dd0e980 978 | PTA_3DNOW | PTA_3DNOW_A},
f4365627 979 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 980 | PTA_3DNOW_A | PTA_SSE},
f4365627 981 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 982 | PTA_3DNOW_A | PTA_SSE},
f4365627 983 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
0dd0e980 984 | PTA_3DNOW_A | PTA_SSE},
3af4bd89 985 };
c8c5cb99 986
ca7558fc 987 int const pta_size = ARRAY_SIZE (processor_alias_table);
c8c5cb99 988
3dc85dfb
RH
989 /* By default our XFmode is the 80-bit extended format. If we have
990 use TFmode instead, it's also the 80-bit format, but with padding. */
991 real_format_for_mode[XFmode - QFmode] = &ieee_extended_intel_96_format;
992 real_format_for_mode[TFmode - QFmode] = &ieee_extended_intel_128_format;
993
f5316dfe
MM
994#ifdef SUBTARGET_OVERRIDE_OPTIONS
995 SUBTARGET_OVERRIDE_OPTIONS;
996#endif
997
f4365627
JH
998 if (!ix86_cpu_string && ix86_arch_string)
999 ix86_cpu_string = ix86_arch_string;
1000 if (!ix86_cpu_string)
1001 ix86_cpu_string = cpu_names [TARGET_CPU_DEFAULT];
1002 if (!ix86_arch_string)
1003 ix86_arch_string = TARGET_64BIT ? "athlon-4" : "i386";
e075ae69 1004
6189a572
JH
1005 if (ix86_cmodel_string != 0)
1006 {
1007 if (!strcmp (ix86_cmodel_string, "small"))
1008 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1009 else if (flag_pic)
c725bd79 1010 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
6189a572
JH
1011 else if (!strcmp (ix86_cmodel_string, "32"))
1012 ix86_cmodel = CM_32;
1013 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1014 ix86_cmodel = CM_KERNEL;
1015 else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
1016 ix86_cmodel = CM_MEDIUM;
1017 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1018 ix86_cmodel = CM_LARGE;
1019 else
1020 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1021 }
1022 else
1023 {
1024 ix86_cmodel = CM_32;
1025 if (TARGET_64BIT)
1026 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1027 }
c93e80a5
JH
1028 if (ix86_asm_string != 0)
1029 {
1030 if (!strcmp (ix86_asm_string, "intel"))
1031 ix86_asm_dialect = ASM_INTEL;
1032 else if (!strcmp (ix86_asm_string, "att"))
1033 ix86_asm_dialect = ASM_ATT;
1034 else
1035 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1036 }
6189a572 1037 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
c725bd79 1038 error ("code model `%s' not supported in the %s bit mode",
6189a572
JH
1039 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1040 if (ix86_cmodel == CM_LARGE)
c725bd79 1041 sorry ("code model `large' not supported yet");
0c2dc519 1042 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
c725bd79 1043 sorry ("%i-bit mode not compiled in",
0c2dc519 1044 (target_flags & MASK_64BIT) ? 64 : 32);
6189a572 1045
f4365627
JH
1046 for (i = 0; i < pta_size; i++)
1047 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1048 {
1049 ix86_arch = processor_alias_table[i].processor;
1050 /* Default cpu tuning to the architecture. */
1051 ix86_cpu = ix86_arch;
1052 if (processor_alias_table[i].flags & PTA_MMX
9ef1b13a 1053 && !(target_flags_explicit & MASK_MMX))
f4365627
JH
1054 target_flags |= MASK_MMX;
1055 if (processor_alias_table[i].flags & PTA_3DNOW
9ef1b13a 1056 && !(target_flags_explicit & MASK_3DNOW))
f4365627
JH
1057 target_flags |= MASK_3DNOW;
1058 if (processor_alias_table[i].flags & PTA_3DNOW_A
9ef1b13a 1059 && !(target_flags_explicit & MASK_3DNOW_A))
f4365627
JH
1060 target_flags |= MASK_3DNOW_A;
1061 if (processor_alias_table[i].flags & PTA_SSE
9ef1b13a 1062 && !(target_flags_explicit & MASK_SSE))
f4365627
JH
1063 target_flags |= MASK_SSE;
1064 if (processor_alias_table[i].flags & PTA_SSE2
9ef1b13a 1065 && !(target_flags_explicit & MASK_SSE2))
f4365627
JH
1066 target_flags |= MASK_SSE2;
1067 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1068 x86_prefetch_sse = true;
1069 break;
1070 }
400500c4 1071
f4365627
JH
1072 if (i == pta_size)
1073 error ("bad value (%s) for -march= switch", ix86_arch_string);
e075ae69 1074
f4365627
JH
1075 for (i = 0; i < pta_size; i++)
1076 if (! strcmp (ix86_cpu_string, processor_alias_table[i].name))
1077 {
1078 ix86_cpu = processor_alias_table[i].processor;
1079 break;
1080 }
1081 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1082 x86_prefetch_sse = true;
1083 if (i == pta_size)
1084 error ("bad value (%s) for -mcpu= switch", ix86_cpu_string);
e075ae69 1085
2ab0437e
JH
1086 if (optimize_size)
1087 ix86_cost = &size_cost;
1088 else
1089 ix86_cost = processor_target_table[ix86_cpu].cost;
e075ae69
RH
1090 target_flags |= processor_target_table[ix86_cpu].target_enable;
1091 target_flags &= ~processor_target_table[ix86_cpu].target_disable;
1092
36edd3cc
BS
1093 /* Arrange to set up i386_stack_locals for all functions. */
1094 init_machine_status = ix86_init_machine_status;
fce5a9f2 1095
0f290768 1096 /* Validate -mregparm= value. */
e075ae69 1097 if (ix86_regparm_string)
b08de47e 1098 {
400500c4
RK
1099 i = atoi (ix86_regparm_string);
1100 if (i < 0 || i > REGPARM_MAX)
1101 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1102 else
1103 ix86_regparm = i;
b08de47e 1104 }
0d7d98ee
JH
1105 else
1106 if (TARGET_64BIT)
1107 ix86_regparm = REGPARM_MAX;
b08de47e 1108
3e18fdf6 1109 /* If the user has provided any of the -malign-* options,
a4f31c00 1110 warn and use that value only if -falign-* is not set.
3e18fdf6 1111 Remove this code in GCC 3.2 or later. */
e075ae69 1112 if (ix86_align_loops_string)
b08de47e 1113 {
3e18fdf6
GK
1114 warning ("-malign-loops is obsolete, use -falign-loops");
1115 if (align_loops == 0)
1116 {
1117 i = atoi (ix86_align_loops_string);
1118 if (i < 0 || i > MAX_CODE_ALIGN)
1119 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1120 else
1121 align_loops = 1 << i;
1122 }
b08de47e 1123 }
3af4bd89 1124
e075ae69 1125 if (ix86_align_jumps_string)
b08de47e 1126 {
3e18fdf6
GK
1127 warning ("-malign-jumps is obsolete, use -falign-jumps");
1128 if (align_jumps == 0)
1129 {
1130 i = atoi (ix86_align_jumps_string);
1131 if (i < 0 || i > MAX_CODE_ALIGN)
1132 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1133 else
1134 align_jumps = 1 << i;
1135 }
b08de47e 1136 }
b08de47e 1137
e075ae69 1138 if (ix86_align_funcs_string)
b08de47e 1139 {
3e18fdf6
GK
1140 warning ("-malign-functions is obsolete, use -falign-functions");
1141 if (align_functions == 0)
1142 {
1143 i = atoi (ix86_align_funcs_string);
1144 if (i < 0 || i > MAX_CODE_ALIGN)
1145 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1146 else
1147 align_functions = 1 << i;
1148 }
b08de47e 1149 }
3af4bd89 1150
3e18fdf6 1151 /* Default align_* from the processor table. */
3e18fdf6 1152 if (align_loops == 0)
2cca7283
JH
1153 {
1154 align_loops = processor_target_table[ix86_cpu].align_loop;
1155 align_loops_max_skip = processor_target_table[ix86_cpu].align_loop_max_skip;
1156 }
3e18fdf6 1157 if (align_jumps == 0)
2cca7283
JH
1158 {
1159 align_jumps = processor_target_table[ix86_cpu].align_jump;
1160 align_jumps_max_skip = processor_target_table[ix86_cpu].align_jump_max_skip;
1161 }
3e18fdf6 1162 if (align_functions == 0)
2cca7283
JH
1163 {
1164 align_functions = processor_target_table[ix86_cpu].align_func;
1165 }
3e18fdf6 1166
e4c0478d 1167 /* Validate -mpreferred-stack-boundary= value, or provide default.
fbb83b43
AO
1168 The default of 128 bits is for Pentium III's SSE __m128, but we
1169 don't want additional code to keep the stack aligned when
1170 optimizing for code size. */
1171 ix86_preferred_stack_boundary = (optimize_size
ef49d42e 1172 ? TARGET_64BIT ? 128 : 32
fbb83b43 1173 : 128);
e075ae69 1174 if (ix86_preferred_stack_boundary_string)
3af4bd89 1175 {
400500c4 1176 i = atoi (ix86_preferred_stack_boundary_string);
ef49d42e 1177 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
c6257c5d 1178 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
ef49d42e 1179 TARGET_64BIT ? 4 : 2);
400500c4
RK
1180 else
1181 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3af4bd89 1182 }
77a989d1 1183
0f290768 1184 /* Validate -mbranch-cost= value, or provide default. */
e075ae69
RH
1185 ix86_branch_cost = processor_target_table[ix86_cpu].branch_cost;
1186 if (ix86_branch_cost_string)
804a8ee0 1187 {
400500c4
RK
1188 i = atoi (ix86_branch_cost_string);
1189 if (i < 0 || i > 5)
1190 error ("-mbranch-cost=%d is not between 0 and 5", i);
1191 else
1192 ix86_branch_cost = i;
804a8ee0 1193 }
804a8ee0 1194
f996902d
RH
1195 if (ix86_tls_dialect_string)
1196 {
1197 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
1198 ix86_tls_dialect = TLS_DIALECT_GNU;
1199 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
1200 ix86_tls_dialect = TLS_DIALECT_SUN;
1201 else
1202 error ("bad value (%s) for -mtls-dialect= switch",
1203 ix86_tls_dialect_string);
1204 }
1205
db01f480
JH
1206 if (profile_flag)
1207 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
1208
e9a25f70
JL
1209 /* Keep nonleaf frame pointers. */
1210 if (TARGET_OMIT_LEAF_FRAME_POINTER)
77a989d1 1211 flag_omit_frame_pointer = 1;
e075ae69
RH
1212
1213 /* If we're doing fast math, we don't care about comparison order
1214 wrt NaNs. This lets us use a shorter comparison sequence. */
de6c5979 1215 if (flag_unsafe_math_optimizations)
e075ae69
RH
1216 target_flags &= ~MASK_IEEE_FP;
1217
30c99a84
RH
1218 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
1219 since the insns won't need emulation. */
1220 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
1221 target_flags &= ~MASK_NO_FANCY_MATH_387;
1222
14f73b5a
JH
1223 if (TARGET_64BIT)
1224 {
1225 if (TARGET_ALIGN_DOUBLE)
c725bd79 1226 error ("-malign-double makes no sense in the 64bit mode");
14f73b5a 1227 if (TARGET_RTD)
c725bd79 1228 error ("-mrtd calling convention not supported in the 64bit mode");
14f73b5a 1229 /* Enable by default the SSE and MMX builtins. */
965f5423
JH
1230 target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
1231 ix86_fpmath = FPMATH_SSE;
14f73b5a 1232 }
965f5423
JH
1233 else
1234 ix86_fpmath = FPMATH_387;
1235
1236 if (ix86_fpmath_string != 0)
1237 {
1238 if (! strcmp (ix86_fpmath_string, "387"))
1239 ix86_fpmath = FPMATH_387;
1240 else if (! strcmp (ix86_fpmath_string, "sse"))
1241 {
1242 if (!TARGET_SSE)
1243 {
1244 warning ("SSE instruction set disabled, using 387 arithmetics");
1245 ix86_fpmath = FPMATH_387;
1246 }
1247 else
1248 ix86_fpmath = FPMATH_SSE;
1249 }
1250 else if (! strcmp (ix86_fpmath_string, "387,sse")
1251 || ! strcmp (ix86_fpmath_string, "sse,387"))
1252 {
1253 if (!TARGET_SSE)
1254 {
1255 warning ("SSE instruction set disabled, using 387 arithmetics");
1256 ix86_fpmath = FPMATH_387;
1257 }
1258 else if (!TARGET_80387)
1259 {
1260 warning ("387 instruction set disabled, using SSE arithmetics");
1261 ix86_fpmath = FPMATH_SSE;
1262 }
1263 else
1264 ix86_fpmath = FPMATH_SSE | FPMATH_387;
1265 }
fce5a9f2 1266 else
965f5423
JH
1267 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
1268 }
14f73b5a 1269
a7180f70
BS
1270 /* It makes no sense to ask for just SSE builtins, so MMX is also turned
1271 on by -msse. */
1272 if (TARGET_SSE)
e37af218
RH
1273 {
1274 target_flags |= MASK_MMX;
1275 x86_prefetch_sse = true;
1276 }
c6036a37 1277
47f339cf
BS
1278 /* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
1279 if (TARGET_3DNOW)
1280 {
1281 target_flags |= MASK_MMX;
1282 /* If we are targetting the Athlon architecture, enable the 3Dnow/MMX
1283 extensions it adds. */
1284 if (x86_3dnow_a & (1 << ix86_arch))
1285 target_flags |= MASK_3DNOW_A;
1286 }
c6036a37 1287 if ((x86_accumulate_outgoing_args & CPUMASK)
9ef1b13a 1288 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
c6036a37
JH
1289 && !optimize_size)
1290 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
623fe810
RH
1291
1292 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
1293 {
1294 char *p;
1295 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
1296 p = strchr (internal_label_prefix, 'X');
1297 internal_label_prefix_len = p - internal_label_prefix;
1298 *p = '\0';
1299 }
f5316dfe
MM
1300}
1301\f
32b5b1aa 1302void
c6aded7c 1303optimization_options (level, size)
32b5b1aa 1304 int level;
bb5177ac 1305 int size ATTRIBUTE_UNUSED;
32b5b1aa 1306{
e9a25f70
JL
1307 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
1308 make the problem with not enough registers even worse. */
32b5b1aa
SC
1309#ifdef INSN_SCHEDULING
1310 if (level > 1)
1311 flag_schedule_insns = 0;
1312#endif
53c17031
JH
1313 if (TARGET_64BIT && optimize >= 1)
1314 flag_omit_frame_pointer = 1;
1315 if (TARGET_64BIT)
b932f770
JH
1316 {
1317 flag_pcc_struct_return = 0;
1318 flag_asynchronous_unwind_tables = 1;
1319 }
db01f480
JH
1320 if (profile_flag)
1321 flag_omit_frame_pointer = 0;
32b5b1aa 1322}
b08de47e 1323\f
91d231cb
JM
1324/* Table of valid machine attributes. */
1325const struct attribute_spec ix86_attribute_table[] =
b08de47e 1326{
91d231cb 1327 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
b08de47e
MM
1328 /* Stdcall attribute says callee is responsible for popping arguments
1329 if they are not variable. */
91d231cb
JM
1330 { "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
1331 /* Cdecl attribute says the callee is a normal C declaration */
1332 { "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
b08de47e 1333 /* Regparm attribute specifies how many integer arguments are to be
0f290768 1334 passed in registers. */
91d231cb
JM
1335 { "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
1336#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
3da1eb0b
DS
1337 { "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1338 { "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
1339 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
91d231cb
JM
1340#endif
1341 { NULL, 0, 0, false, false, false, NULL }
1342};
1343
1344/* Handle a "cdecl" or "stdcall" attribute;
1345 arguments as in struct attribute_spec.handler. */
1346static tree
1347ix86_handle_cdecl_attribute (node, name, args, flags, no_add_attrs)
1348 tree *node;
1349 tree name;
1350 tree args ATTRIBUTE_UNUSED;
1351 int flags ATTRIBUTE_UNUSED;
1352 bool *no_add_attrs;
1353{
1354 if (TREE_CODE (*node) != FUNCTION_TYPE
1355 && TREE_CODE (*node) != METHOD_TYPE
1356 && TREE_CODE (*node) != FIELD_DECL
1357 && TREE_CODE (*node) != TYPE_DECL)
b08de47e 1358 {
91d231cb
JM
1359 warning ("`%s' attribute only applies to functions",
1360 IDENTIFIER_POINTER (name));
1361 *no_add_attrs = true;
1362 }
b08de47e 1363
91d231cb
JM
1364 if (TARGET_64BIT)
1365 {
1366 warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
1367 *no_add_attrs = true;
1368 }
b08de47e 1369
91d231cb
JM
1370 return NULL_TREE;
1371}
b08de47e 1372
91d231cb
JM
1373/* Handle a "regparm" attribute;
1374 arguments as in struct attribute_spec.handler. */
1375static tree
1376ix86_handle_regparm_attribute (node, name, args, flags, no_add_attrs)
1377 tree *node;
1378 tree name;
1379 tree args;
1380 int flags ATTRIBUTE_UNUSED;
1381 bool *no_add_attrs;
1382{
1383 if (TREE_CODE (*node) != FUNCTION_TYPE
1384 && TREE_CODE (*node) != METHOD_TYPE
1385 && TREE_CODE (*node) != FIELD_DECL
1386 && TREE_CODE (*node) != TYPE_DECL)
1387 {
1388 warning ("`%s' attribute only applies to functions",
1389 IDENTIFIER_POINTER (name));
1390 *no_add_attrs = true;
1391 }
1392 else
1393 {
1394 tree cst;
b08de47e 1395
91d231cb
JM
1396 cst = TREE_VALUE (args);
1397 if (TREE_CODE (cst) != INTEGER_CST)
1398 {
1399 warning ("`%s' attribute requires an integer constant argument",
1400 IDENTIFIER_POINTER (name));
1401 *no_add_attrs = true;
1402 }
1403 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
1404 {
1405 warning ("argument to `%s' attribute larger than %d",
1406 IDENTIFIER_POINTER (name), REGPARM_MAX);
1407 *no_add_attrs = true;
1408 }
b08de47e
MM
1409 }
1410
91d231cb 1411 return NULL_TREE;
b08de47e
MM
1412}
1413
1414/* Return 0 if the attributes for two types are incompatible, 1 if they
1415 are compatible, and 2 if they are nearly compatible (which causes a
1416 warning to be generated). */
1417
8d8e52be 1418static int
e075ae69 1419ix86_comp_type_attributes (type1, type2)
afcfe58c
MM
1420 tree type1;
1421 tree type2;
b08de47e 1422{
0f290768 1423 /* Check for mismatch of non-default calling convention. */
27c38fbe 1424 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
afcfe58c
MM
1425
1426 if (TREE_CODE (type1) != FUNCTION_TYPE)
1427 return 1;
1428
1429 /* Check for mismatched return types (cdecl vs stdcall). */
6093f019
RH
1430 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
1431 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
afcfe58c 1432 return 0;
b08de47e
MM
1433 return 1;
1434}
b08de47e
MM
1435\f
1436/* Value is the number of bytes of arguments automatically
1437 popped when returning from a subroutine call.
1438 FUNDECL is the declaration node of the function (as a tree),
1439 FUNTYPE is the data type of the function (as a tree),
1440 or for a library call it is an identifier node for the subroutine name.
1441 SIZE is the number of bytes of arguments passed on the stack.
1442
1443 On the 80386, the RTD insn may be used to pop them if the number
1444 of args is fixed, but if the number is variable then the caller
1445 must pop them all. RTD can't be used for library calls now
1446 because the library is compiled with the Unix compiler.
1447 Use of RTD is a selectable option, since it is incompatible with
1448 standard Unix calling sequences. If the option is not selected,
1449 the caller must always pop the args.
1450
1451 The attribute stdcall is equivalent to RTD on a per module basis. */
1452
1453int
e075ae69 1454ix86_return_pops_args (fundecl, funtype, size)
b08de47e
MM
1455 tree fundecl;
1456 tree funtype;
1457 int size;
79325812 1458{
3345ee7d 1459 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
b08de47e 1460
0f290768 1461 /* Cdecl functions override -mrtd, and never pop the stack. */
e9a25f70 1462 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
79325812 1463
0f290768 1464 /* Stdcall functions will pop the stack if not variable args. */
698cdd84
SC
1465 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)))
1466 rtd = 1;
79325812 1467
698cdd84
SC
1468 if (rtd
1469 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
e9a25f70
JL
1470 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
1471 == void_type_node)))
698cdd84
SC
1472 return size;
1473 }
79325812 1474
232b8f52 1475 /* Lose any fake structure return argument if it is passed on the stack. */
0d7d98ee
JH
1476 if (aggregate_value_p (TREE_TYPE (funtype))
1477 && !TARGET_64BIT)
232b8f52
JJ
1478 {
1479 int nregs = ix86_regparm;
79325812 1480
232b8f52
JJ
1481 if (funtype)
1482 {
1483 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (funtype));
1484
1485 if (attr)
1486 nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1487 }
1488
1489 if (!nregs)
1490 return GET_MODE_SIZE (Pmode);
1491 }
1492
1493 return 0;
b08de47e 1494}
b08de47e
MM
1495\f
1496/* Argument support functions. */
1497
53c17031
JH
1498/* Return true when register may be used to pass function parameters. */
1499bool
1500ix86_function_arg_regno_p (regno)
1501 int regno;
1502{
1503 int i;
1504 if (!TARGET_64BIT)
0333394e
JJ
1505 return (regno < REGPARM_MAX
1506 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
53c17031
JH
1507 if (SSE_REGNO_P (regno) && TARGET_SSE)
1508 return true;
1509 /* RAX is used as hidden argument to va_arg functions. */
1510 if (!regno)
1511 return true;
1512 for (i = 0; i < REGPARM_MAX; i++)
1513 if (regno == x86_64_int_parameter_registers[i])
1514 return true;
1515 return false;
1516}
1517
b08de47e
MM
1518/* Initialize a variable CUM of type CUMULATIVE_ARGS
1519 for a call to a function whose data type is FNTYPE.
1520 For a library call, FNTYPE is 0. */
1521
1522void
1523init_cumulative_args (cum, fntype, libname)
e9a25f70 1524 CUMULATIVE_ARGS *cum; /* Argument info to initialize */
b08de47e
MM
1525 tree fntype; /* tree ptr for function decl */
1526 rtx libname; /* SYMBOL_REF of library name or 0 */
1527{
1528 static CUMULATIVE_ARGS zero_cum;
1529 tree param, next_param;
1530
1531 if (TARGET_DEBUG_ARG)
1532 {
1533 fprintf (stderr, "\ninit_cumulative_args (");
1534 if (fntype)
e9a25f70
JL
1535 fprintf (stderr, "fntype code = %s, ret code = %s",
1536 tree_code_name[(int) TREE_CODE (fntype)],
1537 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
b08de47e
MM
1538 else
1539 fprintf (stderr, "no fntype");
1540
1541 if (libname)
1542 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
1543 }
1544
1545 *cum = zero_cum;
1546
1547 /* Set up the number of registers to use for passing arguments. */
e075ae69 1548 cum->nregs = ix86_regparm;
53c17031
JH
1549 cum->sse_nregs = SSE_REGPARM_MAX;
1550 if (fntype && !TARGET_64BIT)
b08de47e
MM
1551 {
1552 tree attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (fntype));
e9a25f70 1553
b08de47e
MM
1554 if (attr)
1555 cum->nregs = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
1556 }
53c17031 1557 cum->maybe_vaarg = false;
b08de47e
MM
1558
1559 /* Determine if this function has variable arguments. This is
1560 indicated by the last argument being 'void_type_mode' if there
1561 are no variable arguments. If there are variable arguments, then
1562 we won't pass anything in registers */
1563
1564 if (cum->nregs)
1565 {
1566 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
e9a25f70 1567 param != 0; param = next_param)
b08de47e
MM
1568 {
1569 next_param = TREE_CHAIN (param);
e9a25f70 1570 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
53c17031
JH
1571 {
1572 if (!TARGET_64BIT)
1573 cum->nregs = 0;
1574 cum->maybe_vaarg = true;
1575 }
b08de47e
MM
1576 }
1577 }
53c17031
JH
1578 if ((!fntype && !libname)
1579 || (fntype && !TYPE_ARG_TYPES (fntype)))
1580 cum->maybe_vaarg = 1;
b08de47e
MM
1581
1582 if (TARGET_DEBUG_ARG)
1583 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
1584
1585 return;
1586}
1587
53c17031 1588/* x86-64 register passing impleemntation. See x86-64 ABI for details. Goal
f710504c 1589 of this code is to classify each 8bytes of incoming argument by the register
53c17031
JH
1590 class and assign registers accordingly. */
1591
1592/* Return the union class of CLASS1 and CLASS2.
1593 See the x86-64 PS ABI for details. */
1594
1595static enum x86_64_reg_class
1596merge_classes (class1, class2)
1597 enum x86_64_reg_class class1, class2;
1598{
1599 /* Rule #1: If both classes are equal, this is the resulting class. */
1600 if (class1 == class2)
1601 return class1;
1602
1603 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
1604 the other class. */
1605 if (class1 == X86_64_NO_CLASS)
1606 return class2;
1607 if (class2 == X86_64_NO_CLASS)
1608 return class1;
1609
1610 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
1611 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
1612 return X86_64_MEMORY_CLASS;
1613
1614 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
1615 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
1616 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
1617 return X86_64_INTEGERSI_CLASS;
1618 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
1619 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
1620 return X86_64_INTEGER_CLASS;
1621
1622 /* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
1623 if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
1624 || class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
1625 return X86_64_MEMORY_CLASS;
1626
1627 /* Rule #6: Otherwise class SSE is used. */
1628 return X86_64_SSE_CLASS;
1629}
1630
1631/* Classify the argument of type TYPE and mode MODE.
1632 CLASSES will be filled by the register class used to pass each word
1633 of the operand. The number of words is returned. In case the parameter
1634 should be passed in memory, 0 is returned. As a special case for zero
1635 sized containers, classes[0] will be NO_CLASS and 1 is returned.
1636
1637 BIT_OFFSET is used internally for handling records and specifies offset
1638 of the offset in bits modulo 256 to avoid overflow cases.
1639
1640 See the x86-64 PS ABI for details.
1641*/
1642
1643static int
1644classify_argument (mode, type, classes, bit_offset)
1645 enum machine_mode mode;
1646 tree type;
1647 enum x86_64_reg_class classes[MAX_CLASSES];
1648 int bit_offset;
1649{
1650 int bytes =
1651 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
23327dae 1652 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
53c17031 1653
c60ee6f5
JH
1654 /* Variable sized entities are always passed/returned in memory. */
1655 if (bytes < 0)
1656 return 0;
1657
53c17031
JH
1658 if (type && AGGREGATE_TYPE_P (type))
1659 {
1660 int i;
1661 tree field;
1662 enum x86_64_reg_class subclasses[MAX_CLASSES];
1663
1664 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
1665 if (bytes > 16)
1666 return 0;
1667
1668 for (i = 0; i < words; i++)
1669 classes[i] = X86_64_NO_CLASS;
1670
1671 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
1672 signalize memory class, so handle it as special case. */
1673 if (!words)
1674 {
1675 classes[0] = X86_64_NO_CLASS;
1676 return 1;
1677 }
1678
1679 /* Classify each field of record and merge classes. */
1680 if (TREE_CODE (type) == RECORD_TYPE)
1681 {
91ea38f9
JH
1682 /* For classes first merge in the field of the subclasses. */
1683 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1684 {
1685 tree bases = TYPE_BINFO_BASETYPES (type);
1686 int n_bases = TREE_VEC_LENGTH (bases);
1687 int i;
1688
1689 for (i = 0; i < n_bases; ++i)
1690 {
1691 tree binfo = TREE_VEC_ELT (bases, i);
1692 int num;
1693 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1694 tree type = BINFO_TYPE (binfo);
1695
1696 num = classify_argument (TYPE_MODE (type),
1697 type, subclasses,
1698 (offset + bit_offset) % 256);
1699 if (!num)
1700 return 0;
1701 for (i = 0; i < num; i++)
1702 {
db01f480 1703 int pos = (offset + (bit_offset % 64)) / 8 / 8;
91ea38f9
JH
1704 classes[i + pos] =
1705 merge_classes (subclasses[i], classes[i + pos]);
1706 }
1707 }
1708 }
1709 /* And now merge the fields of structure. */
53c17031
JH
1710 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1711 {
1712 if (TREE_CODE (field) == FIELD_DECL)
1713 {
1714 int num;
1715
1716 /* Bitfields are always classified as integer. Handle them
1717 early, since later code would consider them to be
1718 misaligned integers. */
1719 if (DECL_BIT_FIELD (field))
1720 {
1721 for (i = int_bit_position (field) / 8 / 8;
1722 i < (int_bit_position (field)
1723 + tree_low_cst (DECL_SIZE (field), 0)
1724 + 63) / 8 / 8; i++)
1725 classes[i] =
1726 merge_classes (X86_64_INTEGER_CLASS,
1727 classes[i]);
1728 }
1729 else
1730 {
1731 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1732 TREE_TYPE (field), subclasses,
1733 (int_bit_position (field)
1734 + bit_offset) % 256);
1735 if (!num)
1736 return 0;
1737 for (i = 0; i < num; i++)
1738 {
1739 int pos =
db01f480 1740 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
53c17031
JH
1741 classes[i + pos] =
1742 merge_classes (subclasses[i], classes[i + pos]);
1743 }
1744 }
1745 }
1746 }
1747 }
1748 /* Arrays are handled as small records. */
1749 else if (TREE_CODE (type) == ARRAY_TYPE)
1750 {
1751 int num;
1752 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
1753 TREE_TYPE (type), subclasses, bit_offset);
1754 if (!num)
1755 return 0;
1756
1757 /* The partial classes are now full classes. */
1758 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
1759 subclasses[0] = X86_64_SSE_CLASS;
1760 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
1761 subclasses[0] = X86_64_INTEGER_CLASS;
1762
1763 for (i = 0; i < words; i++)
1764 classes[i] = subclasses[i % num];
1765 }
1766 /* Unions are similar to RECORD_TYPE but offset is always 0. */
e4dbaed5
AS
1767 else if (TREE_CODE (type) == UNION_TYPE
1768 || TREE_CODE (type) == QUAL_UNION_TYPE)
53c17031 1769 {
91ea38f9
JH
1770 /* For classes first merge in the field of the subclasses. */
1771 if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
1772 {
1773 tree bases = TYPE_BINFO_BASETYPES (type);
1774 int n_bases = TREE_VEC_LENGTH (bases);
1775 int i;
1776
1777 for (i = 0; i < n_bases; ++i)
1778 {
1779 tree binfo = TREE_VEC_ELT (bases, i);
1780 int num;
1781 int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
1782 tree type = BINFO_TYPE (binfo);
1783
1784 num = classify_argument (TYPE_MODE (type),
1785 type, subclasses,
db01f480 1786 (offset + (bit_offset % 64)) % 256);
91ea38f9
JH
1787 if (!num)
1788 return 0;
1789 for (i = 0; i < num; i++)
1790 {
c16576e6 1791 int pos = (offset + (bit_offset % 64)) / 8 / 8;
91ea38f9
JH
1792 classes[i + pos] =
1793 merge_classes (subclasses[i], classes[i + pos]);
1794 }
1795 }
1796 }
53c17031
JH
1797 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
1798 {
1799 if (TREE_CODE (field) == FIELD_DECL)
1800 {
1801 int num;
1802 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
1803 TREE_TYPE (field), subclasses,
1804 bit_offset);
1805 if (!num)
1806 return 0;
1807 for (i = 0; i < num; i++)
1808 classes[i] = merge_classes (subclasses[i], classes[i]);
1809 }
1810 }
1811 }
1812 else
1813 abort ();
1814
1815 /* Final merger cleanup. */
1816 for (i = 0; i < words; i++)
1817 {
1818 /* If one class is MEMORY, everything should be passed in
1819 memory. */
1820 if (classes[i] == X86_64_MEMORY_CLASS)
1821 return 0;
1822
d6a7951f 1823 /* The X86_64_SSEUP_CLASS should be always preceded by
53c17031
JH
1824 X86_64_SSE_CLASS. */
1825 if (classes[i] == X86_64_SSEUP_CLASS
1826 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
1827 classes[i] = X86_64_SSE_CLASS;
1828
d6a7951f 1829 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
53c17031
JH
1830 if (classes[i] == X86_64_X87UP_CLASS
1831 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
1832 classes[i] = X86_64_SSE_CLASS;
1833 }
1834 return words;
1835 }
1836
1837 /* Compute alignment needed. We align all types to natural boundaries with
1838 exception of XFmode that is aligned to 64bits. */
1839 if (mode != VOIDmode && mode != BLKmode)
1840 {
1841 int mode_alignment = GET_MODE_BITSIZE (mode);
1842
1843 if (mode == XFmode)
1844 mode_alignment = 128;
1845 else if (mode == XCmode)
1846 mode_alignment = 256;
f5143c46 1847 /* Misaligned fields are always returned in memory. */
53c17031
JH
1848 if (bit_offset % mode_alignment)
1849 return 0;
1850 }
1851
1852 /* Classification of atomic types. */
1853 switch (mode)
1854 {
1855 case DImode:
1856 case SImode:
1857 case HImode:
1858 case QImode:
1859 case CSImode:
1860 case CHImode:
1861 case CQImode:
1862 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
1863 classes[0] = X86_64_INTEGERSI_CLASS;
1864 else
1865 classes[0] = X86_64_INTEGER_CLASS;
1866 return 1;
1867 case CDImode:
1868 case TImode:
1869 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1870 return 2;
1871 case CTImode:
1872 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
1873 classes[2] = classes[3] = X86_64_INTEGER_CLASS;
1874 return 4;
1875 case SFmode:
1876 if (!(bit_offset % 64))
1877 classes[0] = X86_64_SSESF_CLASS;
1878 else
1879 classes[0] = X86_64_SSE_CLASS;
1880 return 1;
1881 case DFmode:
1882 classes[0] = X86_64_SSEDF_CLASS;
1883 return 1;
1884 case TFmode:
1885 classes[0] = X86_64_X87_CLASS;
1886 classes[1] = X86_64_X87UP_CLASS;
1887 return 2;
1888 case TCmode:
1889 classes[0] = X86_64_X87_CLASS;
1890 classes[1] = X86_64_X87UP_CLASS;
1891 classes[2] = X86_64_X87_CLASS;
1892 classes[3] = X86_64_X87UP_CLASS;
1893 return 4;
1894 case DCmode:
1895 classes[0] = X86_64_SSEDF_CLASS;
1896 classes[1] = X86_64_SSEDF_CLASS;
1897 return 2;
1898 case SCmode:
1899 classes[0] = X86_64_SSE_CLASS;
1900 return 1;
e95d6b23
JH
1901 case V4SFmode:
1902 case V4SImode:
495333a6
JH
1903 case V16QImode:
1904 case V8HImode:
1905 case V2DFmode:
1906 case V2DImode:
e95d6b23
JH
1907 classes[0] = X86_64_SSE_CLASS;
1908 classes[1] = X86_64_SSEUP_CLASS;
1909 return 2;
1910 case V2SFmode:
1911 case V2SImode:
1912 case V4HImode:
1913 case V8QImode:
1914 classes[0] = X86_64_SSE_CLASS;
1915 return 1;
53c17031 1916 case BLKmode:
e95d6b23 1917 case VOIDmode:
53c17031
JH
1918 return 0;
1919 default:
1920 abort ();
1921 }
1922}
1923
1924/* Examine the argument and return set number of register required in each
f5143c46 1925 class. Return 0 iff parameter should be passed in memory. */
53c17031
JH
1926static int
1927examine_argument (mode, type, in_return, int_nregs, sse_nregs)
1928 enum machine_mode mode;
1929 tree type;
1930 int *int_nregs, *sse_nregs;
1931 int in_return;
1932{
1933 enum x86_64_reg_class class[MAX_CLASSES];
1934 int n = classify_argument (mode, type, class, 0);
1935
1936 *int_nregs = 0;
1937 *sse_nregs = 0;
1938 if (!n)
1939 return 0;
1940 for (n--; n >= 0; n--)
1941 switch (class[n])
1942 {
1943 case X86_64_INTEGER_CLASS:
1944 case X86_64_INTEGERSI_CLASS:
1945 (*int_nregs)++;
1946 break;
1947 case X86_64_SSE_CLASS:
1948 case X86_64_SSESF_CLASS:
1949 case X86_64_SSEDF_CLASS:
1950 (*sse_nregs)++;
1951 break;
1952 case X86_64_NO_CLASS:
1953 case X86_64_SSEUP_CLASS:
1954 break;
1955 case X86_64_X87_CLASS:
1956 case X86_64_X87UP_CLASS:
1957 if (!in_return)
1958 return 0;
1959 break;
1960 case X86_64_MEMORY_CLASS:
1961 abort ();
1962 }
1963 return 1;
1964}
1965/* Construct container for the argument used by GCC interface. See
1966 FUNCTION_ARG for the detailed description. */
1967static rtx
1968construct_container (mode, type, in_return, nintregs, nsseregs, intreg, sse_regno)
1969 enum machine_mode mode;
1970 tree type;
1971 int in_return;
1972 int nintregs, nsseregs;
07933f72
GS
1973 const int * intreg;
1974 int sse_regno;
53c17031
JH
1975{
1976 enum machine_mode tmpmode;
1977 int bytes =
1978 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
1979 enum x86_64_reg_class class[MAX_CLASSES];
1980 int n;
1981 int i;
1982 int nexps = 0;
1983 int needed_sseregs, needed_intregs;
1984 rtx exp[MAX_CLASSES];
1985 rtx ret;
1986
1987 n = classify_argument (mode, type, class, 0);
1988 if (TARGET_DEBUG_ARG)
1989 {
1990 if (!n)
1991 fprintf (stderr, "Memory class\n");
1992 else
1993 {
1994 fprintf (stderr, "Classes:");
1995 for (i = 0; i < n; i++)
1996 {
1997 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
1998 }
1999 fprintf (stderr, "\n");
2000 }
2001 }
2002 if (!n)
2003 return NULL;
2004 if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
2005 return NULL;
2006 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
2007 return NULL;
2008
2009 /* First construct simple cases. Avoid SCmode, since we want to use
2010 single register to pass this type. */
2011 if (n == 1 && mode != SCmode)
2012 switch (class[0])
2013 {
2014 case X86_64_INTEGER_CLASS:
2015 case X86_64_INTEGERSI_CLASS:
2016 return gen_rtx_REG (mode, intreg[0]);
2017 case X86_64_SSE_CLASS:
2018 case X86_64_SSESF_CLASS:
2019 case X86_64_SSEDF_CLASS:
2020 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
2021 case X86_64_X87_CLASS:
2022 return gen_rtx_REG (mode, FIRST_STACK_REG);
2023 case X86_64_NO_CLASS:
2024 /* Zero sized array, struct or class. */
2025 return NULL;
2026 default:
2027 abort ();
2028 }
2029 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS)
e95d6b23 2030 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
53c17031
JH
2031 if (n == 2
2032 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
2033 return gen_rtx_REG (TFmode, FIRST_STACK_REG);
2034 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
2035 && class[1] == X86_64_INTEGER_CLASS
2036 && (mode == CDImode || mode == TImode)
2037 && intreg[0] + 1 == intreg[1])
2038 return gen_rtx_REG (mode, intreg[0]);
2039 if (n == 4
2040 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
2041 && class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS)
2042 return gen_rtx_REG (TCmode, FIRST_STACK_REG);
2043
2044 /* Otherwise figure out the entries of the PARALLEL. */
2045 for (i = 0; i < n; i++)
2046 {
2047 switch (class[i])
2048 {
2049 case X86_64_NO_CLASS:
2050 break;
2051 case X86_64_INTEGER_CLASS:
2052 case X86_64_INTEGERSI_CLASS:
2053 /* Merge TImodes on aligned occassions here too. */
2054 if (i * 8 + 8 > bytes)
2055 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
2056 else if (class[i] == X86_64_INTEGERSI_CLASS)
2057 tmpmode = SImode;
2058 else
2059 tmpmode = DImode;
2060 /* We've requested 24 bytes we don't have mode for. Use DImode. */
2061 if (tmpmode == BLKmode)
2062 tmpmode = DImode;
2063 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2064 gen_rtx_REG (tmpmode, *intreg),
2065 GEN_INT (i*8));
2066 intreg++;
2067 break;
2068 case X86_64_SSESF_CLASS:
2069 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2070 gen_rtx_REG (SFmode,
2071 SSE_REGNO (sse_regno)),
2072 GEN_INT (i*8));
2073 sse_regno++;
2074 break;
2075 case X86_64_SSEDF_CLASS:
2076 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2077 gen_rtx_REG (DFmode,
2078 SSE_REGNO (sse_regno)),
2079 GEN_INT (i*8));
2080 sse_regno++;
2081 break;
2082 case X86_64_SSE_CLASS:
2083 if (i < n && class[i + 1] == X86_64_SSEUP_CLASS)
2084 tmpmode = TImode, i++;
2085 else
2086 tmpmode = DImode;
2087 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
2088 gen_rtx_REG (tmpmode,
2089 SSE_REGNO (sse_regno)),
2090 GEN_INT (i*8));
2091 sse_regno++;
2092 break;
2093 default:
2094 abort ();
2095 }
2096 }
2097 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
2098 for (i = 0; i < nexps; i++)
2099 XVECEXP (ret, 0, i) = exp [i];
2100 return ret;
2101}
2102
b08de47e
MM
2103/* Update the data in CUM to advance over an argument
2104 of mode MODE and data type TYPE.
2105 (TYPE is null for libcalls where that information may not be available.) */
2106
2107void
2108function_arg_advance (cum, mode, type, named)
2109 CUMULATIVE_ARGS *cum; /* current arg information */
2110 enum machine_mode mode; /* current arg mode */
2111 tree type; /* type of the argument or 0 if lib support */
2112 int named; /* whether or not the argument was named */
2113{
5ac9118e
KG
2114 int bytes =
2115 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
2116 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2117
2118 if (TARGET_DEBUG_ARG)
2119 fprintf (stderr,
e9a25f70 2120 "function_adv (sz=%d, wds=%2d, nregs=%d, mode=%s, named=%d)\n\n",
b08de47e 2121 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
53c17031 2122 if (TARGET_64BIT)
b08de47e 2123 {
53c17031
JH
2124 int int_nregs, sse_nregs;
2125 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
2126 cum->words += words;
2127 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
82a127a9 2128 {
53c17031
JH
2129 cum->nregs -= int_nregs;
2130 cum->sse_nregs -= sse_nregs;
2131 cum->regno += int_nregs;
2132 cum->sse_regno += sse_nregs;
82a127a9 2133 }
53c17031
JH
2134 else
2135 cum->words += words;
b08de47e 2136 }
a4f31c00 2137 else
82a127a9 2138 {
53c17031
JH
2139 if (TARGET_SSE && mode == TImode)
2140 {
2141 cum->sse_words += words;
2142 cum->sse_nregs -= 1;
2143 cum->sse_regno += 1;
2144 if (cum->sse_nregs <= 0)
2145 {
2146 cum->sse_nregs = 0;
2147 cum->sse_regno = 0;
2148 }
2149 }
2150 else
82a127a9 2151 {
53c17031
JH
2152 cum->words += words;
2153 cum->nregs -= words;
2154 cum->regno += words;
2155
2156 if (cum->nregs <= 0)
2157 {
2158 cum->nregs = 0;
2159 cum->regno = 0;
2160 }
82a127a9
CM
2161 }
2162 }
b08de47e
MM
2163 return;
2164}
2165
2166/* Define where to put the arguments to a function.
2167 Value is zero to push the argument on the stack,
2168 or a hard register in which to store the argument.
2169
2170 MODE is the argument's machine mode.
2171 TYPE is the data type of the argument (as a tree).
2172 This is null for libcalls where that information may
2173 not be available.
2174 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2175 the preceding args and about the function being called.
2176 NAMED is nonzero if this argument is a named parameter
2177 (otherwise it is an extra parameter matching an ellipsis). */
2178
07933f72 2179rtx
b08de47e
MM
2180function_arg (cum, mode, type, named)
2181 CUMULATIVE_ARGS *cum; /* current arg information */
2182 enum machine_mode mode; /* current arg mode */
2183 tree type; /* type of the argument or 0 if lib support */
2184 int named; /* != 0 for normal args, == 0 for ... args */
2185{
2186 rtx ret = NULL_RTX;
5ac9118e
KG
2187 int bytes =
2188 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
b08de47e
MM
2189 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2190
53c17031
JH
2191 /* Handle an hidden AL argument containing number of registers for varargs
2192 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
2193 any AL settings. */
32ee7d1d 2194 if (mode == VOIDmode)
b08de47e 2195 {
53c17031
JH
2196 if (TARGET_64BIT)
2197 return GEN_INT (cum->maybe_vaarg
2198 ? (cum->sse_nregs < 0
2199 ? SSE_REGPARM_MAX
2200 : cum->sse_regno)
2201 : -1);
2202 else
2203 return constm1_rtx;
b08de47e 2204 }
53c17031
JH
2205 if (TARGET_64BIT)
2206 ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
2207 &x86_64_int_parameter_registers [cum->regno],
2208 cum->sse_regno);
2209 else
2210 switch (mode)
2211 {
2212 /* For now, pass fp/complex values on the stack. */
2213 default:
2214 break;
2215
2216 case BLKmode:
2217 case DImode:
2218 case SImode:
2219 case HImode:
2220 case QImode:
2221 if (words <= cum->nregs)
2222 ret = gen_rtx_REG (mode, cum->regno);
2223 break;
2224 case TImode:
2225 if (cum->sse_nregs)
2226 ret = gen_rtx_REG (mode, cum->sse_regno);
2227 break;
2228 }
b08de47e
MM
2229
2230 if (TARGET_DEBUG_ARG)
2231 {
2232 fprintf (stderr,
91ea38f9 2233 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
b08de47e
MM
2234 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
2235
2236 if (ret)
91ea38f9 2237 print_simple_rtl (stderr, ret);
b08de47e
MM
2238 else
2239 fprintf (stderr, ", stack");
2240
2241 fprintf (stderr, " )\n");
2242 }
2243
2244 return ret;
2245}
53c17031
JH
2246
2247/* Gives the alignment boundary, in bits, of an argument with the specified mode
2248 and type. */
2249
2250int
2251ix86_function_arg_boundary (mode, type)
2252 enum machine_mode mode;
2253 tree type;
2254{
2255 int align;
2256 if (!TARGET_64BIT)
2257 return PARM_BOUNDARY;
2258 if (type)
2259 align = TYPE_ALIGN (type);
2260 else
2261 align = GET_MODE_ALIGNMENT (mode);
2262 if (align < PARM_BOUNDARY)
2263 align = PARM_BOUNDARY;
2264 if (align > 128)
2265 align = 128;
2266 return align;
2267}
2268
2269/* Return true if N is a possible register number of function value. */
2270bool
2271ix86_function_value_regno_p (regno)
2272 int regno;
2273{
2274 if (!TARGET_64BIT)
2275 {
2276 return ((regno) == 0
2277 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
2278 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
2279 }
2280 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
2281 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
2282 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
2283}
2284
2285/* Define how to find the value returned by a function.
2286 VALTYPE is the data type of the value (as a tree).
2287 If the precise function being called is known, FUNC is its FUNCTION_DECL;
2288 otherwise, FUNC is 0. */
2289rtx
2290ix86_function_value (valtype)
2291 tree valtype;
2292{
2293 if (TARGET_64BIT)
2294 {
2295 rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
2296 REGPARM_MAX, SSE_REGPARM_MAX,
2297 x86_64_int_return_registers, 0);
2298 /* For zero sized structures, construct_continer return NULL, but we need
2299 to keep rest of compiler happy by returning meaningfull value. */
2300 if (!ret)
2301 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
2302 return ret;
2303 }
2304 else
b069de3b
SS
2305 return gen_rtx_REG (TYPE_MODE (valtype),
2306 ix86_value_regno (TYPE_MODE (valtype)));
53c17031
JH
2307}
2308
f5143c46 2309/* Return false iff type is returned in memory. */
53c17031
JH
2310int
2311ix86_return_in_memory (type)
2312 tree type;
2313{
2314 int needed_intregs, needed_sseregs;
2315 if (TARGET_64BIT)
2316 {
2317 return !examine_argument (TYPE_MODE (type), type, 1,
2318 &needed_intregs, &needed_sseregs);
2319 }
2320 else
2321 {
2322 if (TYPE_MODE (type) == BLKmode
2323 || (VECTOR_MODE_P (TYPE_MODE (type))
2324 && int_size_in_bytes (type) == 8)
2325 || (int_size_in_bytes (type) > 12 && TYPE_MODE (type) != TImode
2326 && TYPE_MODE (type) != TFmode
2327 && !VECTOR_MODE_P (TYPE_MODE (type))))
2328 return 1;
2329 return 0;
2330 }
2331}
2332
2333/* Define how to find the value returned by a library function
2334 assuming the value has mode MODE. */
2335rtx
2336ix86_libcall_value (mode)
2337 enum machine_mode mode;
2338{
2339 if (TARGET_64BIT)
2340 {
2341 switch (mode)
2342 {
2343 case SFmode:
2344 case SCmode:
2345 case DFmode:
2346 case DCmode:
2347 return gen_rtx_REG (mode, FIRST_SSE_REG);
2348 case TFmode:
2349 case TCmode:
2350 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
2351 default:
2352 return gen_rtx_REG (mode, 0);
2353 }
2354 }
2355 else
b069de3b
SS
2356 return gen_rtx_REG (mode, ix86_value_regno (mode));
2357}
2358
2359/* Given a mode, return the register to use for a return value. */
2360
2361static int
2362ix86_value_regno (mode)
2363 enum machine_mode mode;
2364{
2365 if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
2366 return FIRST_FLOAT_REG;
2367 if (mode == TImode || VECTOR_MODE_P (mode))
2368 return FIRST_SSE_REG;
2369 return 0;
53c17031 2370}
ad919812
JH
2371\f
2372/* Create the va_list data type. */
53c17031 2373
ad919812
JH
2374tree
2375ix86_build_va_list ()
2376{
2377 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
53c17031 2378
ad919812
JH
2379 /* For i386 we use plain pointer to argument area. */
2380 if (!TARGET_64BIT)
2381 return build_pointer_type (char_type_node);
2382
f1e639b1 2383 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
ad919812
JH
2384 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
2385
fce5a9f2 2386 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
ad919812 2387 unsigned_type_node);
fce5a9f2 2388 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
ad919812
JH
2389 unsigned_type_node);
2390 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
2391 ptr_type_node);
2392 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
2393 ptr_type_node);
2394
2395 DECL_FIELD_CONTEXT (f_gpr) = record;
2396 DECL_FIELD_CONTEXT (f_fpr) = record;
2397 DECL_FIELD_CONTEXT (f_ovf) = record;
2398 DECL_FIELD_CONTEXT (f_sav) = record;
2399
2400 TREE_CHAIN (record) = type_decl;
2401 TYPE_NAME (record) = type_decl;
2402 TYPE_FIELDS (record) = f_gpr;
2403 TREE_CHAIN (f_gpr) = f_fpr;
2404 TREE_CHAIN (f_fpr) = f_ovf;
2405 TREE_CHAIN (f_ovf) = f_sav;
2406
2407 layout_type (record);
2408
2409 /* The correct type is an array type of one element. */
2410 return build_array_type (record, build_index_type (size_zero_node));
2411}
2412
2413/* Perform any needed actions needed for a function that is receiving a
fce5a9f2 2414 variable number of arguments.
ad919812
JH
2415
2416 CUM is as above.
2417
2418 MODE and TYPE are the mode and type of the current parameter.
2419
2420 PRETEND_SIZE is a variable that should be set to the amount of stack
2421 that must be pushed by the prolog to pretend that our caller pushed
2422 it.
2423
2424 Normally, this macro will push all remaining incoming registers on the
2425 stack and set PRETEND_SIZE to the length of the registers pushed. */
2426
2427void
2428ix86_setup_incoming_varargs (cum, mode, type, pretend_size, no_rtl)
2429 CUMULATIVE_ARGS *cum;
2430 enum machine_mode mode;
2431 tree type;
2432 int *pretend_size ATTRIBUTE_UNUSED;
2433 int no_rtl;
2434
2435{
2436 CUMULATIVE_ARGS next_cum;
2437 rtx save_area = NULL_RTX, mem;
2438 rtx label;
2439 rtx label_ref;
2440 rtx tmp_reg;
2441 rtx nsse_reg;
2442 int set;
2443 tree fntype;
2444 int stdarg_p;
2445 int i;
2446
2447 if (!TARGET_64BIT)
2448 return;
2449
2450 /* Indicate to allocate space on the stack for varargs save area. */
2451 ix86_save_varrargs_registers = 1;
2452
2453 fntype = TREE_TYPE (current_function_decl);
2454 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
2455 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
2456 != void_type_node));
2457
2458 /* For varargs, we do not want to skip the dummy va_dcl argument.
2459 For stdargs, we do want to skip the last named argument. */
2460 next_cum = *cum;
2461 if (stdarg_p)
2462 function_arg_advance (&next_cum, mode, type, 1);
2463
2464 if (!no_rtl)
2465 save_area = frame_pointer_rtx;
2466
2467 set = get_varargs_alias_set ();
2468
2469 for (i = next_cum.regno; i < ix86_regparm; i++)
2470 {
2471 mem = gen_rtx_MEM (Pmode,
2472 plus_constant (save_area, i * UNITS_PER_WORD));
0692acba 2473 set_mem_alias_set (mem, set);
ad919812
JH
2474 emit_move_insn (mem, gen_rtx_REG (Pmode,
2475 x86_64_int_parameter_registers[i]));
2476 }
2477
2478 if (next_cum.sse_nregs)
2479 {
2480 /* Now emit code to save SSE registers. The AX parameter contains number
2481 of SSE parameter regsiters used to call this function. We use
2482 sse_prologue_save insn template that produces computed jump across
2483 SSE saves. We need some preparation work to get this working. */
2484
2485 label = gen_label_rtx ();
2486 label_ref = gen_rtx_LABEL_REF (Pmode, label);
2487
2488 /* Compute address to jump to :
2489 label - 5*eax + nnamed_sse_arguments*5 */
2490 tmp_reg = gen_reg_rtx (Pmode);
2491 nsse_reg = gen_reg_rtx (Pmode);
2492 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
2493 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
44cf5b6a 2494 gen_rtx_MULT (Pmode, nsse_reg,
ad919812
JH
2495 GEN_INT (4))));
2496 if (next_cum.sse_regno)
2497 emit_move_insn
2498 (nsse_reg,
2499 gen_rtx_CONST (DImode,
2500 gen_rtx_PLUS (DImode,
2501 label_ref,
2502 GEN_INT (next_cum.sse_regno * 4))));
2503 else
2504 emit_move_insn (nsse_reg, label_ref);
2505 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
2506
2507 /* Compute address of memory block we save into. We always use pointer
2508 pointing 127 bytes after first byte to store - this is needed to keep
2509 instruction size limited by 4 bytes. */
2510 tmp_reg = gen_reg_rtx (Pmode);
8ac61af7
RK
2511 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
2512 plus_constant (save_area,
2513 8 * REGPARM_MAX + 127)));
ad919812 2514 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
14f73b5a 2515 set_mem_alias_set (mem, set);
8ac61af7 2516 set_mem_align (mem, BITS_PER_WORD);
ad919812
JH
2517
2518 /* And finally do the dirty job! */
8ac61af7
RK
2519 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
2520 GEN_INT (next_cum.sse_regno), label));
ad919812
JH
2521 }
2522
2523}
2524
2525/* Implement va_start. */
2526
2527void
e5faf155 2528ix86_va_start (valist, nextarg)
ad919812
JH
2529 tree valist;
2530 rtx nextarg;
2531{
2532 HOST_WIDE_INT words, n_gpr, n_fpr;
2533 tree f_gpr, f_fpr, f_ovf, f_sav;
2534 tree gpr, fpr, ovf, sav, t;
2535
2536 /* Only 64bit target needs something special. */
2537 if (!TARGET_64BIT)
2538 {
e5faf155 2539 std_expand_builtin_va_start (valist, nextarg);
ad919812
JH
2540 return;
2541 }
2542
2543 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2544 f_fpr = TREE_CHAIN (f_gpr);
2545 f_ovf = TREE_CHAIN (f_fpr);
2546 f_sav = TREE_CHAIN (f_ovf);
2547
2548 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2549 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2550 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2551 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2552 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2553
2554 /* Count number of gp and fp argument registers used. */
2555 words = current_function_args_info.words;
2556 n_gpr = current_function_args_info.regno;
2557 n_fpr = current_function_args_info.sse_regno;
2558
2559 if (TARGET_DEBUG_ARG)
2560 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
b531087a 2561 (int) words, (int) n_gpr, (int) n_fpr);
ad919812
JH
2562
2563 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
2564 build_int_2 (n_gpr * 8, 0));
2565 TREE_SIDE_EFFECTS (t) = 1;
2566 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2567
2568 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
2569 build_int_2 (n_fpr * 16 + 8*REGPARM_MAX, 0));
2570 TREE_SIDE_EFFECTS (t) = 1;
2571 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2572
2573 /* Find the overflow area. */
2574 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
2575 if (words != 0)
2576 t = build (PLUS_EXPR, TREE_TYPE (ovf), t,
2577 build_int_2 (words * UNITS_PER_WORD, 0));
2578 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2579 TREE_SIDE_EFFECTS (t) = 1;
2580 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2581
2582 /* Find the register save area.
2583 Prologue of the function save it right above stack frame. */
2584 t = make_tree (TREE_TYPE (sav), frame_pointer_rtx);
2585 t = build (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
2586 TREE_SIDE_EFFECTS (t) = 1;
2587 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2588}
2589
2590/* Implement va_arg. */
2591rtx
2592ix86_va_arg (valist, type)
2593 tree valist, type;
2594{
0139adca 2595 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
ad919812
JH
2596 tree f_gpr, f_fpr, f_ovf, f_sav;
2597 tree gpr, fpr, ovf, sav, t;
b932f770 2598 int size, rsize;
ad919812
JH
2599 rtx lab_false, lab_over = NULL_RTX;
2600 rtx addr_rtx, r;
2601 rtx container;
2602
2603 /* Only 64bit target needs something special. */
2604 if (!TARGET_64BIT)
2605 {
2606 return std_expand_builtin_va_arg (valist, type);
2607 }
2608
2609 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
2610 f_fpr = TREE_CHAIN (f_gpr);
2611 f_ovf = TREE_CHAIN (f_fpr);
2612 f_sav = TREE_CHAIN (f_ovf);
2613
2614 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
2615 gpr = build (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr);
2616 fpr = build (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr);
2617 ovf = build (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf);
2618 sav = build (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav);
2619
2620 size = int_size_in_bytes (type);
2621 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
2622
2623 container = construct_container (TYPE_MODE (type), type, 0,
2624 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
2625 /*
2626 * Pull the value out of the saved registers ...
2627 */
2628
2629 addr_rtx = gen_reg_rtx (Pmode);
2630
2631 if (container)
2632 {
2633 rtx int_addr_rtx, sse_addr_rtx;
2634 int needed_intregs, needed_sseregs;
2635 int need_temp;
2636
2637 lab_over = gen_label_rtx ();
2638 lab_false = gen_label_rtx ();
8bad7136 2639
ad919812
JH
2640 examine_argument (TYPE_MODE (type), type, 0,
2641 &needed_intregs, &needed_sseregs);
2642
2643
2644 need_temp = ((needed_intregs && TYPE_ALIGN (type) > 64)
2645 || TYPE_ALIGN (type) > 128);
2646
2647 /* In case we are passing structure, verify that it is consetuctive block
2648 on the register save area. If not we need to do moves. */
2649 if (!need_temp && !REG_P (container))
2650 {
2651 /* Verify that all registers are strictly consetuctive */
2652 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
2653 {
2654 int i;
2655
2656 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2657 {
2658 rtx slot = XVECEXP (container, 0, i);
b531087a 2659 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
ad919812
JH
2660 || INTVAL (XEXP (slot, 1)) != i * 16)
2661 need_temp = 1;
2662 }
2663 }
2664 else
2665 {
2666 int i;
2667
2668 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
2669 {
2670 rtx slot = XVECEXP (container, 0, i);
b531087a 2671 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
ad919812
JH
2672 || INTVAL (XEXP (slot, 1)) != i * 8)
2673 need_temp = 1;
2674 }
2675 }
2676 }
2677 if (!need_temp)
2678 {
2679 int_addr_rtx = addr_rtx;
2680 sse_addr_rtx = addr_rtx;
2681 }
2682 else
2683 {
2684 int_addr_rtx = gen_reg_rtx (Pmode);
2685 sse_addr_rtx = gen_reg_rtx (Pmode);
2686 }
2687 /* First ensure that we fit completely in registers. */
2688 if (needed_intregs)
2689 {
2690 emit_cmp_and_jump_insns (expand_expr
2691 (gpr, NULL_RTX, SImode, EXPAND_NORMAL),
2692 GEN_INT ((REGPARM_MAX - needed_intregs +
2693 1) * 8), GE, const1_rtx, SImode,
d43e0b7d 2694 1, lab_false);
ad919812
JH
2695 }
2696 if (needed_sseregs)
2697 {
2698 emit_cmp_and_jump_insns (expand_expr
2699 (fpr, NULL_RTX, SImode, EXPAND_NORMAL),
2700 GEN_INT ((SSE_REGPARM_MAX -
2701 needed_sseregs + 1) * 16 +
2702 REGPARM_MAX * 8), GE, const1_rtx,
d43e0b7d 2703 SImode, 1, lab_false);
ad919812
JH
2704 }
2705
2706 /* Compute index to start of area used for integer regs. */
2707 if (needed_intregs)
2708 {
2709 t = build (PLUS_EXPR, ptr_type_node, sav, gpr);
2710 r = expand_expr (t, int_addr_rtx, Pmode, EXPAND_NORMAL);
2711 if (r != int_addr_rtx)
2712 emit_move_insn (int_addr_rtx, r);
2713 }
2714 if (needed_sseregs)
2715 {
2716 t = build (PLUS_EXPR, ptr_type_node, sav, fpr);
2717 r = expand_expr (t, sse_addr_rtx, Pmode, EXPAND_NORMAL);
2718 if (r != sse_addr_rtx)
2719 emit_move_insn (sse_addr_rtx, r);
2720 }
2721 if (need_temp)
2722 {
2723 int i;
2724 rtx mem;
2725
b932f770
JH
2726 /* Never use the memory itself, as it has the alias set. */
2727 addr_rtx = XEXP (assign_temp (type, 0, 1, 0), 0);
2728 mem = gen_rtx_MEM (BLKmode, addr_rtx);
0692acba 2729 set_mem_alias_set (mem, get_varargs_alias_set ());
8ac61af7 2730 set_mem_align (mem, BITS_PER_UNIT);
b932f770 2731
ad919812
JH
2732 for (i = 0; i < XVECLEN (container, 0); i++)
2733 {
2734 rtx slot = XVECEXP (container, 0, i);
2735 rtx reg = XEXP (slot, 0);
2736 enum machine_mode mode = GET_MODE (reg);
2737 rtx src_addr;
2738 rtx src_mem;
2739 int src_offset;
2740 rtx dest_mem;
2741
2742 if (SSE_REGNO_P (REGNO (reg)))
2743 {
2744 src_addr = sse_addr_rtx;
2745 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
2746 }
2747 else
2748 {
2749 src_addr = int_addr_rtx;
2750 src_offset = REGNO (reg) * 8;
2751 }
2752 src_mem = gen_rtx_MEM (mode, src_addr);
0692acba 2753 set_mem_alias_set (src_mem, get_varargs_alias_set ());
ad919812
JH
2754 src_mem = adjust_address (src_mem, mode, src_offset);
2755 dest_mem = adjust_address (mem, mode, INTVAL (XEXP (slot, 1)));
ad919812
JH
2756 emit_move_insn (dest_mem, src_mem);
2757 }
2758 }
2759
2760 if (needed_intregs)
2761 {
2762 t =
2763 build (PLUS_EXPR, TREE_TYPE (gpr), gpr,
2764 build_int_2 (needed_intregs * 8, 0));
2765 t = build (MODIFY_EXPR, TREE_TYPE (gpr), gpr, t);
2766 TREE_SIDE_EFFECTS (t) = 1;
2767 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2768 }
2769 if (needed_sseregs)
2770 {
2771 t =
2772 build (PLUS_EXPR, TREE_TYPE (fpr), fpr,
2773 build_int_2 (needed_sseregs * 16, 0));
2774 t = build (MODIFY_EXPR, TREE_TYPE (fpr), fpr, t);
2775 TREE_SIDE_EFFECTS (t) = 1;
2776 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2777 }
2778
2779 emit_jump_insn (gen_jump (lab_over));
2780 emit_barrier ();
2781 emit_label (lab_false);
2782 }
2783
2784 /* ... otherwise out of the overflow area. */
2785
2786 /* Care for on-stack alignment if needed. */
2787 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64)
2788 t = ovf;
2789 else
2790 {
2791 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
2792 t = build (PLUS_EXPR, TREE_TYPE (ovf), ovf, build_int_2 (align - 1, 0));
2793 t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-align, -1));
2794 }
2795 t = save_expr (t);
2796
2797 r = expand_expr (t, addr_rtx, Pmode, EXPAND_NORMAL);
2798 if (r != addr_rtx)
2799 emit_move_insn (addr_rtx, r);
2800
2801 t =
2802 build (PLUS_EXPR, TREE_TYPE (t), t,
2803 build_int_2 (rsize * UNITS_PER_WORD, 0));
2804 t = build (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
2805 TREE_SIDE_EFFECTS (t) = 1;
2806 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
2807
2808 if (container)
2809 emit_label (lab_over);
2810
ad919812
JH
2811 return addr_rtx;
2812}
2813\f
c3c637e3
GS
2814/* Return nonzero if OP is either a i387 or SSE fp register. */
2815int
2816any_fp_register_operand (op, mode)
2817 rtx op;
2818 enum machine_mode mode ATTRIBUTE_UNUSED;
2819{
2820 return ANY_FP_REG_P (op);
2821}
2822
2823/* Return nonzero if OP is an i387 fp register. */
2824int
2825fp_register_operand (op, mode)
2826 rtx op;
2827 enum machine_mode mode ATTRIBUTE_UNUSED;
2828{
2829 return FP_REG_P (op);
2830}
2831
2832/* Return nonzero if OP is a non-fp register_operand. */
2833int
2834register_and_not_any_fp_reg_operand (op, mode)
2835 rtx op;
2836 enum machine_mode mode;
2837{
2838 return register_operand (op, mode) && !ANY_FP_REG_P (op);
2839}
2840
2841/* Return nonzero of OP is a register operand other than an
2842 i387 fp register. */
2843int
2844register_and_not_fp_reg_operand (op, mode)
2845 rtx op;
2846 enum machine_mode mode;
2847{
2848 return register_operand (op, mode) && !FP_REG_P (op);
2849}
2850
7dd4b4a3
JH
2851/* Return nonzero if OP is general operand representable on x86_64. */
2852
2853int
2854x86_64_general_operand (op, mode)
2855 rtx op;
2856 enum machine_mode mode;
2857{
2858 if (!TARGET_64BIT)
2859 return general_operand (op, mode);
2860 if (nonimmediate_operand (op, mode))
2861 return 1;
2862 return x86_64_sign_extended_value (op);
2863}
2864
2865/* Return nonzero if OP is general operand representable on x86_64
d6a7951f 2866 as either sign extended or zero extended constant. */
7dd4b4a3
JH
2867
2868int
2869x86_64_szext_general_operand (op, mode)
2870 rtx op;
2871 enum machine_mode mode;
2872{
2873 if (!TARGET_64BIT)
2874 return general_operand (op, mode);
2875 if (nonimmediate_operand (op, mode))
2876 return 1;
2877 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2878}
2879
2880/* Return nonzero if OP is nonmemory operand representable on x86_64. */
2881
2882int
2883x86_64_nonmemory_operand (op, mode)
2884 rtx op;
2885 enum machine_mode mode;
2886{
2887 if (!TARGET_64BIT)
2888 return nonmemory_operand (op, mode);
2889 if (register_operand (op, mode))
2890 return 1;
2891 return x86_64_sign_extended_value (op);
2892}
2893
2894/* Return nonzero if OP is nonmemory operand acceptable by movabs patterns. */
2895
2896int
2897x86_64_movabs_operand (op, mode)
2898 rtx op;
2899 enum machine_mode mode;
2900{
2901 if (!TARGET_64BIT || !flag_pic)
2902 return nonmemory_operand (op, mode);
2903 if (register_operand (op, mode) || x86_64_sign_extended_value (op))
2904 return 1;
2905 if (CONSTANT_P (op) && !symbolic_reference_mentioned_p (op))
2906 return 1;
2907 return 0;
2908}
2909
2910/* Return nonzero if OP is nonmemory operand representable on x86_64. */
2911
2912int
2913x86_64_szext_nonmemory_operand (op, mode)
2914 rtx op;
2915 enum machine_mode mode;
2916{
2917 if (!TARGET_64BIT)
2918 return nonmemory_operand (op, mode);
2919 if (register_operand (op, mode))
2920 return 1;
2921 return x86_64_sign_extended_value (op) || x86_64_zero_extended_value (op);
2922}
2923
2924/* Return nonzero if OP is immediate operand representable on x86_64. */
2925
2926int
2927x86_64_immediate_operand (op, mode)
2928 rtx op;
2929 enum machine_mode mode;
2930{
2931 if (!TARGET_64BIT)
2932 return immediate_operand (op, mode);
2933 return x86_64_sign_extended_value (op);
2934}
2935
2936/* Return nonzero if OP is immediate operand representable on x86_64. */
2937
2938int
2939x86_64_zext_immediate_operand (op, mode)
2940 rtx op;
2941 enum machine_mode mode ATTRIBUTE_UNUSED;
2942{
2943 return x86_64_zero_extended_value (op);
2944}
2945
8bad7136
JL
2946/* Return nonzero if OP is (const_int 1), else return zero. */
2947
2948int
2949const_int_1_operand (op, mode)
2950 rtx op;
2951 enum machine_mode mode ATTRIBUTE_UNUSED;
2952{
2953 return (GET_CODE (op) == CONST_INT && INTVAL (op) == 1);
2954}
2955
794a292d
JJ
2956/* Return nonzero if OP is CONST_INT >= 1 and <= 31 (a valid operand
2957 for shift & compare patterns, as shifting by 0 does not change flags),
2958 else return zero. */
2959
2960int
2961const_int_1_31_operand (op, mode)
2962 rtx op;
2963 enum machine_mode mode ATTRIBUTE_UNUSED;
2964{
2965 return (GET_CODE (op) == CONST_INT && INTVAL (op) >= 1 && INTVAL (op) <= 31);
2966}
2967
e075ae69
RH
2968/* Returns 1 if OP is either a symbol reference or a sum of a symbol
2969 reference and a constant. */
b08de47e
MM
2970
2971int
e075ae69
RH
2972symbolic_operand (op, mode)
2973 register rtx op;
2974 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 2975{
e075ae69 2976 switch (GET_CODE (op))
2a2ab3f9 2977 {
e075ae69
RH
2978 case SYMBOL_REF:
2979 case LABEL_REF:
2980 return 1;
2981
2982 case CONST:
2983 op = XEXP (op, 0);
2984 if (GET_CODE (op) == SYMBOL_REF
2985 || GET_CODE (op) == LABEL_REF
2986 || (GET_CODE (op) == UNSPEC
8ee41eaf
RH
2987 && (XINT (op, 1) == UNSPEC_GOT
2988 || XINT (op, 1) == UNSPEC_GOTOFF
2989 || XINT (op, 1) == UNSPEC_GOTPCREL)))
e075ae69
RH
2990 return 1;
2991 if (GET_CODE (op) != PLUS
2992 || GET_CODE (XEXP (op, 1)) != CONST_INT)
2993 return 0;
2994
2995 op = XEXP (op, 0);
2996 if (GET_CODE (op) == SYMBOL_REF
2997 || GET_CODE (op) == LABEL_REF)
2998 return 1;
2999 /* Only @GOTOFF gets offsets. */
3000 if (GET_CODE (op) != UNSPEC
8ee41eaf 3001 || XINT (op, 1) != UNSPEC_GOTOFF)
e075ae69
RH
3002 return 0;
3003
3004 op = XVECEXP (op, 0, 0);
3005 if (GET_CODE (op) == SYMBOL_REF
3006 || GET_CODE (op) == LABEL_REF)
3007 return 1;
3008 return 0;
3009
3010 default:
3011 return 0;
2a2ab3f9
JVA
3012 }
3013}
2a2ab3f9 3014
e075ae69 3015/* Return true if the operand contains a @GOT or @GOTOFF reference. */
3b3c6a3f 3016
e075ae69
RH
3017int
3018pic_symbolic_operand (op, mode)
3019 register rtx op;
3020 enum machine_mode mode ATTRIBUTE_UNUSED;
3021{
6eb791fc
JH
3022 if (GET_CODE (op) != CONST)
3023 return 0;
3024 op = XEXP (op, 0);
3025 if (TARGET_64BIT)
3026 {
3027 if (GET_CODE (XEXP (op, 0)) == UNSPEC)
3028 return 1;
3029 }
fce5a9f2 3030 else
2a2ab3f9 3031 {
e075ae69
RH
3032 if (GET_CODE (op) == UNSPEC)
3033 return 1;
3034 if (GET_CODE (op) != PLUS
3035 || GET_CODE (XEXP (op, 1)) != CONST_INT)
3036 return 0;
3037 op = XEXP (op, 0);
3038 if (GET_CODE (op) == UNSPEC)
3039 return 1;
2a2ab3f9 3040 }
e075ae69 3041 return 0;
2a2ab3f9 3042}
2a2ab3f9 3043
623fe810
RH
3044/* Return true if OP is a symbolic operand that resolves locally. */
3045
3046static int
3047local_symbolic_operand (op, mode)
3048 rtx op;
3049 enum machine_mode mode ATTRIBUTE_UNUSED;
3050{
3051 if (GET_CODE (op) == LABEL_REF)
3052 return 1;
3053
3054 if (GET_CODE (op) == CONST
3055 && GET_CODE (XEXP (op, 0)) == PLUS
3056 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3057 op = XEXP (XEXP (op, 0), 0);
3058
3059 if (GET_CODE (op) != SYMBOL_REF)
3060 return 0;
3061
3062 /* These we've been told are local by varasm and encode_section_info
3063 respectively. */
3064 if (CONSTANT_POOL_ADDRESS_P (op) || SYMBOL_REF_FLAG (op))
3065 return 1;
3066
3067 /* There is, however, a not insubstantial body of code in the rest of
fce5a9f2 3068 the compiler that assumes it can just stick the results of
623fe810
RH
3069 ASM_GENERATE_INTERNAL_LABEL in a symbol_ref and have done. */
3070 /* ??? This is a hack. Should update the body of the compiler to
fb49053f 3071 always create a DECL an invoke targetm.encode_section_info. */
623fe810
RH
3072 if (strncmp (XSTR (op, 0), internal_label_prefix,
3073 internal_label_prefix_len) == 0)
3074 return 1;
3075
3076 return 0;
3077}
3078
f996902d
RH
3079/* Test for various thread-local symbols. See ix86_encode_section_info. */
3080
3081int
3082tls_symbolic_operand (op, mode)
3083 register rtx op;
3084 enum machine_mode mode ATTRIBUTE_UNUSED;
3085{
3086 const char *symbol_str;
3087
3088 if (GET_CODE (op) != SYMBOL_REF)
3089 return 0;
3090 symbol_str = XSTR (op, 0);
3091
3092 if (symbol_str[0] != '%')
3093 return 0;
755ac5d4 3094 return strchr (tls_model_chars, symbol_str[1]) - tls_model_chars;
f996902d
RH
3095}
3096
3097static int
3098tls_symbolic_operand_1 (op, kind)
3099 rtx op;
3100 enum tls_model kind;
3101{
3102 const char *symbol_str;
3103
3104 if (GET_CODE (op) != SYMBOL_REF)
3105 return 0;
3106 symbol_str = XSTR (op, 0);
3107
3108 return symbol_str[0] == '%' && symbol_str[1] == tls_model_chars[kind];
3109}
3110
3111int
3112global_dynamic_symbolic_operand (op, mode)
3113 register rtx op;
3114 enum machine_mode mode ATTRIBUTE_UNUSED;
3115{
3116 return tls_symbolic_operand_1 (op, TLS_MODEL_GLOBAL_DYNAMIC);
3117}
3118
3119int
3120local_dynamic_symbolic_operand (op, mode)
3121 register rtx op;
3122 enum machine_mode mode ATTRIBUTE_UNUSED;
3123{
3124 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_DYNAMIC);
3125}
3126
3127int
3128initial_exec_symbolic_operand (op, mode)
3129 register rtx op;
3130 enum machine_mode mode ATTRIBUTE_UNUSED;
3131{
3132 return tls_symbolic_operand_1 (op, TLS_MODEL_INITIAL_EXEC);
3133}
3134
3135int
3136local_exec_symbolic_operand (op, mode)
3137 register rtx op;
3138 enum machine_mode mode ATTRIBUTE_UNUSED;
3139{
3140 return tls_symbolic_operand_1 (op, TLS_MODEL_LOCAL_EXEC);
3141}
3142
28d52ffb
RH
3143/* Test for a valid operand for a call instruction. Don't allow the
3144 arg pointer register or virtual regs since they may decay into
3145 reg + const, which the patterns can't handle. */
2a2ab3f9 3146
e075ae69
RH
3147int
3148call_insn_operand (op, mode)
3149 rtx op;
3150 enum machine_mode mode ATTRIBUTE_UNUSED;
3151{
e075ae69
RH
3152 /* Disallow indirect through a virtual register. This leads to
3153 compiler aborts when trying to eliminate them. */
3154 if (GET_CODE (op) == REG
3155 && (op == arg_pointer_rtx
564d80f4 3156 || op == frame_pointer_rtx
e075ae69
RH
3157 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
3158 && REGNO (op) <= LAST_VIRTUAL_REGISTER)))
3159 return 0;
2a2ab3f9 3160
28d52ffb
RH
3161 /* Disallow `call 1234'. Due to varying assembler lameness this
3162 gets either rejected or translated to `call .+1234'. */
3163 if (GET_CODE (op) == CONST_INT)
3164 return 0;
3165
cbbf65e0
RH
3166 /* Explicitly allow SYMBOL_REF even if pic. */
3167 if (GET_CODE (op) == SYMBOL_REF)
e075ae69 3168 return 1;
2a2ab3f9 3169
cbbf65e0
RH
3170 /* Otherwise we can allow any general_operand in the address. */
3171 return general_operand (op, Pmode);
e075ae69 3172}
79325812 3173
e075ae69
RH
3174int
3175constant_call_address_operand (op, mode)
3176 rtx op;
3177 enum machine_mode mode ATTRIBUTE_UNUSED;
3178{
eaf19aba
JJ
3179 if (GET_CODE (op) == CONST
3180 && GET_CODE (XEXP (op, 0)) == PLUS
3181 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
3182 op = XEXP (XEXP (op, 0), 0);
e1ff012c 3183 return GET_CODE (op) == SYMBOL_REF;
e075ae69 3184}
2a2ab3f9 3185
e075ae69 3186/* Match exactly zero and one. */
e9a25f70 3187
0f290768 3188int
e075ae69
RH
3189const0_operand (op, mode)
3190 register rtx op;
3191 enum machine_mode mode;
3192{
3193 return op == CONST0_RTX (mode);
3194}
e9a25f70 3195
0f290768 3196int
e075ae69
RH
3197const1_operand (op, mode)
3198 register rtx op;
3199 enum machine_mode mode ATTRIBUTE_UNUSED;
3200{
3201 return op == const1_rtx;
3202}
2a2ab3f9 3203
e075ae69 3204/* Match 2, 4, or 8. Used for leal multiplicands. */
e9a25f70 3205
e075ae69
RH
3206int
3207const248_operand (op, mode)
3208 register rtx op;
3209 enum machine_mode mode ATTRIBUTE_UNUSED;
3210{
3211 return (GET_CODE (op) == CONST_INT
3212 && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8));
3213}
e9a25f70 3214
e075ae69 3215/* True if this is a constant appropriate for an increment or decremenmt. */
81fd0956 3216
e075ae69
RH
3217int
3218incdec_operand (op, mode)
3219 register rtx op;
0631e0bf 3220 enum machine_mode mode ATTRIBUTE_UNUSED;
e075ae69 3221{
f5143c46 3222 /* On Pentium4, the inc and dec operations causes extra dependency on flag
b4e89e2d
JH
3223 registers, since carry flag is not set. */
3224 if (TARGET_PENTIUM4 && !optimize_size)
3225 return 0;
2b1c08f5 3226 return op == const1_rtx || op == constm1_rtx;
e075ae69 3227}
2a2ab3f9 3228
371bc54b
JH
3229/* Return nonzero if OP is acceptable as operand of DImode shift
3230 expander. */
3231
3232int
3233shiftdi_operand (op, mode)
3234 rtx op;
3235 enum machine_mode mode ATTRIBUTE_UNUSED;
3236{
3237 if (TARGET_64BIT)
3238 return nonimmediate_operand (op, mode);
3239 else
3240 return register_operand (op, mode);
3241}
3242
0f290768 3243/* Return false if this is the stack pointer, or any other fake
e075ae69
RH
3244 register eliminable to the stack pointer. Otherwise, this is
3245 a register operand.
2a2ab3f9 3246
e075ae69
RH
3247 This is used to prevent esp from being used as an index reg.
3248 Which would only happen in pathological cases. */
5f1ec3e6 3249
e075ae69
RH
3250int
3251reg_no_sp_operand (op, mode)
3252 register rtx op;
3253 enum machine_mode mode;
3254{
3255 rtx t = op;
3256 if (GET_CODE (t) == SUBREG)
3257 t = SUBREG_REG (t);
564d80f4 3258 if (t == stack_pointer_rtx || t == arg_pointer_rtx || t == frame_pointer_rtx)
e075ae69 3259 return 0;
2a2ab3f9 3260
e075ae69 3261 return register_operand (op, mode);
2a2ab3f9 3262}
b840bfb0 3263
915119a5
BS
3264int
3265mmx_reg_operand (op, mode)
3266 register rtx op;
bd793c65 3267 enum machine_mode mode ATTRIBUTE_UNUSED;
915119a5
BS
3268{
3269 return MMX_REG_P (op);
3270}
3271
2c5a510c
RH
3272/* Return false if this is any eliminable register. Otherwise
3273 general_operand. */
3274
3275int
3276general_no_elim_operand (op, mode)
3277 register rtx op;
3278 enum machine_mode mode;
3279{
3280 rtx t = op;
3281 if (GET_CODE (t) == SUBREG)
3282 t = SUBREG_REG (t);
3283 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3284 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3285 || t == virtual_stack_dynamic_rtx)
3286 return 0;
1020a5ab
RH
3287 if (REG_P (t)
3288 && REGNO (t) >= FIRST_VIRTUAL_REGISTER
3289 && REGNO (t) <= LAST_VIRTUAL_REGISTER)
3290 return 0;
2c5a510c
RH
3291
3292 return general_operand (op, mode);
3293}
3294
3295/* Return false if this is any eliminable register. Otherwise
3296 register_operand or const_int. */
3297
3298int
3299nonmemory_no_elim_operand (op, mode)
3300 register rtx op;
3301 enum machine_mode mode;
3302{
3303 rtx t = op;
3304 if (GET_CODE (t) == SUBREG)
3305 t = SUBREG_REG (t);
3306 if (t == arg_pointer_rtx || t == frame_pointer_rtx
3307 || t == virtual_incoming_args_rtx || t == virtual_stack_vars_rtx
3308 || t == virtual_stack_dynamic_rtx)
3309 return 0;
3310
3311 return GET_CODE (op) == CONST_INT || register_operand (op, mode);
3312}
3313
7ec70495
JH
3314/* Return false if this is any eliminable register or stack register,
3315 otherwise work like register_operand. */
3316
3317int
3318index_register_operand (op, mode)
3319 register rtx op;
3320 enum machine_mode mode;
3321{
3322 rtx t = op;
3323 if (GET_CODE (t) == SUBREG)
3324 t = SUBREG_REG (t);
3325 if (!REG_P (t))
3326 return 0;
3327 if (t == arg_pointer_rtx
3328 || t == frame_pointer_rtx
3329 || t == virtual_incoming_args_rtx
3330 || t == virtual_stack_vars_rtx
3331 || t == virtual_stack_dynamic_rtx
3332 || REGNO (t) == STACK_POINTER_REGNUM)
3333 return 0;
3334
3335 return general_operand (op, mode);
3336}
3337
e075ae69 3338/* Return true if op is a Q_REGS class register. */
b840bfb0 3339
e075ae69
RH
3340int
3341q_regs_operand (op, mode)
3342 register rtx op;
3343 enum machine_mode mode;
b840bfb0 3344{
e075ae69
RH
3345 if (mode != VOIDmode && GET_MODE (op) != mode)
3346 return 0;
3347 if (GET_CODE (op) == SUBREG)
3348 op = SUBREG_REG (op);
7799175f 3349 return ANY_QI_REG_P (op);
0f290768 3350}
b840bfb0 3351
e075ae69 3352/* Return true if op is a NON_Q_REGS class register. */
b840bfb0 3353
e075ae69
RH
3354int
3355non_q_regs_operand (op, mode)
3356 register rtx op;
3357 enum machine_mode mode;
3358{
3359 if (mode != VOIDmode && GET_MODE (op) != mode)
3360 return 0;
3361 if (GET_CODE (op) == SUBREG)
3362 op = SUBREG_REG (op);
3363 return NON_QI_REG_P (op);
0f290768 3364}
b840bfb0 3365
915119a5
BS
3366/* Return 1 if OP is a comparison that can be used in the CMPSS/CMPPS
3367 insns. */
3368int
3369sse_comparison_operator (op, mode)
3370 rtx op;
3371 enum machine_mode mode ATTRIBUTE_UNUSED;
3372{
3373 enum rtx_code code = GET_CODE (op);
a46d1d38
JH
3374 switch (code)
3375 {
3376 /* Operations supported directly. */
3377 case EQ:
3378 case LT:
3379 case LE:
3380 case UNORDERED:
3381 case NE:
3382 case UNGE:
3383 case UNGT:
3384 case ORDERED:
3385 return 1;
3386 /* These are equivalent to ones above in non-IEEE comparisons. */
3387 case UNEQ:
3388 case UNLT:
3389 case UNLE:
3390 case LTGT:
3391 case GE:
3392 case GT:
3393 return !TARGET_IEEE_FP;
3394 default:
3395 return 0;
3396 }
915119a5 3397}
9076b9c1 3398/* Return 1 if OP is a valid comparison operator in valid mode. */
e075ae69 3399int
9076b9c1
JH
3400ix86_comparison_operator (op, mode)
3401 register rtx op;
3402 enum machine_mode mode;
e075ae69 3403{
9076b9c1 3404 enum machine_mode inmode;
9a915772 3405 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
3406 if (mode != VOIDmode && GET_MODE (op) != mode)
3407 return 0;
9a915772
JH
3408 if (GET_RTX_CLASS (code) != '<')
3409 return 0;
3410 inmode = GET_MODE (XEXP (op, 0));
3411
3412 if (inmode == CCFPmode || inmode == CCFPUmode)
3413 {
3414 enum rtx_code second_code, bypass_code;
3415 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3416 return (bypass_code == NIL && second_code == NIL);
3417 }
3418 switch (code)
3a3677ff
RH
3419 {
3420 case EQ: case NE:
3a3677ff 3421 return 1;
9076b9c1 3422 case LT: case GE:
7e08e190 3423 if (inmode == CCmode || inmode == CCGCmode
9076b9c1
JH
3424 || inmode == CCGOCmode || inmode == CCNOmode)
3425 return 1;
3426 return 0;
7e08e190 3427 case LTU: case GTU: case LEU: case ORDERED: case UNORDERED: case GEU:
7e08e190 3428 if (inmode == CCmode)
9076b9c1
JH
3429 return 1;
3430 return 0;
3431 case GT: case LE:
7e08e190 3432 if (inmode == CCmode || inmode == CCGCmode || inmode == CCNOmode)
9076b9c1
JH
3433 return 1;
3434 return 0;
3a3677ff
RH
3435 default:
3436 return 0;
3437 }
3438}
3439
9076b9c1 3440/* Return 1 if OP is a comparison operator that can be issued by fcmov. */
3a3677ff 3441
9076b9c1
JH
3442int
3443fcmov_comparison_operator (op, mode)
3a3677ff
RH
3444 register rtx op;
3445 enum machine_mode mode;
3446{
b62d22a2 3447 enum machine_mode inmode;
9a915772 3448 enum rtx_code code = GET_CODE (op);
3a3677ff
RH
3449 if (mode != VOIDmode && GET_MODE (op) != mode)
3450 return 0;
9a915772
JH
3451 if (GET_RTX_CLASS (code) != '<')
3452 return 0;
3453 inmode = GET_MODE (XEXP (op, 0));
3454 if (inmode == CCFPmode || inmode == CCFPUmode)
3a3677ff 3455 {
9a915772
JH
3456 enum rtx_code second_code, bypass_code;
3457 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
3458 if (bypass_code != NIL || second_code != NIL)
3459 return 0;
3460 code = ix86_fp_compare_code_to_integer (code);
3461 }
3462 /* i387 supports just limited amount of conditional codes. */
3463 switch (code)
3464 {
3465 case LTU: case GTU: case LEU: case GEU:
3466 if (inmode == CCmode || inmode == CCFPmode || inmode == CCFPUmode)
9076b9c1
JH
3467 return 1;
3468 return 0;
9a915772
JH
3469 case ORDERED: case UNORDERED:
3470 case EQ: case NE:
3471 return 1;
3a3677ff
RH
3472 default:
3473 return 0;
3474 }
e075ae69 3475}
b840bfb0 3476
e9e80858
JH
3477/* Return 1 if OP is a binary operator that can be promoted to wider mode. */
3478
3479int
3480promotable_binary_operator (op, mode)
3481 register rtx op;
3482 enum machine_mode mode ATTRIBUTE_UNUSED;
3483{
3484 switch (GET_CODE (op))
3485 {
3486 case MULT:
3487 /* Modern CPUs have same latency for HImode and SImode multiply,
3488 but 386 and 486 do HImode multiply faster. */
3489 return ix86_cpu > PROCESSOR_I486;
3490 case PLUS:
3491 case AND:
3492 case IOR:
3493 case XOR:
3494 case ASHIFT:
3495 return 1;
3496 default:
3497 return 0;
3498 }
3499}
3500
e075ae69
RH
3501/* Nearly general operand, but accept any const_double, since we wish
3502 to be able to drop them into memory rather than have them get pulled
3503 into registers. */
b840bfb0 3504
2a2ab3f9 3505int
e075ae69
RH
3506cmp_fp_expander_operand (op, mode)
3507 register rtx op;
3508 enum machine_mode mode;
2a2ab3f9 3509{
e075ae69 3510 if (mode != VOIDmode && mode != GET_MODE (op))
0b6b2900 3511 return 0;
e075ae69 3512 if (GET_CODE (op) == CONST_DOUBLE)
2a2ab3f9 3513 return 1;
e075ae69 3514 return general_operand (op, mode);
2a2ab3f9
JVA
3515}
3516
e075ae69 3517/* Match an SI or HImode register for a zero_extract. */
2a2ab3f9
JVA
3518
3519int
e075ae69 3520ext_register_operand (op, mode)
2a2ab3f9 3521 register rtx op;
bb5177ac 3522 enum machine_mode mode ATTRIBUTE_UNUSED;
2a2ab3f9 3523{
3522082b 3524 int regno;
0d7d98ee
JH
3525 if ((!TARGET_64BIT || GET_MODE (op) != DImode)
3526 && GET_MODE (op) != SImode && GET_MODE (op) != HImode)
e075ae69 3527 return 0;
3522082b
JH
3528
3529 if (!register_operand (op, VOIDmode))
3530 return 0;
3531
3532 /* Be curefull to accept only registers having upper parts. */
3533 regno = REG_P (op) ? REGNO (op) : REGNO (SUBREG_REG (op));
3534 return (regno > LAST_VIRTUAL_REGISTER || regno < 4);
e075ae69
RH
3535}
3536
3537/* Return 1 if this is a valid binary floating-point operation.
0f290768 3538 OP is the expression matched, and MODE is its mode. */
e075ae69
RH
3539
3540int
3541binary_fp_operator (op, mode)
3542 register rtx op;
3543 enum machine_mode mode;
3544{
3545 if (mode != VOIDmode && mode != GET_MODE (op))
3546 return 0;
3547
2a2ab3f9
JVA
3548 switch (GET_CODE (op))
3549 {
e075ae69
RH
3550 case PLUS:
3551 case MINUS:
3552 case MULT:
3553 case DIV:
3554 return GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT;
e9a25f70 3555
2a2ab3f9
JVA
3556 default:
3557 return 0;
3558 }
3559}
fee2770d 3560
e075ae69 3561int
b531087a 3562mult_operator (op, mode)
e075ae69
RH
3563 register rtx op;
3564 enum machine_mode mode ATTRIBUTE_UNUSED;
3565{
3566 return GET_CODE (op) == MULT;
3567}
3568
3569int
b531087a 3570div_operator (op, mode)
e075ae69
RH
3571 register rtx op;
3572 enum machine_mode mode ATTRIBUTE_UNUSED;
3573{
3574 return GET_CODE (op) == DIV;
3575}
0a726ef1
JL
3576
3577int
e075ae69
RH
3578arith_or_logical_operator (op, mode)
3579 rtx op;
3580 enum machine_mode mode;
0a726ef1 3581{
e075ae69
RH
3582 return ((mode == VOIDmode || GET_MODE (op) == mode)
3583 && (GET_RTX_CLASS (GET_CODE (op)) == 'c'
3584 || GET_RTX_CLASS (GET_CODE (op)) == '2'));
0a726ef1
JL
3585}
3586
e075ae69 3587/* Returns 1 if OP is memory operand with a displacement. */
fee2770d
RS
3588
3589int
e075ae69
RH
3590memory_displacement_operand (op, mode)
3591 register rtx op;
3592 enum machine_mode mode;
4f2c8ebb 3593{
e075ae69 3594 struct ix86_address parts;
e9a25f70 3595
e075ae69
RH
3596 if (! memory_operand (op, mode))
3597 return 0;
3598
3599 if (! ix86_decompose_address (XEXP (op, 0), &parts))
3600 abort ();
3601
3602 return parts.disp != NULL_RTX;
4f2c8ebb
RS
3603}
3604
16189740 3605/* To avoid problems when jump re-emits comparisons like testqi_ext_ccno_0,
e075ae69
RH
3606 re-recognize the operand to avoid a copy_to_mode_reg that will fail.
3607
3608 ??? It seems likely that this will only work because cmpsi is an
3609 expander, and no actual insns use this. */
4f2c8ebb
RS
3610
3611int
e075ae69
RH
3612cmpsi_operand (op, mode)
3613 rtx op;
3614 enum machine_mode mode;
fee2770d 3615{
b9b2c339 3616 if (nonimmediate_operand (op, mode))
e075ae69
RH
3617 return 1;
3618
3619 if (GET_CODE (op) == AND
3620 && GET_MODE (op) == SImode
3621 && GET_CODE (XEXP (op, 0)) == ZERO_EXTRACT
3622 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
3623 && GET_CODE (XEXP (XEXP (op, 0), 2)) == CONST_INT
3624 && INTVAL (XEXP (XEXP (op, 0), 1)) == 8
3625 && INTVAL (XEXP (XEXP (op, 0), 2)) == 8
3626 && GET_CODE (XEXP (op, 1)) == CONST_INT)
fee2770d 3627 return 1;
e9a25f70 3628
fee2770d
RS
3629 return 0;
3630}
d784886d 3631
e075ae69
RH
3632/* Returns 1 if OP is memory operand that can not be represented by the
3633 modRM array. */
d784886d
RK
3634
3635int
e075ae69 3636long_memory_operand (op, mode)
d784886d
RK
3637 register rtx op;
3638 enum machine_mode mode;
3639{
e075ae69 3640 if (! memory_operand (op, mode))
d784886d
RK
3641 return 0;
3642
e075ae69 3643 return memory_address_length (op) != 0;
d784886d 3644}
2247f6ed
JH
3645
3646/* Return nonzero if the rtx is known aligned. */
3647
3648int
3649aligned_operand (op, mode)
3650 rtx op;
3651 enum machine_mode mode;
3652{
3653 struct ix86_address parts;
3654
3655 if (!general_operand (op, mode))
3656 return 0;
3657
0f290768 3658 /* Registers and immediate operands are always "aligned". */
2247f6ed
JH
3659 if (GET_CODE (op) != MEM)
3660 return 1;
3661
0f290768 3662 /* Don't even try to do any aligned optimizations with volatiles. */
2247f6ed
JH
3663 if (MEM_VOLATILE_P (op))
3664 return 0;
3665
3666 op = XEXP (op, 0);
3667
3668 /* Pushes and pops are only valid on the stack pointer. */
3669 if (GET_CODE (op) == PRE_DEC
3670 || GET_CODE (op) == POST_INC)
3671 return 1;
3672
3673 /* Decode the address. */
3674 if (! ix86_decompose_address (op, &parts))
3675 abort ();
3676
1540f9eb
JH
3677 if (parts.base && GET_CODE (parts.base) == SUBREG)
3678 parts.base = SUBREG_REG (parts.base);
3679 if (parts.index && GET_CODE (parts.index) == SUBREG)
3680 parts.index = SUBREG_REG (parts.index);
3681
2247f6ed
JH
3682 /* Look for some component that isn't known to be aligned. */
3683 if (parts.index)
3684 {
3685 if (parts.scale < 4
bdb429a5 3686 && REGNO_POINTER_ALIGN (REGNO (parts.index)) < 32)
2247f6ed
JH
3687 return 0;
3688 }
3689 if (parts.base)
3690 {
bdb429a5 3691 if (REGNO_POINTER_ALIGN (REGNO (parts.base)) < 32)
2247f6ed
JH
3692 return 0;
3693 }
3694 if (parts.disp)
3695 {
3696 if (GET_CODE (parts.disp) != CONST_INT
3697 || (INTVAL (parts.disp) & 3) != 0)
3698 return 0;
3699 }
3700
3701 /* Didn't find one -- this must be an aligned address. */
3702 return 1;
3703}
e075ae69
RH
3704\f
3705/* Return true if the constant is something that can be loaded with
3706 a special instruction. Only handle 0.0 and 1.0; others are less
3707 worthwhile. */
57dbca5e
BS
3708
3709int
e075ae69
RH
3710standard_80387_constant_p (x)
3711 rtx x;
57dbca5e 3712{
2b04e52b 3713 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
e075ae69 3714 return -1;
2b04e52b
JH
3715 /* Note that on the 80387, other constants, such as pi, that we should support
3716 too. On some machines, these are much slower to load as standard constant,
3717 than to load from doubles in memory. */
3718 if (x == CONST0_RTX (GET_MODE (x)))
3719 return 1;
3720 if (x == CONST1_RTX (GET_MODE (x)))
3721 return 2;
e075ae69 3722 return 0;
57dbca5e
BS
3723}
3724
2b04e52b
JH
3725/* Return 1 if X is FP constant we can load to SSE register w/o using memory.
3726 */
3727int
3728standard_sse_constant_p (x)
3729 rtx x;
3730{
3731 if (GET_CODE (x) != CONST_DOUBLE)
3732 return -1;
3733 return (x == CONST0_RTX (GET_MODE (x)));
3734}
3735
2a2ab3f9
JVA
3736/* Returns 1 if OP contains a symbol reference */
3737
3738int
3739symbolic_reference_mentioned_p (op)
3740 rtx op;
3741{
6f7d635c 3742 register const char *fmt;
2a2ab3f9
JVA
3743 register int i;
3744
3745 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3746 return 1;
3747
3748 fmt = GET_RTX_FORMAT (GET_CODE (op));
3749 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3750 {
3751 if (fmt[i] == 'E')
3752 {
3753 register int j;
3754
3755 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3756 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3757 return 1;
3758 }
e9a25f70 3759
2a2ab3f9
JVA
3760 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3761 return 1;
3762 }
3763
3764 return 0;
3765}
e075ae69
RH
3766
3767/* Return 1 if it is appropriate to emit `ret' instructions in the
3768 body of a function. Do this only if the epilogue is simple, needing a
3769 couple of insns. Prior to reloading, we can't tell how many registers
3770 must be saved, so return 0 then. Return 0 if there is no frame
3771 marker to de-allocate.
3772
3773 If NON_SAVING_SETJMP is defined and true, then it is not possible
3774 for the epilogue to be simple, so return 0. This is a special case
3775 since NON_SAVING_SETJMP will not cause regs_ever_live to change
3776 until final, but jump_optimize may need to know sooner if a
3777 `return' is OK. */
32b5b1aa
SC
3778
3779int
e075ae69 3780ix86_can_use_return_insn_p ()
32b5b1aa 3781{
4dd2ac2c 3782 struct ix86_frame frame;
9a7372d6 3783
e075ae69
RH
3784#ifdef NON_SAVING_SETJMP
3785 if (NON_SAVING_SETJMP && current_function_calls_setjmp)
3786 return 0;
3787#endif
9a7372d6
RH
3788
3789 if (! reload_completed || frame_pointer_needed)
3790 return 0;
32b5b1aa 3791
9a7372d6
RH
3792 /* Don't allow more than 32 pop, since that's all we can do
3793 with one instruction. */
3794 if (current_function_pops_args
3795 && current_function_args_size >= 32768)
e075ae69 3796 return 0;
32b5b1aa 3797
4dd2ac2c
JH
3798 ix86_compute_frame_layout (&frame);
3799 return frame.to_allocate == 0 && frame.nregs == 0;
e075ae69 3800}
6189a572
JH
3801\f
3802/* Return 1 if VALUE can be stored in the sign extended immediate field. */
3803int
3804x86_64_sign_extended_value (value)
3805 rtx value;
3806{
3807 switch (GET_CODE (value))
3808 {
3809 /* CONST_DOUBLES never match, since HOST_BITS_PER_WIDE_INT is known
3810 to be at least 32 and this all acceptable constants are
3811 represented as CONST_INT. */
3812 case CONST_INT:
3813 if (HOST_BITS_PER_WIDE_INT == 32)
3814 return 1;
3815 else
3816 {
3817 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (value), DImode);
fa9f36a1 3818 return trunc_int_for_mode (val, SImode) == val;
6189a572
JH
3819 }
3820 break;
3821
3822 /* For certain code models, the symbolic references are known to fit. */
3823 case SYMBOL_REF:
3824 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_KERNEL;
3825
3826 /* For certain code models, the code is near as well. */
3827 case LABEL_REF:
3828 return ix86_cmodel != CM_LARGE && ix86_cmodel != CM_SMALL_PIC;
3829
3830 /* We also may accept the offsetted memory references in certain special
3831 cases. */
3832 case CONST:
3833 if (GET_CODE (XEXP (value, 0)) == UNSPEC
8ee41eaf 3834 && XINT (XEXP (value, 0), 1) == UNSPEC_GOTPCREL)
6189a572
JH
3835 return 1;
3836 else if (GET_CODE (XEXP (value, 0)) == PLUS)
3837 {
3838 rtx op1 = XEXP (XEXP (value, 0), 0);
3839 rtx op2 = XEXP (XEXP (value, 0), 1);
3840 HOST_WIDE_INT offset;
3841
3842 if (ix86_cmodel == CM_LARGE)
3843 return 0;
3844 if (GET_CODE (op2) != CONST_INT)
3845 return 0;
3846 offset = trunc_int_for_mode (INTVAL (op2), DImode);
3847 switch (GET_CODE (op1))
3848 {
3849 case SYMBOL_REF:
3850 /* For CM_SMALL assume that latest object is 1MB before
3851 end of 31bits boundary. We may also accept pretty
3852 large negative constants knowing that all objects are
3853 in the positive half of address space. */
3854 if (ix86_cmodel == CM_SMALL
3855 && offset < 1024*1024*1024
3856 && trunc_int_for_mode (offset, SImode) == offset)
3857 return 1;
3858 /* For CM_KERNEL we know that all object resist in the
3859 negative half of 32bits address space. We may not
3860 accept negative offsets, since they may be just off
d6a7951f 3861 and we may accept pretty large positive ones. */
6189a572
JH
3862 if (ix86_cmodel == CM_KERNEL
3863 && offset > 0
3864 && trunc_int_for_mode (offset, SImode) == offset)
3865 return 1;
3866 break;
3867 case LABEL_REF:
3868 /* These conditions are similar to SYMBOL_REF ones, just the
3869 constraints for code models differ. */
3870 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3871 && offset < 1024*1024*1024
3872 && trunc_int_for_mode (offset, SImode) == offset)
3873 return 1;
3874 if (ix86_cmodel == CM_KERNEL
3875 && offset > 0
3876 && trunc_int_for_mode (offset, SImode) == offset)
3877 return 1;
3878 break;
3879 default:
3880 return 0;
3881 }
3882 }
3883 return 0;
3884 default:
3885 return 0;
3886 }
3887}
3888
3889/* Return 1 if VALUE can be stored in the zero extended immediate field. */
3890int
3891x86_64_zero_extended_value (value)
3892 rtx value;
3893{
3894 switch (GET_CODE (value))
3895 {
3896 case CONST_DOUBLE:
3897 if (HOST_BITS_PER_WIDE_INT == 32)
3898 return (GET_MODE (value) == VOIDmode
3899 && !CONST_DOUBLE_HIGH (value));
3900 else
3901 return 0;
3902 case CONST_INT:
3903 if (HOST_BITS_PER_WIDE_INT == 32)
3904 return INTVAL (value) >= 0;
3905 else
b531087a 3906 return !(INTVAL (value) & ~(HOST_WIDE_INT) 0xffffffff);
6189a572
JH
3907 break;
3908
3909 /* For certain code models, the symbolic references are known to fit. */
3910 case SYMBOL_REF:
3911 return ix86_cmodel == CM_SMALL;
3912
3913 /* For certain code models, the code is near as well. */
3914 case LABEL_REF:
3915 return ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM;
3916
3917 /* We also may accept the offsetted memory references in certain special
3918 cases. */
3919 case CONST:
3920 if (GET_CODE (XEXP (value, 0)) == PLUS)
3921 {
3922 rtx op1 = XEXP (XEXP (value, 0), 0);
3923 rtx op2 = XEXP (XEXP (value, 0), 1);
3924
3925 if (ix86_cmodel == CM_LARGE)
3926 return 0;
3927 switch (GET_CODE (op1))
3928 {
3929 case SYMBOL_REF:
3930 return 0;
d6a7951f 3931 /* For small code model we may accept pretty large positive
6189a572
JH
3932 offsets, since one bit is available for free. Negative
3933 offsets are limited by the size of NULL pointer area
3934 specified by the ABI. */
3935 if (ix86_cmodel == CM_SMALL
3936 && GET_CODE (op2) == CONST_INT
3937 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3938 && (trunc_int_for_mode (INTVAL (op2), SImode)
3939 == INTVAL (op2)))
3940 return 1;
3941 /* ??? For the kernel, we may accept adjustment of
3942 -0x10000000, since we know that it will just convert
d6a7951f 3943 negative address space to positive, but perhaps this
6189a572
JH
3944 is not worthwhile. */
3945 break;
3946 case LABEL_REF:
3947 /* These conditions are similar to SYMBOL_REF ones, just the
3948 constraints for code models differ. */
3949 if ((ix86_cmodel == CM_SMALL || ix86_cmodel == CM_MEDIUM)
3950 && GET_CODE (op2) == CONST_INT
3951 && trunc_int_for_mode (INTVAL (op2), DImode) > -0x10000
3952 && (trunc_int_for_mode (INTVAL (op2), SImode)
3953 == INTVAL (op2)))
3954 return 1;
3955 break;
3956 default:
3957 return 0;
3958 }
3959 }
3960 return 0;
3961 default:
3962 return 0;
3963 }
3964}
6fca22eb
RH
3965
3966/* Value should be nonzero if functions must have frame pointers.
3967 Zero means the frame pointer need not be set up (and parms may
3968 be accessed via the stack pointer) in functions that seem suitable. */
3969
3970int
3971ix86_frame_pointer_required ()
3972{
3973 /* If we accessed previous frames, then the generated code expects
3974 to be able to access the saved ebp value in our frame. */
3975 if (cfun->machine->accesses_prev_frame)
3976 return 1;
a4f31c00 3977
6fca22eb
RH
3978 /* Several x86 os'es need a frame pointer for other reasons,
3979 usually pertaining to setjmp. */
3980 if (SUBTARGET_FRAME_POINTER_REQUIRED)
3981 return 1;
3982
3983 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
3984 the frame pointer by default. Turn it back on now if we've not
3985 got a leaf function. */
a7943381
RH
3986 if (TARGET_OMIT_LEAF_FRAME_POINTER
3987 && (!current_function_is_leaf || current_function_profile))
6fca22eb
RH
3988 return 1;
3989
3990 return 0;
3991}
3992
3993/* Record that the current function accesses previous call frames. */
3994
3995void
3996ix86_setup_frame_addresses ()
3997{
3998 cfun->machine->accesses_prev_frame = 1;
3999}
e075ae69 4000\f
145aacc2
RH
4001#if defined(HAVE_GAS_HIDDEN) && defined(SUPPORTS_ONE_ONLY)
4002# define USE_HIDDEN_LINKONCE 1
4003#else
4004# define USE_HIDDEN_LINKONCE 0
4005#endif
4006
bd09bdeb 4007static int pic_labels_used;
e9a25f70 4008
145aacc2
RH
4009/* Fills in the label name that should be used for a pc thunk for
4010 the given register. */
4011
4012static void
4013get_pc_thunk_name (name, regno)
4014 char name[32];
4015 unsigned int regno;
4016{
4017 if (USE_HIDDEN_LINKONCE)
4018 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
4019 else
4020 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
4021}
4022
4023
e075ae69
RH
4024/* This function generates code for -fpic that loads %ebx with
4025 the return address of the caller and then returns. */
4026
4027void
4cf12e7e 4028ix86_asm_file_end (file)
e075ae69 4029 FILE *file;
e075ae69
RH
4030{
4031 rtx xops[2];
bd09bdeb 4032 int regno;
32b5b1aa 4033
bd09bdeb 4034 for (regno = 0; regno < 8; ++regno)
7c262518 4035 {
145aacc2
RH
4036 char name[32];
4037
bd09bdeb
RH
4038 if (! ((pic_labels_used >> regno) & 1))
4039 continue;
4040
145aacc2 4041 get_pc_thunk_name (name, regno);
bd09bdeb 4042
145aacc2
RH
4043 if (USE_HIDDEN_LINKONCE)
4044 {
4045 tree decl;
4046
4047 decl = build_decl (FUNCTION_DECL, get_identifier (name),
4048 error_mark_node);
4049 TREE_PUBLIC (decl) = 1;
4050 TREE_STATIC (decl) = 1;
4051 DECL_ONE_ONLY (decl) = 1;
4052
4053 (*targetm.asm_out.unique_section) (decl, 0);
4054 named_section (decl, NULL, 0);
4055
5eb99654 4056 (*targetm.asm_out.globalize_label) (file, name);
145aacc2
RH
4057 fputs ("\t.hidden\t", file);
4058 assemble_name (file, name);
4059 fputc ('\n', file);
4060 ASM_DECLARE_FUNCTION_NAME (file, name, decl);
4061 }
4062 else
4063 {
4064 text_section ();
4065 ASM_OUTPUT_LABEL (file, name);
4066 }
bd09bdeb
RH
4067
4068 xops[0] = gen_rtx_REG (SImode, regno);
4069 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
4070 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
4071 output_asm_insn ("ret", xops);
7c262518 4072 }
32b5b1aa 4073}
32b5b1aa 4074
c8c03509 4075/* Emit code for the SET_GOT patterns. */
32b5b1aa 4076
c8c03509
RH
4077const char *
4078output_set_got (dest)
4079 rtx dest;
4080{
4081 rtx xops[3];
0d7d98ee 4082
c8c03509 4083 xops[0] = dest;
5fc0e5df 4084 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
32b5b1aa 4085
c8c03509 4086 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
32b5b1aa 4087 {
c8c03509
RH
4088 xops[2] = gen_rtx_LABEL_REF (Pmode, gen_label_rtx ());
4089
4090 if (!flag_pic)
4091 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
4092 else
4093 output_asm_insn ("call\t%a2", xops);
4094
b069de3b
SS
4095#if TARGET_MACHO
4096 /* Output the "canonical" label name ("Lxx$pb") here too. This
4097 is what will be referred to by the Mach-O PIC subsystem. */
4098 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
4099#endif
c8c03509
RH
4100 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
4101 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
4102
4103 if (flag_pic)
4104 output_asm_insn ("pop{l}\t%0", xops);
32b5b1aa 4105 }
e075ae69 4106 else
e5cb57e8 4107 {
145aacc2
RH
4108 char name[32];
4109 get_pc_thunk_name (name, REGNO (dest));
bd09bdeb 4110 pic_labels_used |= 1 << REGNO (dest);
f996902d 4111
145aacc2 4112 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
c8c03509
RH
4113 xops[2] = gen_rtx_MEM (QImode, xops[2]);
4114 output_asm_insn ("call\t%X2", xops);
e5cb57e8 4115 }
e5cb57e8 4116
c8c03509
RH
4117 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
4118 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
b069de3b 4119 else if (!TARGET_MACHO)
8e9fadc3 4120 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %a1+(.-%a2)}", xops);
79325812 4121
c8c03509 4122 return "";
e9a25f70 4123}
8dfe5673 4124
0d7d98ee 4125/* Generate an "push" pattern for input ARG. */
e9a25f70 4126
e075ae69
RH
4127static rtx
4128gen_push (arg)
4129 rtx arg;
e9a25f70 4130{
c5c76735 4131 return gen_rtx_SET (VOIDmode,
0d7d98ee
JH
4132 gen_rtx_MEM (Pmode,
4133 gen_rtx_PRE_DEC (Pmode,
c5c76735
JL
4134 stack_pointer_rtx)),
4135 arg);
e9a25f70
JL
4136}
4137
bd09bdeb
RH
4138/* Return >= 0 if there is an unused call-clobbered register available
4139 for the entire function. */
4140
4141static unsigned int
4142ix86_select_alt_pic_regnum ()
4143{
4144 if (current_function_is_leaf && !current_function_profile)
4145 {
4146 int i;
4147 for (i = 2; i >= 0; --i)
4148 if (!regs_ever_live[i])
4149 return i;
4150 }
4151
4152 return INVALID_REGNUM;
4153}
fce5a9f2 4154
4dd2ac2c
JH
4155/* Return 1 if we need to save REGNO. */
4156static int
1020a5ab 4157ix86_save_reg (regno, maybe_eh_return)
9b690711 4158 unsigned int regno;
37a58036 4159 int maybe_eh_return;
1020a5ab 4160{
bd09bdeb
RH
4161 if (pic_offset_table_rtx
4162 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
4163 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
66edd3b4 4164 || current_function_profile
1020a5ab 4165 || current_function_calls_eh_return))
bd09bdeb
RH
4166 {
4167 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
4168 return 0;
4169 return 1;
4170 }
1020a5ab
RH
4171
4172 if (current_function_calls_eh_return && maybe_eh_return)
4173 {
4174 unsigned i;
4175 for (i = 0; ; i++)
4176 {
b531087a 4177 unsigned test = EH_RETURN_DATA_REGNO (i);
1020a5ab
RH
4178 if (test == INVALID_REGNUM)
4179 break;
9b690711 4180 if (test == regno)
1020a5ab
RH
4181 return 1;
4182 }
4183 }
4dd2ac2c 4184
1020a5ab
RH
4185 return (regs_ever_live[regno]
4186 && !call_used_regs[regno]
4187 && !fixed_regs[regno]
4188 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
4dd2ac2c
JH
4189}
4190
0903fcab
JH
4191/* Return number of registers to be saved on the stack. */
4192
4193static int
4194ix86_nsaved_regs ()
4195{
4196 int nregs = 0;
0903fcab
JH
4197 int regno;
4198
4dd2ac2c 4199 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 4200 if (ix86_save_reg (regno, true))
4dd2ac2c 4201 nregs++;
0903fcab
JH
4202 return nregs;
4203}
4204
4205/* Return the offset between two registers, one to be eliminated, and the other
4206 its replacement, at the start of a routine. */
4207
4208HOST_WIDE_INT
4209ix86_initial_elimination_offset (from, to)
4210 int from;
4211 int to;
4212{
4dd2ac2c
JH
4213 struct ix86_frame frame;
4214 ix86_compute_frame_layout (&frame);
564d80f4
JH
4215
4216 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 4217 return frame.hard_frame_pointer_offset;
564d80f4
JH
4218 else if (from == FRAME_POINTER_REGNUM
4219 && to == HARD_FRAME_POINTER_REGNUM)
4dd2ac2c 4220 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
4221 else
4222 {
564d80f4
JH
4223 if (to != STACK_POINTER_REGNUM)
4224 abort ();
4225 else if (from == ARG_POINTER_REGNUM)
4dd2ac2c 4226 return frame.stack_pointer_offset;
564d80f4
JH
4227 else if (from != FRAME_POINTER_REGNUM)
4228 abort ();
0903fcab 4229 else
4dd2ac2c 4230 return frame.stack_pointer_offset - frame.frame_pointer_offset;
0903fcab
JH
4231 }
4232}
4233
4dd2ac2c 4234/* Fill structure ix86_frame about frame of currently computed function. */
0f290768 4235
4dd2ac2c
JH
4236static void
4237ix86_compute_frame_layout (frame)
4238 struct ix86_frame *frame;
65954bd8 4239{
65954bd8 4240 HOST_WIDE_INT total_size;
564d80f4 4241 int stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
44affdae
JH
4242 int offset;
4243 int preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
4dd2ac2c 4244 HOST_WIDE_INT size = get_frame_size ();
65954bd8 4245
4dd2ac2c 4246 frame->nregs = ix86_nsaved_regs ();
564d80f4 4247 total_size = size;
65954bd8 4248
9ba81eaa 4249 /* Skip return address and saved base pointer. */
4dd2ac2c
JH
4250 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
4251
4252 frame->hard_frame_pointer_offset = offset;
564d80f4 4253
fcbfaa65
RK
4254 /* Do some sanity checking of stack_alignment_needed and
4255 preferred_alignment, since i386 port is the only using those features
f710504c 4256 that may break easily. */
564d80f4 4257
44affdae
JH
4258 if (size && !stack_alignment_needed)
4259 abort ();
44affdae
JH
4260 if (preferred_alignment < STACK_BOUNDARY / BITS_PER_UNIT)
4261 abort ();
4262 if (preferred_alignment > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4263 abort ();
4264 if (stack_alignment_needed > PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
4265 abort ();
564d80f4 4266
4dd2ac2c
JH
4267 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
4268 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
564d80f4 4269
4dd2ac2c
JH
4270 /* Register save area */
4271 offset += frame->nregs * UNITS_PER_WORD;
65954bd8 4272
8362f420
JH
4273 /* Va-arg area */
4274 if (ix86_save_varrargs_registers)
4275 {
4276 offset += X86_64_VARARGS_SIZE;
4277 frame->va_arg_size = X86_64_VARARGS_SIZE;
4278 }
4279 else
4280 frame->va_arg_size = 0;
4281
4dd2ac2c
JH
4282 /* Align start of frame for local function. */
4283 frame->padding1 = ((offset + stack_alignment_needed - 1)
4284 & -stack_alignment_needed) - offset;
f73ad30e 4285
4dd2ac2c 4286 offset += frame->padding1;
65954bd8 4287
4dd2ac2c
JH
4288 /* Frame pointer points here. */
4289 frame->frame_pointer_offset = offset;
54ff41b7 4290
4dd2ac2c 4291 offset += size;
65954bd8 4292
0b7ae565
RH
4293 /* Add outgoing arguments area. Can be skipped if we eliminated
4294 all the function calls as dead code. */
4295 if (ACCUMULATE_OUTGOING_ARGS && !current_function_is_leaf)
4dd2ac2c
JH
4296 {
4297 offset += current_function_outgoing_args_size;
4298 frame->outgoing_arguments_size = current_function_outgoing_args_size;
4299 }
4300 else
4301 frame->outgoing_arguments_size = 0;
564d80f4 4302
002ff5bc
RH
4303 /* Align stack boundary. Only needed if we're calling another function
4304 or using alloca. */
4305 if (!current_function_is_leaf || current_function_calls_alloca)
0b7ae565
RH
4306 frame->padding2 = ((offset + preferred_alignment - 1)
4307 & -preferred_alignment) - offset;
4308 else
4309 frame->padding2 = 0;
4dd2ac2c
JH
4310
4311 offset += frame->padding2;
4312
4313 /* We've reached end of stack frame. */
4314 frame->stack_pointer_offset = offset;
4315
4316 /* Size prologue needs to allocate. */
4317 frame->to_allocate =
4318 (size + frame->padding1 + frame->padding2
8362f420 4319 + frame->outgoing_arguments_size + frame->va_arg_size);
4dd2ac2c 4320
8362f420
JH
4321 if (TARGET_64BIT && TARGET_RED_ZONE && current_function_sp_is_unchanging
4322 && current_function_is_leaf)
4323 {
4324 frame->red_zone_size = frame->to_allocate;
4325 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
4326 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
4327 }
4328 else
4329 frame->red_zone_size = 0;
4330 frame->to_allocate -= frame->red_zone_size;
4331 frame->stack_pointer_offset -= frame->red_zone_size;
4dd2ac2c
JH
4332#if 0
4333 fprintf (stderr, "nregs: %i\n", frame->nregs);
4334 fprintf (stderr, "size: %i\n", size);
4335 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
4336 fprintf (stderr, "padding1: %i\n", frame->padding1);
8362f420 4337 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
4dd2ac2c
JH
4338 fprintf (stderr, "padding2: %i\n", frame->padding2);
4339 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
8362f420 4340 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
4dd2ac2c
JH
4341 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
4342 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
4343 frame->hard_frame_pointer_offset);
4344 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
4345#endif
65954bd8
JL
4346}
4347
0903fcab
JH
4348/* Emit code to save registers in the prologue. */
4349
4350static void
4351ix86_emit_save_regs ()
4352{
4353 register int regno;
0903fcab 4354 rtx insn;
0903fcab 4355
4dd2ac2c 4356 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
1020a5ab 4357 if (ix86_save_reg (regno, true))
0903fcab 4358 {
0d7d98ee 4359 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
0903fcab
JH
4360 RTX_FRAME_RELATED_P (insn) = 1;
4361 }
4362}
4363
c6036a37
JH
4364/* Emit code to save registers using MOV insns. First register
4365 is restored from POINTER + OFFSET. */
4366static void
4367ix86_emit_save_regs_using_mov (pointer, offset)
b72f00af
RK
4368 rtx pointer;
4369 HOST_WIDE_INT offset;
c6036a37
JH
4370{
4371 int regno;
4372 rtx insn;
4373
4374 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
4375 if (ix86_save_reg (regno, true))
4376 {
b72f00af
RK
4377 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
4378 Pmode, offset),
c6036a37
JH
4379 gen_rtx_REG (Pmode, regno));
4380 RTX_FRAME_RELATED_P (insn) = 1;
4381 offset += UNITS_PER_WORD;
4382 }
4383}
4384
0f290768 4385/* Expand the prologue into a bunch of separate insns. */
e075ae69
RH
4386
4387void
4388ix86_expand_prologue ()
2a2ab3f9 4389{
564d80f4 4390 rtx insn;
bd09bdeb 4391 bool pic_reg_used;
4dd2ac2c 4392 struct ix86_frame frame;
6ab16dd9 4393 int use_mov = 0;
c6036a37 4394 HOST_WIDE_INT allocate;
4dd2ac2c 4395
2ab0437e 4396 if (!optimize_size)
6ab16dd9
JH
4397 {
4398 use_fast_prologue_epilogue
4399 = !expensive_function_p (FAST_PROLOGUE_INSN_COUNT);
2ab0437e
JH
4400 if (TARGET_PROLOGUE_USING_MOVE)
4401 use_mov = use_fast_prologue_epilogue;
6ab16dd9 4402 }
4dd2ac2c 4403 ix86_compute_frame_layout (&frame);
79325812 4404
e075ae69
RH
4405 /* Note: AT&T enter does NOT have reversed args. Enter is probably
4406 slower on all targets. Also sdb doesn't like it. */
e9a25f70 4407
2a2ab3f9
JVA
4408 if (frame_pointer_needed)
4409 {
564d80f4 4410 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
e075ae69 4411 RTX_FRAME_RELATED_P (insn) = 1;
e9a25f70 4412
564d80f4 4413 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
e075ae69 4414 RTX_FRAME_RELATED_P (insn) = 1;
2a2ab3f9
JVA
4415 }
4416
c6036a37
JH
4417 allocate = frame.to_allocate;
4418 /* In case we are dealing only with single register and empty frame,
4419 push is equivalent of the mov+add sequence. */
4420 if (allocate == 0 && frame.nregs <= 1)
4421 use_mov = 0;
4422
4423 if (!use_mov)
4424 ix86_emit_save_regs ();
4425 else
4426 allocate += frame.nregs * UNITS_PER_WORD;
564d80f4 4427
c6036a37 4428 if (allocate == 0)
8dfe5673 4429 ;
e323735c 4430 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
469ac993 4431 {
f2042df3
RH
4432 insn = emit_insn (gen_pro_epilogue_adjust_stack
4433 (stack_pointer_rtx, stack_pointer_rtx,
e323735c 4434 GEN_INT (-allocate)));
e075ae69 4435 RTX_FRAME_RELATED_P (insn) = 1;
469ac993 4436 }
79325812 4437 else
8dfe5673 4438 {
e075ae69 4439 /* ??? Is this only valid for Win32? */
e9a25f70 4440
e075ae69 4441 rtx arg0, sym;
e9a25f70 4442
8362f420 4443 if (TARGET_64BIT)
b531087a 4444 abort ();
8362f420 4445
e075ae69 4446 arg0 = gen_rtx_REG (SImode, 0);
c6036a37 4447 emit_move_insn (arg0, GEN_INT (allocate));
77a989d1 4448
e075ae69
RH
4449 sym = gen_rtx_MEM (FUNCTION_MODE,
4450 gen_rtx_SYMBOL_REF (Pmode, "_alloca"));
32ee7d1d 4451 insn = emit_call_insn (gen_call (sym, const0_rtx, constm1_rtx));
e075ae69
RH
4452
4453 CALL_INSN_FUNCTION_USAGE (insn)
276ab4a4
RH
4454 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_USE (VOIDmode, arg0),
4455 CALL_INSN_FUNCTION_USAGE (insn));
e075ae69 4456 }
c6036a37
JH
4457 if (use_mov)
4458 {
4459 if (!frame_pointer_needed || !frame.to_allocate)
4460 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
4461 else
4462 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
4463 -frame.nregs * UNITS_PER_WORD);
4464 }
e9a25f70 4465
84530511
SC
4466#ifdef SUBTARGET_PROLOGUE
4467 SUBTARGET_PROLOGUE;
0f290768 4468#endif
84530511 4469
bd09bdeb
RH
4470 pic_reg_used = false;
4471 if (pic_offset_table_rtx
4472 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
4473 || current_function_profile))
4474 {
4475 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
4476
4477 if (alt_pic_reg_used != INVALID_REGNUM)
4478 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
4479
4480 pic_reg_used = true;
4481 }
4482
e9a25f70 4483 if (pic_reg_used)
c8c03509
RH
4484 {
4485 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
4486
66edd3b4
RH
4487 /* Even with accurate pre-reload life analysis, we can wind up
4488 deleting all references to the pic register after reload.
4489 Consider if cross-jumping unifies two sides of a branch
4490 controled by a comparison vs the only read from a global.
4491 In which case, allow the set_got to be deleted, though we're
4492 too late to do anything about the ebx save in the prologue. */
c8c03509
RH
4493 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
4494 }
77a989d1 4495
66edd3b4
RH
4496 /* Prevent function calls from be scheduled before the call to mcount.
4497 In the pic_reg_used case, make sure that the got load isn't deleted. */
4498 if (current_function_profile)
4499 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
77a989d1
SC
4500}
4501
da2d1d3a
JH
4502/* Emit code to restore saved registers using MOV insns. First register
4503 is restored from POINTER + OFFSET. */
4504static void
1020a5ab
RH
4505ix86_emit_restore_regs_using_mov (pointer, offset, maybe_eh_return)
4506 rtx pointer;
4507 int offset;
37a58036 4508 int maybe_eh_return;
da2d1d3a
JH
4509{
4510 int regno;
da2d1d3a 4511
4dd2ac2c 4512 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 4513 if (ix86_save_reg (regno, maybe_eh_return))
da2d1d3a 4514 {
4dd2ac2c 4515 emit_move_insn (gen_rtx_REG (Pmode, regno),
b72f00af
RK
4516 adjust_address (gen_rtx_MEM (Pmode, pointer),
4517 Pmode, offset));
4dd2ac2c 4518 offset += UNITS_PER_WORD;
da2d1d3a
JH
4519 }
4520}
4521
0f290768 4522/* Restore function stack, frame, and registers. */
e9a25f70 4523
2a2ab3f9 4524void
1020a5ab
RH
4525ix86_expand_epilogue (style)
4526 int style;
2a2ab3f9 4527{
1c71e60e 4528 int regno;
fdb8a883 4529 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
4dd2ac2c 4530 struct ix86_frame frame;
65954bd8 4531 HOST_WIDE_INT offset;
4dd2ac2c
JH
4532
4533 ix86_compute_frame_layout (&frame);
2a2ab3f9 4534
a4f31c00 4535 /* Calculate start of saved registers relative to ebp. Special care
84e306b4
RH
4536 must be taken for the normal return case of a function using
4537 eh_return: the eax and edx registers are marked as saved, but not
4538 restored along this path. */
4539 offset = frame.nregs;
4540 if (current_function_calls_eh_return && style != 2)
4541 offset -= 2;
4542 offset *= -UNITS_PER_WORD;
2a2ab3f9 4543
fdb8a883
JW
4544 /* If we're only restoring one register and sp is not valid then
4545 using a move instruction to restore the register since it's
0f290768 4546 less work than reloading sp and popping the register.
da2d1d3a
JH
4547
4548 The default code result in stack adjustment using add/lea instruction,
4549 while this code results in LEAVE instruction (or discrete equivalent),
4550 so it is profitable in some other cases as well. Especially when there
4551 are no registers to restore. We also use this code when TARGET_USE_LEAVE
4552 and there is exactly one register to pop. This heruistic may need some
4553 tuning in future. */
4dd2ac2c 4554 if ((!sp_valid && frame.nregs <= 1)
2ab0437e 4555 || (TARGET_EPILOGUE_USING_MOVE
6ab16dd9 4556 && use_fast_prologue_epilogue
c6036a37 4557 && (frame.nregs > 1 || frame.to_allocate))
4dd2ac2c 4558 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
2ab0437e 4559 || (frame_pointer_needed && TARGET_USE_LEAVE
6ab16dd9 4560 && use_fast_prologue_epilogue && frame.nregs == 1)
2ab0437e 4561 || current_function_calls_eh_return)
2a2ab3f9 4562 {
da2d1d3a
JH
4563 /* Restore registers. We can use ebp or esp to address the memory
4564 locations. If both are available, default to ebp, since offsets
4565 are known to be small. Only exception is esp pointing directly to the
4566 end of block of saved registers, where we may simplify addressing
4567 mode. */
4568
4dd2ac2c 4569 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
1020a5ab
RH
4570 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
4571 frame.to_allocate, style == 2);
da2d1d3a 4572 else
1020a5ab
RH
4573 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
4574 offset, style == 2);
4575
4576 /* eh_return epilogues need %ecx added to the stack pointer. */
4577 if (style == 2)
4578 {
4579 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
2a2ab3f9 4580
1020a5ab
RH
4581 if (frame_pointer_needed)
4582 {
4583 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
4584 tmp = plus_constant (tmp, UNITS_PER_WORD);
4585 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
4586
4587 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
4588 emit_move_insn (hard_frame_pointer_rtx, tmp);
4589
4590 emit_insn (gen_pro_epilogue_adjust_stack
f2042df3 4591 (stack_pointer_rtx, sa, const0_rtx));
1020a5ab
RH
4592 }
4593 else
4594 {
4595 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
4596 tmp = plus_constant (tmp, (frame.to_allocate
4597 + frame.nregs * UNITS_PER_WORD));
4598 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
4599 }
4600 }
4601 else if (!frame_pointer_needed)
f2042df3
RH
4602 emit_insn (gen_pro_epilogue_adjust_stack
4603 (stack_pointer_rtx, stack_pointer_rtx,
4604 GEN_INT (frame.to_allocate
4605 + frame.nregs * UNITS_PER_WORD)));
0f290768 4606 /* If not an i386, mov & pop is faster than "leave". */
6ab16dd9 4607 else if (TARGET_USE_LEAVE || optimize_size || !use_fast_prologue_epilogue)
8362f420 4608 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
c8c5cb99 4609 else
2a2ab3f9 4610 {
1c71e60e
JH
4611 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4612 hard_frame_pointer_rtx,
f2042df3 4613 const0_rtx));
8362f420
JH
4614 if (TARGET_64BIT)
4615 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4616 else
4617 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
e9a25f70
JL
4618 }
4619 }
1c71e60e 4620 else
68f654ec 4621 {
1c71e60e
JH
4622 /* First step is to deallocate the stack frame so that we can
4623 pop the registers. */
4624 if (!sp_valid)
4625 {
4626 if (!frame_pointer_needed)
4627 abort ();
4628 emit_insn (gen_pro_epilogue_adjust_stack (stack_pointer_rtx,
4629 hard_frame_pointer_rtx,
f2042df3 4630 GEN_INT (offset)));
1c71e60e 4631 }
4dd2ac2c 4632 else if (frame.to_allocate)
f2042df3
RH
4633 emit_insn (gen_pro_epilogue_adjust_stack
4634 (stack_pointer_rtx, stack_pointer_rtx,
4635 GEN_INT (frame.to_allocate)));
1c71e60e 4636
4dd2ac2c 4637 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
1020a5ab 4638 if (ix86_save_reg (regno, false))
8362f420
JH
4639 {
4640 if (TARGET_64BIT)
4641 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
4642 else
4643 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
4644 }
4dd2ac2c 4645 if (frame_pointer_needed)
8362f420 4646 {
f5143c46 4647 /* Leave results in shorter dependency chains on CPUs that are
2ab0437e
JH
4648 able to grok it fast. */
4649 if (TARGET_USE_LEAVE)
4650 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
4651 else if (TARGET_64BIT)
8362f420
JH
4652 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
4653 else
4654 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
4655 }
68f654ec 4656 }
68f654ec 4657
cbbf65e0 4658 /* Sibcall epilogues don't want a return instruction. */
1020a5ab 4659 if (style == 0)
cbbf65e0
RH
4660 return;
4661
2a2ab3f9
JVA
4662 if (current_function_pops_args && current_function_args_size)
4663 {
e075ae69 4664 rtx popc = GEN_INT (current_function_pops_args);
2a2ab3f9 4665
b8c752c8
UD
4666 /* i386 can only pop 64K bytes. If asked to pop more, pop
4667 return address, do explicit add, and jump indirectly to the
0f290768 4668 caller. */
2a2ab3f9 4669
b8c752c8 4670 if (current_function_pops_args >= 65536)
2a2ab3f9 4671 {
e075ae69 4672 rtx ecx = gen_rtx_REG (SImode, 2);
e9a25f70 4673
8362f420
JH
4674 /* There are is no "pascal" calling convention in 64bit ABI. */
4675 if (TARGET_64BIT)
b531087a 4676 abort ();
8362f420 4677
e075ae69
RH
4678 emit_insn (gen_popsi1 (ecx));
4679 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
11837777 4680 emit_jump_insn (gen_return_indirect_internal (ecx));
e9a25f70 4681 }
79325812 4682 else
e075ae69
RH
4683 emit_jump_insn (gen_return_pop_internal (popc));
4684 }
4685 else
4686 emit_jump_insn (gen_return_internal ());
4687}
bd09bdeb
RH
4688
4689/* Reset from the function's potential modifications. */
4690
4691static void
4692ix86_output_function_epilogue (file, size)
4693 FILE *file ATTRIBUTE_UNUSED;
4694 HOST_WIDE_INT size ATTRIBUTE_UNUSED;
4695{
4696 if (pic_offset_table_rtx)
4697 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
4698}
e075ae69
RH
4699\f
4700/* Extract the parts of an RTL expression that is a valid memory address
b446e5a2
JH
4701 for an instruction. Return 0 if the structure of the address is
4702 grossly off. Return -1 if the address contains ASHIFT, so it is not
4703 strictly valid, but still used for computing length of lea instruction.
4704 */
e075ae69
RH
4705
4706static int
4707ix86_decompose_address (addr, out)
4708 register rtx addr;
4709 struct ix86_address *out;
4710{
4711 rtx base = NULL_RTX;
4712 rtx index = NULL_RTX;
4713 rtx disp = NULL_RTX;
4714 HOST_WIDE_INT scale = 1;
4715 rtx scale_rtx = NULL_RTX;
b446e5a2 4716 int retval = 1;
e075ae69 4717
1540f9eb 4718 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
e075ae69
RH
4719 base = addr;
4720 else if (GET_CODE (addr) == PLUS)
4721 {
4722 rtx op0 = XEXP (addr, 0);
4723 rtx op1 = XEXP (addr, 1);
4724 enum rtx_code code0 = GET_CODE (op0);
4725 enum rtx_code code1 = GET_CODE (op1);
4726
4727 if (code0 == REG || code0 == SUBREG)
4728 {
4729 if (code1 == REG || code1 == SUBREG)
4730 index = op0, base = op1; /* index + base */
4731 else
4732 base = op0, disp = op1; /* base + displacement */
4733 }
4734 else if (code0 == MULT)
e9a25f70 4735 {
e075ae69
RH
4736 index = XEXP (op0, 0);
4737 scale_rtx = XEXP (op0, 1);
4738 if (code1 == REG || code1 == SUBREG)
4739 base = op1; /* index*scale + base */
e9a25f70 4740 else
e075ae69
RH
4741 disp = op1; /* index*scale + disp */
4742 }
4743 else if (code0 == PLUS && GET_CODE (XEXP (op0, 0)) == MULT)
4744 {
4745 index = XEXP (XEXP (op0, 0), 0); /* index*scale + base + disp */
4746 scale_rtx = XEXP (XEXP (op0, 0), 1);
4747 base = XEXP (op0, 1);
4748 disp = op1;
2a2ab3f9 4749 }
e075ae69
RH
4750 else if (code0 == PLUS)
4751 {
4752 index = XEXP (op0, 0); /* index + base + disp */
4753 base = XEXP (op0, 1);
4754 disp = op1;
4755 }
4756 else
b446e5a2 4757 return 0;
e075ae69
RH
4758 }
4759 else if (GET_CODE (addr) == MULT)
4760 {
4761 index = XEXP (addr, 0); /* index*scale */
4762 scale_rtx = XEXP (addr, 1);
4763 }
4764 else if (GET_CODE (addr) == ASHIFT)
4765 {
4766 rtx tmp;
4767
4768 /* We're called for lea too, which implements ashift on occasion. */
4769 index = XEXP (addr, 0);
4770 tmp = XEXP (addr, 1);
4771 if (GET_CODE (tmp) != CONST_INT)
b446e5a2 4772 return 0;
e075ae69
RH
4773 scale = INTVAL (tmp);
4774 if ((unsigned HOST_WIDE_INT) scale > 3)
b446e5a2 4775 return 0;
e075ae69 4776 scale = 1 << scale;
b446e5a2 4777 retval = -1;
2a2ab3f9 4778 }
2a2ab3f9 4779 else
e075ae69
RH
4780 disp = addr; /* displacement */
4781
4782 /* Extract the integral value of scale. */
4783 if (scale_rtx)
e9a25f70 4784 {
e075ae69 4785 if (GET_CODE (scale_rtx) != CONST_INT)
b446e5a2 4786 return 0;
e075ae69 4787 scale = INTVAL (scale_rtx);
e9a25f70 4788 }
3b3c6a3f 4789
e075ae69
RH
4790 /* Allow arg pointer and stack pointer as index if there is not scaling */
4791 if (base && index && scale == 1
564d80f4
JH
4792 && (index == arg_pointer_rtx || index == frame_pointer_rtx
4793 || index == stack_pointer_rtx))
e075ae69
RH
4794 {
4795 rtx tmp = base;
4796 base = index;
4797 index = tmp;
4798 }
4799
4800 /* Special case: %ebp cannot be encoded as a base without a displacement. */
564d80f4
JH
4801 if ((base == hard_frame_pointer_rtx
4802 || base == frame_pointer_rtx
4803 || base == arg_pointer_rtx) && !disp)
e075ae69
RH
4804 disp = const0_rtx;
4805
4806 /* Special case: on K6, [%esi] makes the instruction vector decoded.
4807 Avoid this by transforming to [%esi+0]. */
4808 if (ix86_cpu == PROCESSOR_K6 && !optimize_size
4809 && base && !index && !disp
329e1d01 4810 && REG_P (base)
e075ae69
RH
4811 && REGNO_REG_CLASS (REGNO (base)) == SIREG)
4812 disp = const0_rtx;
4813
4814 /* Special case: encode reg+reg instead of reg*2. */
4815 if (!base && index && scale && scale == 2)
4816 base = index, scale = 1;
0f290768 4817
e075ae69
RH
4818 /* Special case: scaling cannot be encoded without base or displacement. */
4819 if (!base && !disp && index && scale != 1)
4820 disp = const0_rtx;
4821
4822 out->base = base;
4823 out->index = index;
4824 out->disp = disp;
4825 out->scale = scale;
3b3c6a3f 4826
b446e5a2 4827 return retval;
e075ae69 4828}
01329426
JH
4829\f
4830/* Return cost of the memory address x.
4831 For i386, it is better to use a complex address than let gcc copy
4832 the address into a reg and make a new pseudo. But not if the address
4833 requires to two regs - that would mean more pseudos with longer
4834 lifetimes. */
4835int
4836ix86_address_cost (x)
4837 rtx x;
4838{
4839 struct ix86_address parts;
4840 int cost = 1;
3b3c6a3f 4841
01329426
JH
4842 if (!ix86_decompose_address (x, &parts))
4843 abort ();
4844
1540f9eb
JH
4845 if (parts.base && GET_CODE (parts.base) == SUBREG)
4846 parts.base = SUBREG_REG (parts.base);
4847 if (parts.index && GET_CODE (parts.index) == SUBREG)
4848 parts.index = SUBREG_REG (parts.index);
4849
01329426
JH
4850 /* More complex memory references are better. */
4851 if (parts.disp && parts.disp != const0_rtx)
4852 cost--;
4853
4854 /* Attempt to minimize number of registers in the address. */
4855 if ((parts.base
4856 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
4857 || (parts.index
4858 && (!REG_P (parts.index)
4859 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
4860 cost++;
4861
4862 if (parts.base
4863 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
4864 && parts.index
4865 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
4866 && parts.base != parts.index)
4867 cost++;
4868
4869 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
4870 since it's predecode logic can't detect the length of instructions
4871 and it degenerates to vector decoded. Increase cost of such
4872 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
0f290768 4873 to split such addresses or even refuse such addresses at all.
01329426
JH
4874
4875 Following addressing modes are affected:
4876 [base+scale*index]
4877 [scale*index+disp]
4878 [base+index]
0f290768 4879
01329426
JH
4880 The first and last case may be avoidable by explicitly coding the zero in
4881 memory address, but I don't have AMD-K6 machine handy to check this
4882 theory. */
4883
4884 if (TARGET_K6
4885 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
4886 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
4887 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
4888 cost += 10;
0f290768 4889
01329426
JH
4890 return cost;
4891}
4892\f
b949ea8b
JW
4893/* If X is a machine specific address (i.e. a symbol or label being
4894 referenced as a displacement from the GOT implemented using an
4895 UNSPEC), then return the base term. Otherwise return X. */
4896
4897rtx
4898ix86_find_base_term (x)
4899 rtx x;
4900{
4901 rtx term;
4902
6eb791fc
JH
4903 if (TARGET_64BIT)
4904 {
4905 if (GET_CODE (x) != CONST)
4906 return x;
4907 term = XEXP (x, 0);
4908 if (GET_CODE (term) == PLUS
4909 && (GET_CODE (XEXP (term, 1)) == CONST_INT
4910 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
4911 term = XEXP (term, 0);
4912 if (GET_CODE (term) != UNSPEC
8ee41eaf 4913 || XINT (term, 1) != UNSPEC_GOTPCREL)
6eb791fc
JH
4914 return x;
4915
4916 term = XVECEXP (term, 0, 0);
4917
4918 if (GET_CODE (term) != SYMBOL_REF
4919 && GET_CODE (term) != LABEL_REF)
4920 return x;
4921
4922 return term;
4923 }
4924
b949ea8b
JW
4925 if (GET_CODE (x) != PLUS
4926 || XEXP (x, 0) != pic_offset_table_rtx
4927 || GET_CODE (XEXP (x, 1)) != CONST)
4928 return x;
4929
4930 term = XEXP (XEXP (x, 1), 0);
4931
4932 if (GET_CODE (term) == PLUS && GET_CODE (XEXP (term, 1)) == CONST_INT)
4933 term = XEXP (term, 0);
4934
4935 if (GET_CODE (term) != UNSPEC
8ee41eaf 4936 || XINT (term, 1) != UNSPEC_GOTOFF)
b949ea8b
JW
4937 return x;
4938
4939 term = XVECEXP (term, 0, 0);
4940
4941 if (GET_CODE (term) != SYMBOL_REF
4942 && GET_CODE (term) != LABEL_REF)
4943 return x;
4944
4945 return term;
4946}
4947\f
f996902d
RH
4948/* Determine if a given RTX is a valid constant. We already know this
4949 satisfies CONSTANT_P. */
4950
4951bool
4952legitimate_constant_p (x)
4953 rtx x;
4954{
4955 rtx inner;
4956
4957 switch (GET_CODE (x))
4958 {
4959 case SYMBOL_REF:
4960 /* TLS symbols are not constant. */
4961 if (tls_symbolic_operand (x, Pmode))
4962 return false;
4963 break;
4964
4965 case CONST:
4966 inner = XEXP (x, 0);
4967
4968 /* Offsets of TLS symbols are never valid.
4969 Discourage CSE from creating them. */
4970 if (GET_CODE (inner) == PLUS
4971 && tls_symbolic_operand (XEXP (inner, 0), Pmode))
4972 return false;
4973
4974 /* Only some unspecs are valid as "constants". */
4975 if (GET_CODE (inner) == UNSPEC)
4976 switch (XINT (inner, 1))
4977 {
4978 case UNSPEC_TPOFF:
4979 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
f996902d
RH
4980 default:
4981 return false;
4982 }
4983 break;
4984
4985 default:
4986 break;
4987 }
4988
4989 /* Otherwise we handle everything else in the move patterns. */
4990 return true;
4991}
4992
4993/* Determine if a given RTX is a valid constant address. */
4994
4995bool
4996constant_address_p (x)
4997 rtx x;
4998{
4999 switch (GET_CODE (x))
5000 {
5001 case LABEL_REF:
5002 case CONST_INT:
5003 return true;
5004
5005 case CONST_DOUBLE:
5006 return TARGET_64BIT;
5007
5008 case CONST:
b069de3b
SS
5009 /* For Mach-O, really believe the CONST. */
5010 if (TARGET_MACHO)
5011 return true;
5012 /* Otherwise fall through. */
f996902d
RH
5013 case SYMBOL_REF:
5014 return !flag_pic && legitimate_constant_p (x);
5015
5016 default:
5017 return false;
5018 }
5019}
5020
5021/* Nonzero if the constant value X is a legitimate general operand
fce5a9f2 5022 when generating PIC code. It is given that flag_pic is on and
f996902d
RH
5023 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
5024
5025bool
5026legitimate_pic_operand_p (x)
5027 rtx x;
5028{
5029 rtx inner;
5030
5031 switch (GET_CODE (x))
5032 {
5033 case CONST:
5034 inner = XEXP (x, 0);
5035
5036 /* Only some unspecs are valid as "constants". */
5037 if (GET_CODE (inner) == UNSPEC)
5038 switch (XINT (inner, 1))
5039 {
5040 case UNSPEC_TPOFF:
5041 return local_exec_symbolic_operand (XVECEXP (inner, 0, 0), Pmode);
f996902d
RH
5042 default:
5043 return false;
5044 }
5045 /* FALLTHRU */
5046
5047 case SYMBOL_REF:
5048 case LABEL_REF:
5049 return legitimate_pic_address_disp_p (x);
5050
5051 default:
5052 return true;
5053 }
5054}
5055
e075ae69
RH
5056/* Determine if a given CONST RTX is a valid memory displacement
5057 in PIC mode. */
0f290768 5058
59be65f6 5059int
91bb873f
RH
5060legitimate_pic_address_disp_p (disp)
5061 register rtx disp;
5062{
f996902d
RH
5063 bool saw_plus;
5064
6eb791fc
JH
5065 /* In 64bit mode we can allow direct addresses of symbols and labels
5066 when they are not dynamic symbols. */
5067 if (TARGET_64BIT)
5068 {
5069 rtx x = disp;
5070 if (GET_CODE (disp) == CONST)
5071 x = XEXP (disp, 0);
5072 /* ??? Handle PIC code models */
5073 if (GET_CODE (x) == PLUS
5074 && (GET_CODE (XEXP (x, 1)) == CONST_INT
5075 && ix86_cmodel == CM_SMALL_PIC
5076 && INTVAL (XEXP (x, 1)) < 1024*1024*1024
5077 && INTVAL (XEXP (x, 1)) > -1024*1024*1024))
5078 x = XEXP (x, 0);
5079 if (local_symbolic_operand (x, Pmode))
5080 return 1;
5081 }
91bb873f
RH
5082 if (GET_CODE (disp) != CONST)
5083 return 0;
5084 disp = XEXP (disp, 0);
5085
6eb791fc
JH
5086 if (TARGET_64BIT)
5087 {
5088 /* We are unsafe to allow PLUS expressions. This limit allowed distance
5089 of GOT tables. We should not need these anyway. */
5090 if (GET_CODE (disp) != UNSPEC
8ee41eaf 5091 || XINT (disp, 1) != UNSPEC_GOTPCREL)
6eb791fc
JH
5092 return 0;
5093
5094 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
5095 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
5096 return 0;
5097 return 1;
5098 }
5099
f996902d 5100 saw_plus = false;
91bb873f
RH
5101 if (GET_CODE (disp) == PLUS)
5102 {
5103 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
5104 return 0;
5105 disp = XEXP (disp, 0);
f996902d 5106 saw_plus = true;
91bb873f
RH
5107 }
5108
b069de3b
SS
5109 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O. */
5110 if (TARGET_MACHO && GET_CODE (disp) == MINUS)
5111 {
5112 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
5113 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
5114 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
5115 {
5116 const char *sym_name = XSTR (XEXP (disp, 1), 0);
5117 if (strstr (sym_name, "$pb") != 0)
5118 return 1;
5119 }
5120 }
5121
8ee41eaf 5122 if (GET_CODE (disp) != UNSPEC)
91bb873f
RH
5123 return 0;
5124
623fe810
RH
5125 switch (XINT (disp, 1))
5126 {
8ee41eaf 5127 case UNSPEC_GOT:
f996902d
RH
5128 if (saw_plus)
5129 return false;
623fe810 5130 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
8ee41eaf 5131 case UNSPEC_GOTOFF:
623fe810 5132 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
f996902d 5133 case UNSPEC_GOTTPOFF:
dea73790
JJ
5134 case UNSPEC_GOTNTPOFF:
5135 case UNSPEC_INDNTPOFF:
f996902d
RH
5136 if (saw_plus)
5137 return false;
5138 return initial_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5139 case UNSPEC_NTPOFF:
f996902d
RH
5140 return local_exec_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
5141 case UNSPEC_DTPOFF:
f996902d 5142 return local_dynamic_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
623fe810 5143 }
fce5a9f2 5144
623fe810 5145 return 0;
91bb873f
RH
5146}
5147
e075ae69
RH
5148/* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
5149 memory address for an instruction. The MODE argument is the machine mode
5150 for the MEM expression that wants to use this address.
5151
5152 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
5153 convert common non-canonical forms to canonical form so that they will
5154 be recognized. */
5155
3b3c6a3f
MM
5156int
5157legitimate_address_p (mode, addr, strict)
5158 enum machine_mode mode;
5159 register rtx addr;
5160 int strict;
5161{
e075ae69
RH
5162 struct ix86_address parts;
5163 rtx base, index, disp;
5164 HOST_WIDE_INT scale;
5165 const char *reason = NULL;
5166 rtx reason_rtx = NULL_RTX;
3b3c6a3f
MM
5167
5168 if (TARGET_DEBUG_ADDR)
5169 {
5170 fprintf (stderr,
e9a25f70 5171 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
3b3c6a3f 5172 GET_MODE_NAME (mode), strict);
3b3c6a3f
MM
5173 debug_rtx (addr);
5174 }
5175
9e20be0c
JJ
5176 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
5177 {
5178 if (TARGET_DEBUG_ADDR)
5179 fprintf (stderr, "Success.\n");
5180 return TRUE;
5181 }
5182
b446e5a2 5183 if (ix86_decompose_address (addr, &parts) <= 0)
3b3c6a3f 5184 {
e075ae69 5185 reason = "decomposition failed";
50e60bc3 5186 goto report_error;
3b3c6a3f
MM
5187 }
5188
e075ae69
RH
5189 base = parts.base;
5190 index = parts.index;
5191 disp = parts.disp;
5192 scale = parts.scale;
91f0226f 5193
e075ae69 5194 /* Validate base register.
e9a25f70
JL
5195
5196 Don't allow SUBREG's here, it can lead to spill failures when the base
3d771dfd
MM
5197 is one word out of a two word structure, which is represented internally
5198 as a DImode int. */
e9a25f70 5199
3b3c6a3f
MM
5200 if (base)
5201 {
1540f9eb 5202 rtx reg;
e075ae69
RH
5203 reason_rtx = base;
5204
1540f9eb
JH
5205 if (GET_CODE (base) == SUBREG)
5206 reg = SUBREG_REG (base);
5207 else
5208 reg = base;
5209
5210 if (GET_CODE (reg) != REG)
3b3c6a3f 5211 {
e075ae69 5212 reason = "base is not a register";
50e60bc3 5213 goto report_error;
3b3c6a3f
MM
5214 }
5215
c954bd01
RH
5216 if (GET_MODE (base) != Pmode)
5217 {
e075ae69 5218 reason = "base is not in Pmode";
50e60bc3 5219 goto report_error;
c954bd01
RH
5220 }
5221
1540f9eb
JH
5222 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
5223 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
3b3c6a3f 5224 {
e075ae69 5225 reason = "base is not valid";
50e60bc3 5226 goto report_error;
3b3c6a3f
MM
5227 }
5228 }
5229
e075ae69 5230 /* Validate index register.
e9a25f70
JL
5231
5232 Don't allow SUBREG's here, it can lead to spill failures when the index
3d771dfd
MM
5233 is one word out of a two word structure, which is represented internally
5234 as a DImode int. */
e075ae69
RH
5235
5236 if (index)
3b3c6a3f 5237 {
1540f9eb 5238 rtx reg;
e075ae69
RH
5239 reason_rtx = index;
5240
1540f9eb
JH
5241 if (GET_CODE (index) == SUBREG)
5242 reg = SUBREG_REG (index);
5243 else
5244 reg = index;
5245
5246 if (GET_CODE (reg) != REG)
3b3c6a3f 5247 {
e075ae69 5248 reason = "index is not a register";
50e60bc3 5249 goto report_error;
3b3c6a3f
MM
5250 }
5251
e075ae69 5252 if (GET_MODE (index) != Pmode)
c954bd01 5253 {
e075ae69 5254 reason = "index is not in Pmode";
50e60bc3 5255 goto report_error;
c954bd01
RH
5256 }
5257
1540f9eb
JH
5258 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
5259 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
3b3c6a3f 5260 {
e075ae69 5261 reason = "index is not valid";
50e60bc3 5262 goto report_error;
3b3c6a3f
MM
5263 }
5264 }
3b3c6a3f 5265
e075ae69
RH
5266 /* Validate scale factor. */
5267 if (scale != 1)
3b3c6a3f 5268 {
e075ae69
RH
5269 reason_rtx = GEN_INT (scale);
5270 if (!index)
3b3c6a3f 5271 {
e075ae69 5272 reason = "scale without index";
50e60bc3 5273 goto report_error;
3b3c6a3f
MM
5274 }
5275
e075ae69 5276 if (scale != 2 && scale != 4 && scale != 8)
3b3c6a3f 5277 {
e075ae69 5278 reason = "scale is not a valid multiplier";
50e60bc3 5279 goto report_error;
3b3c6a3f
MM
5280 }
5281 }
5282
91bb873f 5283 /* Validate displacement. */
3b3c6a3f
MM
5284 if (disp)
5285 {
e075ae69
RH
5286 reason_rtx = disp;
5287
0d7d98ee 5288 if (TARGET_64BIT)
3b3c6a3f 5289 {
0d7d98ee
JH
5290 if (!x86_64_sign_extended_value (disp))
5291 {
5292 reason = "displacement is out of range";
5293 goto report_error;
5294 }
5295 }
5296 else
5297 {
5298 if (GET_CODE (disp) == CONST_DOUBLE)
5299 {
5300 reason = "displacement is a const_double";
5301 goto report_error;
5302 }
3b3c6a3f
MM
5303 }
5304
f996902d
RH
5305 if (GET_CODE (disp) == CONST
5306 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
5307 switch (XINT (XEXP (disp, 0), 1))
5308 {
5309 case UNSPEC_GOT:
5310 case UNSPEC_GOTOFF:
5311 case UNSPEC_GOTPCREL:
5312 if (!flag_pic)
5313 abort ();
5314 goto is_legitimate_pic;
5315
5316 case UNSPEC_GOTTPOFF:
dea73790
JJ
5317 case UNSPEC_GOTNTPOFF:
5318 case UNSPEC_INDNTPOFF:
f996902d
RH
5319 case UNSPEC_NTPOFF:
5320 case UNSPEC_DTPOFF:
5321 break;
5322
5323 default:
5324 reason = "invalid address unspec";
5325 goto report_error;
5326 }
5327
b069de3b
SS
5328 else if (flag_pic && (SYMBOLIC_CONST (disp)
5329#if TARGET_MACHO
5330 && !machopic_operand_p (disp)
5331#endif
5332 ))
3b3c6a3f 5333 {
f996902d 5334 is_legitimate_pic:
0d7d98ee
JH
5335 if (TARGET_64BIT && (index || base))
5336 {
5337 reason = "non-constant pic memory reference";
5338 goto report_error;
5339 }
91bb873f
RH
5340 if (! legitimate_pic_address_disp_p (disp))
5341 {
e075ae69 5342 reason = "displacement is an invalid pic construct";
50e60bc3 5343 goto report_error;
91bb873f
RH
5344 }
5345
4e9efe54 5346 /* This code used to verify that a symbolic pic displacement
0f290768
KH
5347 includes the pic_offset_table_rtx register.
5348
4e9efe54
JH
5349 While this is good idea, unfortunately these constructs may
5350 be created by "adds using lea" optimization for incorrect
5351 code like:
5352
5353 int a;
5354 int foo(int i)
5355 {
5356 return *(&a+i);
5357 }
5358
50e60bc3 5359 This code is nonsensical, but results in addressing
4e9efe54 5360 GOT table with pic_offset_table_rtx base. We can't
f710504c 5361 just refuse it easily, since it gets matched by
4e9efe54
JH
5362 "addsi3" pattern, that later gets split to lea in the
5363 case output register differs from input. While this
5364 can be handled by separate addsi pattern for this case
5365 that never results in lea, this seems to be easier and
5366 correct fix for crash to disable this test. */
3b3c6a3f 5367 }
f996902d
RH
5368 else if (!CONSTANT_ADDRESS_P (disp))
5369 {
5370 reason = "displacement is not constant";
5371 goto report_error;
5372 }
3b3c6a3f
MM
5373 }
5374
e075ae69 5375 /* Everything looks valid. */
3b3c6a3f 5376 if (TARGET_DEBUG_ADDR)
e075ae69 5377 fprintf (stderr, "Success.\n");
3b3c6a3f 5378 return TRUE;
e075ae69 5379
5bf0ebab 5380 report_error:
e075ae69
RH
5381 if (TARGET_DEBUG_ADDR)
5382 {
5383 fprintf (stderr, "Error: %s\n", reason);
5384 debug_rtx (reason_rtx);
5385 }
5386 return FALSE;
3b3c6a3f 5387}
3b3c6a3f 5388\f
55efb413
JW
5389/* Return an unique alias set for the GOT. */
5390
0f290768 5391static HOST_WIDE_INT
55efb413
JW
5392ix86_GOT_alias_set ()
5393{
5bf0ebab
RH
5394 static HOST_WIDE_INT set = -1;
5395 if (set == -1)
5396 set = new_alias_set ();
5397 return set;
0f290768 5398}
55efb413 5399
3b3c6a3f
MM
5400/* Return a legitimate reference for ORIG (an address) using the
5401 register REG. If REG is 0, a new pseudo is generated.
5402
91bb873f 5403 There are two types of references that must be handled:
3b3c6a3f
MM
5404
5405 1. Global data references must load the address from the GOT, via
5406 the PIC reg. An insn is emitted to do this load, and the reg is
5407 returned.
5408
91bb873f
RH
5409 2. Static data references, constant pool addresses, and code labels
5410 compute the address as an offset from the GOT, whose base is in
5411 the PIC reg. Static data objects have SYMBOL_REF_FLAG set to
5412 differentiate them from global data objects. The returned
5413 address is the PIC reg + an unspec constant.
3b3c6a3f
MM
5414
5415 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
91bb873f 5416 reg also appears in the address. */
3b3c6a3f
MM
5417
5418rtx
5419legitimize_pic_address (orig, reg)
5420 rtx orig;
5421 rtx reg;
5422{
5423 rtx addr = orig;
5424 rtx new = orig;
91bb873f 5425 rtx base;
3b3c6a3f 5426
b069de3b
SS
5427#if TARGET_MACHO
5428 if (reg == 0)
5429 reg = gen_reg_rtx (Pmode);
5430 /* Use the generic Mach-O PIC machinery. */
5431 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
5432#endif
5433
623fe810 5434 if (local_symbolic_operand (addr, Pmode))
3b3c6a3f 5435 {
14f73b5a
JH
5436 /* In 64bit mode we can address such objects directly. */
5437 if (TARGET_64BIT)
5438 new = addr;
5439 else
5440 {
5441 /* This symbol may be referenced via a displacement from the PIC
5442 base address (@GOTOFF). */
3b3c6a3f 5443
66edd3b4
RH
5444 if (reload_in_progress)
5445 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf 5446 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
14f73b5a
JH
5447 new = gen_rtx_CONST (Pmode, new);
5448 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
3b3c6a3f 5449
14f73b5a
JH
5450 if (reg != 0)
5451 {
5452 emit_move_insn (reg, new);
5453 new = reg;
5454 }
5455 }
3b3c6a3f 5456 }
91bb873f 5457 else if (GET_CODE (addr) == SYMBOL_REF)
3b3c6a3f 5458 {
14f73b5a
JH
5459 if (TARGET_64BIT)
5460 {
8ee41eaf 5461 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
14f73b5a
JH
5462 new = gen_rtx_CONST (Pmode, new);
5463 new = gen_rtx_MEM (Pmode, new);
5464 RTX_UNCHANGING_P (new) = 1;
5465 set_mem_alias_set (new, ix86_GOT_alias_set ());
5466
5467 if (reg == 0)
5468 reg = gen_reg_rtx (Pmode);
5469 /* Use directly gen_movsi, otherwise the address is loaded
5470 into register for CSE. We don't want to CSE this addresses,
5471 instead we CSE addresses from the GOT table, so skip this. */
5472 emit_insn (gen_movsi (reg, new));
5473 new = reg;
5474 }
5475 else
5476 {
5477 /* This symbol must be referenced via a load from the
5478 Global Offset Table (@GOT). */
3b3c6a3f 5479
66edd3b4
RH
5480 if (reload_in_progress)
5481 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf 5482 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
14f73b5a
JH
5483 new = gen_rtx_CONST (Pmode, new);
5484 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
5485 new = gen_rtx_MEM (Pmode, new);
5486 RTX_UNCHANGING_P (new) = 1;
5487 set_mem_alias_set (new, ix86_GOT_alias_set ());
3b3c6a3f 5488
14f73b5a
JH
5489 if (reg == 0)
5490 reg = gen_reg_rtx (Pmode);
5491 emit_move_insn (reg, new);
5492 new = reg;
5493 }
0f290768 5494 }
91bb873f
RH
5495 else
5496 {
5497 if (GET_CODE (addr) == CONST)
3b3c6a3f 5498 {
91bb873f 5499 addr = XEXP (addr, 0);
e3c8ea67
RH
5500
5501 /* We must match stuff we generate before. Assume the only
5502 unspecs that can get here are ours. Not that we could do
5503 anything with them anyway... */
5504 if (GET_CODE (addr) == UNSPEC
5505 || (GET_CODE (addr) == PLUS
5506 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
5507 return orig;
5508 if (GET_CODE (addr) != PLUS)
564d80f4 5509 abort ();
3b3c6a3f 5510 }
91bb873f
RH
5511 if (GET_CODE (addr) == PLUS)
5512 {
5513 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
e9a25f70 5514
91bb873f
RH
5515 /* Check first to see if this is a constant offset from a @GOTOFF
5516 symbol reference. */
623fe810 5517 if (local_symbolic_operand (op0, Pmode)
91bb873f
RH
5518 && GET_CODE (op1) == CONST_INT)
5519 {
6eb791fc
JH
5520 if (!TARGET_64BIT)
5521 {
66edd3b4
RH
5522 if (reload_in_progress)
5523 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
8ee41eaf
RH
5524 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
5525 UNSPEC_GOTOFF);
6eb791fc
JH
5526 new = gen_rtx_PLUS (Pmode, new, op1);
5527 new = gen_rtx_CONST (Pmode, new);
5528 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
91bb873f 5529
6eb791fc
JH
5530 if (reg != 0)
5531 {
5532 emit_move_insn (reg, new);
5533 new = reg;
5534 }
5535 }
5536 else
91bb873f 5537 {
6eb791fc 5538 /* ??? We need to limit offsets here. */
91bb873f
RH
5539 }
5540 }
5541 else
5542 {
5543 base = legitimize_pic_address (XEXP (addr, 0), reg);
5544 new = legitimize_pic_address (XEXP (addr, 1),
5545 base == reg ? NULL_RTX : reg);
5546
5547 if (GET_CODE (new) == CONST_INT)
5548 new = plus_constant (base, INTVAL (new));
5549 else
5550 {
5551 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
5552 {
5553 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
5554 new = XEXP (new, 1);
5555 }
5556 new = gen_rtx_PLUS (Pmode, base, new);
5557 }
5558 }
5559 }
3b3c6a3f
MM
5560 }
5561 return new;
5562}
fb49053f 5563
fb49053f 5564static void
f996902d 5565ix86_encode_section_info (decl, first)
fb49053f
RH
5566 tree decl;
5567 int first ATTRIBUTE_UNUSED;
5568{
f996902d
RH
5569 bool local_p = (*targetm.binds_local_p) (decl);
5570 rtx rtl, symbol;
5571
5572 rtl = DECL_P (decl) ? DECL_RTL (decl) : TREE_CST_RTL (decl);
5573 if (GET_CODE (rtl) != MEM)
5574 return;
5575 symbol = XEXP (rtl, 0);
5576 if (GET_CODE (symbol) != SYMBOL_REF)
5577 return;
5578
5579 /* For basic x86, if using PIC, mark a SYMBOL_REF for a non-global
5580 symbol so that we may access it directly in the GOT. */
5581
fb49053f 5582 if (flag_pic)
f996902d
RH
5583 SYMBOL_REF_FLAG (symbol) = local_p;
5584
5585 /* For ELF, encode thread-local data with %[GLil] for "global dynamic",
5586 "local dynamic", "initial exec" or "local exec" TLS models
5587 respectively. */
5588
5589 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL (decl))
fb49053f 5590 {
f996902d
RH
5591 const char *symbol_str;
5592 char *newstr;
5593 size_t len;
dce81a1a 5594 enum tls_model kind = decl_tls_model (decl);
f996902d
RH
5595
5596 symbol_str = XSTR (symbol, 0);
fb49053f 5597
f996902d
RH
5598 if (symbol_str[0] == '%')
5599 {
5600 if (symbol_str[1] == tls_model_chars[kind])
5601 return;
5602 symbol_str += 2;
5603 }
5604 len = strlen (symbol_str) + 1;
5605 newstr = alloca (len + 2);
5606
5607 newstr[0] = '%';
5608 newstr[1] = tls_model_chars[kind];
5609 memcpy (newstr + 2, symbol_str, len);
5610
5611 XSTR (symbol, 0) = ggc_alloc_string (newstr, len + 2 - 1);
fb49053f
RH
5612 }
5613}
f996902d
RH
5614
5615/* Undo the above when printing symbol names. */
5616
5617static const char *
5618ix86_strip_name_encoding (str)
5619 const char *str;
5620{
5621 if (str[0] == '%')
5622 str += 2;
5623 if (str [0] == '*')
5624 str += 1;
5625 return str;
5626}
3b3c6a3f 5627\f
f996902d
RH
5628/* Load the thread pointer into a register. */
5629
5630static rtx
5631get_thread_pointer ()
5632{
5633 rtx tp;
5634
5635 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
9e20be0c
JJ
5636 tp = gen_rtx_MEM (Pmode, tp);
5637 RTX_UNCHANGING_P (tp) = 1;
5638 set_mem_alias_set (tp, ix86_GOT_alias_set ());
f996902d
RH
5639 tp = force_reg (Pmode, tp);
5640
5641 return tp;
5642}
fce5a9f2 5643
3b3c6a3f
MM
5644/* Try machine-dependent ways of modifying an illegitimate address
5645 to be legitimate. If we find one, return the new, valid address.
5646 This macro is used in only one place: `memory_address' in explow.c.
5647
5648 OLDX is the address as it was before break_out_memory_refs was called.
5649 In some cases it is useful to look at this to decide what needs to be done.
5650
5651 MODE and WIN are passed so that this macro can use
5652 GO_IF_LEGITIMATE_ADDRESS.
5653
5654 It is always safe for this macro to do nothing. It exists to recognize
5655 opportunities to optimize the output.
5656
5657 For the 80386, we handle X+REG by loading X into a register R and
5658 using R+REG. R will go in a general reg and indexing will be used.
5659 However, if REG is a broken-out memory address or multiplication,
5660 nothing needs to be done because REG can certainly go in a general reg.
5661
5662 When -fpic is used, special handling is needed for symbolic references.
5663 See comments by legitimize_pic_address in i386.c for details. */
5664
5665rtx
5666legitimize_address (x, oldx, mode)
5667 register rtx x;
bb5177ac 5668 register rtx oldx ATTRIBUTE_UNUSED;
3b3c6a3f
MM
5669 enum machine_mode mode;
5670{
5671 int changed = 0;
5672 unsigned log;
5673
5674 if (TARGET_DEBUG_ADDR)
5675 {
e9a25f70
JL
5676 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
5677 GET_MODE_NAME (mode));
3b3c6a3f
MM
5678 debug_rtx (x);
5679 }
5680
f996902d
RH
5681 log = tls_symbolic_operand (x, mode);
5682 if (log)
5683 {
5684 rtx dest, base, off, pic;
5685
755ac5d4 5686 switch (log)
f996902d
RH
5687 {
5688 case TLS_MODEL_GLOBAL_DYNAMIC:
5689 dest = gen_reg_rtx (Pmode);
5690 emit_insn (gen_tls_global_dynamic (dest, x));
5691 break;
5692
5693 case TLS_MODEL_LOCAL_DYNAMIC:
5694 base = gen_reg_rtx (Pmode);
5695 emit_insn (gen_tls_local_dynamic_base (base));
5696
5697 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
5698 off = gen_rtx_CONST (Pmode, off);
5699
5700 return gen_rtx_PLUS (Pmode, base, off);
5701
5702 case TLS_MODEL_INITIAL_EXEC:
5703 if (flag_pic)
5704 {
66edd3b4
RH
5705 if (reload_in_progress)
5706 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
f996902d
RH
5707 pic = pic_offset_table_rtx;
5708 }
dea73790 5709 else if (!TARGET_GNU_TLS)
f996902d
RH
5710 {
5711 pic = gen_reg_rtx (Pmode);
5712 emit_insn (gen_set_got (pic));
5713 }
dea73790
JJ
5714 else
5715 pic = NULL;
f996902d
RH
5716
5717 base = get_thread_pointer ();
5718
dea73790
JJ
5719 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5720 !TARGET_GNU_TLS
5721 ? UNSPEC_GOTTPOFF
5722 : flag_pic ? UNSPEC_GOTNTPOFF
5723 : UNSPEC_INDNTPOFF);
f996902d 5724 off = gen_rtx_CONST (Pmode, off);
dea73790
JJ
5725 if (flag_pic || !TARGET_GNU_TLS)
5726 off = gen_rtx_PLUS (Pmode, pic, off);
f996902d
RH
5727 off = gen_rtx_MEM (Pmode, off);
5728 RTX_UNCHANGING_P (off) = 1;
5729 set_mem_alias_set (off, ix86_GOT_alias_set ());
f996902d 5730 dest = gen_reg_rtx (Pmode);
dea73790
JJ
5731
5732 if (TARGET_GNU_TLS)
5733 {
5734 emit_move_insn (dest, off);
5735 return gen_rtx_PLUS (Pmode, base, dest);
5736 }
5737 else
5738 emit_insn (gen_subsi3 (dest, base, off));
f996902d
RH
5739 break;
5740
5741 case TLS_MODEL_LOCAL_EXEC:
5742 base = get_thread_pointer ();
5743
5744 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
5745 TARGET_GNU_TLS ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
5746 off = gen_rtx_CONST (Pmode, off);
5747
5748 if (TARGET_GNU_TLS)
5749 return gen_rtx_PLUS (Pmode, base, off);
5750 else
5751 {
5752 dest = gen_reg_rtx (Pmode);
5753 emit_insn (gen_subsi3 (dest, base, off));
5754 }
5755 break;
5756
5757 default:
5758 abort ();
5759 }
5760
5761 return dest;
5762 }
5763
3b3c6a3f
MM
5764 if (flag_pic && SYMBOLIC_CONST (x))
5765 return legitimize_pic_address (x, 0);
5766
5767 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
5768 if (GET_CODE (x) == ASHIFT
5769 && GET_CODE (XEXP (x, 1)) == CONST_INT
b531087a 5770 && (log = (unsigned) exact_log2 (INTVAL (XEXP (x, 1)))) < 4)
3b3c6a3f
MM
5771 {
5772 changed = 1;
a269a03c
JC
5773 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
5774 GEN_INT (1 << log));
3b3c6a3f
MM
5775 }
5776
5777 if (GET_CODE (x) == PLUS)
5778 {
0f290768 5779 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
e9a25f70 5780
3b3c6a3f
MM
5781 if (GET_CODE (XEXP (x, 0)) == ASHIFT
5782 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
b531087a 5783 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 0), 1)))) < 4)
3b3c6a3f
MM
5784 {
5785 changed = 1;
c5c76735
JL
5786 XEXP (x, 0) = gen_rtx_MULT (Pmode,
5787 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
5788 GEN_INT (1 << log));
3b3c6a3f
MM
5789 }
5790
5791 if (GET_CODE (XEXP (x, 1)) == ASHIFT
5792 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
b531087a 5793 && (log = (unsigned) exact_log2 (INTVAL (XEXP (XEXP (x, 1), 1)))) < 4)
3b3c6a3f
MM
5794 {
5795 changed = 1;
c5c76735
JL
5796 XEXP (x, 1) = gen_rtx_MULT (Pmode,
5797 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
5798 GEN_INT (1 << log));
3b3c6a3f
MM
5799 }
5800
0f290768 5801 /* Put multiply first if it isn't already. */
3b3c6a3f
MM
5802 if (GET_CODE (XEXP (x, 1)) == MULT)
5803 {
5804 rtx tmp = XEXP (x, 0);
5805 XEXP (x, 0) = XEXP (x, 1);
5806 XEXP (x, 1) = tmp;
5807 changed = 1;
5808 }
5809
5810 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
5811 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
5812 created by virtual register instantiation, register elimination, and
5813 similar optimizations. */
5814 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
5815 {
5816 changed = 1;
c5c76735
JL
5817 x = gen_rtx_PLUS (Pmode,
5818 gen_rtx_PLUS (Pmode, XEXP (x, 0),
5819 XEXP (XEXP (x, 1), 0)),
5820 XEXP (XEXP (x, 1), 1));
3b3c6a3f
MM
5821 }
5822
e9a25f70
JL
5823 /* Canonicalize
5824 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
3b3c6a3f
MM
5825 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
5826 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
5827 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
5828 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
5829 && CONSTANT_P (XEXP (x, 1)))
5830 {
00c79232
ML
5831 rtx constant;
5832 rtx other = NULL_RTX;
3b3c6a3f
MM
5833
5834 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5835 {
5836 constant = XEXP (x, 1);
5837 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
5838 }
5839 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
5840 {
5841 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
5842 other = XEXP (x, 1);
5843 }
5844 else
5845 constant = 0;
5846
5847 if (constant)
5848 {
5849 changed = 1;
c5c76735
JL
5850 x = gen_rtx_PLUS (Pmode,
5851 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
5852 XEXP (XEXP (XEXP (x, 0), 1), 0)),
5853 plus_constant (other, INTVAL (constant)));
3b3c6a3f
MM
5854 }
5855 }
5856
5857 if (changed && legitimate_address_p (mode, x, FALSE))
5858 return x;
5859
5860 if (GET_CODE (XEXP (x, 0)) == MULT)
5861 {
5862 changed = 1;
5863 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
5864 }
5865
5866 if (GET_CODE (XEXP (x, 1)) == MULT)
5867 {
5868 changed = 1;
5869 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
5870 }
5871
5872 if (changed
5873 && GET_CODE (XEXP (x, 1)) == REG
5874 && GET_CODE (XEXP (x, 0)) == REG)
5875 return x;
5876
5877 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
5878 {
5879 changed = 1;
5880 x = legitimize_pic_address (x, 0);
5881 }
5882
5883 if (changed && legitimate_address_p (mode, x, FALSE))
5884 return x;
5885
5886 if (GET_CODE (XEXP (x, 0)) == REG)
5887 {
5888 register rtx temp = gen_reg_rtx (Pmode);
5889 register rtx val = force_operand (XEXP (x, 1), temp);
5890 if (val != temp)
5891 emit_move_insn (temp, val);
5892
5893 XEXP (x, 1) = temp;
5894 return x;
5895 }
5896
5897 else if (GET_CODE (XEXP (x, 1)) == REG)
5898 {
5899 register rtx temp = gen_reg_rtx (Pmode);
5900 register rtx val = force_operand (XEXP (x, 0), temp);
5901 if (val != temp)
5902 emit_move_insn (temp, val);
5903
5904 XEXP (x, 0) = temp;
5905 return x;
5906 }
5907 }
5908
5909 return x;
5910}
2a2ab3f9
JVA
5911\f
5912/* Print an integer constant expression in assembler syntax. Addition
5913 and subtraction are the only arithmetic that may appear in these
5914 expressions. FILE is the stdio stream to write to, X is the rtx, and
5915 CODE is the operand print code from the output string. */
5916
5917static void
5918output_pic_addr_const (file, x, code)
5919 FILE *file;
5920 rtx x;
5921 int code;
5922{
5923 char buf[256];
5924
5925 switch (GET_CODE (x))
5926 {
5927 case PC:
5928 if (flag_pic)
5929 putc ('.', file);
5930 else
5931 abort ();
5932 break;
5933
5934 case SYMBOL_REF:
91bb873f 5935 assemble_name (file, XSTR (x, 0));
b069de3b 5936 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_FLAG (x))
91bb873f 5937 fputs ("@PLT", file);
2a2ab3f9
JVA
5938 break;
5939
91bb873f
RH
5940 case LABEL_REF:
5941 x = XEXP (x, 0);
5942 /* FALLTHRU */
2a2ab3f9
JVA
5943 case CODE_LABEL:
5944 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
5945 assemble_name (asm_out_file, buf);
5946 break;
5947
5948 case CONST_INT:
f64cecad 5949 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
2a2ab3f9
JVA
5950 break;
5951
5952 case CONST:
5953 /* This used to output parentheses around the expression,
5954 but that does not work on the 386 (either ATT or BSD assembler). */
5955 output_pic_addr_const (file, XEXP (x, 0), code);
5956 break;
5957
5958 case CONST_DOUBLE:
5959 if (GET_MODE (x) == VOIDmode)
5960 {
5961 /* We can use %d if the number is <32 bits and positive. */
5962 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
f64cecad
JC
5963 fprintf (file, "0x%lx%08lx",
5964 (unsigned long) CONST_DOUBLE_HIGH (x),
5965 (unsigned long) CONST_DOUBLE_LOW (x));
2a2ab3f9 5966 else
f64cecad 5967 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
2a2ab3f9
JVA
5968 }
5969 else
5970 /* We can't handle floating point constants;
5971 PRINT_OPERAND must handle them. */
5972 output_operand_lossage ("floating constant misused");
5973 break;
5974
5975 case PLUS:
e9a25f70 5976 /* Some assemblers need integer constants to appear first. */
2a2ab3f9
JVA
5977 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
5978 {
2a2ab3f9 5979 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 5980 putc ('+', file);
e9a25f70 5981 output_pic_addr_const (file, XEXP (x, 1), code);
2a2ab3f9 5982 }
91bb873f 5983 else if (GET_CODE (XEXP (x, 1)) == CONST_INT)
2a2ab3f9 5984 {
2a2ab3f9 5985 output_pic_addr_const (file, XEXP (x, 1), code);
e075ae69 5986 putc ('+', file);
e9a25f70 5987 output_pic_addr_const (file, XEXP (x, 0), code);
2a2ab3f9 5988 }
91bb873f
RH
5989 else
5990 abort ();
2a2ab3f9
JVA
5991 break;
5992
5993 case MINUS:
b069de3b
SS
5994 if (!TARGET_MACHO)
5995 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
2a2ab3f9 5996 output_pic_addr_const (file, XEXP (x, 0), code);
e075ae69 5997 putc ('-', file);
2a2ab3f9 5998 output_pic_addr_const (file, XEXP (x, 1), code);
b069de3b
SS
5999 if (!TARGET_MACHO)
6000 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
2a2ab3f9
JVA
6001 break;
6002
91bb873f
RH
6003 case UNSPEC:
6004 if (XVECLEN (x, 0) != 1)
5bf0ebab 6005 abort ();
91bb873f
RH
6006 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
6007 switch (XINT (x, 1))
77ebd435 6008 {
8ee41eaf 6009 case UNSPEC_GOT:
77ebd435
AJ
6010 fputs ("@GOT", file);
6011 break;
8ee41eaf 6012 case UNSPEC_GOTOFF:
77ebd435
AJ
6013 fputs ("@GOTOFF", file);
6014 break;
8ee41eaf 6015 case UNSPEC_GOTPCREL:
edfe8595 6016 fputs ("@GOTPCREL(%rip)", file);
6eb791fc 6017 break;
f996902d 6018 case UNSPEC_GOTTPOFF:
dea73790 6019 /* FIXME: This might be @TPOFF in Sun ld too. */
f996902d
RH
6020 fputs ("@GOTTPOFF", file);
6021 break;
6022 case UNSPEC_TPOFF:
6023 fputs ("@TPOFF", file);
6024 break;
6025 case UNSPEC_NTPOFF:
6026 fputs ("@NTPOFF", file);
6027 break;
6028 case UNSPEC_DTPOFF:
6029 fputs ("@DTPOFF", file);
6030 break;
dea73790
JJ
6031 case UNSPEC_GOTNTPOFF:
6032 fputs ("@GOTNTPOFF", file);
6033 break;
6034 case UNSPEC_INDNTPOFF:
6035 fputs ("@INDNTPOFF", file);
6036 break;
77ebd435
AJ
6037 default:
6038 output_operand_lossage ("invalid UNSPEC as operand");
6039 break;
6040 }
91bb873f
RH
6041 break;
6042
2a2ab3f9
JVA
6043 default:
6044 output_operand_lossage ("invalid expression as operand");
6045 }
6046}
1865dbb5 6047
0f290768 6048/* This is called from dwarfout.c via ASM_OUTPUT_DWARF_ADDR_CONST.
1865dbb5
JM
6049 We need to handle our special PIC relocations. */
6050
0f290768 6051void
1865dbb5
JM
6052i386_dwarf_output_addr_const (file, x)
6053 FILE *file;
6054 rtx x;
6055{
14f73b5a 6056#ifdef ASM_QUAD
18b5b8d6 6057 fprintf (file, "%s", TARGET_64BIT ? ASM_QUAD : ASM_LONG);
14f73b5a
JH
6058#else
6059 if (TARGET_64BIT)
6060 abort ();
18b5b8d6 6061 fprintf (file, "%s", ASM_LONG);
14f73b5a 6062#endif
1865dbb5
JM
6063 if (flag_pic)
6064 output_pic_addr_const (file, x, '\0');
6065 else
6066 output_addr_const (file, x);
6067 fputc ('\n', file);
6068}
6069
b9203463
RH
6070/* This is called from dwarf2out.c via ASM_OUTPUT_DWARF_DTPREL.
6071 We need to emit DTP-relative relocations. */
6072
6073void
6074i386_output_dwarf_dtprel (file, size, x)
6075 FILE *file;
6076 int size;
6077 rtx x;
6078{
6079 switch (size)
6080 {
6081 case 4:
6082 fputs (ASM_LONG, file);
6083 break;
6084 case 8:
6085#ifdef ASM_QUAD
6086 fputs (ASM_QUAD, file);
6087 break;
6088#endif
6089 default:
6090 abort ();
6091 }
6092
6093 output_addr_const (file, x);
6094 fputs ("@DTPOFF", file);
6095}
6096
1865dbb5
JM
6097/* In the name of slightly smaller debug output, and to cater to
6098 general assembler losage, recognize PIC+GOTOFF and turn it back
6099 into a direct symbol reference. */
6100
6101rtx
6102i386_simplify_dwarf_addr (orig_x)
6103 rtx orig_x;
6104{
ec65b2e3 6105 rtx x = orig_x, y;
1865dbb5 6106
4c8c0dec
JJ
6107 if (GET_CODE (x) == MEM)
6108 x = XEXP (x, 0);
6109
6eb791fc
JH
6110 if (TARGET_64BIT)
6111 {
6112 if (GET_CODE (x) != CONST
6113 || GET_CODE (XEXP (x, 0)) != UNSPEC
8ee41eaf 6114 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
4c8c0dec 6115 || GET_CODE (orig_x) != MEM)
6eb791fc
JH
6116 return orig_x;
6117 return XVECEXP (XEXP (x, 0), 0, 0);
6118 }
6119
1865dbb5 6120 if (GET_CODE (x) != PLUS
1865dbb5
JM
6121 || GET_CODE (XEXP (x, 1)) != CONST)
6122 return orig_x;
6123
ec65b2e3
JJ
6124 if (GET_CODE (XEXP (x, 0)) == REG
6125 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
6126 /* %ebx + GOT/GOTOFF */
6127 y = NULL;
6128 else if (GET_CODE (XEXP (x, 0)) == PLUS)
6129 {
6130 /* %ebx + %reg * scale + GOT/GOTOFF */
6131 y = XEXP (x, 0);
6132 if (GET_CODE (XEXP (y, 0)) == REG
6133 && REGNO (XEXP (y, 0)) == PIC_OFFSET_TABLE_REGNUM)
6134 y = XEXP (y, 1);
6135 else if (GET_CODE (XEXP (y, 1)) == REG
6136 && REGNO (XEXP (y, 1)) == PIC_OFFSET_TABLE_REGNUM)
6137 y = XEXP (y, 0);
6138 else
6139 return orig_x;
6140 if (GET_CODE (y) != REG
6141 && GET_CODE (y) != MULT
6142 && GET_CODE (y) != ASHIFT)
6143 return orig_x;
6144 }
6145 else
6146 return orig_x;
6147
1865dbb5
JM
6148 x = XEXP (XEXP (x, 1), 0);
6149 if (GET_CODE (x) == UNSPEC
8ee41eaf
RH
6150 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6151 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
ec65b2e3
JJ
6152 {
6153 if (y)
6154 return gen_rtx_PLUS (Pmode, y, XVECEXP (x, 0, 0));
6155 return XVECEXP (x, 0, 0);
6156 }
1865dbb5
JM
6157
6158 if (GET_CODE (x) == PLUS
6159 && GET_CODE (XEXP (x, 0)) == UNSPEC
6160 && GET_CODE (XEXP (x, 1)) == CONST_INT
8ee41eaf
RH
6161 && ((XINT (XEXP (x, 0), 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
6162 || (XINT (XEXP (x, 0), 1) == UNSPEC_GOTOFF
6163 && GET_CODE (orig_x) != MEM)))
ec65b2e3
JJ
6164 {
6165 x = gen_rtx_PLUS (VOIDmode, XVECEXP (XEXP (x, 0), 0, 0), XEXP (x, 1));
6166 if (y)
6167 return gen_rtx_PLUS (Pmode, y, x);
6168 return x;
6169 }
1865dbb5
JM
6170
6171 return orig_x;
6172}
2a2ab3f9 6173\f
a269a03c 6174static void
e075ae69 6175put_condition_code (code, mode, reverse, fp, file)
a269a03c 6176 enum rtx_code code;
e075ae69
RH
6177 enum machine_mode mode;
6178 int reverse, fp;
a269a03c
JC
6179 FILE *file;
6180{
a269a03c
JC
6181 const char *suffix;
6182
9a915772
JH
6183 if (mode == CCFPmode || mode == CCFPUmode)
6184 {
6185 enum rtx_code second_code, bypass_code;
6186 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
6187 if (bypass_code != NIL || second_code != NIL)
b531087a 6188 abort ();
9a915772
JH
6189 code = ix86_fp_compare_code_to_integer (code);
6190 mode = CCmode;
6191 }
a269a03c
JC
6192 if (reverse)
6193 code = reverse_condition (code);
e075ae69 6194
a269a03c
JC
6195 switch (code)
6196 {
6197 case EQ:
6198 suffix = "e";
6199 break;
a269a03c
JC
6200 case NE:
6201 suffix = "ne";
6202 break;
a269a03c 6203 case GT:
7e08e190 6204 if (mode != CCmode && mode != CCNOmode && mode != CCGCmode)
e075ae69
RH
6205 abort ();
6206 suffix = "g";
a269a03c 6207 break;
a269a03c 6208 case GTU:
e075ae69
RH
6209 /* ??? Use "nbe" instead of "a" for fcmov losage on some assemblers.
6210 Those same assemblers have the same but opposite losage on cmov. */
7e08e190 6211 if (mode != CCmode)
0f290768 6212 abort ();
e075ae69 6213 suffix = fp ? "nbe" : "a";
a269a03c 6214 break;
a269a03c 6215 case LT:
9076b9c1 6216 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 6217 suffix = "s";
7e08e190 6218 else if (mode == CCmode || mode == CCGCmode)
e075ae69 6219 suffix = "l";
9076b9c1 6220 else
0f290768 6221 abort ();
a269a03c 6222 break;
a269a03c 6223 case LTU:
9076b9c1 6224 if (mode != CCmode)
0f290768 6225 abort ();
a269a03c
JC
6226 suffix = "b";
6227 break;
a269a03c 6228 case GE:
9076b9c1 6229 if (mode == CCNOmode || mode == CCGOCmode)
a269a03c 6230 suffix = "ns";
7e08e190 6231 else if (mode == CCmode || mode == CCGCmode)
e075ae69 6232 suffix = "ge";
9076b9c1 6233 else
0f290768 6234 abort ();
a269a03c 6235 break;
a269a03c 6236 case GEU:
e075ae69 6237 /* ??? As above. */
7e08e190 6238 if (mode != CCmode)
0f290768 6239 abort ();
7e08e190 6240 suffix = fp ? "nb" : "ae";
a269a03c 6241 break;
a269a03c 6242 case LE:
7e08e190 6243 if (mode != CCmode && mode != CCGCmode && mode != CCNOmode)
e075ae69
RH
6244 abort ();
6245 suffix = "le";
a269a03c 6246 break;
a269a03c 6247 case LEU:
9076b9c1
JH
6248 if (mode != CCmode)
6249 abort ();
7e08e190 6250 suffix = "be";
a269a03c 6251 break;
3a3677ff 6252 case UNORDERED:
9e7adcb3 6253 suffix = fp ? "u" : "p";
3a3677ff
RH
6254 break;
6255 case ORDERED:
9e7adcb3 6256 suffix = fp ? "nu" : "np";
3a3677ff 6257 break;
a269a03c
JC
6258 default:
6259 abort ();
6260 }
6261 fputs (suffix, file);
6262}
6263
e075ae69
RH
6264void
6265print_reg (x, code, file)
6266 rtx x;
6267 int code;
6268 FILE *file;
e5cb57e8 6269{
e075ae69 6270 if (REGNO (x) == ARG_POINTER_REGNUM
564d80f4 6271 || REGNO (x) == FRAME_POINTER_REGNUM
e075ae69
RH
6272 || REGNO (x) == FLAGS_REG
6273 || REGNO (x) == FPSR_REG)
6274 abort ();
e9a25f70 6275
5bf0ebab 6276 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
e075ae69
RH
6277 putc ('%', file);
6278
ef6257cd 6279 if (code == 'w' || MMX_REG_P (x))
e075ae69
RH
6280 code = 2;
6281 else if (code == 'b')
6282 code = 1;
6283 else if (code == 'k')
6284 code = 4;
3f3f2124
JH
6285 else if (code == 'q')
6286 code = 8;
e075ae69
RH
6287 else if (code == 'y')
6288 code = 3;
6289 else if (code == 'h')
6290 code = 0;
6291 else
6292 code = GET_MODE_SIZE (GET_MODE (x));
e9a25f70 6293
3f3f2124
JH
6294 /* Irritatingly, AMD extended registers use different naming convention
6295 from the normal registers. */
6296 if (REX_INT_REG_P (x))
6297 {
885a70fd
JH
6298 if (!TARGET_64BIT)
6299 abort ();
3f3f2124
JH
6300 switch (code)
6301 {
ef6257cd 6302 case 0:
c725bd79 6303 error ("extended registers have no high halves");
3f3f2124
JH
6304 break;
6305 case 1:
6306 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
6307 break;
6308 case 2:
6309 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
6310 break;
6311 case 4:
6312 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
6313 break;
6314 case 8:
6315 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
6316 break;
6317 default:
c725bd79 6318 error ("unsupported operand size for extended register");
3f3f2124
JH
6319 break;
6320 }
6321 return;
6322 }
e075ae69
RH
6323 switch (code)
6324 {
6325 case 3:
6326 if (STACK_TOP_P (x))
6327 {
6328 fputs ("st(0)", file);
6329 break;
6330 }
6331 /* FALLTHRU */
e075ae69 6332 case 8:
3f3f2124 6333 case 4:
e075ae69 6334 case 12:
446988df 6335 if (! ANY_FP_REG_P (x))
885a70fd 6336 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
e075ae69 6337 /* FALLTHRU */
a7180f70 6338 case 16:
e075ae69
RH
6339 case 2:
6340 fputs (hi_reg_name[REGNO (x)], file);
6341 break;
6342 case 1:
6343 fputs (qi_reg_name[REGNO (x)], file);
6344 break;
6345 case 0:
6346 fputs (qi_high_reg_name[REGNO (x)], file);
6347 break;
6348 default:
6349 abort ();
fe25fea3 6350 }
e5cb57e8
SC
6351}
6352
f996902d
RH
6353/* Locate some local-dynamic symbol still in use by this function
6354 so that we can print its name in some tls_local_dynamic_base
6355 pattern. */
6356
6357static const char *
6358get_some_local_dynamic_name ()
6359{
6360 rtx insn;
6361
6362 if (cfun->machine->some_ld_name)
6363 return cfun->machine->some_ld_name;
6364
6365 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
6366 if (INSN_P (insn)
6367 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
6368 return cfun->machine->some_ld_name;
6369
6370 abort ();
6371}
6372
6373static int
6374get_some_local_dynamic_name_1 (px, data)
6375 rtx *px;
6376 void *data ATTRIBUTE_UNUSED;
6377{
6378 rtx x = *px;
6379
6380 if (GET_CODE (x) == SYMBOL_REF
6381 && local_dynamic_symbolic_operand (x, Pmode))
6382 {
6383 cfun->machine->some_ld_name = XSTR (x, 0);
6384 return 1;
6385 }
6386
6387 return 0;
6388}
6389
2a2ab3f9 6390/* Meaning of CODE:
fe25fea3 6391 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
e5cb57e8 6392 C -- print opcode suffix for set/cmov insn.
fe25fea3 6393 c -- like C, but print reversed condition
ef6257cd 6394 F,f -- likewise, but for floating-point.
048b1c95
JJ
6395 O -- if CMOV_SUN_AS_SYNTAX, expand to "w.", "l." or "q.", otherwise
6396 nothing
2a2ab3f9
JVA
6397 R -- print the prefix for register names.
6398 z -- print the opcode suffix for the size of the current operand.
6399 * -- print a star (in certain assembler syntax)
fb204271 6400 A -- print an absolute memory reference.
2a2ab3f9 6401 w -- print the operand as if it's a "word" (HImode) even if it isn't.
2d49677f
SC
6402 s -- print a shift double count, followed by the assemblers argument
6403 delimiter.
fe25fea3
SC
6404 b -- print the QImode name of the register for the indicated operand.
6405 %b0 would print %al if operands[0] is reg 0.
6406 w -- likewise, print the HImode name of the register.
6407 k -- likewise, print the SImode name of the register.
3f3f2124 6408 q -- likewise, print the DImode name of the register.
ef6257cd
JH
6409 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
6410 y -- print "st(0)" instead of "st" as a register.
a46d1d38 6411 D -- print condition for SSE cmp instruction.
ef6257cd
JH
6412 P -- if PIC, print an @PLT suffix.
6413 X -- don't print any sort of PIC '@' suffix for a symbol.
f996902d 6414 & -- print some in-use local-dynamic symbol name.
a46d1d38 6415 */
2a2ab3f9
JVA
6416
6417void
6418print_operand (file, x, code)
6419 FILE *file;
6420 rtx x;
6421 int code;
6422{
6423 if (code)
6424 {
6425 switch (code)
6426 {
6427 case '*':
80f33d06 6428 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9
JVA
6429 putc ('*', file);
6430 return;
6431
f996902d
RH
6432 case '&':
6433 assemble_name (file, get_some_local_dynamic_name ());
6434 return;
6435
fb204271 6436 case 'A':
80f33d06 6437 if (ASSEMBLER_DIALECT == ASM_ATT)
fb204271 6438 putc ('*', file);
80f33d06 6439 else if (ASSEMBLER_DIALECT == ASM_INTEL)
fb204271
DN
6440 {
6441 /* Intel syntax. For absolute addresses, registers should not
6442 be surrounded by braces. */
6443 if (GET_CODE (x) != REG)
6444 {
6445 putc ('[', file);
6446 PRINT_OPERAND (file, x, 0);
6447 putc (']', file);
6448 return;
6449 }
6450 }
80f33d06
GS
6451 else
6452 abort ();
fb204271
DN
6453
6454 PRINT_OPERAND (file, x, 0);
6455 return;
6456
6457
2a2ab3f9 6458 case 'L':
80f33d06 6459 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6460 putc ('l', file);
2a2ab3f9
JVA
6461 return;
6462
6463 case 'W':
80f33d06 6464 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6465 putc ('w', file);
2a2ab3f9
JVA
6466 return;
6467
6468 case 'B':
80f33d06 6469 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6470 putc ('b', file);
2a2ab3f9
JVA
6471 return;
6472
6473 case 'Q':
80f33d06 6474 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6475 putc ('l', file);
2a2ab3f9
JVA
6476 return;
6477
6478 case 'S':
80f33d06 6479 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6480 putc ('s', file);
2a2ab3f9
JVA
6481 return;
6482
5f1ec3e6 6483 case 'T':
80f33d06 6484 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6485 putc ('t', file);
5f1ec3e6
JVA
6486 return;
6487
2a2ab3f9
JVA
6488 case 'z':
6489 /* 387 opcodes don't get size suffixes if the operands are
0f290768 6490 registers. */
2a2ab3f9
JVA
6491 if (STACK_REG_P (x))
6492 return;
6493
831c4e87
KC
6494 /* Likewise if using Intel opcodes. */
6495 if (ASSEMBLER_DIALECT == ASM_INTEL)
6496 return;
6497
6498 /* This is the size of op from size of operand. */
2a2ab3f9
JVA
6499 switch (GET_MODE_SIZE (GET_MODE (x)))
6500 {
2a2ab3f9 6501 case 2:
155d8a47
JW
6502#ifdef HAVE_GAS_FILDS_FISTS
6503 putc ('s', file);
6504#endif
2a2ab3f9
JVA
6505 return;
6506
6507 case 4:
6508 if (GET_MODE (x) == SFmode)
6509 {
e075ae69 6510 putc ('s', file);
2a2ab3f9
JVA
6511 return;
6512 }
6513 else
e075ae69 6514 putc ('l', file);
2a2ab3f9
JVA
6515 return;
6516
5f1ec3e6 6517 case 12:
2b589241 6518 case 16:
e075ae69
RH
6519 putc ('t', file);
6520 return;
5f1ec3e6 6521
2a2ab3f9
JVA
6522 case 8:
6523 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
56c0e8fa
JVA
6524 {
6525#ifdef GAS_MNEMONICS
e075ae69 6526 putc ('q', file);
56c0e8fa 6527#else
e075ae69
RH
6528 putc ('l', file);
6529 putc ('l', file);
56c0e8fa
JVA
6530#endif
6531 }
e075ae69
RH
6532 else
6533 putc ('l', file);
2a2ab3f9 6534 return;
155d8a47
JW
6535
6536 default:
6537 abort ();
2a2ab3f9 6538 }
4af3895e
JVA
6539
6540 case 'b':
6541 case 'w':
6542 case 'k':
3f3f2124 6543 case 'q':
4af3895e
JVA
6544 case 'h':
6545 case 'y':
5cb6195d 6546 case 'X':
e075ae69 6547 case 'P':
4af3895e
JVA
6548 break;
6549
2d49677f
SC
6550 case 's':
6551 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
6552 {
6553 PRINT_OPERAND (file, x, 0);
e075ae69 6554 putc (',', file);
2d49677f 6555 }
a269a03c
JC
6556 return;
6557
a46d1d38
JH
6558 case 'D':
6559 /* Little bit of braindamage here. The SSE compare instructions
6560 does use completely different names for the comparisons that the
6561 fp conditional moves. */
6562 switch (GET_CODE (x))
6563 {
6564 case EQ:
6565 case UNEQ:
6566 fputs ("eq", file);
6567 break;
6568 case LT:
6569 case UNLT:
6570 fputs ("lt", file);
6571 break;
6572 case LE:
6573 case UNLE:
6574 fputs ("le", file);
6575 break;
6576 case UNORDERED:
6577 fputs ("unord", file);
6578 break;
6579 case NE:
6580 case LTGT:
6581 fputs ("neq", file);
6582 break;
6583 case UNGE:
6584 case GE:
6585 fputs ("nlt", file);
6586 break;
6587 case UNGT:
6588 case GT:
6589 fputs ("nle", file);
6590 break;
6591 case ORDERED:
6592 fputs ("ord", file);
6593 break;
6594 default:
6595 abort ();
6596 break;
6597 }
6598 return;
048b1c95
JJ
6599 case 'O':
6600#ifdef CMOV_SUN_AS_SYNTAX
6601 if (ASSEMBLER_DIALECT == ASM_ATT)
6602 {
6603 switch (GET_MODE (x))
6604 {
6605 case HImode: putc ('w', file); break;
6606 case SImode:
6607 case SFmode: putc ('l', file); break;
6608 case DImode:
6609 case DFmode: putc ('q', file); break;
6610 default: abort ();
6611 }
6612 putc ('.', file);
6613 }
6614#endif
6615 return;
1853aadd 6616 case 'C':
e075ae69 6617 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
1853aadd 6618 return;
fe25fea3 6619 case 'F':
048b1c95
JJ
6620#ifdef CMOV_SUN_AS_SYNTAX
6621 if (ASSEMBLER_DIALECT == ASM_ATT)
6622 putc ('.', file);
6623#endif
e075ae69 6624 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
fe25fea3
SC
6625 return;
6626
e9a25f70 6627 /* Like above, but reverse condition */
e075ae69 6628 case 'c':
fce5a9f2 6629 /* Check to see if argument to %c is really a constant
c1d5afc4
CR
6630 and not a condition code which needs to be reversed. */
6631 if (GET_RTX_CLASS (GET_CODE (x)) != '<')
6632 {
6633 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
6634 return;
6635 }
e075ae69
RH
6636 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
6637 return;
fe25fea3 6638 case 'f':
048b1c95
JJ
6639#ifdef CMOV_SUN_AS_SYNTAX
6640 if (ASSEMBLER_DIALECT == ASM_ATT)
6641 putc ('.', file);
6642#endif
e075ae69 6643 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
1853aadd 6644 return;
ef6257cd
JH
6645 case '+':
6646 {
6647 rtx x;
e5cb57e8 6648
ef6257cd
JH
6649 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
6650 return;
a4f31c00 6651
ef6257cd
JH
6652 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
6653 if (x)
6654 {
6655 int pred_val = INTVAL (XEXP (x, 0));
6656
6657 if (pred_val < REG_BR_PROB_BASE * 45 / 100
6658 || pred_val > REG_BR_PROB_BASE * 55 / 100)
6659 {
6660 int taken = pred_val > REG_BR_PROB_BASE / 2;
6661 int cputaken = final_forward_branch_p (current_output_insn) == 0;
6662
6663 /* Emit hints only in the case default branch prediction
6664 heruistics would fail. */
6665 if (taken != cputaken)
6666 {
6667 /* We use 3e (DS) prefix for taken branches and
6668 2e (CS) prefix for not taken branches. */
6669 if (taken)
6670 fputs ("ds ; ", file);
6671 else
6672 fputs ("cs ; ", file);
6673 }
6674 }
6675 }
6676 return;
6677 }
4af3895e 6678 default:
a52453cc 6679 output_operand_lossage ("invalid operand code `%c'", code);
2a2ab3f9
JVA
6680 }
6681 }
e9a25f70 6682
2a2ab3f9
JVA
6683 if (GET_CODE (x) == REG)
6684 {
6685 PRINT_REG (x, code, file);
6686 }
e9a25f70 6687
2a2ab3f9
JVA
6688 else if (GET_CODE (x) == MEM)
6689 {
e075ae69 6690 /* No `byte ptr' prefix for call instructions. */
80f33d06 6691 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
2a2ab3f9 6692 {
69ddee61 6693 const char * size;
e075ae69
RH
6694 switch (GET_MODE_SIZE (GET_MODE (x)))
6695 {
6696 case 1: size = "BYTE"; break;
6697 case 2: size = "WORD"; break;
6698 case 4: size = "DWORD"; break;
6699 case 8: size = "QWORD"; break;
6700 case 12: size = "XWORD"; break;
a7180f70 6701 case 16: size = "XMMWORD"; break;
e075ae69 6702 default:
564d80f4 6703 abort ();
e075ae69 6704 }
fb204271
DN
6705
6706 /* Check for explicit size override (codes 'b', 'w' and 'k') */
6707 if (code == 'b')
6708 size = "BYTE";
6709 else if (code == 'w')
6710 size = "WORD";
6711 else if (code == 'k')
6712 size = "DWORD";
6713
e075ae69
RH
6714 fputs (size, file);
6715 fputs (" PTR ", file);
2a2ab3f9 6716 }
e075ae69
RH
6717
6718 x = XEXP (x, 0);
6719 if (flag_pic && CONSTANT_ADDRESS_P (x))
6720 output_pic_addr_const (file, x, code);
0d7d98ee 6721 /* Avoid (%rip) for call operands. */
5bf0ebab 6722 else if (CONSTANT_ADDRESS_P (x) && code == 'P'
0d7d98ee
JH
6723 && GET_CODE (x) != CONST_INT)
6724 output_addr_const (file, x);
c8b94768
RH
6725 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
6726 output_operand_lossage ("invalid constraints for operand");
2a2ab3f9 6727 else
e075ae69 6728 output_address (x);
2a2ab3f9 6729 }
e9a25f70 6730
2a2ab3f9
JVA
6731 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
6732 {
e9a25f70
JL
6733 REAL_VALUE_TYPE r;
6734 long l;
6735
5f1ec3e6
JVA
6736 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
6737 REAL_VALUE_TO_TARGET_SINGLE (r, l);
e075ae69 6738
80f33d06 6739 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69 6740 putc ('$', file);
52267fcb 6741 fprintf (file, "0x%lx", l);
5f1ec3e6 6742 }
e9a25f70 6743
0f290768 6744 /* These float cases don't actually occur as immediate operands. */
5f1ec3e6
JVA
6745 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
6746 {
e9a25f70
JL
6747 REAL_VALUE_TYPE r;
6748 char dstr[30];
6749
5f1ec3e6 6750 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4b67a274 6751 REAL_VALUE_TO_DECIMAL (r, dstr, -1);
5f1ec3e6 6752 fprintf (file, "%s", dstr);
2a2ab3f9 6753 }
e9a25f70 6754
2b589241
JH
6755 else if (GET_CODE (x) == CONST_DOUBLE
6756 && (GET_MODE (x) == XFmode || GET_MODE (x) == TFmode))
2a2ab3f9 6757 {
e9a25f70
JL
6758 REAL_VALUE_TYPE r;
6759 char dstr[30];
6760
5f1ec3e6 6761 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4b67a274 6762 REAL_VALUE_TO_DECIMAL (r, dstr, -1);
5f1ec3e6 6763 fprintf (file, "%s", dstr);
2a2ab3f9 6764 }
f996902d 6765
79325812 6766 else
2a2ab3f9 6767 {
4af3895e 6768 if (code != 'P')
2a2ab3f9 6769 {
695dac07 6770 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
e075ae69 6771 {
80f33d06 6772 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
6773 putc ('$', file);
6774 }
2a2ab3f9
JVA
6775 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
6776 || GET_CODE (x) == LABEL_REF)
e075ae69 6777 {
80f33d06 6778 if (ASSEMBLER_DIALECT == ASM_ATT)
e075ae69
RH
6779 putc ('$', file);
6780 else
6781 fputs ("OFFSET FLAT:", file);
6782 }
2a2ab3f9 6783 }
e075ae69
RH
6784 if (GET_CODE (x) == CONST_INT)
6785 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
6786 else if (flag_pic)
2a2ab3f9
JVA
6787 output_pic_addr_const (file, x, code);
6788 else
6789 output_addr_const (file, x);
6790 }
6791}
6792\f
6793/* Print a memory operand whose address is ADDR. */
6794
6795void
6796print_operand_address (file, addr)
6797 FILE *file;
6798 register rtx addr;
6799{
e075ae69
RH
6800 struct ix86_address parts;
6801 rtx base, index, disp;
6802 int scale;
e9a25f70 6803
9e20be0c
JJ
6804 if (GET_CODE (addr) == UNSPEC && XINT (addr, 1) == UNSPEC_TP)
6805 {
6806 if (ASSEMBLER_DIALECT == ASM_INTEL)
6807 fputs ("DWORD PTR ", file);
6808 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
6809 putc ('%', file);
6810 fputs ("gs:0", file);
6811 return;
6812 }
6813
e075ae69
RH
6814 if (! ix86_decompose_address (addr, &parts))
6815 abort ();
e9a25f70 6816
e075ae69
RH
6817 base = parts.base;
6818 index = parts.index;
6819 disp = parts.disp;
6820 scale = parts.scale;
e9a25f70 6821
e075ae69
RH
6822 if (!base && !index)
6823 {
6824 /* Displacement only requires special attention. */
e9a25f70 6825
e075ae69 6826 if (GET_CODE (disp) == CONST_INT)
2a2ab3f9 6827 {
80f33d06 6828 if (ASSEMBLER_DIALECT == ASM_INTEL)
fb204271
DN
6829 {
6830 if (USER_LABEL_PREFIX[0] == 0)
6831 putc ('%', file);
6832 fputs ("ds:", file);
6833 }
e075ae69 6834 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (addr));
2a2ab3f9 6835 }
e075ae69
RH
6836 else if (flag_pic)
6837 output_pic_addr_const (file, addr, 0);
6838 else
6839 output_addr_const (file, addr);
0d7d98ee
JH
6840
6841 /* Use one byte shorter RIP relative addressing for 64bit mode. */
edfe8595
RH
6842 if (TARGET_64BIT
6843 && (GET_CODE (addr) == SYMBOL_REF
6844 || GET_CODE (addr) == LABEL_REF
6845 || (GET_CODE (addr) == CONST
6846 && GET_CODE (XEXP (addr, 0)) == PLUS
200bcf7e
JH
6847 && (GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
6848 || GET_CODE (XEXP (XEXP (addr, 0), 0)) == LABEL_REF)
edfe8595 6849 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)))
0d7d98ee 6850 fputs ("(%rip)", file);
e075ae69
RH
6851 }
6852 else
6853 {
80f33d06 6854 if (ASSEMBLER_DIALECT == ASM_ATT)
2a2ab3f9 6855 {
e075ae69 6856 if (disp)
2a2ab3f9 6857 {
c399861d 6858 if (flag_pic)
e075ae69
RH
6859 output_pic_addr_const (file, disp, 0);
6860 else if (GET_CODE (disp) == LABEL_REF)
6861 output_asm_label (disp);
2a2ab3f9 6862 else
e075ae69 6863 output_addr_const (file, disp);
2a2ab3f9
JVA
6864 }
6865
e075ae69
RH
6866 putc ('(', file);
6867 if (base)
6868 PRINT_REG (base, 0, file);
6869 if (index)
2a2ab3f9 6870 {
e075ae69
RH
6871 putc (',', file);
6872 PRINT_REG (index, 0, file);
6873 if (scale != 1)
6874 fprintf (file, ",%d", scale);
2a2ab3f9 6875 }
e075ae69 6876 putc (')', file);
2a2ab3f9 6877 }
2a2ab3f9
JVA
6878 else
6879 {
e075ae69 6880 rtx offset = NULL_RTX;
e9a25f70 6881
e075ae69
RH
6882 if (disp)
6883 {
6884 /* Pull out the offset of a symbol; print any symbol itself. */
6885 if (GET_CODE (disp) == CONST
6886 && GET_CODE (XEXP (disp, 0)) == PLUS
6887 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
6888 {
6889 offset = XEXP (XEXP (disp, 0), 1);
6890 disp = gen_rtx_CONST (VOIDmode,
6891 XEXP (XEXP (disp, 0), 0));
6892 }
ce193852 6893
e075ae69
RH
6894 if (flag_pic)
6895 output_pic_addr_const (file, disp, 0);
6896 else if (GET_CODE (disp) == LABEL_REF)
6897 output_asm_label (disp);
6898 else if (GET_CODE (disp) == CONST_INT)
6899 offset = disp;
6900 else
6901 output_addr_const (file, disp);
6902 }
e9a25f70 6903
e075ae69
RH
6904 putc ('[', file);
6905 if (base)
a8620236 6906 {
e075ae69
RH
6907 PRINT_REG (base, 0, file);
6908 if (offset)
6909 {
6910 if (INTVAL (offset) >= 0)
6911 putc ('+', file);
6912 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
6913 }
a8620236 6914 }
e075ae69
RH
6915 else if (offset)
6916 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
2247a58c 6917 else
e075ae69 6918 putc ('0', file);
e9a25f70 6919
e075ae69
RH
6920 if (index)
6921 {
6922 putc ('+', file);
6923 PRINT_REG (index, 0, file);
6924 if (scale != 1)
6925 fprintf (file, "*%d", scale);
6926 }
6927 putc (']', file);
6928 }
2a2ab3f9
JVA
6929 }
6930}
f996902d
RH
6931
6932bool
6933output_addr_const_extra (file, x)
6934 FILE *file;
6935 rtx x;
6936{
6937 rtx op;
6938
6939 if (GET_CODE (x) != UNSPEC)
6940 return false;
6941
6942 op = XVECEXP (x, 0, 0);
6943 switch (XINT (x, 1))
6944 {
6945 case UNSPEC_GOTTPOFF:
6946 output_addr_const (file, op);
dea73790 6947 /* FIXME: This might be @TPOFF in Sun ld. */
f996902d
RH
6948 fputs ("@GOTTPOFF", file);
6949 break;
6950 case UNSPEC_TPOFF:
6951 output_addr_const (file, op);
6952 fputs ("@TPOFF", file);
6953 break;
6954 case UNSPEC_NTPOFF:
6955 output_addr_const (file, op);
6956 fputs ("@NTPOFF", file);
6957 break;
6958 case UNSPEC_DTPOFF:
6959 output_addr_const (file, op);
6960 fputs ("@DTPOFF", file);
6961 break;
dea73790
JJ
6962 case UNSPEC_GOTNTPOFF:
6963 output_addr_const (file, op);
6964 fputs ("@GOTNTPOFF", file);
6965 break;
6966 case UNSPEC_INDNTPOFF:
6967 output_addr_const (file, op);
6968 fputs ("@INDNTPOFF", file);
6969 break;
f996902d
RH
6970
6971 default:
6972 return false;
6973 }
6974
6975 return true;
6976}
2a2ab3f9
JVA
6977\f
6978/* Split one or more DImode RTL references into pairs of SImode
6979 references. The RTL can be REG, offsettable MEM, integer constant, or
6980 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
6981 split and "num" is its length. lo_half and hi_half are output arrays
0f290768 6982 that parallel "operands". */
2a2ab3f9
JVA
6983
6984void
6985split_di (operands, num, lo_half, hi_half)
6986 rtx operands[];
6987 int num;
6988 rtx lo_half[], hi_half[];
6989{
6990 while (num--)
6991 {
57dbca5e 6992 rtx op = operands[num];
b932f770
JH
6993
6994 /* simplify_subreg refuse to split volatile memory addresses,
6995 but we still have to handle it. */
6996 if (GET_CODE (op) == MEM)
2a2ab3f9 6997 {
f4ef873c 6998 lo_half[num] = adjust_address (op, SImode, 0);
b72f00af 6999 hi_half[num] = adjust_address (op, SImode, 4);
2a2ab3f9
JVA
7000 }
7001 else
b932f770 7002 {
38ca929b
JH
7003 lo_half[num] = simplify_gen_subreg (SImode, op,
7004 GET_MODE (op) == VOIDmode
7005 ? DImode : GET_MODE (op), 0);
7006 hi_half[num] = simplify_gen_subreg (SImode, op,
7007 GET_MODE (op) == VOIDmode
7008 ? DImode : GET_MODE (op), 4);
b932f770 7009 }
2a2ab3f9
JVA
7010 }
7011}
44cf5b6a
JH
7012/* Split one or more TImode RTL references into pairs of SImode
7013 references. The RTL can be REG, offsettable MEM, integer constant, or
7014 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
7015 split and "num" is its length. lo_half and hi_half are output arrays
7016 that parallel "operands". */
7017
7018void
7019split_ti (operands, num, lo_half, hi_half)
7020 rtx operands[];
7021 int num;
7022 rtx lo_half[], hi_half[];
7023{
7024 while (num--)
7025 {
7026 rtx op = operands[num];
b932f770
JH
7027
7028 /* simplify_subreg refuse to split volatile memory addresses, but we
7029 still have to handle it. */
7030 if (GET_CODE (op) == MEM)
44cf5b6a
JH
7031 {
7032 lo_half[num] = adjust_address (op, DImode, 0);
7033 hi_half[num] = adjust_address (op, DImode, 8);
7034 }
7035 else
b932f770
JH
7036 {
7037 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
7038 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
7039 }
44cf5b6a
JH
7040 }
7041}
2a2ab3f9 7042\f
2a2ab3f9
JVA
7043/* Output code to perform a 387 binary operation in INSN, one of PLUS,
7044 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
7045 is the expression of the binary operation. The output may either be
7046 emitted here, or returned to the caller, like all output_* functions.
7047
7048 There is no guarantee that the operands are the same mode, as they
0f290768 7049 might be within FLOAT or FLOAT_EXTEND expressions. */
2a2ab3f9 7050
e3c2afab
AM
7051#ifndef SYSV386_COMPAT
7052/* Set to 1 for compatibility with brain-damaged assemblers. No-one
7053 wants to fix the assemblers because that causes incompatibility
7054 with gcc. No-one wants to fix gcc because that causes
7055 incompatibility with assemblers... You can use the option of
7056 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
7057#define SYSV386_COMPAT 1
7058#endif
7059
69ddee61 7060const char *
2a2ab3f9
JVA
7061output_387_binary_op (insn, operands)
7062 rtx insn;
7063 rtx *operands;
7064{
e3c2afab 7065 static char buf[30];
69ddee61 7066 const char *p;
1deaa899
JH
7067 const char *ssep;
7068 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]) | SSE_REG_P (operands[2]);
2a2ab3f9 7069
e3c2afab
AM
7070#ifdef ENABLE_CHECKING
7071 /* Even if we do not want to check the inputs, this documents input
7072 constraints. Which helps in understanding the following code. */
7073 if (STACK_REG_P (operands[0])
7074 && ((REG_P (operands[1])
7075 && REGNO (operands[0]) == REGNO (operands[1])
7076 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
7077 || (REG_P (operands[2])
7078 && REGNO (operands[0]) == REGNO (operands[2])
7079 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
7080 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
7081 ; /* ok */
1deaa899 7082 else if (!is_sse)
e3c2afab
AM
7083 abort ();
7084#endif
7085
2a2ab3f9
JVA
7086 switch (GET_CODE (operands[3]))
7087 {
7088 case PLUS:
e075ae69
RH
7089 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7090 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7091 p = "fiadd";
7092 else
7093 p = "fadd";
1deaa899 7094 ssep = "add";
2a2ab3f9
JVA
7095 break;
7096
7097 case MINUS:
e075ae69
RH
7098 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7099 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7100 p = "fisub";
7101 else
7102 p = "fsub";
1deaa899 7103 ssep = "sub";
2a2ab3f9
JVA
7104 break;
7105
7106 case MULT:
e075ae69
RH
7107 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7108 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7109 p = "fimul";
7110 else
7111 p = "fmul";
1deaa899 7112 ssep = "mul";
2a2ab3f9
JVA
7113 break;
7114
7115 case DIV:
e075ae69
RH
7116 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
7117 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
7118 p = "fidiv";
7119 else
7120 p = "fdiv";
1deaa899 7121 ssep = "div";
2a2ab3f9
JVA
7122 break;
7123
7124 default:
7125 abort ();
7126 }
7127
1deaa899
JH
7128 if (is_sse)
7129 {
7130 strcpy (buf, ssep);
7131 if (GET_MODE (operands[0]) == SFmode)
7132 strcat (buf, "ss\t{%2, %0|%0, %2}");
7133 else
7134 strcat (buf, "sd\t{%2, %0|%0, %2}");
7135 return buf;
7136 }
e075ae69 7137 strcpy (buf, p);
2a2ab3f9
JVA
7138
7139 switch (GET_CODE (operands[3]))
7140 {
7141 case MULT:
7142 case PLUS:
7143 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
7144 {
e3c2afab 7145 rtx temp = operands[2];
2a2ab3f9
JVA
7146 operands[2] = operands[1];
7147 operands[1] = temp;
7148 }
7149
e3c2afab
AM
7150 /* know operands[0] == operands[1]. */
7151
2a2ab3f9 7152 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
7153 {
7154 p = "%z2\t%2";
7155 break;
7156 }
2a2ab3f9
JVA
7157
7158 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63
JL
7159 {
7160 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
7161 /* How is it that we are storing to a dead operand[2]?
7162 Well, presumably operands[1] is dead too. We can't
7163 store the result to st(0) as st(0) gets popped on this
7164 instruction. Instead store to operands[2] (which I
7165 think has to be st(1)). st(1) will be popped later.
7166 gcc <= 2.8.1 didn't have this check and generated
7167 assembly code that the Unixware assembler rejected. */
7168 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 7169 else
e3c2afab 7170 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
e075ae69 7171 break;
6b28fd63 7172 }
2a2ab3f9
JVA
7173
7174 if (STACK_TOP_P (operands[0]))
e3c2afab 7175 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 7176 else
e3c2afab 7177 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
e075ae69 7178 break;
2a2ab3f9
JVA
7179
7180 case MINUS:
7181 case DIV:
7182 if (GET_CODE (operands[1]) == MEM)
e075ae69
RH
7183 {
7184 p = "r%z1\t%1";
7185 break;
7186 }
2a2ab3f9
JVA
7187
7188 if (GET_CODE (operands[2]) == MEM)
e075ae69
RH
7189 {
7190 p = "%z2\t%2";
7191 break;
7192 }
2a2ab3f9 7193
2a2ab3f9 7194 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
6b28fd63 7195 {
e3c2afab
AM
7196#if SYSV386_COMPAT
7197 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
7198 derived assemblers, confusingly reverse the direction of
7199 the operation for fsub{r} and fdiv{r} when the
7200 destination register is not st(0). The Intel assembler
7201 doesn't have this brain damage. Read !SYSV386_COMPAT to
7202 figure out what the hardware really does. */
7203 if (STACK_TOP_P (operands[0]))
7204 p = "{p\t%0, %2|rp\t%2, %0}";
7205 else
7206 p = "{rp\t%2, %0|p\t%0, %2}";
7207#else
6b28fd63 7208 if (STACK_TOP_P (operands[0]))
e3c2afab
AM
7209 /* As above for fmul/fadd, we can't store to st(0). */
7210 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
6b28fd63 7211 else
e3c2afab
AM
7212 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
7213#endif
e075ae69 7214 break;
6b28fd63 7215 }
2a2ab3f9
JVA
7216
7217 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
6b28fd63 7218 {
e3c2afab 7219#if SYSV386_COMPAT
6b28fd63 7220 if (STACK_TOP_P (operands[0]))
e3c2afab 7221 p = "{rp\t%0, %1|p\t%1, %0}";
6b28fd63 7222 else
e3c2afab
AM
7223 p = "{p\t%1, %0|rp\t%0, %1}";
7224#else
7225 if (STACK_TOP_P (operands[0]))
7226 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
7227 else
7228 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
7229#endif
e075ae69 7230 break;
6b28fd63 7231 }
2a2ab3f9
JVA
7232
7233 if (STACK_TOP_P (operands[0]))
7234 {
7235 if (STACK_TOP_P (operands[1]))
e3c2afab 7236 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
2a2ab3f9 7237 else
e3c2afab 7238 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
e075ae69 7239 break;
2a2ab3f9
JVA
7240 }
7241 else if (STACK_TOP_P (operands[1]))
e3c2afab
AM
7242 {
7243#if SYSV386_COMPAT
7244 p = "{\t%1, %0|r\t%0, %1}";
7245#else
7246 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
7247#endif
7248 }
2a2ab3f9 7249 else
e3c2afab
AM
7250 {
7251#if SYSV386_COMPAT
7252 p = "{r\t%2, %0|\t%0, %2}";
7253#else
7254 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
7255#endif
7256 }
e075ae69 7257 break;
2a2ab3f9
JVA
7258
7259 default:
7260 abort ();
7261 }
e075ae69
RH
7262
7263 strcat (buf, p);
7264 return buf;
2a2ab3f9 7265}
e075ae69 7266
a4f31c00 7267/* Output code to initialize control word copies used by
7a2e09f4
JH
7268 trunc?f?i patterns. NORMAL is set to current control word, while ROUND_DOWN
7269 is set to control word rounding downwards. */
7270void
7271emit_i387_cw_initialization (normal, round_down)
7272 rtx normal, round_down;
7273{
7274 rtx reg = gen_reg_rtx (HImode);
7275
7276 emit_insn (gen_x86_fnstcw_1 (normal));
7277 emit_move_insn (reg, normal);
7278 if (!TARGET_PARTIAL_REG_STALL && !optimize_size
7279 && !TARGET_64BIT)
7280 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
7281 else
7282 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0xc00)));
7283 emit_move_insn (round_down, reg);
7284}
7285
2a2ab3f9 7286/* Output code for INSN to convert a float to a signed int. OPERANDS
46d21d2c 7287 are the insn operands. The output may be [HSD]Imode and the input
e075ae69 7288 operand may be [SDX]Fmode. */
2a2ab3f9 7289
69ddee61 7290const char *
2a2ab3f9
JVA
7291output_fix_trunc (insn, operands)
7292 rtx insn;
7293 rtx *operands;
7294{
7295 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
e075ae69 7296 int dimode_p = GET_MODE (operands[0]) == DImode;
2a2ab3f9 7297
e075ae69
RH
7298 /* Jump through a hoop or two for DImode, since the hardware has no
7299 non-popping instruction. We used to do this a different way, but
7300 that was somewhat fragile and broke with post-reload splitters. */
a05924f9
JH
7301 if (dimode_p && !stack_top_dies)
7302 output_asm_insn ("fld\t%y1", operands);
e075ae69 7303
7a2e09f4 7304 if (!STACK_TOP_P (operands[1]))
10195bd8
JW
7305 abort ();
7306
e075ae69 7307 if (GET_CODE (operands[0]) != MEM)
7a2e09f4 7308 abort ();
e9a25f70 7309
7a2e09f4 7310 output_asm_insn ("fldcw\t%3", operands);
e075ae69 7311 if (stack_top_dies || dimode_p)
7a2e09f4 7312 output_asm_insn ("fistp%z0\t%0", operands);
10195bd8 7313 else
7a2e09f4 7314 output_asm_insn ("fist%z0\t%0", operands);
e075ae69 7315 output_asm_insn ("fldcw\t%2", operands);
10195bd8 7316
e075ae69 7317 return "";
2a2ab3f9 7318}
cda749b1 7319
e075ae69
RH
7320/* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
7321 should be used and 2 when fnstsw should be used. UNORDERED_P is true
7322 when fucom should be used. */
7323
69ddee61 7324const char *
e075ae69 7325output_fp_compare (insn, operands, eflags_p, unordered_p)
cda749b1
JW
7326 rtx insn;
7327 rtx *operands;
e075ae69 7328 int eflags_p, unordered_p;
cda749b1 7329{
e075ae69
RH
7330 int stack_top_dies;
7331 rtx cmp_op0 = operands[0];
7332 rtx cmp_op1 = operands[1];
0644b628 7333 int is_sse = SSE_REG_P (operands[0]) | SSE_REG_P (operands[1]);
e075ae69
RH
7334
7335 if (eflags_p == 2)
7336 {
7337 cmp_op0 = cmp_op1;
7338 cmp_op1 = operands[2];
7339 }
0644b628
JH
7340 if (is_sse)
7341 {
7342 if (GET_MODE (operands[0]) == SFmode)
7343 if (unordered_p)
7344 return "ucomiss\t{%1, %0|%0, %1}";
7345 else
7346 return "comiss\t{%1, %0|%0, %y}";
7347 else
7348 if (unordered_p)
7349 return "ucomisd\t{%1, %0|%0, %1}";
7350 else
7351 return "comisd\t{%1, %0|%0, %y}";
7352 }
cda749b1 7353
e075ae69 7354 if (! STACK_TOP_P (cmp_op0))
cda749b1
JW
7355 abort ();
7356
e075ae69 7357 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
cda749b1 7358
e075ae69
RH
7359 if (STACK_REG_P (cmp_op1)
7360 && stack_top_dies
7361 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
7362 && REGNO (cmp_op1) != FIRST_STACK_REG)
cda749b1 7363 {
e075ae69
RH
7364 /* If both the top of the 387 stack dies, and the other operand
7365 is also a stack register that dies, then this must be a
7366 `fcompp' float compare */
7367
7368 if (eflags_p == 1)
7369 {
7370 /* There is no double popping fcomi variant. Fortunately,
7371 eflags is immune from the fstp's cc clobbering. */
7372 if (unordered_p)
7373 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
7374 else
7375 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
7376 return "fstp\t%y0";
7377 }
7378 else
cda749b1 7379 {
e075ae69
RH
7380 if (eflags_p == 2)
7381 {
7382 if (unordered_p)
7383 return "fucompp\n\tfnstsw\t%0";
7384 else
7385 return "fcompp\n\tfnstsw\t%0";
7386 }
cda749b1
JW
7387 else
7388 {
e075ae69
RH
7389 if (unordered_p)
7390 return "fucompp";
7391 else
7392 return "fcompp";
cda749b1
JW
7393 }
7394 }
cda749b1
JW
7395 }
7396 else
7397 {
e075ae69 7398 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
cda749b1 7399
0f290768 7400 static const char * const alt[24] =
e075ae69
RH
7401 {
7402 "fcom%z1\t%y1",
7403 "fcomp%z1\t%y1",
7404 "fucom%z1\t%y1",
7405 "fucomp%z1\t%y1",
0f290768 7406
e075ae69
RH
7407 "ficom%z1\t%y1",
7408 "ficomp%z1\t%y1",
7409 NULL,
7410 NULL,
7411
7412 "fcomi\t{%y1, %0|%0, %y1}",
7413 "fcomip\t{%y1, %0|%0, %y1}",
7414 "fucomi\t{%y1, %0|%0, %y1}",
7415 "fucomip\t{%y1, %0|%0, %y1}",
7416
7417 NULL,
7418 NULL,
7419 NULL,
7420 NULL,
7421
7422 "fcom%z2\t%y2\n\tfnstsw\t%0",
7423 "fcomp%z2\t%y2\n\tfnstsw\t%0",
7424 "fucom%z2\t%y2\n\tfnstsw\t%0",
7425 "fucomp%z2\t%y2\n\tfnstsw\t%0",
0f290768 7426
e075ae69
RH
7427 "ficom%z2\t%y2\n\tfnstsw\t%0",
7428 "ficomp%z2\t%y2\n\tfnstsw\t%0",
7429 NULL,
7430 NULL
7431 };
7432
7433 int mask;
69ddee61 7434 const char *ret;
e075ae69
RH
7435
7436 mask = eflags_p << 3;
7437 mask |= (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT) << 2;
7438 mask |= unordered_p << 1;
7439 mask |= stack_top_dies;
7440
7441 if (mask >= 24)
7442 abort ();
7443 ret = alt[mask];
7444 if (ret == NULL)
7445 abort ();
cda749b1 7446
e075ae69 7447 return ret;
cda749b1
JW
7448 }
7449}
2a2ab3f9 7450
f88c65f7
RH
7451void
7452ix86_output_addr_vec_elt (file, value)
7453 FILE *file;
7454 int value;
7455{
7456 const char *directive = ASM_LONG;
7457
7458 if (TARGET_64BIT)
7459 {
7460#ifdef ASM_QUAD
7461 directive = ASM_QUAD;
7462#else
7463 abort ();
7464#endif
7465 }
7466
7467 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
7468}
7469
7470void
7471ix86_output_addr_diff_elt (file, value, rel)
7472 FILE *file;
7473 int value, rel;
7474{
7475 if (TARGET_64BIT)
74411039 7476 fprintf (file, "%s%s%d-%s%d\n",
f88c65f7
RH
7477 ASM_LONG, LPREFIX, value, LPREFIX, rel);
7478 else if (HAVE_AS_GOTOFF_IN_DATA)
7479 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
b069de3b
SS
7480#if TARGET_MACHO
7481 else if (TARGET_MACHO)
7482 fprintf (file, "%s%s%d-%s\n", ASM_LONG, LPREFIX, value,
7483 machopic_function_base_name () + 1);
7484#endif
f88c65f7 7485 else
5fc0e5df
KW
7486 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
7487 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
f88c65f7 7488}
32b5b1aa 7489\f
a8bac9ab
RH
7490/* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
7491 for the target. */
7492
7493void
7494ix86_expand_clear (dest)
7495 rtx dest;
7496{
7497 rtx tmp;
7498
7499 /* We play register width games, which are only valid after reload. */
7500 if (!reload_completed)
7501 abort ();
7502
7503 /* Avoid HImode and its attendant prefix byte. */
7504 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
7505 dest = gen_rtx_REG (SImode, REGNO (dest));
7506
7507 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
7508
7509 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
7510 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
7511 {
7512 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
7513 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
7514 }
7515
7516 emit_insn (tmp);
7517}
7518
f996902d
RH
7519/* X is an unchanging MEM. If it is a constant pool reference, return
7520 the constant pool rtx, else NULL. */
7521
7522static rtx
7523maybe_get_pool_constant (x)
7524 rtx x;
7525{
7526 x = XEXP (x, 0);
7527
7528 if (flag_pic)
7529 {
7530 if (GET_CODE (x) != PLUS)
7531 return NULL_RTX;
7532 if (XEXP (x, 0) != pic_offset_table_rtx)
7533 return NULL_RTX;
7534 x = XEXP (x, 1);
7535 if (GET_CODE (x) != CONST)
7536 return NULL_RTX;
7537 x = XEXP (x, 0);
7538 if (GET_CODE (x) != UNSPEC)
7539 return NULL_RTX;
7540 if (XINT (x, 1) != UNSPEC_GOTOFF)
7541 return NULL_RTX;
7542 x = XVECEXP (x, 0, 0);
7543 }
7544
7545 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7546 return get_pool_constant (x);
7547
7548 return NULL_RTX;
7549}
7550
79325812 7551void
e075ae69
RH
7552ix86_expand_move (mode, operands)
7553 enum machine_mode mode;
7554 rtx operands[];
32b5b1aa 7555{
e075ae69 7556 int strict = (reload_in_progress || reload_completed);
f996902d
RH
7557 rtx insn, op0, op1, tmp;
7558
7559 op0 = operands[0];
7560 op1 = operands[1];
7561
7562 /* ??? We have a slight problem. We need to say that tls symbols are
7563 not legitimate constants so that reload does not helpfully reload
7564 these constants from a REG_EQUIV, which we cannot handle. (Recall
7565 that general- and local-dynamic address resolution requires a
7566 function call.)
e9a25f70 7567
f996902d
RH
7568 However, if we say that tls symbols are not legitimate constants,
7569 then emit_move_insn helpfully drop them into the constant pool.
7570
7571 It is far easier to work around emit_move_insn than reload. Recognize
7572 the MEM that we would have created and extract the symbol_ref. */
7573
7574 if (mode == Pmode
7575 && GET_CODE (op1) == MEM
7576 && RTX_UNCHANGING_P (op1))
32b5b1aa 7577 {
f996902d
RH
7578 tmp = maybe_get_pool_constant (op1);
7579 /* Note that we only care about symbolic constants here, which
7580 unlike CONST_INT will always have a proper mode. */
7581 if (tmp && GET_MODE (tmp) == Pmode)
7582 op1 = tmp;
7583 }
e9a25f70 7584
f996902d
RH
7585 if (tls_symbolic_operand (op1, Pmode))
7586 {
7587 op1 = legitimize_address (op1, op1, VOIDmode);
7588 if (GET_CODE (op0) == MEM)
7589 {
7590 tmp = gen_reg_rtx (mode);
7591 emit_insn (gen_rtx_SET (VOIDmode, tmp, op1));
7592 op1 = tmp;
7593 }
7594 }
7595 else if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
7596 {
b069de3b
SS
7597#if TARGET_MACHO
7598 if (MACHOPIC_PURE)
7599 {
7600 rtx temp = ((reload_in_progress
7601 || ((op0 && GET_CODE (op0) == REG)
7602 && mode == Pmode))
7603 ? op0 : gen_reg_rtx (Pmode));
7604 op1 = machopic_indirect_data_reference (op1, temp);
7605 op1 = machopic_legitimize_pic_address (op1, mode,
7606 temp == op1 ? 0 : temp);
7607 }
7608 else
7609 {
7610 if (MACHOPIC_INDIRECT)
7611 op1 = machopic_indirect_data_reference (op1, 0);
7612 }
7613 if (op0 != op1)
7614 {
7615 insn = gen_rtx_SET (VOIDmode, op0, op1);
7616 emit_insn (insn);
7617 }
7618 return;
7619#endif /* TARGET_MACHO */
f996902d
RH
7620 if (GET_CODE (op0) == MEM)
7621 op1 = force_reg (Pmode, op1);
e075ae69 7622 else
32b5b1aa 7623 {
f996902d 7624 rtx temp = op0;
e075ae69
RH
7625 if (GET_CODE (temp) != REG)
7626 temp = gen_reg_rtx (Pmode);
f996902d
RH
7627 temp = legitimize_pic_address (op1, temp);
7628 if (temp == op0)
e075ae69 7629 return;
f996902d 7630 op1 = temp;
32b5b1aa 7631 }
e075ae69
RH
7632 }
7633 else
7634 {
f996902d 7635 if (GET_CODE (op0) == MEM
44cf5b6a 7636 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
f996902d
RH
7637 || !push_operand (op0, mode))
7638 && GET_CODE (op1) == MEM)
7639 op1 = force_reg (mode, op1);
e9a25f70 7640
f996902d
RH
7641 if (push_operand (op0, mode)
7642 && ! general_no_elim_operand (op1, mode))
7643 op1 = copy_to_mode_reg (mode, op1);
2c5a510c 7644
44cf5b6a
JH
7645 /* Force large constants in 64bit compilation into register
7646 to get them CSEed. */
7647 if (TARGET_64BIT && mode == DImode
f996902d
RH
7648 && immediate_operand (op1, mode)
7649 && !x86_64_zero_extended_value (op1)
7650 && !register_operand (op0, mode)
44cf5b6a 7651 && optimize && !reload_completed && !reload_in_progress)
f996902d 7652 op1 = copy_to_mode_reg (mode, op1);
44cf5b6a 7653
e075ae69 7654 if (FLOAT_MODE_P (mode))
32b5b1aa 7655 {
d7a29404
JH
7656 /* If we are loading a floating point constant to a register,
7657 force the value to memory now, since we'll get better code
7658 out the back end. */
e075ae69
RH
7659
7660 if (strict)
7661 ;
f996902d
RH
7662 else if (GET_CODE (op1) == CONST_DOUBLE
7663 && register_operand (op0, mode))
7664 op1 = validize_mem (force_const_mem (mode, op1));
32b5b1aa 7665 }
32b5b1aa 7666 }
e9a25f70 7667
f996902d 7668 insn = gen_rtx_SET (VOIDmode, op0, op1);
e9a25f70 7669
e075ae69
RH
7670 emit_insn (insn);
7671}
e9a25f70 7672
e37af218
RH
7673void
7674ix86_expand_vector_move (mode, operands)
7675 enum machine_mode mode;
7676 rtx operands[];
7677{
7678 /* Force constants other than zero into memory. We do not know how
7679 the instructions used to build constants modify the upper 64 bits
7680 of the register, once we have that information we may be able
7681 to handle some of them more efficiently. */
7682 if ((reload_in_progress | reload_completed) == 0
7683 && register_operand (operands[0], mode)
7684 && CONSTANT_P (operands[1]))
7685 {
7686 rtx addr = gen_reg_rtx (Pmode);
7687 emit_move_insn (addr, XEXP (force_const_mem (mode, operands[1]), 0));
7688 operands[1] = gen_rtx_MEM (mode, addr);
7689 }
7690
7691 /* Make operand1 a register if it isn't already. */
7692 if ((reload_in_progress | reload_completed) == 0
7693 && !register_operand (operands[0], mode)
7694 && !register_operand (operands[1], mode)
7695 && operands[1] != CONST0_RTX (mode))
7696 {
59bef189 7697 rtx temp = force_reg (GET_MODE (operands[1]), operands[1]);
e37af218
RH
7698 emit_move_insn (operands[0], temp);
7699 return;
7700 }
7701
7702 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
fce5a9f2 7703}
e37af218 7704
e075ae69
RH
7705/* Attempt to expand a binary operator. Make the expansion closer to the
7706 actual machine, then just general_operand, which will allow 3 separate
9d81fc27 7707 memory references (one output, two input) in a single insn. */
e9a25f70 7708
e075ae69
RH
7709void
7710ix86_expand_binary_operator (code, mode, operands)
7711 enum rtx_code code;
7712 enum machine_mode mode;
7713 rtx operands[];
7714{
7715 int matching_memory;
7716 rtx src1, src2, dst, op, clob;
7717
7718 dst = operands[0];
7719 src1 = operands[1];
7720 src2 = operands[2];
7721
7722 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
7723 if (GET_RTX_CLASS (code) == 'c'
7724 && (rtx_equal_p (dst, src2)
7725 || immediate_operand (src1, mode)))
7726 {
7727 rtx temp = src1;
7728 src1 = src2;
7729 src2 = temp;
32b5b1aa 7730 }
e9a25f70 7731
e075ae69
RH
7732 /* If the destination is memory, and we do not have matching source
7733 operands, do things in registers. */
7734 matching_memory = 0;
7735 if (GET_CODE (dst) == MEM)
32b5b1aa 7736 {
e075ae69
RH
7737 if (rtx_equal_p (dst, src1))
7738 matching_memory = 1;
7739 else if (GET_RTX_CLASS (code) == 'c'
7740 && rtx_equal_p (dst, src2))
7741 matching_memory = 2;
7742 else
7743 dst = gen_reg_rtx (mode);
7744 }
0f290768 7745
e075ae69
RH
7746 /* Both source operands cannot be in memory. */
7747 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
7748 {
7749 if (matching_memory != 2)
7750 src2 = force_reg (mode, src2);
7751 else
7752 src1 = force_reg (mode, src1);
32b5b1aa 7753 }
e9a25f70 7754
06a964de
JH
7755 /* If the operation is not commutable, source 1 cannot be a constant
7756 or non-matching memory. */
0f290768 7757 if ((CONSTANT_P (src1)
06a964de
JH
7758 || (!matching_memory && GET_CODE (src1) == MEM))
7759 && GET_RTX_CLASS (code) != 'c')
e075ae69 7760 src1 = force_reg (mode, src1);
0f290768 7761
e075ae69 7762 /* If optimizing, copy to regs to improve CSE */
fe577e58 7763 if (optimize && ! no_new_pseudos)
32b5b1aa 7764 {
e075ae69
RH
7765 if (GET_CODE (dst) == MEM)
7766 dst = gen_reg_rtx (mode);
7767 if (GET_CODE (src1) == MEM)
7768 src1 = force_reg (mode, src1);
7769 if (GET_CODE (src2) == MEM)
7770 src2 = force_reg (mode, src2);
32b5b1aa 7771 }
e9a25f70 7772
e075ae69
RH
7773 /* Emit the instruction. */
7774
7775 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
7776 if (reload_in_progress)
7777 {
7778 /* Reload doesn't know about the flags register, and doesn't know that
7779 it doesn't want to clobber it. We can only do this with PLUS. */
7780 if (code != PLUS)
7781 abort ();
7782 emit_insn (op);
7783 }
7784 else
32b5b1aa 7785 {
e075ae69
RH
7786 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7787 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
32b5b1aa 7788 }
e9a25f70 7789
e075ae69
RH
7790 /* Fix up the destination if needed. */
7791 if (dst != operands[0])
7792 emit_move_insn (operands[0], dst);
7793}
7794
7795/* Return TRUE or FALSE depending on whether the binary operator meets the
7796 appropriate constraints. */
7797
7798int
7799ix86_binary_operator_ok (code, mode, operands)
7800 enum rtx_code code;
7801 enum machine_mode mode ATTRIBUTE_UNUSED;
7802 rtx operands[3];
7803{
7804 /* Both source operands cannot be in memory. */
7805 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
7806 return 0;
7807 /* If the operation is not commutable, source 1 cannot be a constant. */
7808 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != 'c')
7809 return 0;
7810 /* If the destination is memory, we must have a matching source operand. */
7811 if (GET_CODE (operands[0]) == MEM
7812 && ! (rtx_equal_p (operands[0], operands[1])
7813 || (GET_RTX_CLASS (code) == 'c'
7814 && rtx_equal_p (operands[0], operands[2]))))
7815 return 0;
06a964de 7816 /* If the operation is not commutable and the source 1 is memory, we must
d6a7951f 7817 have a matching destination. */
06a964de
JH
7818 if (GET_CODE (operands[1]) == MEM
7819 && GET_RTX_CLASS (code) != 'c'
7820 && ! rtx_equal_p (operands[0], operands[1]))
7821 return 0;
e075ae69
RH
7822 return 1;
7823}
7824
7825/* Attempt to expand a unary operator. Make the expansion closer to the
7826 actual machine, then just general_operand, which will allow 2 separate
9d81fc27 7827 memory references (one output, one input) in a single insn. */
e075ae69 7828
9d81fc27 7829void
e075ae69
RH
7830ix86_expand_unary_operator (code, mode, operands)
7831 enum rtx_code code;
7832 enum machine_mode mode;
7833 rtx operands[];
7834{
06a964de
JH
7835 int matching_memory;
7836 rtx src, dst, op, clob;
7837
7838 dst = operands[0];
7839 src = operands[1];
e075ae69 7840
06a964de
JH
7841 /* If the destination is memory, and we do not have matching source
7842 operands, do things in registers. */
7843 matching_memory = 0;
7844 if (GET_CODE (dst) == MEM)
32b5b1aa 7845 {
06a964de
JH
7846 if (rtx_equal_p (dst, src))
7847 matching_memory = 1;
e075ae69 7848 else
06a964de 7849 dst = gen_reg_rtx (mode);
32b5b1aa 7850 }
e9a25f70 7851
06a964de
JH
7852 /* When source operand is memory, destination must match. */
7853 if (!matching_memory && GET_CODE (src) == MEM)
7854 src = force_reg (mode, src);
0f290768 7855
06a964de 7856 /* If optimizing, copy to regs to improve CSE */
fe577e58 7857 if (optimize && ! no_new_pseudos)
06a964de
JH
7858 {
7859 if (GET_CODE (dst) == MEM)
7860 dst = gen_reg_rtx (mode);
7861 if (GET_CODE (src) == MEM)
7862 src = force_reg (mode, src);
7863 }
7864
7865 /* Emit the instruction. */
7866
7867 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
7868 if (reload_in_progress || code == NOT)
7869 {
7870 /* Reload doesn't know about the flags register, and doesn't know that
7871 it doesn't want to clobber it. */
7872 if (code != NOT)
7873 abort ();
7874 emit_insn (op);
7875 }
7876 else
7877 {
7878 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
7879 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
7880 }
7881
7882 /* Fix up the destination if needed. */
7883 if (dst != operands[0])
7884 emit_move_insn (operands[0], dst);
e075ae69
RH
7885}
7886
7887/* Return TRUE or FALSE depending on whether the unary operator meets the
7888 appropriate constraints. */
7889
7890int
7891ix86_unary_operator_ok (code, mode, operands)
7892 enum rtx_code code ATTRIBUTE_UNUSED;
7893 enum machine_mode mode ATTRIBUTE_UNUSED;
7894 rtx operands[2] ATTRIBUTE_UNUSED;
7895{
06a964de
JH
7896 /* If one of operands is memory, source and destination must match. */
7897 if ((GET_CODE (operands[0]) == MEM
7898 || GET_CODE (operands[1]) == MEM)
7899 && ! rtx_equal_p (operands[0], operands[1]))
7900 return FALSE;
e075ae69
RH
7901 return TRUE;
7902}
7903
16189740
RH
7904/* Return TRUE or FALSE depending on whether the first SET in INSN
7905 has source and destination with matching CC modes, and that the
7906 CC mode is at least as constrained as REQ_MODE. */
7907
7908int
7909ix86_match_ccmode (insn, req_mode)
7910 rtx insn;
7911 enum machine_mode req_mode;
7912{
7913 rtx set;
7914 enum machine_mode set_mode;
7915
7916 set = PATTERN (insn);
7917 if (GET_CODE (set) == PARALLEL)
7918 set = XVECEXP (set, 0, 0);
7919 if (GET_CODE (set) != SET)
7920 abort ();
9076b9c1
JH
7921 if (GET_CODE (SET_SRC (set)) != COMPARE)
7922 abort ();
16189740
RH
7923
7924 set_mode = GET_MODE (SET_DEST (set));
7925 switch (set_mode)
7926 {
9076b9c1
JH
7927 case CCNOmode:
7928 if (req_mode != CCNOmode
7929 && (req_mode != CCmode
7930 || XEXP (SET_SRC (set), 1) != const0_rtx))
7931 return 0;
7932 break;
16189740 7933 case CCmode:
9076b9c1 7934 if (req_mode == CCGCmode)
16189740
RH
7935 return 0;
7936 /* FALLTHRU */
9076b9c1
JH
7937 case CCGCmode:
7938 if (req_mode == CCGOCmode || req_mode == CCNOmode)
7939 return 0;
7940 /* FALLTHRU */
7941 case CCGOCmode:
16189740
RH
7942 if (req_mode == CCZmode)
7943 return 0;
7944 /* FALLTHRU */
7945 case CCZmode:
7946 break;
7947
7948 default:
7949 abort ();
7950 }
7951
7952 return (GET_MODE (SET_SRC (set)) == set_mode);
7953}
7954
e075ae69
RH
7955/* Generate insn patterns to do an integer compare of OPERANDS. */
7956
7957static rtx
7958ix86_expand_int_compare (code, op0, op1)
7959 enum rtx_code code;
7960 rtx op0, op1;
7961{
7962 enum machine_mode cmpmode;
7963 rtx tmp, flags;
7964
7965 cmpmode = SELECT_CC_MODE (code, op0, op1);
7966 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
7967
7968 /* This is very simple, but making the interface the same as in the
7969 FP case makes the rest of the code easier. */
7970 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
7971 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
7972
7973 /* Return the test that should be put into the flags user, i.e.
7974 the bcc, scc, or cmov instruction. */
7975 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
7976}
7977
3a3677ff
RH
7978/* Figure out whether to use ordered or unordered fp comparisons.
7979 Return the appropriate mode to use. */
e075ae69 7980
b1cdafbb 7981enum machine_mode
3a3677ff 7982ix86_fp_compare_mode (code)
8752c357 7983 enum rtx_code code ATTRIBUTE_UNUSED;
e075ae69 7984{
9e7adcb3
JH
7985 /* ??? In order to make all comparisons reversible, we do all comparisons
7986 non-trapping when compiling for IEEE. Once gcc is able to distinguish
7987 all forms trapping and nontrapping comparisons, we can make inequality
7988 comparisons trapping again, since it results in better code when using
7989 FCOM based compares. */
7990 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
3a3677ff
RH
7991}
7992
9076b9c1
JH
7993enum machine_mode
7994ix86_cc_mode (code, op0, op1)
7995 enum rtx_code code;
7996 rtx op0, op1;
7997{
7998 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
7999 return ix86_fp_compare_mode (code);
8000 switch (code)
8001 {
8002 /* Only zero flag is needed. */
8003 case EQ: /* ZF=0 */
8004 case NE: /* ZF!=0 */
8005 return CCZmode;
8006 /* Codes needing carry flag. */
265dab10
JH
8007 case GEU: /* CF=0 */
8008 case GTU: /* CF=0 & ZF=0 */
7e08e190
JH
8009 case LTU: /* CF=1 */
8010 case LEU: /* CF=1 | ZF=1 */
265dab10 8011 return CCmode;
9076b9c1
JH
8012 /* Codes possibly doable only with sign flag when
8013 comparing against zero. */
8014 case GE: /* SF=OF or SF=0 */
7e08e190 8015 case LT: /* SF<>OF or SF=1 */
9076b9c1
JH
8016 if (op1 == const0_rtx)
8017 return CCGOCmode;
8018 else
8019 /* For other cases Carry flag is not required. */
8020 return CCGCmode;
8021 /* Codes doable only with sign flag when comparing
8022 against zero, but we miss jump instruction for it
8023 so we need to use relational tests agains overflow
8024 that thus needs to be zero. */
8025 case GT: /* ZF=0 & SF=OF */
8026 case LE: /* ZF=1 | SF<>OF */
8027 if (op1 == const0_rtx)
8028 return CCNOmode;
8029 else
8030 return CCGCmode;
7fcd7218
JH
8031 /* strcmp pattern do (use flags) and combine may ask us for proper
8032 mode. */
8033 case USE:
8034 return CCmode;
9076b9c1 8035 default:
0f290768 8036 abort ();
9076b9c1
JH
8037 }
8038}
8039
3a3677ff
RH
8040/* Return true if we should use an FCOMI instruction for this fp comparison. */
8041
a940d8bd 8042int
3a3677ff 8043ix86_use_fcomi_compare (code)
9e7adcb3 8044 enum rtx_code code ATTRIBUTE_UNUSED;
3a3677ff 8045{
9e7adcb3
JH
8046 enum rtx_code swapped_code = swap_condition (code);
8047 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
8048 || (ix86_fp_comparison_cost (swapped_code)
8049 == ix86_fp_comparison_fcomi_cost (swapped_code)));
3a3677ff
RH
8050}
8051
0f290768 8052/* Swap, force into registers, or otherwise massage the two operands
3a3677ff
RH
8053 to a fp comparison. The operands are updated in place; the new
8054 comparsion code is returned. */
8055
8056static enum rtx_code
8057ix86_prepare_fp_compare_args (code, pop0, pop1)
8058 enum rtx_code code;
8059 rtx *pop0, *pop1;
8060{
8061 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
8062 rtx op0 = *pop0, op1 = *pop1;
8063 enum machine_mode op_mode = GET_MODE (op0);
0644b628 8064 int is_sse = SSE_REG_P (op0) | SSE_REG_P (op1);
3a3677ff 8065
e075ae69 8066 /* All of the unordered compare instructions only work on registers.
3a3677ff
RH
8067 The same is true of the XFmode compare instructions. The same is
8068 true of the fcomi compare instructions. */
8069
0644b628
JH
8070 if (!is_sse
8071 && (fpcmp_mode == CCFPUmode
8072 || op_mode == XFmode
8073 || op_mode == TFmode
8074 || ix86_use_fcomi_compare (code)))
e075ae69 8075 {
3a3677ff
RH
8076 op0 = force_reg (op_mode, op0);
8077 op1 = force_reg (op_mode, op1);
e075ae69
RH
8078 }
8079 else
8080 {
8081 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
8082 things around if they appear profitable, otherwise force op0
8083 into a register. */
8084
8085 if (standard_80387_constant_p (op0) == 0
8086 || (GET_CODE (op0) == MEM
8087 && ! (standard_80387_constant_p (op1) == 0
8088 || GET_CODE (op1) == MEM)))
32b5b1aa 8089 {
e075ae69
RH
8090 rtx tmp;
8091 tmp = op0, op0 = op1, op1 = tmp;
8092 code = swap_condition (code);
8093 }
8094
8095 if (GET_CODE (op0) != REG)
3a3677ff 8096 op0 = force_reg (op_mode, op0);
e075ae69
RH
8097
8098 if (CONSTANT_P (op1))
8099 {
8100 if (standard_80387_constant_p (op1))
3a3677ff 8101 op1 = force_reg (op_mode, op1);
e075ae69 8102 else
3a3677ff 8103 op1 = validize_mem (force_const_mem (op_mode, op1));
32b5b1aa
SC
8104 }
8105 }
e9a25f70 8106
9e7adcb3
JH
8107 /* Try to rearrange the comparison to make it cheaper. */
8108 if (ix86_fp_comparison_cost (code)
8109 > ix86_fp_comparison_cost (swap_condition (code))
558740bf 8110 && (GET_CODE (op1) == REG || !no_new_pseudos))
9e7adcb3
JH
8111 {
8112 rtx tmp;
8113 tmp = op0, op0 = op1, op1 = tmp;
8114 code = swap_condition (code);
8115 if (GET_CODE (op0) != REG)
8116 op0 = force_reg (op_mode, op0);
8117 }
8118
3a3677ff
RH
8119 *pop0 = op0;
8120 *pop1 = op1;
8121 return code;
8122}
8123
c0c102a9
JH
8124/* Convert comparison codes we use to represent FP comparison to integer
8125 code that will result in proper branch. Return UNKNOWN if no such code
8126 is available. */
8127static enum rtx_code
8128ix86_fp_compare_code_to_integer (code)
8129 enum rtx_code code;
8130{
8131 switch (code)
8132 {
8133 case GT:
8134 return GTU;
8135 case GE:
8136 return GEU;
8137 case ORDERED:
8138 case UNORDERED:
8139 return code;
8140 break;
8141 case UNEQ:
8142 return EQ;
8143 break;
8144 case UNLT:
8145 return LTU;
8146 break;
8147 case UNLE:
8148 return LEU;
8149 break;
8150 case LTGT:
8151 return NE;
8152 break;
8153 default:
8154 return UNKNOWN;
8155 }
8156}
8157
8158/* Split comparison code CODE into comparisons we can do using branch
8159 instructions. BYPASS_CODE is comparison code for branch that will
8160 branch around FIRST_CODE and SECOND_CODE. If some of branches
8161 is not required, set value to NIL.
8162 We never require more than two branches. */
8163static void
8164ix86_fp_comparison_codes (code, bypass_code, first_code, second_code)
8165 enum rtx_code code, *bypass_code, *first_code, *second_code;
8166{
8167 *first_code = code;
8168 *bypass_code = NIL;
8169 *second_code = NIL;
8170
8171 /* The fcomi comparison sets flags as follows:
8172
8173 cmp ZF PF CF
8174 > 0 0 0
8175 < 0 0 1
8176 = 1 0 0
8177 un 1 1 1 */
8178
8179 switch (code)
8180 {
8181 case GT: /* GTU - CF=0 & ZF=0 */
8182 case GE: /* GEU - CF=0 */
8183 case ORDERED: /* PF=0 */
8184 case UNORDERED: /* PF=1 */
8185 case UNEQ: /* EQ - ZF=1 */
8186 case UNLT: /* LTU - CF=1 */
8187 case UNLE: /* LEU - CF=1 | ZF=1 */
8188 case LTGT: /* EQ - ZF=0 */
8189 break;
8190 case LT: /* LTU - CF=1 - fails on unordered */
8191 *first_code = UNLT;
8192 *bypass_code = UNORDERED;
8193 break;
8194 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
8195 *first_code = UNLE;
8196 *bypass_code = UNORDERED;
8197 break;
8198 case EQ: /* EQ - ZF=1 - fails on unordered */
8199 *first_code = UNEQ;
8200 *bypass_code = UNORDERED;
8201 break;
8202 case NE: /* NE - ZF=0 - fails on unordered */
8203 *first_code = LTGT;
8204 *second_code = UNORDERED;
8205 break;
8206 case UNGE: /* GEU - CF=0 - fails on unordered */
8207 *first_code = GE;
8208 *second_code = UNORDERED;
8209 break;
8210 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
8211 *first_code = GT;
8212 *second_code = UNORDERED;
8213 break;
8214 default:
8215 abort ();
8216 }
8217 if (!TARGET_IEEE_FP)
8218 {
8219 *second_code = NIL;
8220 *bypass_code = NIL;
8221 }
8222}
8223
9e7adcb3
JH
8224/* Return cost of comparison done fcom + arithmetics operations on AX.
8225 All following functions do use number of instructions as an cost metrics.
8226 In future this should be tweaked to compute bytes for optimize_size and
8227 take into account performance of various instructions on various CPUs. */
8228static int
8229ix86_fp_comparison_arithmetics_cost (code)
8230 enum rtx_code code;
8231{
8232 if (!TARGET_IEEE_FP)
8233 return 4;
8234 /* The cost of code output by ix86_expand_fp_compare. */
8235 switch (code)
8236 {
8237 case UNLE:
8238 case UNLT:
8239 case LTGT:
8240 case GT:
8241 case GE:
8242 case UNORDERED:
8243 case ORDERED:
8244 case UNEQ:
8245 return 4;
8246 break;
8247 case LT:
8248 case NE:
8249 case EQ:
8250 case UNGE:
8251 return 5;
8252 break;
8253 case LE:
8254 case UNGT:
8255 return 6;
8256 break;
8257 default:
8258 abort ();
8259 }
8260}
8261
8262/* Return cost of comparison done using fcomi operation.
8263 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8264static int
8265ix86_fp_comparison_fcomi_cost (code)
8266 enum rtx_code code;
8267{
8268 enum rtx_code bypass_code, first_code, second_code;
8269 /* Return arbitarily high cost when instruction is not supported - this
8270 prevents gcc from using it. */
8271 if (!TARGET_CMOVE)
8272 return 1024;
8273 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8274 return (bypass_code != NIL || second_code != NIL) + 2;
8275}
8276
8277/* Return cost of comparison done using sahf operation.
8278 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8279static int
8280ix86_fp_comparison_sahf_cost (code)
8281 enum rtx_code code;
8282{
8283 enum rtx_code bypass_code, first_code, second_code;
8284 /* Return arbitarily high cost when instruction is not preferred - this
8285 avoids gcc from using it. */
8286 if (!TARGET_USE_SAHF && !optimize_size)
8287 return 1024;
8288 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8289 return (bypass_code != NIL || second_code != NIL) + 3;
8290}
8291
8292/* Compute cost of the comparison done using any method.
8293 See ix86_fp_comparison_arithmetics_cost for the metrics. */
8294static int
8295ix86_fp_comparison_cost (code)
8296 enum rtx_code code;
8297{
8298 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
8299 int min;
8300
8301 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
8302 sahf_cost = ix86_fp_comparison_sahf_cost (code);
8303
8304 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
8305 if (min > sahf_cost)
8306 min = sahf_cost;
8307 if (min > fcomi_cost)
8308 min = fcomi_cost;
8309 return min;
8310}
c0c102a9 8311
3a3677ff
RH
8312/* Generate insn patterns to do a floating point compare of OPERANDS. */
8313
9e7adcb3
JH
8314static rtx
8315ix86_expand_fp_compare (code, op0, op1, scratch, second_test, bypass_test)
3a3677ff
RH
8316 enum rtx_code code;
8317 rtx op0, op1, scratch;
9e7adcb3
JH
8318 rtx *second_test;
8319 rtx *bypass_test;
3a3677ff
RH
8320{
8321 enum machine_mode fpcmp_mode, intcmp_mode;
c0c102a9 8322 rtx tmp, tmp2;
9e7adcb3 8323 int cost = ix86_fp_comparison_cost (code);
c0c102a9 8324 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
8325
8326 fpcmp_mode = ix86_fp_compare_mode (code);
8327 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
8328
9e7adcb3
JH
8329 if (second_test)
8330 *second_test = NULL_RTX;
8331 if (bypass_test)
8332 *bypass_test = NULL_RTX;
8333
c0c102a9
JH
8334 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8335
9e7adcb3
JH
8336 /* Do fcomi/sahf based test when profitable. */
8337 if ((bypass_code == NIL || bypass_test)
8338 && (second_code == NIL || second_test)
8339 && ix86_fp_comparison_arithmetics_cost (code) > cost)
32b5b1aa 8340 {
c0c102a9
JH
8341 if (TARGET_CMOVE)
8342 {
8343 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8344 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
8345 tmp);
8346 emit_insn (tmp);
8347 }
8348 else
8349 {
8350 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8ee41eaf 8351 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
bf71a4f8
JH
8352 if (!scratch)
8353 scratch = gen_reg_rtx (HImode);
c0c102a9
JH
8354 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
8355 emit_insn (gen_x86_sahf_1 (scratch));
8356 }
e075ae69
RH
8357
8358 /* The FP codes work out to act like unsigned. */
9a915772 8359 intcmp_mode = fpcmp_mode;
9e7adcb3
JH
8360 code = first_code;
8361 if (bypass_code != NIL)
8362 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
8363 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8364 const0_rtx);
8365 if (second_code != NIL)
8366 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
8367 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8368 const0_rtx);
e075ae69
RH
8369 }
8370 else
8371 {
8372 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
e075ae69 8373 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
8ee41eaf 8374 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
bf71a4f8
JH
8375 if (!scratch)
8376 scratch = gen_reg_rtx (HImode);
3a3677ff 8377 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
e075ae69 8378
9a915772
JH
8379 /* In the unordered case, we have to check C2 for NaN's, which
8380 doesn't happen to work out to anything nice combination-wise.
8381 So do some bit twiddling on the value we've got in AH to come
8382 up with an appropriate set of condition codes. */
e075ae69 8383
9a915772
JH
8384 intcmp_mode = CCNOmode;
8385 switch (code)
32b5b1aa 8386 {
9a915772
JH
8387 case GT:
8388 case UNGT:
8389 if (code == GT || !TARGET_IEEE_FP)
32b5b1aa 8390 {
3a3677ff 8391 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
e075ae69 8392 code = EQ;
9a915772
JH
8393 }
8394 else
8395 {
8396 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8397 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8398 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
8399 intcmp_mode = CCmode;
8400 code = GEU;
8401 }
8402 break;
8403 case LT:
8404 case UNLT:
8405 if (code == LT && TARGET_IEEE_FP)
8406 {
3a3677ff
RH
8407 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8408 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
e075ae69
RH
8409 intcmp_mode = CCmode;
8410 code = EQ;
9a915772
JH
8411 }
8412 else
8413 {
8414 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
8415 code = NE;
8416 }
8417 break;
8418 case GE:
8419 case UNGE:
8420 if (code == GE || !TARGET_IEEE_FP)
8421 {
3a3677ff 8422 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
e075ae69 8423 code = EQ;
9a915772
JH
8424 }
8425 else
8426 {
8427 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8428 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8429 GEN_INT (0x01)));
8430 code = NE;
8431 }
8432 break;
8433 case LE:
8434 case UNLE:
8435 if (code == LE && TARGET_IEEE_FP)
8436 {
3a3677ff
RH
8437 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8438 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
8439 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
8440 intcmp_mode = CCmode;
8441 code = LTU;
9a915772
JH
8442 }
8443 else
8444 {
8445 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
8446 code = NE;
8447 }
8448 break;
8449 case EQ:
8450 case UNEQ:
8451 if (code == EQ && TARGET_IEEE_FP)
8452 {
3a3677ff
RH
8453 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
8454 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
e075ae69
RH
8455 intcmp_mode = CCmode;
8456 code = EQ;
9a915772
JH
8457 }
8458 else
8459 {
3a3677ff
RH
8460 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8461 code = NE;
8462 break;
9a915772
JH
8463 }
8464 break;
8465 case NE:
8466 case LTGT:
8467 if (code == NE && TARGET_IEEE_FP)
8468 {
3a3677ff 8469 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
9a915772
JH
8470 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
8471 GEN_INT (0x40)));
3a3677ff 8472 code = NE;
9a915772
JH
8473 }
8474 else
8475 {
3a3677ff
RH
8476 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
8477 code = EQ;
32b5b1aa 8478 }
9a915772
JH
8479 break;
8480
8481 case UNORDERED:
8482 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8483 code = NE;
8484 break;
8485 case ORDERED:
8486 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
8487 code = EQ;
8488 break;
8489
8490 default:
8491 abort ();
32b5b1aa 8492 }
32b5b1aa 8493 }
e075ae69
RH
8494
8495 /* Return the test that should be put into the flags user, i.e.
8496 the bcc, scc, or cmov instruction. */
8497 return gen_rtx_fmt_ee (code, VOIDmode,
8498 gen_rtx_REG (intcmp_mode, FLAGS_REG),
8499 const0_rtx);
8500}
8501
9e3e266c 8502rtx
a1b8572c 8503ix86_expand_compare (code, second_test, bypass_test)
e075ae69 8504 enum rtx_code code;
a1b8572c 8505 rtx *second_test, *bypass_test;
e075ae69
RH
8506{
8507 rtx op0, op1, ret;
8508 op0 = ix86_compare_op0;
8509 op1 = ix86_compare_op1;
8510
a1b8572c
JH
8511 if (second_test)
8512 *second_test = NULL_RTX;
8513 if (bypass_test)
8514 *bypass_test = NULL_RTX;
8515
e075ae69 8516 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_FLOAT)
bf71a4f8 8517 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
77ebd435 8518 second_test, bypass_test);
32b5b1aa 8519 else
e075ae69
RH
8520 ret = ix86_expand_int_compare (code, op0, op1);
8521
8522 return ret;
8523}
8524
03598dea
JH
8525/* Return true if the CODE will result in nontrivial jump sequence. */
8526bool
8527ix86_fp_jump_nontrivial_p (code)
8528 enum rtx_code code;
8529{
8530 enum rtx_code bypass_code, first_code, second_code;
8531 if (!TARGET_CMOVE)
8532 return true;
8533 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8534 return bypass_code != NIL || second_code != NIL;
8535}
8536
e075ae69 8537void
3a3677ff 8538ix86_expand_branch (code, label)
e075ae69 8539 enum rtx_code code;
e075ae69
RH
8540 rtx label;
8541{
3a3677ff 8542 rtx tmp;
e075ae69 8543
3a3677ff 8544 switch (GET_MODE (ix86_compare_op0))
32b5b1aa 8545 {
3a3677ff
RH
8546 case QImode:
8547 case HImode:
8548 case SImode:
0d7d98ee 8549 simple:
a1b8572c 8550 tmp = ix86_expand_compare (code, NULL, NULL);
e075ae69
RH
8551 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8552 gen_rtx_LABEL_REF (VOIDmode, label),
8553 pc_rtx);
8554 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
32b5b1aa 8555 return;
e075ae69 8556
3a3677ff
RH
8557 case SFmode:
8558 case DFmode:
0f290768 8559 case XFmode:
2b589241 8560 case TFmode:
3a3677ff
RH
8561 {
8562 rtvec vec;
8563 int use_fcomi;
03598dea 8564 enum rtx_code bypass_code, first_code, second_code;
3a3677ff
RH
8565
8566 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
8567 &ix86_compare_op1);
fce5a9f2 8568
03598dea
JH
8569 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
8570
8571 /* Check whether we will use the natural sequence with one jump. If
8572 so, we can expand jump early. Otherwise delay expansion by
8573 creating compound insn to not confuse optimizers. */
8574 if (bypass_code == NIL && second_code == NIL
8575 && TARGET_CMOVE)
8576 {
8577 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
8578 gen_rtx_LABEL_REF (VOIDmode, label),
8579 pc_rtx, NULL_RTX);
8580 }
8581 else
8582 {
8583 tmp = gen_rtx_fmt_ee (code, VOIDmode,
8584 ix86_compare_op0, ix86_compare_op1);
8585 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
8586 gen_rtx_LABEL_REF (VOIDmode, label),
8587 pc_rtx);
8588 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
8589
8590 use_fcomi = ix86_use_fcomi_compare (code);
8591 vec = rtvec_alloc (3 + !use_fcomi);
8592 RTVEC_ELT (vec, 0) = tmp;
8593 RTVEC_ELT (vec, 1)
8594 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
8595 RTVEC_ELT (vec, 2)
8596 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
8597 if (! use_fcomi)
8598 RTVEC_ELT (vec, 3)
8599 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
8600
8601 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
8602 }
3a3677ff
RH
8603 return;
8604 }
32b5b1aa 8605
3a3677ff 8606 case DImode:
0d7d98ee
JH
8607 if (TARGET_64BIT)
8608 goto simple;
3a3677ff
RH
8609 /* Expand DImode branch into multiple compare+branch. */
8610 {
8611 rtx lo[2], hi[2], label2;
8612 enum rtx_code code1, code2, code3;
32b5b1aa 8613
3a3677ff
RH
8614 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
8615 {
8616 tmp = ix86_compare_op0;
8617 ix86_compare_op0 = ix86_compare_op1;
8618 ix86_compare_op1 = tmp;
8619 code = swap_condition (code);
8620 }
8621 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
8622 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
32b5b1aa 8623
3a3677ff
RH
8624 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
8625 avoid two branches. This costs one extra insn, so disable when
8626 optimizing for size. */
32b5b1aa 8627
3a3677ff
RH
8628 if ((code == EQ || code == NE)
8629 && (!optimize_size
8630 || hi[1] == const0_rtx || lo[1] == const0_rtx))
8631 {
8632 rtx xor0, xor1;
32b5b1aa 8633
3a3677ff
RH
8634 xor1 = hi[0];
8635 if (hi[1] != const0_rtx)
8636 xor1 = expand_binop (SImode, xor_optab, xor1, hi[1],
8637 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 8638
3a3677ff
RH
8639 xor0 = lo[0];
8640 if (lo[1] != const0_rtx)
8641 xor0 = expand_binop (SImode, xor_optab, xor0, lo[1],
8642 NULL_RTX, 0, OPTAB_WIDEN);
e075ae69 8643
3a3677ff
RH
8644 tmp = expand_binop (SImode, ior_optab, xor1, xor0,
8645 NULL_RTX, 0, OPTAB_WIDEN);
32b5b1aa 8646
3a3677ff
RH
8647 ix86_compare_op0 = tmp;
8648 ix86_compare_op1 = const0_rtx;
8649 ix86_expand_branch (code, label);
8650 return;
8651 }
e075ae69 8652
1f9124e4
JJ
8653 /* Otherwise, if we are doing less-than or greater-or-equal-than,
8654 op1 is a constant and the low word is zero, then we can just
8655 examine the high word. */
32b5b1aa 8656
1f9124e4
JJ
8657 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
8658 switch (code)
8659 {
8660 case LT: case LTU: case GE: case GEU:
8661 ix86_compare_op0 = hi[0];
8662 ix86_compare_op1 = hi[1];
8663 ix86_expand_branch (code, label);
8664 return;
8665 default:
8666 break;
8667 }
e075ae69 8668
3a3677ff 8669 /* Otherwise, we need two or three jumps. */
e075ae69 8670
3a3677ff 8671 label2 = gen_label_rtx ();
e075ae69 8672
3a3677ff
RH
8673 code1 = code;
8674 code2 = swap_condition (code);
8675 code3 = unsigned_condition (code);
e075ae69 8676
3a3677ff
RH
8677 switch (code)
8678 {
8679 case LT: case GT: case LTU: case GTU:
8680 break;
e075ae69 8681
3a3677ff
RH
8682 case LE: code1 = LT; code2 = GT; break;
8683 case GE: code1 = GT; code2 = LT; break;
8684 case LEU: code1 = LTU; code2 = GTU; break;
8685 case GEU: code1 = GTU; code2 = LTU; break;
e075ae69 8686
3a3677ff
RH
8687 case EQ: code1 = NIL; code2 = NE; break;
8688 case NE: code2 = NIL; break;
e075ae69 8689
3a3677ff
RH
8690 default:
8691 abort ();
8692 }
e075ae69 8693
3a3677ff
RH
8694 /*
8695 * a < b =>
8696 * if (hi(a) < hi(b)) goto true;
8697 * if (hi(a) > hi(b)) goto false;
8698 * if (lo(a) < lo(b)) goto true;
8699 * false:
8700 */
8701
8702 ix86_compare_op0 = hi[0];
8703 ix86_compare_op1 = hi[1];
8704
8705 if (code1 != NIL)
8706 ix86_expand_branch (code1, label);
8707 if (code2 != NIL)
8708 ix86_expand_branch (code2, label2);
8709
8710 ix86_compare_op0 = lo[0];
8711 ix86_compare_op1 = lo[1];
8712 ix86_expand_branch (code3, label);
8713
8714 if (code2 != NIL)
8715 emit_label (label2);
8716 return;
8717 }
e075ae69 8718
3a3677ff
RH
8719 default:
8720 abort ();
8721 }
32b5b1aa 8722}
e075ae69 8723
9e7adcb3
JH
8724/* Split branch based on floating point condition. */
8725void
03598dea
JH
8726ix86_split_fp_branch (code, op1, op2, target1, target2, tmp)
8727 enum rtx_code code;
8728 rtx op1, op2, target1, target2, tmp;
9e7adcb3
JH
8729{
8730 rtx second, bypass;
8731 rtx label = NULL_RTX;
03598dea 8732 rtx condition;
6b24c259
JH
8733 int bypass_probability = -1, second_probability = -1, probability = -1;
8734 rtx i;
9e7adcb3
JH
8735
8736 if (target2 != pc_rtx)
8737 {
8738 rtx tmp = target2;
8739 code = reverse_condition_maybe_unordered (code);
8740 target2 = target1;
8741 target1 = tmp;
8742 }
8743
8744 condition = ix86_expand_fp_compare (code, op1, op2,
8745 tmp, &second, &bypass);
6b24c259
JH
8746
8747 if (split_branch_probability >= 0)
8748 {
8749 /* Distribute the probabilities across the jumps.
8750 Assume the BYPASS and SECOND to be always test
8751 for UNORDERED. */
8752 probability = split_branch_probability;
8753
d6a7951f 8754 /* Value of 1 is low enough to make no need for probability
6b24c259
JH
8755 to be updated. Later we may run some experiments and see
8756 if unordered values are more frequent in practice. */
8757 if (bypass)
8758 bypass_probability = 1;
8759 if (second)
8760 second_probability = 1;
8761 }
9e7adcb3
JH
8762 if (bypass != NULL_RTX)
8763 {
8764 label = gen_label_rtx ();
6b24c259
JH
8765 i = emit_jump_insn (gen_rtx_SET
8766 (VOIDmode, pc_rtx,
8767 gen_rtx_IF_THEN_ELSE (VOIDmode,
8768 bypass,
8769 gen_rtx_LABEL_REF (VOIDmode,
8770 label),
8771 pc_rtx)));
8772 if (bypass_probability >= 0)
8773 REG_NOTES (i)
8774 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8775 GEN_INT (bypass_probability),
8776 REG_NOTES (i));
8777 }
8778 i = emit_jump_insn (gen_rtx_SET
9e7adcb3
JH
8779 (VOIDmode, pc_rtx,
8780 gen_rtx_IF_THEN_ELSE (VOIDmode,
6b24c259
JH
8781 condition, target1, target2)));
8782 if (probability >= 0)
8783 REG_NOTES (i)
8784 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8785 GEN_INT (probability),
8786 REG_NOTES (i));
8787 if (second != NULL_RTX)
9e7adcb3 8788 {
6b24c259
JH
8789 i = emit_jump_insn (gen_rtx_SET
8790 (VOIDmode, pc_rtx,
8791 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
8792 target2)));
8793 if (second_probability >= 0)
8794 REG_NOTES (i)
8795 = gen_rtx_EXPR_LIST (REG_BR_PROB,
8796 GEN_INT (second_probability),
8797 REG_NOTES (i));
9e7adcb3 8798 }
9e7adcb3
JH
8799 if (label != NULL_RTX)
8800 emit_label (label);
8801}
8802
32b5b1aa 8803int
3a3677ff 8804ix86_expand_setcc (code, dest)
e075ae69 8805 enum rtx_code code;
e075ae69 8806 rtx dest;
32b5b1aa 8807{
a1b8572c
JH
8808 rtx ret, tmp, tmpreg;
8809 rtx second_test, bypass_test;
e075ae69 8810
885a70fd
JH
8811 if (GET_MODE (ix86_compare_op0) == DImode
8812 && !TARGET_64BIT)
e075ae69
RH
8813 return 0; /* FAIL */
8814
b932f770
JH
8815 if (GET_MODE (dest) != QImode)
8816 abort ();
e075ae69 8817
a1b8572c 8818 ret = ix86_expand_compare (code, &second_test, &bypass_test);
e075ae69
RH
8819 PUT_MODE (ret, QImode);
8820
8821 tmp = dest;
a1b8572c 8822 tmpreg = dest;
32b5b1aa 8823
e075ae69 8824 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
a1b8572c
JH
8825 if (bypass_test || second_test)
8826 {
8827 rtx test = second_test;
8828 int bypass = 0;
8829 rtx tmp2 = gen_reg_rtx (QImode);
8830 if (bypass_test)
8831 {
8832 if (second_test)
b531087a 8833 abort ();
a1b8572c
JH
8834 test = bypass_test;
8835 bypass = 1;
8836 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
8837 }
8838 PUT_MODE (test, QImode);
8839 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
8840
8841 if (bypass)
8842 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
8843 else
8844 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
8845 }
e075ae69 8846
e075ae69 8847 return 1; /* DONE */
32b5b1aa 8848}
e075ae69 8849
32b5b1aa 8850int
e075ae69
RH
8851ix86_expand_int_movcc (operands)
8852 rtx operands[];
32b5b1aa 8853{
e075ae69
RH
8854 enum rtx_code code = GET_CODE (operands[1]), compare_code;
8855 rtx compare_seq, compare_op;
a1b8572c 8856 rtx second_test, bypass_test;
635559ab 8857 enum machine_mode mode = GET_MODE (operands[0]);
32b5b1aa 8858
36583fea
JH
8859 /* When the compare code is not LTU or GEU, we can not use sbbl case.
8860 In case comparsion is done with immediate, we can convert it to LTU or
8861 GEU by altering the integer. */
8862
8863 if ((code == LEU || code == GTU)
8864 && GET_CODE (ix86_compare_op1) == CONST_INT
635559ab 8865 && mode != HImode
261376e7
RH
8866 && INTVAL (ix86_compare_op1) != -1
8867 /* For x86-64, the immediate field in the instruction is 32-bit
8868 signed, so we can't increment a DImode value above 0x7fffffff. */
74411039
JH
8869 && (!TARGET_64BIT
8870 || GET_MODE (ix86_compare_op0) != DImode
261376e7 8871 || INTVAL (ix86_compare_op1) != 0x7fffffff)
0f290768 8872 && GET_CODE (operands[2]) == CONST_INT
36583fea
JH
8873 && GET_CODE (operands[3]) == CONST_INT)
8874 {
8875 if (code == LEU)
8876 code = LTU;
8877 else
8878 code = GEU;
261376e7
RH
8879 ix86_compare_op1 = gen_int_mode (INTVAL (ix86_compare_op1) + 1,
8880 GET_MODE (ix86_compare_op0));
36583fea 8881 }
3a3677ff 8882
e075ae69 8883 start_sequence ();
a1b8572c 8884 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
2f937369 8885 compare_seq = get_insns ();
e075ae69
RH
8886 end_sequence ();
8887
8888 compare_code = GET_CODE (compare_op);
8889
8890 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
8891 HImode insns, we'd be swallowed in word prefix ops. */
8892
635559ab
JH
8893 if (mode != HImode
8894 && (mode != DImode || TARGET_64BIT)
0f290768 8895 && GET_CODE (operands[2]) == CONST_INT
e075ae69
RH
8896 && GET_CODE (operands[3]) == CONST_INT)
8897 {
8898 rtx out = operands[0];
8899 HOST_WIDE_INT ct = INTVAL (operands[2]);
8900 HOST_WIDE_INT cf = INTVAL (operands[3]);
8901 HOST_WIDE_INT diff;
8902
a1b8572c
JH
8903 if ((compare_code == LTU || compare_code == GEU)
8904 && !second_test && !bypass_test)
e075ae69 8905 {
e075ae69
RH
8906 /* Detect overlap between destination and compare sources. */
8907 rtx tmp = out;
8908
0f290768 8909 /* To simplify rest of code, restrict to the GEU case. */
36583fea
JH
8910 if (compare_code == LTU)
8911 {
8912 int tmp = ct;
8913 ct = cf;
8914 cf = tmp;
8915 compare_code = reverse_condition (compare_code);
8916 code = reverse_condition (code);
8917 }
8918 diff = ct - cf;
8919
e075ae69 8920 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
a500c31b 8921 || reg_overlap_mentioned_p (out, ix86_compare_op1))
635559ab 8922 tmp = gen_reg_rtx (mode);
e075ae69
RH
8923
8924 emit_insn (compare_seq);
635559ab 8925 if (mode == DImode)
14f73b5a
JH
8926 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp));
8927 else
8928 emit_insn (gen_x86_movsicc_0_m1 (tmp));
e075ae69 8929
36583fea
JH
8930 if (diff == 1)
8931 {
8932 /*
8933 * cmpl op0,op1
8934 * sbbl dest,dest
8935 * [addl dest, ct]
8936 *
8937 * Size 5 - 8.
8938 */
8939 if (ct)
635559ab
JH
8940 tmp = expand_simple_binop (mode, PLUS,
8941 tmp, GEN_INT (ct),
8942 tmp, 1, OPTAB_DIRECT);
36583fea
JH
8943 }
8944 else if (cf == -1)
8945 {
8946 /*
8947 * cmpl op0,op1
8948 * sbbl dest,dest
8949 * orl $ct, dest
8950 *
8951 * Size 8.
8952 */
635559ab
JH
8953 tmp = expand_simple_binop (mode, IOR,
8954 tmp, GEN_INT (ct),
8955 tmp, 1, OPTAB_DIRECT);
36583fea
JH
8956 }
8957 else if (diff == -1 && ct)
8958 {
8959 /*
8960 * cmpl op0,op1
8961 * sbbl dest,dest
06ec023f 8962 * notl dest
36583fea
JH
8963 * [addl dest, cf]
8964 *
8965 * Size 8 - 11.
8966 */
635559ab
JH
8967 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8968 if (cf)
8969 tmp = expand_simple_binop (mode, PLUS,
8970 tmp, GEN_INT (cf),
8971 tmp, 1, OPTAB_DIRECT);
36583fea
JH
8972 }
8973 else
8974 {
8975 /*
8976 * cmpl op0,op1
8977 * sbbl dest,dest
06ec023f 8978 * [notl dest]
36583fea
JH
8979 * andl cf - ct, dest
8980 * [addl dest, ct]
8981 *
8982 * Size 8 - 11.
8983 */
06ec023f
RB
8984
8985 if (cf == 0)
8986 {
8987 cf = ct;
8988 ct = 0;
8989 tmp = expand_simple_unop (mode, NOT, tmp, tmp, 1);
8990 }
8991
635559ab
JH
8992 tmp = expand_simple_binop (mode, AND,
8993 tmp,
d8bf17f9 8994 gen_int_mode (cf - ct, mode),
635559ab
JH
8995 tmp, 1, OPTAB_DIRECT);
8996 if (ct)
8997 tmp = expand_simple_binop (mode, PLUS,
8998 tmp, GEN_INT (ct),
8999 tmp, 1, OPTAB_DIRECT);
36583fea 9000 }
e075ae69
RH
9001
9002 if (tmp != out)
9003 emit_move_insn (out, tmp);
9004
9005 return 1; /* DONE */
9006 }
9007
9008 diff = ct - cf;
9009 if (diff < 0)
9010 {
9011 HOST_WIDE_INT tmp;
9012 tmp = ct, ct = cf, cf = tmp;
9013 diff = -diff;
734dba19
JH
9014 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
9015 {
9016 /* We may be reversing unordered compare to normal compare, that
9017 is not valid in general (we may convert non-trapping condition
9018 to trapping one), however on i386 we currently emit all
9019 comparisons unordered. */
9020 compare_code = reverse_condition_maybe_unordered (compare_code);
9021 code = reverse_condition_maybe_unordered (code);
9022 }
9023 else
9024 {
9025 compare_code = reverse_condition (compare_code);
9026 code = reverse_condition (code);
9027 }
e075ae69 9028 }
0f2a3457
JJ
9029
9030 compare_code = NIL;
9031 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
9032 && GET_CODE (ix86_compare_op1) == CONST_INT)
9033 {
9034 if (ix86_compare_op1 == const0_rtx
9035 && (code == LT || code == GE))
9036 compare_code = code;
9037 else if (ix86_compare_op1 == constm1_rtx)
9038 {
9039 if (code == LE)
9040 compare_code = LT;
9041 else if (code == GT)
9042 compare_code = GE;
9043 }
9044 }
9045
9046 /* Optimize dest = (op0 < 0) ? -1 : cf. */
9047 if (compare_code != NIL
9048 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
9049 && (cf == -1 || ct == -1))
9050 {
9051 /* If lea code below could be used, only optimize
9052 if it results in a 2 insn sequence. */
9053
9054 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
9055 || diff == 3 || diff == 5 || diff == 9)
9056 || (compare_code == LT && ct == -1)
9057 || (compare_code == GE && cf == -1))
9058 {
9059 /*
9060 * notl op1 (if necessary)
9061 * sarl $31, op1
9062 * orl cf, op1
9063 */
9064 if (ct != -1)
9065 {
9066 cf = ct;
9067 ct = -1;
9068 code = reverse_condition (code);
9069 }
9070
9071 out = emit_store_flag (out, code, ix86_compare_op0,
9072 ix86_compare_op1, VOIDmode, 0, -1);
9073
9074 out = expand_simple_binop (mode, IOR,
9075 out, GEN_INT (cf),
9076 out, 1, OPTAB_DIRECT);
9077 if (out != operands[0])
9078 emit_move_insn (operands[0], out);
9079
9080 return 1; /* DONE */
9081 }
9082 }
9083
635559ab
JH
9084 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
9085 || diff == 3 || diff == 5 || diff == 9)
9086 && (mode != DImode || x86_64_sign_extended_value (GEN_INT (cf))))
e075ae69
RH
9087 {
9088 /*
9089 * xorl dest,dest
9090 * cmpl op1,op2
9091 * setcc dest
9092 * lea cf(dest*(ct-cf)),dest
9093 *
9094 * Size 14.
9095 *
9096 * This also catches the degenerate setcc-only case.
9097 */
9098
9099 rtx tmp;
9100 int nops;
9101
9102 out = emit_store_flag (out, code, ix86_compare_op0,
9103 ix86_compare_op1, VOIDmode, 0, 1);
9104
9105 nops = 0;
97f51ac4
RB
9106 /* On x86_64 the lea instruction operates on Pmode, so we need
9107 to get arithmetics done in proper mode to match. */
e075ae69 9108 if (diff == 1)
14f73b5a 9109 tmp = out;
e075ae69
RH
9110 else
9111 {
885a70fd 9112 rtx out1;
14f73b5a 9113 out1 = out;
635559ab 9114 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
e075ae69
RH
9115 nops++;
9116 if (diff & 1)
9117 {
635559ab 9118 tmp = gen_rtx_PLUS (mode, tmp, out1);
e075ae69
RH
9119 nops++;
9120 }
9121 }
9122 if (cf != 0)
9123 {
635559ab 9124 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
e075ae69
RH
9125 nops++;
9126 }
885a70fd
JH
9127 if (tmp != out
9128 && (GET_CODE (tmp) != SUBREG || SUBREG_REG (tmp) != out))
e075ae69 9129 {
14f73b5a 9130 if (nops == 1)
e075ae69
RH
9131 {
9132 rtx clob;
9133
9134 clob = gen_rtx_REG (CCmode, FLAGS_REG);
9135 clob = gen_rtx_CLOBBER (VOIDmode, clob);
9136
9137 tmp = gen_rtx_SET (VOIDmode, out, tmp);
9138 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9139 emit_insn (tmp);
9140 }
9141 else
9142 emit_insn (gen_rtx_SET (VOIDmode, out, tmp));
9143 }
9144 if (out != operands[0])
1985ef90 9145 emit_move_insn (operands[0], copy_rtx (out));
e075ae69
RH
9146
9147 return 1; /* DONE */
9148 }
9149
9150 /*
9151 * General case: Jumpful:
9152 * xorl dest,dest cmpl op1, op2
9153 * cmpl op1, op2 movl ct, dest
9154 * setcc dest jcc 1f
9155 * decl dest movl cf, dest
9156 * andl (cf-ct),dest 1:
9157 * addl ct,dest
0f290768 9158 *
e075ae69
RH
9159 * Size 20. Size 14.
9160 *
9161 * This is reasonably steep, but branch mispredict costs are
9162 * high on modern cpus, so consider failing only if optimizing
9163 * for space.
9164 *
9165 * %%% Parameterize branch_cost on the tuning architecture, then
9166 * use that. The 80386 couldn't care less about mispredicts.
9167 */
9168
9169 if (!optimize_size && !TARGET_CMOVE)
9170 {
97f51ac4 9171 if (cf == 0)
e075ae69 9172 {
97f51ac4
RB
9173 cf = ct;
9174 ct = 0;
734dba19 9175 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
0f2a3457
JJ
9176 /* We may be reversing unordered compare to normal compare,
9177 that is not valid in general (we may convert non-trapping
9178 condition to trapping one), however on i386 we currently
9179 emit all comparisons unordered. */
9180 code = reverse_condition_maybe_unordered (code);
9181 else
9182 {
9183 code = reverse_condition (code);
9184 if (compare_code != NIL)
9185 compare_code = reverse_condition (compare_code);
9186 }
9187 }
9188
9189 if (compare_code != NIL)
9190 {
9191 /* notl op1 (if needed)
9192 sarl $31, op1
9193 andl (cf-ct), op1
9194 addl ct, op1
9195
9196 For x < 0 (resp. x <= -1) there will be no notl,
9197 so if possible swap the constants to get rid of the
9198 complement.
9199 True/false will be -1/0 while code below (store flag
9200 followed by decrement) is 0/-1, so the constants need
9201 to be exchanged once more. */
9202
9203 if (compare_code == GE || !cf)
734dba19 9204 {
0f2a3457
JJ
9205 code = reverse_condition (code);
9206 compare_code = LT;
734dba19
JH
9207 }
9208 else
9209 {
0f2a3457
JJ
9210 HOST_WIDE_INT tmp = cf;
9211 cf = ct;
9212 ct = tmp;
734dba19 9213 }
0f2a3457
JJ
9214
9215 out = emit_store_flag (out, code, ix86_compare_op0,
9216 ix86_compare_op1, VOIDmode, 0, -1);
e075ae69 9217 }
0f2a3457
JJ
9218 else
9219 {
9220 out = emit_store_flag (out, code, ix86_compare_op0,
9221 ix86_compare_op1, VOIDmode, 0, 1);
e075ae69 9222
97f51ac4 9223 out = expand_simple_binop (mode, PLUS, out, constm1_rtx,
0f2a3457
JJ
9224 out, 1, OPTAB_DIRECT);
9225 }
e075ae69 9226
97f51ac4 9227 out = expand_simple_binop (mode, AND, out,
d8bf17f9 9228 gen_int_mode (cf - ct, mode),
635559ab 9229 out, 1, OPTAB_DIRECT);
97f51ac4
RB
9230 if (ct)
9231 out = expand_simple_binop (mode, PLUS, out, GEN_INT (ct),
9232 out, 1, OPTAB_DIRECT);
e075ae69
RH
9233 if (out != operands[0])
9234 emit_move_insn (operands[0], out);
9235
9236 return 1; /* DONE */
9237 }
9238 }
9239
9240 if (!TARGET_CMOVE)
9241 {
9242 /* Try a few things more with specific constants and a variable. */
9243
78a0d70c 9244 optab op;
e075ae69
RH
9245 rtx var, orig_out, out, tmp;
9246
9247 if (optimize_size)
9248 return 0; /* FAIL */
9249
0f290768 9250 /* If one of the two operands is an interesting constant, load a
e075ae69 9251 constant with the above and mask it in with a logical operation. */
0f290768 9252
e075ae69
RH
9253 if (GET_CODE (operands[2]) == CONST_INT)
9254 {
9255 var = operands[3];
9256 if (INTVAL (operands[2]) == 0)
9257 operands[3] = constm1_rtx, op = and_optab;
9258 else if (INTVAL (operands[2]) == -1)
9259 operands[3] = const0_rtx, op = ior_optab;
78a0d70c
ZW
9260 else
9261 return 0; /* FAIL */
e075ae69
RH
9262 }
9263 else if (GET_CODE (operands[3]) == CONST_INT)
9264 {
9265 var = operands[2];
9266 if (INTVAL (operands[3]) == 0)
9267 operands[2] = constm1_rtx, op = and_optab;
9268 else if (INTVAL (operands[3]) == -1)
9269 operands[2] = const0_rtx, op = ior_optab;
78a0d70c
ZW
9270 else
9271 return 0; /* FAIL */
e075ae69 9272 }
78a0d70c 9273 else
e075ae69
RH
9274 return 0; /* FAIL */
9275
9276 orig_out = operands[0];
635559ab 9277 tmp = gen_reg_rtx (mode);
e075ae69
RH
9278 operands[0] = tmp;
9279
9280 /* Recurse to get the constant loaded. */
9281 if (ix86_expand_int_movcc (operands) == 0)
9282 return 0; /* FAIL */
9283
9284 /* Mask in the interesting variable. */
635559ab 9285 out = expand_binop (mode, op, var, tmp, orig_out, 0,
e075ae69
RH
9286 OPTAB_WIDEN);
9287 if (out != orig_out)
9288 emit_move_insn (orig_out, out);
9289
9290 return 1; /* DONE */
9291 }
9292
9293 /*
9294 * For comparison with above,
9295 *
9296 * movl cf,dest
9297 * movl ct,tmp
9298 * cmpl op1,op2
9299 * cmovcc tmp,dest
9300 *
9301 * Size 15.
9302 */
9303
635559ab
JH
9304 if (! nonimmediate_operand (operands[2], mode))
9305 operands[2] = force_reg (mode, operands[2]);
9306 if (! nonimmediate_operand (operands[3], mode))
9307 operands[3] = force_reg (mode, operands[3]);
e075ae69 9308
a1b8572c
JH
9309 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9310 {
635559ab 9311 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
9312 emit_move_insn (tmp, operands[3]);
9313 operands[3] = tmp;
9314 }
9315 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9316 {
635559ab 9317 rtx tmp = gen_reg_rtx (mode);
a1b8572c
JH
9318 emit_move_insn (tmp, operands[2]);
9319 operands[2] = tmp;
9320 }
c9682caf
JH
9321 if (! register_operand (operands[2], VOIDmode)
9322 && ! register_operand (operands[3], VOIDmode))
635559ab 9323 operands[2] = force_reg (mode, operands[2]);
a1b8572c 9324
e075ae69
RH
9325 emit_insn (compare_seq);
9326 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 9327 gen_rtx_IF_THEN_ELSE (mode,
e075ae69
RH
9328 compare_op, operands[2],
9329 operands[3])));
a1b8572c
JH
9330 if (bypass_test)
9331 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 9332 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c
JH
9333 bypass_test,
9334 operands[3],
9335 operands[0])));
9336 if (second_test)
9337 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
635559ab 9338 gen_rtx_IF_THEN_ELSE (mode,
a1b8572c
JH
9339 second_test,
9340 operands[2],
9341 operands[0])));
e075ae69
RH
9342
9343 return 1; /* DONE */
e9a25f70 9344}
e075ae69 9345
32b5b1aa 9346int
e075ae69
RH
9347ix86_expand_fp_movcc (operands)
9348 rtx operands[];
32b5b1aa 9349{
e075ae69 9350 enum rtx_code code;
e075ae69 9351 rtx tmp;
a1b8572c 9352 rtx compare_op, second_test, bypass_test;
32b5b1aa 9353
0073023d
JH
9354 /* For SF/DFmode conditional moves based on comparisons
9355 in same mode, we may want to use SSE min/max instructions. */
965f5423
JH
9356 if (((TARGET_SSE_MATH && GET_MODE (operands[0]) == SFmode)
9357 || (TARGET_SSE2 && TARGET_SSE_MATH && GET_MODE (operands[0]) == DFmode))
0073023d 9358 && GET_MODE (ix86_compare_op0) == GET_MODE (operands[0])
fa9f36a1
JH
9359 /* The SSE comparisons does not support the LTGT/UNEQ pair. */
9360 && (!TARGET_IEEE_FP
9361 || (GET_CODE (operands[1]) != LTGT && GET_CODE (operands[1]) != UNEQ))
0073023d
JH
9362 /* We may be called from the post-reload splitter. */
9363 && (!REG_P (operands[0])
9364 || SSE_REG_P (operands[0])
52a661a6 9365 || REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER))
0073023d
JH
9366 {
9367 rtx op0 = ix86_compare_op0, op1 = ix86_compare_op1;
9368 code = GET_CODE (operands[1]);
9369
9370 /* See if we have (cross) match between comparison operands and
9371 conditional move operands. */
9372 if (rtx_equal_p (operands[2], op1))
9373 {
9374 rtx tmp = op0;
9375 op0 = op1;
9376 op1 = tmp;
9377 code = reverse_condition_maybe_unordered (code);
9378 }
9379 if (rtx_equal_p (operands[2], op0) && rtx_equal_p (operands[3], op1))
9380 {
9381 /* Check for min operation. */
9382 if (code == LT)
9383 {
9384 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9385 if (memory_operand (op0, VOIDmode))
9386 op0 = force_reg (GET_MODE (operands[0]), op0);
9387 if (GET_MODE (operands[0]) == SFmode)
9388 emit_insn (gen_minsf3 (operands[0], op0, op1));
9389 else
9390 emit_insn (gen_mindf3 (operands[0], op0, op1));
9391 return 1;
9392 }
9393 /* Check for max operation. */
9394 if (code == GT)
9395 {
9396 operands[0] = force_reg (GET_MODE (operands[0]), operands[0]);
9397 if (memory_operand (op0, VOIDmode))
9398 op0 = force_reg (GET_MODE (operands[0]), op0);
9399 if (GET_MODE (operands[0]) == SFmode)
9400 emit_insn (gen_maxsf3 (operands[0], op0, op1));
9401 else
9402 emit_insn (gen_maxdf3 (operands[0], op0, op1));
9403 return 1;
9404 }
9405 }
9406 /* Manage condition to be sse_comparison_operator. In case we are
9407 in non-ieee mode, try to canonicalize the destination operand
9408 to be first in the comparison - this helps reload to avoid extra
9409 moves. */
9410 if (!sse_comparison_operator (operands[1], VOIDmode)
9411 || (rtx_equal_p (operands[0], ix86_compare_op1) && !TARGET_IEEE_FP))
9412 {
9413 rtx tmp = ix86_compare_op0;
9414 ix86_compare_op0 = ix86_compare_op1;
9415 ix86_compare_op1 = tmp;
9416 operands[1] = gen_rtx_fmt_ee (swap_condition (GET_CODE (operands[1])),
9417 VOIDmode, ix86_compare_op0,
9418 ix86_compare_op1);
9419 }
9420 /* Similary try to manage result to be first operand of conditional
fa9f36a1
JH
9421 move. We also don't support the NE comparison on SSE, so try to
9422 avoid it. */
037f20f1
JH
9423 if ((rtx_equal_p (operands[0], operands[3])
9424 && (!TARGET_IEEE_FP || GET_CODE (operands[1]) != EQ))
9425 || (GET_CODE (operands[1]) == NE && TARGET_IEEE_FP))
0073023d
JH
9426 {
9427 rtx tmp = operands[2];
9428 operands[2] = operands[3];
92d0fb09 9429 operands[3] = tmp;
0073023d
JH
9430 operands[1] = gen_rtx_fmt_ee (reverse_condition_maybe_unordered
9431 (GET_CODE (operands[1])),
9432 VOIDmode, ix86_compare_op0,
9433 ix86_compare_op1);
9434 }
9435 if (GET_MODE (operands[0]) == SFmode)
9436 emit_insn (gen_sse_movsfcc (operands[0], operands[1],
9437 operands[2], operands[3],
9438 ix86_compare_op0, ix86_compare_op1));
9439 else
9440 emit_insn (gen_sse_movdfcc (operands[0], operands[1],
9441 operands[2], operands[3],
9442 ix86_compare_op0, ix86_compare_op1));
9443 return 1;
9444 }
9445
e075ae69 9446 /* The floating point conditional move instructions don't directly
0f290768 9447 support conditions resulting from a signed integer comparison. */
32b5b1aa 9448
e075ae69 9449 code = GET_CODE (operands[1]);
a1b8572c 9450 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9e7adcb3
JH
9451
9452 /* The floating point conditional move instructions don't directly
9453 support signed integer comparisons. */
9454
a1b8572c 9455 if (!fcmov_comparison_operator (compare_op, VOIDmode))
e075ae69 9456 {
a1b8572c 9457 if (second_test != NULL || bypass_test != NULL)
b531087a 9458 abort ();
e075ae69 9459 tmp = gen_reg_rtx (QImode);
3a3677ff 9460 ix86_expand_setcc (code, tmp);
e075ae69
RH
9461 code = NE;
9462 ix86_compare_op0 = tmp;
9463 ix86_compare_op1 = const0_rtx;
a1b8572c
JH
9464 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
9465 }
9466 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
9467 {
9468 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9469 emit_move_insn (tmp, operands[3]);
9470 operands[3] = tmp;
9471 }
9472 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
9473 {
9474 tmp = gen_reg_rtx (GET_MODE (operands[0]));
9475 emit_move_insn (tmp, operands[2]);
9476 operands[2] = tmp;
e075ae69 9477 }
e9a25f70 9478
e075ae69
RH
9479 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9480 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9e7adcb3 9481 compare_op,
e075ae69
RH
9482 operands[2],
9483 operands[3])));
a1b8572c
JH
9484 if (bypass_test)
9485 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9486 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9487 bypass_test,
9488 operands[3],
9489 operands[0])));
9490 if (second_test)
9491 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
9492 gen_rtx_IF_THEN_ELSE (GET_MODE (operands[0]),
9493 second_test,
9494 operands[2],
9495 operands[0])));
32b5b1aa 9496
e075ae69 9497 return 1;
32b5b1aa
SC
9498}
9499
2450a057
JH
9500/* Split operands 0 and 1 into SImode parts. Similar to split_di, but
9501 works for floating pointer parameters and nonoffsetable memories.
9502 For pushes, it returns just stack offsets; the values will be saved
9503 in the right order. Maximally three parts are generated. */
9504
2b589241 9505static int
2450a057
JH
9506ix86_split_to_parts (operand, parts, mode)
9507 rtx operand;
9508 rtx *parts;
9509 enum machine_mode mode;
32b5b1aa 9510{
26e5b205
JH
9511 int size;
9512
9513 if (!TARGET_64BIT)
9514 size = mode == TFmode ? 3 : (GET_MODE_SIZE (mode) / 4);
9515 else
9516 size = (GET_MODE_SIZE (mode) + 4) / 8;
2450a057 9517
a7180f70
BS
9518 if (GET_CODE (operand) == REG && MMX_REGNO_P (REGNO (operand)))
9519 abort ();
2450a057
JH
9520 if (size < 2 || size > 3)
9521 abort ();
9522
f996902d
RH
9523 /* Optimize constant pool reference to immediates. This is used by fp
9524 moves, that force all constants to memory to allow combining. */
9525 if (GET_CODE (operand) == MEM && RTX_UNCHANGING_P (operand))
9526 {
9527 rtx tmp = maybe_get_pool_constant (operand);
9528 if (tmp)
9529 operand = tmp;
9530 }
d7a29404 9531
2450a057 9532 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
e075ae69 9533 {
2450a057
JH
9534 /* The only non-offsetable memories we handle are pushes. */
9535 if (! push_operand (operand, VOIDmode))
9536 abort ();
9537
26e5b205
JH
9538 operand = copy_rtx (operand);
9539 PUT_MODE (operand, Pmode);
2450a057
JH
9540 parts[0] = parts[1] = parts[2] = operand;
9541 }
26e5b205 9542 else if (!TARGET_64BIT)
2450a057
JH
9543 {
9544 if (mode == DImode)
9545 split_di (&operand, 1, &parts[0], &parts[1]);
9546 else
e075ae69 9547 {
2450a057
JH
9548 if (REG_P (operand))
9549 {
9550 if (!reload_completed)
9551 abort ();
9552 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
9553 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9554 if (size == 3)
9555 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
9556 }
9557 else if (offsettable_memref_p (operand))
9558 {
f4ef873c 9559 operand = adjust_address (operand, SImode, 0);
2450a057 9560 parts[0] = operand;
b72f00af 9561 parts[1] = adjust_address (operand, SImode, 4);
2450a057 9562 if (size == 3)
b72f00af 9563 parts[2] = adjust_address (operand, SImode, 8);
2450a057
JH
9564 }
9565 else if (GET_CODE (operand) == CONST_DOUBLE)
9566 {
9567 REAL_VALUE_TYPE r;
2b589241 9568 long l[4];
2450a057
JH
9569
9570 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9571 switch (mode)
9572 {
9573 case XFmode:
2b589241 9574 case TFmode:
2450a057 9575 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
d8bf17f9 9576 parts[2] = gen_int_mode (l[2], SImode);
2450a057
JH
9577 break;
9578 case DFmode:
9579 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
9580 break;
9581 default:
9582 abort ();
9583 }
d8bf17f9
LB
9584 parts[1] = gen_int_mode (l[1], SImode);
9585 parts[0] = gen_int_mode (l[0], SImode);
2450a057
JH
9586 }
9587 else
9588 abort ();
e075ae69 9589 }
2450a057 9590 }
26e5b205
JH
9591 else
9592 {
44cf5b6a
JH
9593 if (mode == TImode)
9594 split_ti (&operand, 1, &parts[0], &parts[1]);
26e5b205
JH
9595 if (mode == XFmode || mode == TFmode)
9596 {
9597 if (REG_P (operand))
9598 {
9599 if (!reload_completed)
9600 abort ();
9601 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
9602 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
9603 }
9604 else if (offsettable_memref_p (operand))
9605 {
b72f00af 9606 operand = adjust_address (operand, DImode, 0);
26e5b205 9607 parts[0] = operand;
b72f00af 9608 parts[1] = adjust_address (operand, SImode, 8);
26e5b205
JH
9609 }
9610 else if (GET_CODE (operand) == CONST_DOUBLE)
9611 {
9612 REAL_VALUE_TYPE r;
9613 long l[3];
9614
9615 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
9616 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
9617 /* Do not use shift by 32 to avoid warning on 32bit systems. */
9618 if (HOST_BITS_PER_WIDE_INT >= 64)
523fbd9d 9619 parts[0]
d8bf17f9 9620 = gen_int_mode
44cf5b6a 9621 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
b531087a 9622 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
d8bf17f9 9623 DImode);
26e5b205
JH
9624 else
9625 parts[0] = immed_double_const (l[0], l[1], DImode);
d8bf17f9 9626 parts[1] = gen_int_mode (l[2], SImode);
26e5b205
JH
9627 }
9628 else
9629 abort ();
9630 }
9631 }
2450a057 9632
2b589241 9633 return size;
2450a057
JH
9634}
9635
9636/* Emit insns to perform a move or push of DI, DF, and XF values.
9637 Return false when normal moves are needed; true when all required
9638 insns have been emitted. Operands 2-4 contain the input values
9639 int the correct order; operands 5-7 contain the output values. */
9640
26e5b205
JH
9641void
9642ix86_split_long_move (operands)
9643 rtx operands[];
2450a057
JH
9644{
9645 rtx part[2][3];
26e5b205 9646 int nparts;
2450a057
JH
9647 int push = 0;
9648 int collisions = 0;
26e5b205
JH
9649 enum machine_mode mode = GET_MODE (operands[0]);
9650
9651 /* The DFmode expanders may ask us to move double.
9652 For 64bit target this is single move. By hiding the fact
9653 here we simplify i386.md splitters. */
9654 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
9655 {
8cdfa312
RH
9656 /* Optimize constant pool reference to immediates. This is used by
9657 fp moves, that force all constants to memory to allow combining. */
26e5b205
JH
9658
9659 if (GET_CODE (operands[1]) == MEM
9660 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
9661 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
9662 operands[1] = get_pool_constant (XEXP (operands[1], 0));
9663 if (push_operand (operands[0], VOIDmode))
b47b4f21
JH
9664 {
9665 operands[0] = copy_rtx (operands[0]);
9666 PUT_MODE (operands[0], Pmode);
9667 }
26e5b205
JH
9668 else
9669 operands[0] = gen_lowpart (DImode, operands[0]);
9670 operands[1] = gen_lowpart (DImode, operands[1]);
9671 emit_move_insn (operands[0], operands[1]);
9672 return;
9673 }
2450a057 9674
2450a057
JH
9675 /* The only non-offsettable memory we handle is push. */
9676 if (push_operand (operands[0], VOIDmode))
9677 push = 1;
9678 else if (GET_CODE (operands[0]) == MEM
9679 && ! offsettable_memref_p (operands[0]))
9680 abort ();
9681
26e5b205
JH
9682 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
9683 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
2450a057
JH
9684
9685 /* When emitting push, take care for source operands on the stack. */
9686 if (push && GET_CODE (operands[1]) == MEM
9687 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
9688 {
26e5b205 9689 if (nparts == 3)
886cbb88
JH
9690 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
9691 XEXP (part[1][2], 0));
9692 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
9693 XEXP (part[1][1], 0));
2450a057
JH
9694 }
9695
0f290768 9696 /* We need to do copy in the right order in case an address register
2450a057
JH
9697 of the source overlaps the destination. */
9698 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
9699 {
9700 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
9701 collisions++;
9702 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
9703 collisions++;
26e5b205 9704 if (nparts == 3
2450a057
JH
9705 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
9706 collisions++;
9707
9708 /* Collision in the middle part can be handled by reordering. */
26e5b205 9709 if (collisions == 1 && nparts == 3
2450a057 9710 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
e075ae69 9711 {
2450a057
JH
9712 rtx tmp;
9713 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
9714 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
9715 }
e075ae69 9716
2450a057
JH
9717 /* If there are more collisions, we can't handle it by reordering.
9718 Do an lea to the last part and use only one colliding move. */
9719 else if (collisions > 1)
9720 {
9721 collisions = 1;
26e5b205 9722 emit_insn (gen_rtx_SET (VOIDmode, part[0][nparts - 1],
2450a057 9723 XEXP (part[1][0], 0)));
26e5b205
JH
9724 part[1][0] = change_address (part[1][0],
9725 TARGET_64BIT ? DImode : SImode,
9726 part[0][nparts - 1]);
b72f00af 9727 part[1][1] = adjust_address (part[1][0], VOIDmode, UNITS_PER_WORD);
26e5b205 9728 if (nparts == 3)
b72f00af 9729 part[1][2] = adjust_address (part[1][0], VOIDmode, 8);
2450a057
JH
9730 }
9731 }
9732
9733 if (push)
9734 {
26e5b205 9735 if (!TARGET_64BIT)
2b589241 9736 {
26e5b205
JH
9737 if (nparts == 3)
9738 {
9739 /* We use only first 12 bytes of TFmode value, but for pushing we
9740 are required to adjust stack as if we were pushing real 16byte
9741 value. */
9742 if (mode == TFmode && !TARGET_64BIT)
9743 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
9744 GEN_INT (-4)));
9745 emit_move_insn (part[0][2], part[1][2]);
9746 }
2b589241 9747 }
26e5b205
JH
9748 else
9749 {
9750 /* In 64bit mode we don't have 32bit push available. In case this is
9751 register, it is OK - we will just use larger counterpart. We also
9752 retype memory - these comes from attempt to avoid REX prefix on
9753 moving of second half of TFmode value. */
9754 if (GET_MODE (part[1][1]) == SImode)
9755 {
9756 if (GET_CODE (part[1][1]) == MEM)
f4ef873c 9757 part[1][1] = adjust_address (part[1][1], DImode, 0);
26e5b205
JH
9758 else if (REG_P (part[1][1]))
9759 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
9760 else
b531087a 9761 abort ();
886cbb88
JH
9762 if (GET_MODE (part[1][0]) == SImode)
9763 part[1][0] = part[1][1];
26e5b205
JH
9764 }
9765 }
9766 emit_move_insn (part[0][1], part[1][1]);
9767 emit_move_insn (part[0][0], part[1][0]);
9768 return;
2450a057
JH
9769 }
9770
9771 /* Choose correct order to not overwrite the source before it is copied. */
9772 if ((REG_P (part[0][0])
9773 && REG_P (part[1][1])
9774 && (REGNO (part[0][0]) == REGNO (part[1][1])
26e5b205 9775 || (nparts == 3
2450a057
JH
9776 && REGNO (part[0][0]) == REGNO (part[1][2]))))
9777 || (collisions > 0
9778 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
9779 {
26e5b205 9780 if (nparts == 3)
2450a057 9781 {
26e5b205
JH
9782 operands[2] = part[0][2];
9783 operands[3] = part[0][1];
9784 operands[4] = part[0][0];
9785 operands[5] = part[1][2];
9786 operands[6] = part[1][1];
9787 operands[7] = part[1][0];
2450a057
JH
9788 }
9789 else
9790 {
26e5b205
JH
9791 operands[2] = part[0][1];
9792 operands[3] = part[0][0];
9793 operands[5] = part[1][1];
9794 operands[6] = part[1][0];
2450a057
JH
9795 }
9796 }
9797 else
9798 {
26e5b205 9799 if (nparts == 3)
2450a057 9800 {
26e5b205
JH
9801 operands[2] = part[0][0];
9802 operands[3] = part[0][1];
9803 operands[4] = part[0][2];
9804 operands[5] = part[1][0];
9805 operands[6] = part[1][1];
9806 operands[7] = part[1][2];
2450a057
JH
9807 }
9808 else
9809 {
26e5b205
JH
9810 operands[2] = part[0][0];
9811 operands[3] = part[0][1];
9812 operands[5] = part[1][0];
9813 operands[6] = part[1][1];
e075ae69
RH
9814 }
9815 }
26e5b205
JH
9816 emit_move_insn (operands[2], operands[5]);
9817 emit_move_insn (operands[3], operands[6]);
9818 if (nparts == 3)
9819 emit_move_insn (operands[4], operands[7]);
32b5b1aa 9820
26e5b205 9821 return;
32b5b1aa 9822}
32b5b1aa 9823
e075ae69
RH
9824void
9825ix86_split_ashldi (operands, scratch)
9826 rtx *operands, scratch;
32b5b1aa 9827{
e075ae69
RH
9828 rtx low[2], high[2];
9829 int count;
b985a30f 9830
e075ae69
RH
9831 if (GET_CODE (operands[2]) == CONST_INT)
9832 {
9833 split_di (operands, 2, low, high);
9834 count = INTVAL (operands[2]) & 63;
32b5b1aa 9835
e075ae69
RH
9836 if (count >= 32)
9837 {
9838 emit_move_insn (high[0], low[1]);
9839 emit_move_insn (low[0], const0_rtx);
b985a30f 9840
e075ae69
RH
9841 if (count > 32)
9842 emit_insn (gen_ashlsi3 (high[0], high[0], GEN_INT (count - 32)));
9843 }
9844 else
9845 {
9846 if (!rtx_equal_p (operands[0], operands[1]))
9847 emit_move_insn (operands[0], operands[1]);
9848 emit_insn (gen_x86_shld_1 (high[0], low[0], GEN_INT (count)));
9849 emit_insn (gen_ashlsi3 (low[0], low[0], GEN_INT (count)));
9850 }
9851 }
9852 else
9853 {
9854 if (!rtx_equal_p (operands[0], operands[1]))
9855 emit_move_insn (operands[0], operands[1]);
b985a30f 9856
e075ae69 9857 split_di (operands, 1, low, high);
b985a30f 9858
e075ae69
RH
9859 emit_insn (gen_x86_shld_1 (high[0], low[0], operands[2]));
9860 emit_insn (gen_ashlsi3 (low[0], low[0], operands[2]));
32b5b1aa 9861
fe577e58 9862 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 9863 {
fe577e58 9864 if (! no_new_pseudos)
e075ae69
RH
9865 scratch = force_reg (SImode, const0_rtx);
9866 else
9867 emit_move_insn (scratch, const0_rtx);
9868
9869 emit_insn (gen_x86_shift_adj_1 (high[0], low[0], operands[2],
9870 scratch));
9871 }
9872 else
9873 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
9874 }
e9a25f70 9875}
32b5b1aa 9876
e075ae69
RH
9877void
9878ix86_split_ashrdi (operands, scratch)
9879 rtx *operands, scratch;
32b5b1aa 9880{
e075ae69
RH
9881 rtx low[2], high[2];
9882 int count;
32b5b1aa 9883
e075ae69
RH
9884 if (GET_CODE (operands[2]) == CONST_INT)
9885 {
9886 split_di (operands, 2, low, high);
9887 count = INTVAL (operands[2]) & 63;
32b5b1aa 9888
e075ae69
RH
9889 if (count >= 32)
9890 {
9891 emit_move_insn (low[0], high[1]);
32b5b1aa 9892
e075ae69
RH
9893 if (! reload_completed)
9894 emit_insn (gen_ashrsi3 (high[0], low[0], GEN_INT (31)));
9895 else
9896 {
9897 emit_move_insn (high[0], low[0]);
9898 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (31)));
9899 }
9900
9901 if (count > 32)
9902 emit_insn (gen_ashrsi3 (low[0], low[0], GEN_INT (count - 32)));
9903 }
9904 else
9905 {
9906 if (!rtx_equal_p (operands[0], operands[1]))
9907 emit_move_insn (operands[0], operands[1]);
9908 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9909 emit_insn (gen_ashrsi3 (high[0], high[0], GEN_INT (count)));
9910 }
9911 }
9912 else
32b5b1aa 9913 {
e075ae69
RH
9914 if (!rtx_equal_p (operands[0], operands[1]))
9915 emit_move_insn (operands[0], operands[1]);
9916
9917 split_di (operands, 1, low, high);
9918
9919 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9920 emit_insn (gen_ashrsi3 (high[0], high[0], operands[2]));
9921
fe577e58 9922 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 9923 {
fe577e58 9924 if (! no_new_pseudos)
e075ae69
RH
9925 scratch = gen_reg_rtx (SImode);
9926 emit_move_insn (scratch, high[0]);
9927 emit_insn (gen_ashrsi3 (scratch, scratch, GEN_INT (31)));
9928 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9929 scratch));
9930 }
9931 else
9932 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
32b5b1aa 9933 }
e075ae69 9934}
32b5b1aa 9935
e075ae69
RH
9936void
9937ix86_split_lshrdi (operands, scratch)
9938 rtx *operands, scratch;
9939{
9940 rtx low[2], high[2];
9941 int count;
32b5b1aa 9942
e075ae69 9943 if (GET_CODE (operands[2]) == CONST_INT)
32b5b1aa 9944 {
e075ae69
RH
9945 split_di (operands, 2, low, high);
9946 count = INTVAL (operands[2]) & 63;
9947
9948 if (count >= 32)
c7271385 9949 {
e075ae69
RH
9950 emit_move_insn (low[0], high[1]);
9951 emit_move_insn (high[0], const0_rtx);
32b5b1aa 9952
e075ae69
RH
9953 if (count > 32)
9954 emit_insn (gen_lshrsi3 (low[0], low[0], GEN_INT (count - 32)));
9955 }
9956 else
9957 {
9958 if (!rtx_equal_p (operands[0], operands[1]))
9959 emit_move_insn (operands[0], operands[1]);
9960 emit_insn (gen_x86_shrd_1 (low[0], high[0], GEN_INT (count)));
9961 emit_insn (gen_lshrsi3 (high[0], high[0], GEN_INT (count)));
9962 }
32b5b1aa 9963 }
e075ae69
RH
9964 else
9965 {
9966 if (!rtx_equal_p (operands[0], operands[1]))
9967 emit_move_insn (operands[0], operands[1]);
32b5b1aa 9968
e075ae69
RH
9969 split_di (operands, 1, low, high);
9970
9971 emit_insn (gen_x86_shrd_1 (low[0], high[0], operands[2]));
9972 emit_insn (gen_lshrsi3 (high[0], high[0], operands[2]));
9973
9974 /* Heh. By reversing the arguments, we can reuse this pattern. */
fe577e58 9975 if (TARGET_CMOVE && (! no_new_pseudos || scratch))
e075ae69 9976 {
fe577e58 9977 if (! no_new_pseudos)
e075ae69
RH
9978 scratch = force_reg (SImode, const0_rtx);
9979 else
9980 emit_move_insn (scratch, const0_rtx);
9981
9982 emit_insn (gen_x86_shift_adj_1 (low[0], high[0], operands[2],
9983 scratch));
9984 }
9985 else
9986 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
9987 }
32b5b1aa 9988}
3f803cd9 9989
0407c02b 9990/* Helper function for the string operations below. Dest VARIABLE whether
0945b39d
JH
9991 it is aligned to VALUE bytes. If true, jump to the label. */
9992static rtx
9993ix86_expand_aligntest (variable, value)
9994 rtx variable;
9995 int value;
9996{
9997 rtx label = gen_label_rtx ();
9998 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
9999 if (GET_MODE (variable) == DImode)
10000 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
10001 else
10002 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
10003 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
d43e0b7d 10004 1, label);
0945b39d
JH
10005 return label;
10006}
10007
10008/* Adjust COUNTER by the VALUE. */
10009static void
10010ix86_adjust_counter (countreg, value)
10011 rtx countreg;
10012 HOST_WIDE_INT value;
10013{
10014 if (GET_MODE (countreg) == DImode)
10015 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
10016 else
10017 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
10018}
10019
10020/* Zero extend possibly SImode EXP to Pmode register. */
d24b3457 10021rtx
0945b39d
JH
10022ix86_zero_extend_to_Pmode (exp)
10023 rtx exp;
10024{
10025 rtx r;
10026 if (GET_MODE (exp) == VOIDmode)
10027 return force_reg (Pmode, exp);
10028 if (GET_MODE (exp) == Pmode)
10029 return copy_to_mode_reg (Pmode, exp);
10030 r = gen_reg_rtx (Pmode);
10031 emit_insn (gen_zero_extendsidi2 (r, exp));
10032 return r;
10033}
10034
10035/* Expand string move (memcpy) operation. Use i386 string operations when
10036 profitable. expand_clrstr contains similar code. */
10037int
10038ix86_expand_movstr (dst, src, count_exp, align_exp)
10039 rtx dst, src, count_exp, align_exp;
10040{
10041 rtx srcreg, destreg, countreg;
10042 enum machine_mode counter_mode;
10043 HOST_WIDE_INT align = 0;
10044 unsigned HOST_WIDE_INT count = 0;
10045 rtx insns;
10046
10047 start_sequence ();
10048
10049 if (GET_CODE (align_exp) == CONST_INT)
10050 align = INTVAL (align_exp);
10051
5519a4f9 10052 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
10053 if (!TARGET_ALIGN_STRINGOPS)
10054 align = 64;
10055
10056 if (GET_CODE (count_exp) == CONST_INT)
10057 count = INTVAL (count_exp);
10058
10059 /* Figure out proper mode for counter. For 32bits it is always SImode,
10060 for 64bits use SImode when possible, otherwise DImode.
10061 Set count to number of bytes copied when known at compile time. */
10062 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10063 || x86_64_zero_extended_value (count_exp))
10064 counter_mode = SImode;
10065 else
10066 counter_mode = DImode;
10067
10068 if (counter_mode != SImode && counter_mode != DImode)
10069 abort ();
10070
10071 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
10072 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10073
10074 emit_insn (gen_cld ());
10075
10076 /* When optimizing for size emit simple rep ; movsb instruction for
10077 counts not divisible by 4. */
10078
10079 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10080 {
10081 countreg = ix86_zero_extend_to_Pmode (count_exp);
10082 if (TARGET_64BIT)
10083 emit_insn (gen_rep_movqi_rex64 (destreg, srcreg, countreg,
10084 destreg, srcreg, countreg));
10085 else
10086 emit_insn (gen_rep_movqi (destreg, srcreg, countreg,
10087 destreg, srcreg, countreg));
10088 }
10089
10090 /* For constant aligned (or small unaligned) copies use rep movsl
10091 followed by code copying the rest. For PentiumPro ensure 8 byte
10092 alignment to allow rep movsl acceleration. */
10093
10094 else if (count != 0
10095 && (align >= 8
10096 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
b531087a 10097 || optimize_size || count < (unsigned int) 64))
0945b39d
JH
10098 {
10099 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10100 if (count & ~(size - 1))
10101 {
10102 countreg = copy_to_mode_reg (counter_mode,
10103 GEN_INT ((count >> (size == 4 ? 2 : 3))
10104 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10105 countreg = ix86_zero_extend_to_Pmode (countreg);
10106 if (size == 4)
10107 {
10108 if (TARGET_64BIT)
10109 emit_insn (gen_rep_movsi_rex64 (destreg, srcreg, countreg,
10110 destreg, srcreg, countreg));
10111 else
10112 emit_insn (gen_rep_movsi (destreg, srcreg, countreg,
10113 destreg, srcreg, countreg));
10114 }
10115 else
10116 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg,
10117 destreg, srcreg, countreg));
10118 }
10119 if (size == 8 && (count & 0x04))
10120 emit_insn (gen_strmovsi (destreg, srcreg));
10121 if (count & 0x02)
10122 emit_insn (gen_strmovhi (destreg, srcreg));
10123 if (count & 0x01)
10124 emit_insn (gen_strmovqi (destreg, srcreg));
10125 }
10126 /* The generic code based on the glibc implementation:
10127 - align destination to 4 bytes (8 byte alignment is used for PentiumPro
10128 allowing accelerated copying there)
10129 - copy the data using rep movsl
10130 - copy the rest. */
10131 else
10132 {
10133 rtx countreg2;
10134 rtx label = NULL;
37ad04a5
JH
10135 int desired_alignment = (TARGET_PENTIUMPRO
10136 && (count == 0 || count >= (unsigned int) 260)
10137 ? 8 : UNITS_PER_WORD);
0945b39d
JH
10138
10139 /* In case we don't know anything about the alignment, default to
10140 library version, since it is usually equally fast and result in
10141 shorter code. */
10142 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10143 {
10144 end_sequence ();
10145 return 0;
10146 }
10147
10148 if (TARGET_SINGLE_STRINGOP)
10149 emit_insn (gen_cld ());
10150
10151 countreg2 = gen_reg_rtx (Pmode);
10152 countreg = copy_to_mode_reg (counter_mode, count_exp);
10153
10154 /* We don't use loops to align destination and to copy parts smaller
10155 than 4 bytes, because gcc is able to optimize such code better (in
10156 the case the destination or the count really is aligned, gcc is often
10157 able to predict the branches) and also it is friendlier to the
a4f31c00 10158 hardware branch prediction.
0945b39d
JH
10159
10160 Using loops is benefical for generic case, because we can
10161 handle small counts using the loops. Many CPUs (such as Athlon)
10162 have large REP prefix setup costs.
10163
10164 This is quite costy. Maybe we can revisit this decision later or
10165 add some customizability to this code. */
10166
37ad04a5 10167 if (count == 0 && align < desired_alignment)
0945b39d
JH
10168 {
10169 label = gen_label_rtx ();
aaae0bb9 10170 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
d43e0b7d 10171 LEU, 0, counter_mode, 1, label);
0945b39d
JH
10172 }
10173 if (align <= 1)
10174 {
10175 rtx label = ix86_expand_aligntest (destreg, 1);
10176 emit_insn (gen_strmovqi (destreg, srcreg));
10177 ix86_adjust_counter (countreg, 1);
10178 emit_label (label);
10179 LABEL_NUSES (label) = 1;
10180 }
10181 if (align <= 2)
10182 {
10183 rtx label = ix86_expand_aligntest (destreg, 2);
10184 emit_insn (gen_strmovhi (destreg, srcreg));
10185 ix86_adjust_counter (countreg, 2);
10186 emit_label (label);
10187 LABEL_NUSES (label) = 1;
10188 }
37ad04a5 10189 if (align <= 4 && desired_alignment > 4)
0945b39d
JH
10190 {
10191 rtx label = ix86_expand_aligntest (destreg, 4);
10192 emit_insn (gen_strmovsi (destreg, srcreg));
10193 ix86_adjust_counter (countreg, 4);
10194 emit_label (label);
10195 LABEL_NUSES (label) = 1;
10196 }
10197
37ad04a5
JH
10198 if (label && desired_alignment > 4 && !TARGET_64BIT)
10199 {
10200 emit_label (label);
10201 LABEL_NUSES (label) = 1;
10202 label = NULL_RTX;
10203 }
0945b39d
JH
10204 if (!TARGET_SINGLE_STRINGOP)
10205 emit_insn (gen_cld ());
10206 if (TARGET_64BIT)
10207 {
10208 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10209 GEN_INT (3)));
10210 emit_insn (gen_rep_movdi_rex64 (destreg, srcreg, countreg2,
10211 destreg, srcreg, countreg2));
10212 }
10213 else
10214 {
10215 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10216 emit_insn (gen_rep_movsi (destreg, srcreg, countreg2,
10217 destreg, srcreg, countreg2));
10218 }
10219
10220 if (label)
10221 {
10222 emit_label (label);
10223 LABEL_NUSES (label) = 1;
10224 }
10225 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10226 emit_insn (gen_strmovsi (destreg, srcreg));
10227 if ((align <= 4 || count == 0) && TARGET_64BIT)
10228 {
10229 rtx label = ix86_expand_aligntest (countreg, 4);
10230 emit_insn (gen_strmovsi (destreg, srcreg));
10231 emit_label (label);
10232 LABEL_NUSES (label) = 1;
10233 }
10234 if (align > 2 && count != 0 && (count & 2))
10235 emit_insn (gen_strmovhi (destreg, srcreg));
10236 if (align <= 2 || count == 0)
10237 {
10238 rtx label = ix86_expand_aligntest (countreg, 2);
10239 emit_insn (gen_strmovhi (destreg, srcreg));
10240 emit_label (label);
10241 LABEL_NUSES (label) = 1;
10242 }
10243 if (align > 1 && count != 0 && (count & 1))
10244 emit_insn (gen_strmovqi (destreg, srcreg));
10245 if (align <= 1 || count == 0)
10246 {
10247 rtx label = ix86_expand_aligntest (countreg, 1);
10248 emit_insn (gen_strmovqi (destreg, srcreg));
10249 emit_label (label);
10250 LABEL_NUSES (label) = 1;
10251 }
10252 }
10253
10254 insns = get_insns ();
10255 end_sequence ();
10256
10257 ix86_set_move_mem_attrs (insns, dst, src, destreg, srcreg);
2f937369 10258 emit_insn (insns);
0945b39d
JH
10259 return 1;
10260}
10261
10262/* Expand string clear operation (bzero). Use i386 string operations when
10263 profitable. expand_movstr contains similar code. */
10264int
10265ix86_expand_clrstr (src, count_exp, align_exp)
10266 rtx src, count_exp, align_exp;
10267{
10268 rtx destreg, zeroreg, countreg;
10269 enum machine_mode counter_mode;
10270 HOST_WIDE_INT align = 0;
10271 unsigned HOST_WIDE_INT count = 0;
10272
10273 if (GET_CODE (align_exp) == CONST_INT)
10274 align = INTVAL (align_exp);
10275
5519a4f9 10276 /* This simple hack avoids all inlining code and simplifies code below. */
0945b39d
JH
10277 if (!TARGET_ALIGN_STRINGOPS)
10278 align = 32;
10279
10280 if (GET_CODE (count_exp) == CONST_INT)
10281 count = INTVAL (count_exp);
10282 /* Figure out proper mode for counter. For 32bits it is always SImode,
10283 for 64bits use SImode when possible, otherwise DImode.
10284 Set count to number of bytes copied when known at compile time. */
10285 if (!TARGET_64BIT || GET_MODE (count_exp) == SImode
10286 || x86_64_zero_extended_value (count_exp))
10287 counter_mode = SImode;
10288 else
10289 counter_mode = DImode;
10290
10291 destreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
10292
10293 emit_insn (gen_cld ());
10294
10295 /* When optimizing for size emit simple rep ; movsb instruction for
10296 counts not divisible by 4. */
10297
10298 if ((!optimize || optimize_size) && (count == 0 || (count & 0x03)))
10299 {
10300 countreg = ix86_zero_extend_to_Pmode (count_exp);
10301 zeroreg = copy_to_mode_reg (QImode, const0_rtx);
10302 if (TARGET_64BIT)
10303 emit_insn (gen_rep_stosqi_rex64 (destreg, countreg, zeroreg,
10304 destreg, countreg));
10305 else
10306 emit_insn (gen_rep_stosqi (destreg, countreg, zeroreg,
10307 destreg, countreg));
10308 }
10309 else if (count != 0
10310 && (align >= 8
10311 || (!TARGET_PENTIUMPRO && !TARGET_64BIT && align >= 4)
b531087a 10312 || optimize_size || count < (unsigned int) 64))
0945b39d
JH
10313 {
10314 int size = TARGET_64BIT && !optimize_size ? 8 : 4;
10315 zeroreg = copy_to_mode_reg (size == 4 ? SImode : DImode, const0_rtx);
10316 if (count & ~(size - 1))
10317 {
10318 countreg = copy_to_mode_reg (counter_mode,
10319 GEN_INT ((count >> (size == 4 ? 2 : 3))
10320 & (TARGET_64BIT ? -1 : 0x3fffffff)));
10321 countreg = ix86_zero_extend_to_Pmode (countreg);
10322 if (size == 4)
10323 {
10324 if (TARGET_64BIT)
10325 emit_insn (gen_rep_stossi_rex64 (destreg, countreg, zeroreg,
10326 destreg, countreg));
10327 else
10328 emit_insn (gen_rep_stossi (destreg, countreg, zeroreg,
10329 destreg, countreg));
10330 }
10331 else
10332 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg, zeroreg,
10333 destreg, countreg));
10334 }
10335 if (size == 8 && (count & 0x04))
10336 emit_insn (gen_strsetsi (destreg,
10337 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10338 if (count & 0x02)
10339 emit_insn (gen_strsethi (destreg,
10340 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10341 if (count & 0x01)
10342 emit_insn (gen_strsetqi (destreg,
10343 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10344 }
10345 else
10346 {
10347 rtx countreg2;
10348 rtx label = NULL;
37ad04a5
JH
10349 /* Compute desired alignment of the string operation. */
10350 int desired_alignment = (TARGET_PENTIUMPRO
10351 && (count == 0 || count >= (unsigned int) 260)
10352 ? 8 : UNITS_PER_WORD);
0945b39d
JH
10353
10354 /* In case we don't know anything about the alignment, default to
10355 library version, since it is usually equally fast and result in
10356 shorter code. */
10357 if (!TARGET_INLINE_ALL_STRINGOPS && align < UNITS_PER_WORD)
10358 return 0;
10359
10360 if (TARGET_SINGLE_STRINGOP)
10361 emit_insn (gen_cld ());
10362
10363 countreg2 = gen_reg_rtx (Pmode);
10364 countreg = copy_to_mode_reg (counter_mode, count_exp);
10365 zeroreg = copy_to_mode_reg (Pmode, const0_rtx);
10366
37ad04a5 10367 if (count == 0 && align < desired_alignment)
0945b39d
JH
10368 {
10369 label = gen_label_rtx ();
37ad04a5 10370 emit_cmp_and_jump_insns (countreg, GEN_INT (desired_alignment - 1),
d43e0b7d 10371 LEU, 0, counter_mode, 1, label);
0945b39d
JH
10372 }
10373 if (align <= 1)
10374 {
10375 rtx label = ix86_expand_aligntest (destreg, 1);
10376 emit_insn (gen_strsetqi (destreg,
10377 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10378 ix86_adjust_counter (countreg, 1);
10379 emit_label (label);
10380 LABEL_NUSES (label) = 1;
10381 }
10382 if (align <= 2)
10383 {
10384 rtx label = ix86_expand_aligntest (destreg, 2);
10385 emit_insn (gen_strsethi (destreg,
10386 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10387 ix86_adjust_counter (countreg, 2);
10388 emit_label (label);
10389 LABEL_NUSES (label) = 1;
10390 }
37ad04a5 10391 if (align <= 4 && desired_alignment > 4)
0945b39d
JH
10392 {
10393 rtx label = ix86_expand_aligntest (destreg, 4);
10394 emit_insn (gen_strsetsi (destreg, (TARGET_64BIT
10395 ? gen_rtx_SUBREG (SImode, zeroreg, 0)
10396 : zeroreg)));
10397 ix86_adjust_counter (countreg, 4);
10398 emit_label (label);
10399 LABEL_NUSES (label) = 1;
10400 }
10401
37ad04a5
JH
10402 if (label && desired_alignment > 4 && !TARGET_64BIT)
10403 {
10404 emit_label (label);
10405 LABEL_NUSES (label) = 1;
10406 label = NULL_RTX;
10407 }
10408
0945b39d
JH
10409 if (!TARGET_SINGLE_STRINGOP)
10410 emit_insn (gen_cld ());
10411 if (TARGET_64BIT)
10412 {
10413 emit_insn (gen_lshrdi3 (countreg2, ix86_zero_extend_to_Pmode (countreg),
10414 GEN_INT (3)));
10415 emit_insn (gen_rep_stosdi_rex64 (destreg, countreg2, zeroreg,
10416 destreg, countreg2));
10417 }
10418 else
10419 {
10420 emit_insn (gen_lshrsi3 (countreg2, countreg, GEN_INT (2)));
10421 emit_insn (gen_rep_stossi (destreg, countreg2, zeroreg,
10422 destreg, countreg2));
10423 }
0945b39d
JH
10424 if (label)
10425 {
10426 emit_label (label);
10427 LABEL_NUSES (label) = 1;
10428 }
37ad04a5 10429
0945b39d
JH
10430 if (TARGET_64BIT && align > 4 && count != 0 && (count & 4))
10431 emit_insn (gen_strsetsi (destreg,
10432 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10433 if (TARGET_64BIT && (align <= 4 || count == 0))
10434 {
79258dce 10435 rtx label = ix86_expand_aligntest (countreg, 4);
0945b39d
JH
10436 emit_insn (gen_strsetsi (destreg,
10437 gen_rtx_SUBREG (SImode, zeroreg, 0)));
10438 emit_label (label);
10439 LABEL_NUSES (label) = 1;
10440 }
10441 if (align > 2 && count != 0 && (count & 2))
10442 emit_insn (gen_strsethi (destreg,
10443 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10444 if (align <= 2 || count == 0)
10445 {
74411039 10446 rtx label = ix86_expand_aligntest (countreg, 2);
0945b39d
JH
10447 emit_insn (gen_strsethi (destreg,
10448 gen_rtx_SUBREG (HImode, zeroreg, 0)));
10449 emit_label (label);
10450 LABEL_NUSES (label) = 1;
10451 }
10452 if (align > 1 && count != 0 && (count & 1))
10453 emit_insn (gen_strsetqi (destreg,
10454 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10455 if (align <= 1 || count == 0)
10456 {
74411039 10457 rtx label = ix86_expand_aligntest (countreg, 1);
0945b39d
JH
10458 emit_insn (gen_strsetqi (destreg,
10459 gen_rtx_SUBREG (QImode, zeroreg, 0)));
10460 emit_label (label);
10461 LABEL_NUSES (label) = 1;
10462 }
10463 }
10464 return 1;
10465}
10466/* Expand strlen. */
10467int
10468ix86_expand_strlen (out, src, eoschar, align)
10469 rtx out, src, eoschar, align;
10470{
10471 rtx addr, scratch1, scratch2, scratch3, scratch4;
10472
10473 /* The generic case of strlen expander is long. Avoid it's
10474 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
10475
10476 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10477 && !TARGET_INLINE_ALL_STRINGOPS
10478 && !optimize_size
10479 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
10480 return 0;
10481
10482 addr = force_reg (Pmode, XEXP (src, 0));
10483 scratch1 = gen_reg_rtx (Pmode);
10484
10485 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
10486 && !optimize_size)
10487 {
10488 /* Well it seems that some optimizer does not combine a call like
10489 foo(strlen(bar), strlen(bar));
10490 when the move and the subtraction is done here. It does calculate
10491 the length just once when these instructions are done inside of
10492 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
10493 often used and I use one fewer register for the lifetime of
10494 output_strlen_unroll() this is better. */
10495
10496 emit_move_insn (out, addr);
10497
10498 ix86_expand_strlensi_unroll_1 (out, align);
10499
10500 /* strlensi_unroll_1 returns the address of the zero at the end of
10501 the string, like memchr(), so compute the length by subtracting
10502 the start address. */
10503 if (TARGET_64BIT)
10504 emit_insn (gen_subdi3 (out, out, addr));
10505 else
10506 emit_insn (gen_subsi3 (out, out, addr));
10507 }
10508 else
10509 {
10510 scratch2 = gen_reg_rtx (Pmode);
10511 scratch3 = gen_reg_rtx (Pmode);
10512 scratch4 = force_reg (Pmode, constm1_rtx);
10513
10514 emit_move_insn (scratch3, addr);
10515 eoschar = force_reg (QImode, eoschar);
10516
10517 emit_insn (gen_cld ());
10518 if (TARGET_64BIT)
10519 {
10520 emit_insn (gen_strlenqi_rex_1 (scratch1, scratch3, eoschar,
10521 align, scratch4, scratch3));
10522 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
10523 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
10524 }
10525 else
10526 {
10527 emit_insn (gen_strlenqi_1 (scratch1, scratch3, eoschar,
10528 align, scratch4, scratch3));
10529 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
10530 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
10531 }
10532 }
10533 return 1;
10534}
10535
e075ae69
RH
10536/* Expand the appropriate insns for doing strlen if not just doing
10537 repnz; scasb
10538
10539 out = result, initialized with the start address
10540 align_rtx = alignment of the address.
10541 scratch = scratch register, initialized with the startaddress when
77ebd435 10542 not aligned, otherwise undefined
3f803cd9
SC
10543
10544 This is just the body. It needs the initialisations mentioned above and
10545 some address computing at the end. These things are done in i386.md. */
10546
0945b39d
JH
10547static void
10548ix86_expand_strlensi_unroll_1 (out, align_rtx)
10549 rtx out, align_rtx;
3f803cd9 10550{
e075ae69
RH
10551 int align;
10552 rtx tmp;
10553 rtx align_2_label = NULL_RTX;
10554 rtx align_3_label = NULL_RTX;
10555 rtx align_4_label = gen_label_rtx ();
10556 rtx end_0_label = gen_label_rtx ();
e075ae69 10557 rtx mem;
e2e52e1b 10558 rtx tmpreg = gen_reg_rtx (SImode);
0945b39d 10559 rtx scratch = gen_reg_rtx (SImode);
e075ae69
RH
10560
10561 align = 0;
10562 if (GET_CODE (align_rtx) == CONST_INT)
10563 align = INTVAL (align_rtx);
3f803cd9 10564
e9a25f70 10565 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
3f803cd9 10566
e9a25f70 10567 /* Is there a known alignment and is it less than 4? */
e075ae69 10568 if (align < 4)
3f803cd9 10569 {
0945b39d
JH
10570 rtx scratch1 = gen_reg_rtx (Pmode);
10571 emit_move_insn (scratch1, out);
e9a25f70 10572 /* Is there a known alignment and is it not 2? */
e075ae69 10573 if (align != 2)
3f803cd9 10574 {
e075ae69
RH
10575 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
10576 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
10577
10578 /* Leave just the 3 lower bits. */
0945b39d 10579 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
e075ae69
RH
10580 NULL_RTX, 0, OPTAB_WIDEN);
10581
9076b9c1 10582 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 10583 Pmode, 1, align_4_label);
9076b9c1 10584 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), EQ, NULL,
d43e0b7d 10585 Pmode, 1, align_2_label);
9076b9c1 10586 emit_cmp_and_jump_insns (align_rtx, GEN_INT (2), GTU, NULL,
d43e0b7d 10587 Pmode, 1, align_3_label);
3f803cd9
SC
10588 }
10589 else
10590 {
e9a25f70
JL
10591 /* Since the alignment is 2, we have to check 2 or 0 bytes;
10592 check if is aligned to 4 - byte. */
e9a25f70 10593
0945b39d 10594 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (2),
e075ae69
RH
10595 NULL_RTX, 0, OPTAB_WIDEN);
10596
9076b9c1 10597 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
d43e0b7d 10598 Pmode, 1, align_4_label);
3f803cd9
SC
10599 }
10600
e075ae69 10601 mem = gen_rtx_MEM (QImode, out);
e9a25f70 10602
e075ae69 10603 /* Now compare the bytes. */
e9a25f70 10604
0f290768 10605 /* Compare the first n unaligned byte on a byte per byte basis. */
9076b9c1 10606 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
d43e0b7d 10607 QImode, 1, end_0_label);
3f803cd9 10608
0f290768 10609 /* Increment the address. */
0945b39d
JH
10610 if (TARGET_64BIT)
10611 emit_insn (gen_adddi3 (out, out, const1_rtx));
10612 else
10613 emit_insn (gen_addsi3 (out, out, const1_rtx));
e9a25f70 10614
e075ae69
RH
10615 /* Not needed with an alignment of 2 */
10616 if (align != 2)
10617 {
10618 emit_label (align_2_label);
3f803cd9 10619
d43e0b7d
RK
10620 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10621 end_0_label);
e075ae69 10622
0945b39d
JH
10623 if (TARGET_64BIT)
10624 emit_insn (gen_adddi3 (out, out, const1_rtx));
10625 else
10626 emit_insn (gen_addsi3 (out, out, const1_rtx));
e075ae69
RH
10627
10628 emit_label (align_3_label);
10629 }
10630
d43e0b7d
RK
10631 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
10632 end_0_label);
e075ae69 10633
0945b39d
JH
10634 if (TARGET_64BIT)
10635 emit_insn (gen_adddi3 (out, out, const1_rtx));
10636 else
10637 emit_insn (gen_addsi3 (out, out, const1_rtx));
3f803cd9
SC
10638 }
10639
e075ae69
RH
10640 /* Generate loop to check 4 bytes at a time. It is not a good idea to
10641 align this loop. It gives only huge programs, but does not help to
10642 speed up. */
10643 emit_label (align_4_label);
3f803cd9 10644
e075ae69
RH
10645 mem = gen_rtx_MEM (SImode, out);
10646 emit_move_insn (scratch, mem);
0945b39d
JH
10647 if (TARGET_64BIT)
10648 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
10649 else
10650 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
e075ae69 10651
e2e52e1b
JH
10652 /* This formula yields a nonzero result iff one of the bytes is zero.
10653 This saves three branches inside loop and many cycles. */
10654
10655 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
10656 emit_insn (gen_one_cmplsi2 (scratch, scratch));
10657 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
7471a1f0 10658 emit_insn (gen_andsi3 (tmpreg, tmpreg,
d8bf17f9 10659 gen_int_mode (0x80808080, SImode)));
d43e0b7d
RK
10660 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
10661 align_4_label);
e2e52e1b
JH
10662
10663 if (TARGET_CMOVE)
10664 {
10665 rtx reg = gen_reg_rtx (SImode);
0945b39d 10666 rtx reg2 = gen_reg_rtx (Pmode);
e2e52e1b
JH
10667 emit_move_insn (reg, tmpreg);
10668 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
10669
0f290768 10670 /* If zero is not in the first two bytes, move two bytes forward. */
16189740 10671 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
10672 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10673 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10674 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
10675 gen_rtx_IF_THEN_ELSE (SImode, tmp,
77ebd435
AJ
10676 reg,
10677 tmpreg)));
e2e52e1b 10678 /* Emit lea manually to avoid clobbering of flags. */
0945b39d
JH
10679 emit_insn (gen_rtx_SET (SImode, reg2,
10680 gen_rtx_PLUS (Pmode, out, GEN_INT (2))));
e2e52e1b
JH
10681
10682 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10683 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
10684 emit_insn (gen_rtx_SET (VOIDmode, out,
0945b39d 10685 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
a4f31c00
AJ
10686 reg2,
10687 out)));
e2e52e1b
JH
10688
10689 }
10690 else
10691 {
10692 rtx end_2_label = gen_label_rtx ();
10693 /* Is zero in the first two bytes? */
10694
16189740 10695 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
e2e52e1b
JH
10696 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
10697 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
10698 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10699 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
10700 pc_rtx);
10701 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10702 JUMP_LABEL (tmp) = end_2_label;
10703
0f290768 10704 /* Not in the first two. Move two bytes forward. */
e2e52e1b 10705 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
0945b39d
JH
10706 if (TARGET_64BIT)
10707 emit_insn (gen_adddi3 (out, out, GEN_INT (2)));
10708 else
10709 emit_insn (gen_addsi3 (out, out, GEN_INT (2)));
e2e52e1b
JH
10710
10711 emit_label (end_2_label);
10712
10713 }
10714
0f290768 10715 /* Avoid branch in fixing the byte. */
e2e52e1b 10716 tmpreg = gen_lowpart (QImode, tmpreg);
7e08e190 10717 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
0945b39d
JH
10718 if (TARGET_64BIT)
10719 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3)));
10720 else
10721 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3)));
e075ae69
RH
10722
10723 emit_label (end_0_label);
10724}
0e07aff3
RH
10725
10726void
10727ix86_expand_call (retval, fnaddr, callarg1, callarg2, pop)
10728 rtx retval, fnaddr, callarg1, callarg2, pop;
10729{
10730 rtx use = NULL, call;
10731
10732 if (pop == const0_rtx)
10733 pop = NULL;
10734 if (TARGET_64BIT && pop)
10735 abort ();
10736
b069de3b
SS
10737#if TARGET_MACHO
10738 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
10739 fnaddr = machopic_indirect_call_target (fnaddr);
10740#else
0e07aff3
RH
10741 /* Static functions and indirect calls don't need the pic register. */
10742 if (! TARGET_64BIT && flag_pic
10743 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
10744 && ! SYMBOL_REF_FLAG (XEXP (fnaddr, 0)))
66edd3b4 10745 use_reg (&use, pic_offset_table_rtx);
0e07aff3
RH
10746
10747 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
10748 {
10749 rtx al = gen_rtx_REG (QImode, 0);
10750 emit_move_insn (al, callarg2);
10751 use_reg (&use, al);
10752 }
b069de3b 10753#endif /* TARGET_MACHO */
0e07aff3
RH
10754
10755 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
10756 {
10757 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
10758 fnaddr = gen_rtx_MEM (QImode, fnaddr);
10759 }
10760
10761 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
10762 if (retval)
10763 call = gen_rtx_SET (VOIDmode, retval, call);
10764 if (pop)
10765 {
10766 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
10767 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
10768 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
10769 }
10770
10771 call = emit_call_insn (call);
10772 if (use)
10773 CALL_INSN_FUNCTION_USAGE (call) = use;
10774}
fce5a9f2 10775
e075ae69 10776\f
e075ae69
RH
10777/* Clear stack slot assignments remembered from previous functions.
10778 This is called from INIT_EXPANDERS once before RTL is emitted for each
10779 function. */
10780
e2500fed
GK
10781static struct machine_function *
10782ix86_init_machine_status ()
37b15744 10783{
e2500fed 10784 return ggc_alloc_cleared (sizeof (struct machine_function));
1526a060
BS
10785}
10786
e075ae69
RH
10787/* Return a MEM corresponding to a stack slot with mode MODE.
10788 Allocate a new slot if necessary.
10789
10790 The RTL for a function can have several slots available: N is
10791 which slot to use. */
10792
10793rtx
10794assign_386_stack_local (mode, n)
10795 enum machine_mode mode;
10796 int n;
10797{
10798 if (n < 0 || n >= MAX_386_STACK_LOCALS)
10799 abort ();
10800
10801 if (ix86_stack_locals[(int) mode][n] == NULL_RTX)
10802 ix86_stack_locals[(int) mode][n]
10803 = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
10804
10805 return ix86_stack_locals[(int) mode][n];
10806}
f996902d
RH
10807
10808/* Construct the SYMBOL_REF for the tls_get_addr function. */
10809
e2500fed 10810static GTY(()) rtx ix86_tls_symbol;
f996902d
RH
10811rtx
10812ix86_tls_get_addr ()
10813{
f996902d 10814
e2500fed 10815 if (!ix86_tls_symbol)
f996902d 10816 {
e2500fed 10817 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, (TARGET_GNU_TLS
f996902d
RH
10818 ? "___tls_get_addr"
10819 : "__tls_get_addr"));
f996902d
RH
10820 }
10821
e2500fed 10822 return ix86_tls_symbol;
f996902d 10823}
e075ae69
RH
10824\f
10825/* Calculate the length of the memory address in the instruction
10826 encoding. Does not include the one-byte modrm, opcode, or prefix. */
10827
10828static int
10829memory_address_length (addr)
10830 rtx addr;
10831{
10832 struct ix86_address parts;
10833 rtx base, index, disp;
10834 int len;
10835
10836 if (GET_CODE (addr) == PRE_DEC
0d7d98ee
JH
10837 || GET_CODE (addr) == POST_INC
10838 || GET_CODE (addr) == PRE_MODIFY
10839 || GET_CODE (addr) == POST_MODIFY)
e075ae69 10840 return 0;
3f803cd9 10841
e075ae69
RH
10842 if (! ix86_decompose_address (addr, &parts))
10843 abort ();
3f803cd9 10844
e075ae69
RH
10845 base = parts.base;
10846 index = parts.index;
10847 disp = parts.disp;
10848 len = 0;
3f803cd9 10849
e075ae69
RH
10850 /* Register Indirect. */
10851 if (base && !index && !disp)
10852 {
10853 /* Special cases: ebp and esp need the two-byte modrm form. */
10854 if (addr == stack_pointer_rtx
10855 || addr == arg_pointer_rtx
564d80f4
JH
10856 || addr == frame_pointer_rtx
10857 || addr == hard_frame_pointer_rtx)
e075ae69 10858 len = 1;
3f803cd9 10859 }
e9a25f70 10860
e075ae69
RH
10861 /* Direct Addressing. */
10862 else if (disp && !base && !index)
10863 len = 4;
10864
3f803cd9
SC
10865 else
10866 {
e075ae69
RH
10867 /* Find the length of the displacement constant. */
10868 if (disp)
10869 {
10870 if (GET_CODE (disp) == CONST_INT
10871 && CONST_OK_FOR_LETTER_P (INTVAL (disp), 'K'))
10872 len = 1;
10873 else
10874 len = 4;
10875 }
3f803cd9 10876
e075ae69
RH
10877 /* An index requires the two-byte modrm form. */
10878 if (index)
10879 len += 1;
3f803cd9
SC
10880 }
10881
e075ae69
RH
10882 return len;
10883}
79325812 10884
5bf0ebab
RH
10885/* Compute default value for "length_immediate" attribute. When SHORTFORM
10886 is set, expect that insn have 8bit immediate alternative. */
e075ae69 10887int
6ef67412 10888ix86_attr_length_immediate_default (insn, shortform)
e075ae69 10889 rtx insn;
6ef67412 10890 int shortform;
e075ae69 10891{
6ef67412
JH
10892 int len = 0;
10893 int i;
6c698a6d 10894 extract_insn_cached (insn);
6ef67412
JH
10895 for (i = recog_data.n_operands - 1; i >= 0; --i)
10896 if (CONSTANT_P (recog_data.operand[i]))
3071fab5 10897 {
6ef67412 10898 if (len)
3071fab5 10899 abort ();
6ef67412
JH
10900 if (shortform
10901 && GET_CODE (recog_data.operand[i]) == CONST_INT
10902 && CONST_OK_FOR_LETTER_P (INTVAL (recog_data.operand[i]), 'K'))
10903 len = 1;
10904 else
10905 {
10906 switch (get_attr_mode (insn))
10907 {
10908 case MODE_QI:
10909 len+=1;
10910 break;
10911 case MODE_HI:
10912 len+=2;
10913 break;
10914 case MODE_SI:
10915 len+=4;
10916 break;
14f73b5a
JH
10917 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
10918 case MODE_DI:
10919 len+=4;
10920 break;
6ef67412 10921 default:
c725bd79 10922 fatal_insn ("unknown insn mode", insn);
6ef67412
JH
10923 }
10924 }
3071fab5 10925 }
6ef67412
JH
10926 return len;
10927}
10928/* Compute default value for "length_address" attribute. */
10929int
10930ix86_attr_length_address_default (insn)
10931 rtx insn;
10932{
10933 int i;
6c698a6d 10934 extract_insn_cached (insn);
1ccbefce
RH
10935 for (i = recog_data.n_operands - 1; i >= 0; --i)
10936 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 10937 {
6ef67412 10938 return memory_address_length (XEXP (recog_data.operand[i], 0));
e075ae69
RH
10939 break;
10940 }
6ef67412 10941 return 0;
3f803cd9 10942}
e075ae69
RH
10943\f
10944/* Return the maximum number of instructions a cpu can issue. */
b657fc39 10945
c237e94a 10946static int
e075ae69 10947ix86_issue_rate ()
b657fc39 10948{
e075ae69 10949 switch (ix86_cpu)
b657fc39 10950 {
e075ae69
RH
10951 case PROCESSOR_PENTIUM:
10952 case PROCESSOR_K6:
10953 return 2;
79325812 10954
e075ae69 10955 case PROCESSOR_PENTIUMPRO:
b4e89e2d
JH
10956 case PROCESSOR_PENTIUM4:
10957 case PROCESSOR_ATHLON:
e075ae69 10958 return 3;
b657fc39 10959
b657fc39 10960 default:
e075ae69 10961 return 1;
b657fc39 10962 }
b657fc39
L
10963}
10964
e075ae69
RH
10965/* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
10966 by DEP_INSN and nothing set by DEP_INSN. */
b657fc39 10967
e075ae69
RH
10968static int
10969ix86_flags_dependant (insn, dep_insn, insn_type)
10970 rtx insn, dep_insn;
10971 enum attr_type insn_type;
10972{
10973 rtx set, set2;
b657fc39 10974
e075ae69
RH
10975 /* Simplify the test for uninteresting insns. */
10976 if (insn_type != TYPE_SETCC
10977 && insn_type != TYPE_ICMOV
10978 && insn_type != TYPE_FCMOV
10979 && insn_type != TYPE_IBR)
10980 return 0;
b657fc39 10981
e075ae69
RH
10982 if ((set = single_set (dep_insn)) != 0)
10983 {
10984 set = SET_DEST (set);
10985 set2 = NULL_RTX;
10986 }
10987 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
10988 && XVECLEN (PATTERN (dep_insn), 0) == 2
10989 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
10990 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
10991 {
10992 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10993 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
10994 }
78a0d70c
ZW
10995 else
10996 return 0;
b657fc39 10997
78a0d70c
ZW
10998 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
10999 return 0;
b657fc39 11000
f5143c46 11001 /* This test is true if the dependent insn reads the flags but
78a0d70c
ZW
11002 not any other potentially set register. */
11003 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
11004 return 0;
11005
11006 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
11007 return 0;
11008
11009 return 1;
e075ae69 11010}
b657fc39 11011
e075ae69
RH
11012/* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
11013 address with operands set by DEP_INSN. */
11014
11015static int
11016ix86_agi_dependant (insn, dep_insn, insn_type)
11017 rtx insn, dep_insn;
11018 enum attr_type insn_type;
11019{
11020 rtx addr;
11021
6ad48e84
JH
11022 if (insn_type == TYPE_LEA
11023 && TARGET_PENTIUM)
5fbdde42
RH
11024 {
11025 addr = PATTERN (insn);
11026 if (GET_CODE (addr) == SET)
11027 ;
11028 else if (GET_CODE (addr) == PARALLEL
11029 && GET_CODE (XVECEXP (addr, 0, 0)) == SET)
11030 addr = XVECEXP (addr, 0, 0);
11031 else
11032 abort ();
11033 addr = SET_SRC (addr);
11034 }
e075ae69
RH
11035 else
11036 {
11037 int i;
6c698a6d 11038 extract_insn_cached (insn);
1ccbefce
RH
11039 for (i = recog_data.n_operands - 1; i >= 0; --i)
11040 if (GET_CODE (recog_data.operand[i]) == MEM)
e075ae69 11041 {
1ccbefce 11042 addr = XEXP (recog_data.operand[i], 0);
e075ae69
RH
11043 goto found;
11044 }
11045 return 0;
11046 found:;
b657fc39
L
11047 }
11048
e075ae69 11049 return modified_in_p (addr, dep_insn);
b657fc39 11050}
a269a03c 11051
c237e94a 11052static int
e075ae69 11053ix86_adjust_cost (insn, link, dep_insn, cost)
a269a03c
JC
11054 rtx insn, link, dep_insn;
11055 int cost;
11056{
e075ae69 11057 enum attr_type insn_type, dep_insn_type;
6ad48e84 11058 enum attr_memory memory, dep_memory;
e075ae69 11059 rtx set, set2;
9b00189f 11060 int dep_insn_code_number;
a269a03c 11061
309ada50 11062 /* Anti and output depenancies have zero cost on all CPUs. */
e075ae69 11063 if (REG_NOTE_KIND (link) != 0)
309ada50 11064 return 0;
a269a03c 11065
9b00189f
JH
11066 dep_insn_code_number = recog_memoized (dep_insn);
11067
e075ae69 11068 /* If we can't recognize the insns, we can't really do anything. */
9b00189f 11069 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
e075ae69 11070 return cost;
a269a03c 11071
1c71e60e
JH
11072 insn_type = get_attr_type (insn);
11073 dep_insn_type = get_attr_type (dep_insn);
9b00189f 11074
a269a03c
JC
11075 switch (ix86_cpu)
11076 {
11077 case PROCESSOR_PENTIUM:
e075ae69
RH
11078 /* Address Generation Interlock adds a cycle of latency. */
11079 if (ix86_agi_dependant (insn, dep_insn, insn_type))
11080 cost += 1;
11081
11082 /* ??? Compares pair with jump/setcc. */
11083 if (ix86_flags_dependant (insn, dep_insn, insn_type))
11084 cost = 0;
11085
11086 /* Floating point stores require value to be ready one cycle ealier. */
0f290768 11087 if (insn_type == TYPE_FMOV
e075ae69
RH
11088 && get_attr_memory (insn) == MEMORY_STORE
11089 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11090 cost += 1;
11091 break;
a269a03c 11092
e075ae69 11093 case PROCESSOR_PENTIUMPRO:
6ad48e84
JH
11094 memory = get_attr_memory (insn);
11095 dep_memory = get_attr_memory (dep_insn);
11096
0f290768 11097 /* Since we can't represent delayed latencies of load+operation,
e075ae69
RH
11098 increase the cost here for non-imov insns. */
11099 if (dep_insn_type != TYPE_IMOV
6ad48e84
JH
11100 && dep_insn_type != TYPE_FMOV
11101 && (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH))
e075ae69
RH
11102 cost += 1;
11103
11104 /* INT->FP conversion is expensive. */
11105 if (get_attr_fp_int_src (dep_insn))
11106 cost += 5;
11107
11108 /* There is one cycle extra latency between an FP op and a store. */
11109 if (insn_type == TYPE_FMOV
11110 && (set = single_set (dep_insn)) != NULL_RTX
11111 && (set2 = single_set (insn)) != NULL_RTX
11112 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
11113 && GET_CODE (SET_DEST (set2)) == MEM)
11114 cost += 1;
6ad48e84
JH
11115
11116 /* Show ability of reorder buffer to hide latency of load by executing
11117 in parallel with previous instruction in case
11118 previous instruction is not needed to compute the address. */
11119 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11120 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11121 {
11122 /* Claim moves to take one cycle, as core can issue one load
11123 at time and the next load can start cycle later. */
11124 if (dep_insn_type == TYPE_IMOV
11125 || dep_insn_type == TYPE_FMOV)
11126 cost = 1;
11127 else if (cost > 1)
11128 cost--;
11129 }
e075ae69 11130 break;
a269a03c 11131
e075ae69 11132 case PROCESSOR_K6:
6ad48e84
JH
11133 memory = get_attr_memory (insn);
11134 dep_memory = get_attr_memory (dep_insn);
e075ae69
RH
11135 /* The esp dependency is resolved before the instruction is really
11136 finished. */
11137 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
11138 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
11139 return 1;
a269a03c 11140
0f290768 11141 /* Since we can't represent delayed latencies of load+operation,
e075ae69 11142 increase the cost here for non-imov insns. */
6ad48e84 11143 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
e075ae69
RH
11144 cost += (dep_insn_type != TYPE_IMOV) ? 2 : 1;
11145
11146 /* INT->FP conversion is expensive. */
11147 if (get_attr_fp_int_src (dep_insn))
11148 cost += 5;
6ad48e84
JH
11149
11150 /* Show ability of reorder buffer to hide latency of load by executing
11151 in parallel with previous instruction in case
11152 previous instruction is not needed to compute the address. */
11153 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11154 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11155 {
11156 /* Claim moves to take one cycle, as core can issue one load
11157 at time and the next load can start cycle later. */
11158 if (dep_insn_type == TYPE_IMOV
11159 || dep_insn_type == TYPE_FMOV)
11160 cost = 1;
11161 else if (cost > 2)
11162 cost -= 2;
11163 else
11164 cost = 1;
11165 }
a14003ee 11166 break;
e075ae69 11167
309ada50 11168 case PROCESSOR_ATHLON:
6ad48e84
JH
11169 memory = get_attr_memory (insn);
11170 dep_memory = get_attr_memory (dep_insn);
11171
11172 if (dep_memory == MEMORY_LOAD || dep_memory == MEMORY_BOTH)
0b5107cf
JH
11173 {
11174 if (dep_insn_type == TYPE_IMOV || dep_insn_type == TYPE_FMOV)
11175 cost += 2;
11176 else
11177 cost += 3;
11178 }
6ad48e84
JH
11179 /* Show ability of reorder buffer to hide latency of load by executing
11180 in parallel with previous instruction in case
11181 previous instruction is not needed to compute the address. */
11182 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
11183 && !ix86_agi_dependant (insn, dep_insn, insn_type))
11184 {
11185 /* Claim moves to take one cycle, as core can issue one load
11186 at time and the next load can start cycle later. */
11187 if (dep_insn_type == TYPE_IMOV
11188 || dep_insn_type == TYPE_FMOV)
11189 cost = 0;
11190 else if (cost >= 3)
11191 cost -= 3;
11192 else
11193 cost = 0;
11194 }
309ada50 11195
a269a03c 11196 default:
a269a03c
JC
11197 break;
11198 }
11199
11200 return cost;
11201}
0a726ef1 11202
e075ae69
RH
11203static union
11204{
11205 struct ppro_sched_data
11206 {
11207 rtx decode[3];
11208 int issued_this_cycle;
11209 } ppro;
11210} ix86_sched_data;
0a726ef1 11211
e075ae69
RH
11212static enum attr_ppro_uops
11213ix86_safe_ppro_uops (insn)
11214 rtx insn;
11215{
11216 if (recog_memoized (insn) >= 0)
11217 return get_attr_ppro_uops (insn);
11218 else
11219 return PPRO_UOPS_MANY;
11220}
0a726ef1 11221
e075ae69
RH
11222static void
11223ix86_dump_ppro_packet (dump)
11224 FILE *dump;
0a726ef1 11225{
e075ae69 11226 if (ix86_sched_data.ppro.decode[0])
0a726ef1 11227 {
e075ae69
RH
11228 fprintf (dump, "PPRO packet: %d",
11229 INSN_UID (ix86_sched_data.ppro.decode[0]));
11230 if (ix86_sched_data.ppro.decode[1])
11231 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[1]));
11232 if (ix86_sched_data.ppro.decode[2])
11233 fprintf (dump, " %d", INSN_UID (ix86_sched_data.ppro.decode[2]));
11234 fputc ('\n', dump);
11235 }
11236}
0a726ef1 11237
e075ae69 11238/* We're beginning a new block. Initialize data structures as necessary. */
0a726ef1 11239
c237e94a
ZW
11240static void
11241ix86_sched_init (dump, sched_verbose, veclen)
e075ae69
RH
11242 FILE *dump ATTRIBUTE_UNUSED;
11243 int sched_verbose ATTRIBUTE_UNUSED;
c237e94a 11244 int veclen ATTRIBUTE_UNUSED;
e075ae69
RH
11245{
11246 memset (&ix86_sched_data, 0, sizeof (ix86_sched_data));
11247}
11248
11249/* Shift INSN to SLOT, and shift everything else down. */
11250
11251static void
11252ix86_reorder_insn (insnp, slot)
11253 rtx *insnp, *slot;
11254{
11255 if (insnp != slot)
11256 {
11257 rtx insn = *insnp;
0f290768 11258 do
e075ae69
RH
11259 insnp[0] = insnp[1];
11260 while (++insnp != slot);
11261 *insnp = insn;
0a726ef1 11262 }
e075ae69
RH
11263}
11264
c6991660 11265static void
78a0d70c
ZW
11266ix86_sched_reorder_ppro (ready, e_ready)
11267 rtx *ready;
11268 rtx *e_ready;
11269{
11270 rtx decode[3];
11271 enum attr_ppro_uops cur_uops;
11272 int issued_this_cycle;
11273 rtx *insnp;
11274 int i;
e075ae69 11275
0f290768 11276 /* At this point .ppro.decode contains the state of the three
78a0d70c 11277 decoders from last "cycle". That is, those insns that were
0f290768 11278 actually independent. But here we're scheduling for the
78a0d70c
ZW
11279 decoder, and we may find things that are decodable in the
11280 same cycle. */
e075ae69 11281
0f290768 11282 memcpy (decode, ix86_sched_data.ppro.decode, sizeof (decode));
78a0d70c 11283 issued_this_cycle = 0;
e075ae69 11284
78a0d70c
ZW
11285 insnp = e_ready;
11286 cur_uops = ix86_safe_ppro_uops (*insnp);
0a726ef1 11287
78a0d70c
ZW
11288 /* If the decoders are empty, and we've a complex insn at the
11289 head of the priority queue, let it issue without complaint. */
11290 if (decode[0] == NULL)
11291 {
11292 if (cur_uops == PPRO_UOPS_MANY)
11293 {
11294 decode[0] = *insnp;
11295 goto ppro_done;
11296 }
11297
11298 /* Otherwise, search for a 2-4 uop unsn to issue. */
11299 while (cur_uops != PPRO_UOPS_FEW)
11300 {
11301 if (insnp == ready)
11302 break;
11303 cur_uops = ix86_safe_ppro_uops (*--insnp);
11304 }
11305
11306 /* If so, move it to the head of the line. */
11307 if (cur_uops == PPRO_UOPS_FEW)
11308 ix86_reorder_insn (insnp, e_ready);
0a726ef1 11309
78a0d70c
ZW
11310 /* Issue the head of the queue. */
11311 issued_this_cycle = 1;
11312 decode[0] = *e_ready--;
11313 }
fb693d44 11314
78a0d70c
ZW
11315 /* Look for simple insns to fill in the other two slots. */
11316 for (i = 1; i < 3; ++i)
11317 if (decode[i] == NULL)
11318 {
a151daf0 11319 if (ready > e_ready)
78a0d70c 11320 goto ppro_done;
fb693d44 11321
e075ae69
RH
11322 insnp = e_ready;
11323 cur_uops = ix86_safe_ppro_uops (*insnp);
78a0d70c
ZW
11324 while (cur_uops != PPRO_UOPS_ONE)
11325 {
11326 if (insnp == ready)
11327 break;
11328 cur_uops = ix86_safe_ppro_uops (*--insnp);
11329 }
fb693d44 11330
78a0d70c
ZW
11331 /* Found one. Move it to the head of the queue and issue it. */
11332 if (cur_uops == PPRO_UOPS_ONE)
e075ae69 11333 {
78a0d70c
ZW
11334 ix86_reorder_insn (insnp, e_ready);
11335 decode[i] = *e_ready--;
11336 issued_this_cycle++;
11337 continue;
11338 }
fb693d44 11339
78a0d70c
ZW
11340 /* ??? Didn't find one. Ideally, here we would do a lazy split
11341 of 2-uop insns, issue one and queue the other. */
11342 }
fb693d44 11343
78a0d70c
ZW
11344 ppro_done:
11345 if (issued_this_cycle == 0)
11346 issued_this_cycle = 1;
11347 ix86_sched_data.ppro.issued_this_cycle = issued_this_cycle;
11348}
fb693d44 11349
0f290768 11350/* We are about to being issuing insns for this clock cycle.
78a0d70c 11351 Override the default sort algorithm to better slot instructions. */
c237e94a
ZW
11352static int
11353ix86_sched_reorder (dump, sched_verbose, ready, n_readyp, clock_var)
78a0d70c
ZW
11354 FILE *dump ATTRIBUTE_UNUSED;
11355 int sched_verbose ATTRIBUTE_UNUSED;
11356 rtx *ready;
c237e94a 11357 int *n_readyp;
78a0d70c
ZW
11358 int clock_var ATTRIBUTE_UNUSED;
11359{
c237e94a 11360 int n_ready = *n_readyp;
78a0d70c 11361 rtx *e_ready = ready + n_ready - 1;
fb693d44 11362
fce5a9f2 11363 /* Make sure to go ahead and initialize key items in
a151daf0
JL
11364 ix86_sched_data if we are not going to bother trying to
11365 reorder the ready queue. */
78a0d70c 11366 if (n_ready < 2)
a151daf0
JL
11367 {
11368 ix86_sched_data.ppro.issued_this_cycle = 1;
11369 goto out;
11370 }
e075ae69 11371
78a0d70c
ZW
11372 switch (ix86_cpu)
11373 {
11374 default:
11375 break;
e075ae69 11376
78a0d70c
ZW
11377 case PROCESSOR_PENTIUMPRO:
11378 ix86_sched_reorder_ppro (ready, e_ready);
e075ae69 11379 break;
fb693d44
RH
11380 }
11381
e075ae69
RH
11382out:
11383 return ix86_issue_rate ();
11384}
fb693d44 11385
e075ae69
RH
11386/* We are about to issue INSN. Return the number of insns left on the
11387 ready queue that can be issued this cycle. */
b222082e 11388
c237e94a 11389static int
e075ae69
RH
11390ix86_variable_issue (dump, sched_verbose, insn, can_issue_more)
11391 FILE *dump;
11392 int sched_verbose;
11393 rtx insn;
11394 int can_issue_more;
11395{
11396 int i;
11397 switch (ix86_cpu)
fb693d44 11398 {
e075ae69
RH
11399 default:
11400 return can_issue_more - 1;
fb693d44 11401
e075ae69
RH
11402 case PROCESSOR_PENTIUMPRO:
11403 {
11404 enum attr_ppro_uops uops = ix86_safe_ppro_uops (insn);
fb693d44 11405
e075ae69
RH
11406 if (uops == PPRO_UOPS_MANY)
11407 {
11408 if (sched_verbose)
11409 ix86_dump_ppro_packet (dump);
11410 ix86_sched_data.ppro.decode[0] = insn;
11411 ix86_sched_data.ppro.decode[1] = NULL;
11412 ix86_sched_data.ppro.decode[2] = NULL;
11413 if (sched_verbose)
11414 ix86_dump_ppro_packet (dump);
11415 ix86_sched_data.ppro.decode[0] = NULL;
11416 }
11417 else if (uops == PPRO_UOPS_FEW)
11418 {
11419 if (sched_verbose)
11420 ix86_dump_ppro_packet (dump);
11421 ix86_sched_data.ppro.decode[0] = insn;
11422 ix86_sched_data.ppro.decode[1] = NULL;
11423 ix86_sched_data.ppro.decode[2] = NULL;
11424 }
11425 else
11426 {
11427 for (i = 0; i < 3; ++i)
11428 if (ix86_sched_data.ppro.decode[i] == NULL)
11429 {
11430 ix86_sched_data.ppro.decode[i] = insn;
11431 break;
11432 }
11433 if (i == 3)
11434 abort ();
11435 if (i == 2)
11436 {
11437 if (sched_verbose)
11438 ix86_dump_ppro_packet (dump);
11439 ix86_sched_data.ppro.decode[0] = NULL;
11440 ix86_sched_data.ppro.decode[1] = NULL;
11441 ix86_sched_data.ppro.decode[2] = NULL;
11442 }
11443 }
11444 }
11445 return --ix86_sched_data.ppro.issued_this_cycle;
11446 }
fb693d44 11447}
9b690711
RH
11448
11449static int
11450ia32_use_dfa_pipeline_interface ()
11451{
11452 if (ix86_cpu == PROCESSOR_PENTIUM)
11453 return 1;
11454 return 0;
11455}
11456
11457/* How many alternative schedules to try. This should be as wide as the
11458 scheduling freedom in the DFA, but no wider. Making this value too
11459 large results extra work for the scheduler. */
11460
11461static int
11462ia32_multipass_dfa_lookahead ()
11463{
11464 if (ix86_cpu == PROCESSOR_PENTIUM)
11465 return 2;
11466 else
11467 return 0;
11468}
11469
a7180f70 11470\f
0e4970d7
RK
11471/* Walk through INSNS and look for MEM references whose address is DSTREG or
11472 SRCREG and set the memory attribute to those of DSTREF and SRCREF, as
11473 appropriate. */
11474
11475void
11476ix86_set_move_mem_attrs (insns, dstref, srcref, dstreg, srcreg)
11477 rtx insns;
11478 rtx dstref, srcref, dstreg, srcreg;
11479{
11480 rtx insn;
11481
11482 for (insn = insns; insn != 0 ; insn = NEXT_INSN (insn))
11483 if (INSN_P (insn))
11484 ix86_set_move_mem_attrs_1 (PATTERN (insn), dstref, srcref,
11485 dstreg, srcreg);
11486}
11487
11488/* Subroutine of above to actually do the updating by recursively walking
11489 the rtx. */
11490
11491static void
11492ix86_set_move_mem_attrs_1 (x, dstref, srcref, dstreg, srcreg)
11493 rtx x;
11494 rtx dstref, srcref, dstreg, srcreg;
11495{
11496 enum rtx_code code = GET_CODE (x);
11497 const char *format_ptr = GET_RTX_FORMAT (code);
11498 int i, j;
11499
11500 if (code == MEM && XEXP (x, 0) == dstreg)
11501 MEM_COPY_ATTRIBUTES (x, dstref);
11502 else if (code == MEM && XEXP (x, 0) == srcreg)
11503 MEM_COPY_ATTRIBUTES (x, srcref);
11504
11505 for (i = 0; i < GET_RTX_LENGTH (code); i++, format_ptr++)
11506 {
11507 if (*format_ptr == 'e')
11508 ix86_set_move_mem_attrs_1 (XEXP (x, i), dstref, srcref,
11509 dstreg, srcreg);
11510 else if (*format_ptr == 'E')
11511 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
397be6cf 11512 ix86_set_move_mem_attrs_1 (XVECEXP (x, i, j), dstref, srcref,
0e4970d7
RK
11513 dstreg, srcreg);
11514 }
11515}
11516\f
a7180f70
BS
11517/* Compute the alignment given to a constant that is being placed in memory.
11518 EXP is the constant and ALIGN is the alignment that the object would
11519 ordinarily have.
11520 The value of this function is used instead of that alignment to align
11521 the object. */
11522
11523int
11524ix86_constant_alignment (exp, align)
11525 tree exp;
11526 int align;
11527{
11528 if (TREE_CODE (exp) == REAL_CST)
11529 {
11530 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
11531 return 64;
11532 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
11533 return 128;
11534 }
11535 else if (TREE_CODE (exp) == STRING_CST && TREE_STRING_LENGTH (exp) >= 31
11536 && align < 256)
11537 return 256;
11538
11539 return align;
11540}
11541
11542/* Compute the alignment for a static variable.
11543 TYPE is the data type, and ALIGN is the alignment that
11544 the object would ordinarily have. The value of this function is used
11545 instead of that alignment to align the object. */
11546
11547int
11548ix86_data_alignment (type, align)
11549 tree type;
11550 int align;
11551{
11552 if (AGGREGATE_TYPE_P (type)
11553 && TYPE_SIZE (type)
11554 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11555 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 256
11556 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 256)
11557 return 256;
11558
0d7d98ee
JH
11559 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11560 to 16byte boundary. */
11561 if (TARGET_64BIT)
11562 {
11563 if (AGGREGATE_TYPE_P (type)
11564 && TYPE_SIZE (type)
11565 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11566 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
11567 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11568 return 128;
11569 }
11570
a7180f70
BS
11571 if (TREE_CODE (type) == ARRAY_TYPE)
11572 {
11573 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11574 return 64;
11575 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11576 return 128;
11577 }
11578 else if (TREE_CODE (type) == COMPLEX_TYPE)
11579 {
0f290768 11580
a7180f70
BS
11581 if (TYPE_MODE (type) == DCmode && align < 64)
11582 return 64;
11583 if (TYPE_MODE (type) == XCmode && align < 128)
11584 return 128;
11585 }
11586 else if ((TREE_CODE (type) == RECORD_TYPE
11587 || TREE_CODE (type) == UNION_TYPE
11588 || TREE_CODE (type) == QUAL_UNION_TYPE)
11589 && TYPE_FIELDS (type))
11590 {
11591 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11592 return 64;
11593 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11594 return 128;
11595 }
11596 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11597 || TREE_CODE (type) == INTEGER_TYPE)
11598 {
11599 if (TYPE_MODE (type) == DFmode && align < 64)
11600 return 64;
11601 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11602 return 128;
11603 }
11604
11605 return align;
11606}
11607
11608/* Compute the alignment for a local variable.
11609 TYPE is the data type, and ALIGN is the alignment that
11610 the object would ordinarily have. The value of this macro is used
11611 instead of that alignment to align the object. */
11612
11613int
11614ix86_local_alignment (type, align)
11615 tree type;
11616 int align;
11617{
0d7d98ee
JH
11618 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
11619 to 16byte boundary. */
11620 if (TARGET_64BIT)
11621 {
11622 if (AGGREGATE_TYPE_P (type)
11623 && TYPE_SIZE (type)
11624 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
11625 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
11626 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
11627 return 128;
11628 }
a7180f70
BS
11629 if (TREE_CODE (type) == ARRAY_TYPE)
11630 {
11631 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
11632 return 64;
11633 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
11634 return 128;
11635 }
11636 else if (TREE_CODE (type) == COMPLEX_TYPE)
11637 {
11638 if (TYPE_MODE (type) == DCmode && align < 64)
11639 return 64;
11640 if (TYPE_MODE (type) == XCmode && align < 128)
11641 return 128;
11642 }
11643 else if ((TREE_CODE (type) == RECORD_TYPE
11644 || TREE_CODE (type) == UNION_TYPE
11645 || TREE_CODE (type) == QUAL_UNION_TYPE)
11646 && TYPE_FIELDS (type))
11647 {
11648 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
11649 return 64;
11650 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
11651 return 128;
11652 }
11653 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
11654 || TREE_CODE (type) == INTEGER_TYPE)
11655 {
0f290768 11656
a7180f70
BS
11657 if (TYPE_MODE (type) == DFmode && align < 64)
11658 return 64;
11659 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
11660 return 128;
11661 }
11662 return align;
11663}
0ed08620
JH
11664\f
11665/* Emit RTL insns to initialize the variable parts of a trampoline.
11666 FNADDR is an RTX for the address of the function's pure code.
11667 CXT is an RTX for the static chain value for the function. */
11668void
11669x86_initialize_trampoline (tramp, fnaddr, cxt)
11670 rtx tramp, fnaddr, cxt;
11671{
11672 if (!TARGET_64BIT)
11673 {
11674 /* Compute offset from the end of the jmp to the target function. */
11675 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
11676 plus_constant (tramp, 10),
11677 NULL_RTX, 1, OPTAB_DIRECT);
11678 emit_move_insn (gen_rtx_MEM (QImode, tramp),
d8bf17f9 11679 gen_int_mode (0xb9, QImode));
0ed08620
JH
11680 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
11681 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
d8bf17f9 11682 gen_int_mode (0xe9, QImode));
0ed08620
JH
11683 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
11684 }
11685 else
11686 {
11687 int offset = 0;
11688 /* Try to load address using shorter movl instead of movabs.
11689 We may want to support movq for kernel mode, but kernel does not use
11690 trampolines at the moment. */
11691 if (x86_64_zero_extended_value (fnaddr))
11692 {
11693 fnaddr = copy_to_mode_reg (DImode, fnaddr);
11694 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 11695 gen_int_mode (0xbb41, HImode));
0ed08620
JH
11696 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
11697 gen_lowpart (SImode, fnaddr));
11698 offset += 6;
11699 }
11700 else
11701 {
11702 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 11703 gen_int_mode (0xbb49, HImode));
0ed08620
JH
11704 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11705 fnaddr);
11706 offset += 10;
11707 }
11708 /* Load static chain using movabs to r10. */
11709 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 11710 gen_int_mode (0xba49, HImode));
0ed08620
JH
11711 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
11712 cxt);
11713 offset += 10;
11714 /* Jump to the r11 */
11715 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
d8bf17f9 11716 gen_int_mode (0xff49, HImode));
0ed08620 11717 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
d8bf17f9 11718 gen_int_mode (0xe3, QImode));
0ed08620
JH
11719 offset += 3;
11720 if (offset > TRAMPOLINE_SIZE)
b531087a 11721 abort ();
0ed08620
JH
11722 }
11723}
eeb06b1b 11724\f
6a2dd09a
RS
11725#define def_builtin(MASK, NAME, TYPE, CODE) \
11726do { \
11727 if ((MASK) & target_flags) \
11728 builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
11729 NULL, NULL_TREE); \
eeb06b1b 11730} while (0)
bd793c65 11731
bd793c65
BS
11732struct builtin_description
11733{
8b60264b
KG
11734 const unsigned int mask;
11735 const enum insn_code icode;
11736 const char *const name;
11737 const enum ix86_builtins code;
11738 const enum rtx_code comparison;
11739 const unsigned int flag;
bd793c65
BS
11740};
11741
fbe5eb6d
BS
11742/* Used for builtins that are enabled both by -msse and -msse2. */
11743#define MASK_SSE1 (MASK_SSE | MASK_SSE2)
11744
8b60264b 11745static const struct builtin_description bdesc_comi[] =
bd793c65 11746{
fbe5eb6d
BS
11747 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, EQ, 0 },
11748 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, LT, 0 },
11749 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, LE, 0 },
11750 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, LT, 1 },
11751 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, LE, 1 },
11752 { MASK_SSE1, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, NE, 0 },
11753 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, EQ, 0 },
11754 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, LT, 0 },
11755 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, LE, 0 },
11756 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, LT, 1 },
11757 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, LE, 1 },
11758 { MASK_SSE1, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, NE, 0 },
11759 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, EQ, 0 },
11760 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, LT, 0 },
11761 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, LE, 0 },
11762 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, LT, 1 },
11763 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, LE, 1 },
11764 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, NE, 0 },
11765 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, EQ, 0 },
11766 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, LT, 0 },
11767 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, LE, 0 },
11768 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, LT, 1 },
11769 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, LE, 1 },
11770 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, NE, 0 },
bd793c65
BS
11771};
11772
8b60264b 11773static const struct builtin_description bdesc_2arg[] =
bd793c65
BS
11774{
11775 /* SSE */
fbe5eb6d
BS
11776 { MASK_SSE1, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
11777 { MASK_SSE1, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
11778 { MASK_SSE1, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
11779 { MASK_SSE1, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
11780 { MASK_SSE1, CODE_FOR_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
11781 { MASK_SSE1, CODE_FOR_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
11782 { MASK_SSE1, CODE_FOR_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
11783 { MASK_SSE1, CODE_FOR_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
11784
11785 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
11786 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
11787 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
11788 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, 1 },
11789 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, 1 },
11790 { MASK_SSE1, CODE_FOR_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
11791 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, EQ, 0 },
11792 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, LT, 0 },
11793 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, LE, 0 },
11794 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, LT, 1 },
11795 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, LE, 1 },
11796 { MASK_SSE1, CODE_FOR_maskncmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, UNORDERED, 0 },
11797 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
11798 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
11799 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
11800 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgtss", IX86_BUILTIN_CMPGTSS, LT, 1 },
11801 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpgess", IX86_BUILTIN_CMPGESS, LE, 1 },
11802 { MASK_SSE1, CODE_FOR_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
11803 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, EQ, 0 },
11804 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, LT, 0 },
11805 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, LE, 0 },
11806 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, LT, 1 },
11807 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, LE, 1 },
11808 { MASK_SSE1, CODE_FOR_vmmaskncmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
11809
11810 { MASK_SSE1, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
11811 { MASK_SSE1, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
11812 { MASK_SSE1, CODE_FOR_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
11813 { MASK_SSE1, CODE_FOR_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
11814
11815 { MASK_SSE1, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
11816 { MASK_SSE1, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
11817 { MASK_SSE1, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
11818 { MASK_SSE1, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
11819 { MASK_SSE1, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
bd793c65
BS
11820
11821 /* MMX */
eeb06b1b
BS
11822 { MASK_MMX, CODE_FOR_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
11823 { MASK_MMX, CODE_FOR_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
11824 { MASK_MMX, CODE_FOR_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
11825 { MASK_MMX, CODE_FOR_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
11826 { MASK_MMX, CODE_FOR_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
11827 { MASK_MMX, CODE_FOR_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
11828
11829 { MASK_MMX, CODE_FOR_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
11830 { MASK_MMX, CODE_FOR_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
11831 { MASK_MMX, CODE_FOR_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
11832 { MASK_MMX, CODE_FOR_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
11833 { MASK_MMX, CODE_FOR_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
11834 { MASK_MMX, CODE_FOR_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
11835 { MASK_MMX, CODE_FOR_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
11836 { MASK_MMX, CODE_FOR_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
11837
11838 { MASK_MMX, CODE_FOR_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
11839 { MASK_MMX, CODE_FOR_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
fbe5eb6d 11840 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
eeb06b1b
BS
11841
11842 { MASK_MMX, CODE_FOR_mmx_anddi3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
11843 { MASK_MMX, CODE_FOR_mmx_nanddi3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
11844 { MASK_MMX, CODE_FOR_mmx_iordi3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
11845 { MASK_MMX, CODE_FOR_mmx_xordi3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
11846
fbe5eb6d
BS
11847 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
11848 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
eeb06b1b
BS
11849
11850 { MASK_MMX, CODE_FOR_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
11851 { MASK_MMX, CODE_FOR_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
11852 { MASK_MMX, CODE_FOR_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
11853 { MASK_MMX, CODE_FOR_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
11854 { MASK_MMX, CODE_FOR_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
11855 { MASK_MMX, CODE_FOR_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
11856
fbe5eb6d
BS
11857 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
11858 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
11859 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
11860 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
eeb06b1b
BS
11861
11862 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
11863 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
11864 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
11865 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
11866 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
11867 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
bd793c65
BS
11868
11869 /* Special. */
eeb06b1b
BS
11870 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
11871 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
11872 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
11873
fbe5eb6d
BS
11874 { MASK_SSE1, CODE_FOR_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
11875 { MASK_SSE1, CODE_FOR_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
eeb06b1b
BS
11876
11877 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
11878 { MASK_MMX, CODE_FOR_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
11879 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
11880 { MASK_MMX, CODE_FOR_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
11881 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
11882 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
11883
11884 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
11885 { MASK_MMX, CODE_FOR_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
11886 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
11887 { MASK_MMX, CODE_FOR_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
11888 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
11889 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
11890
11891 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
11892 { MASK_MMX, CODE_FOR_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
11893 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
11894 { MASK_MMX, CODE_FOR_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
11895
fbe5eb6d
BS
11896 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
11897 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
11898
11899 /* SSE2 */
11900 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
11901 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
11902 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
11903 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
11904 { MASK_SSE2, CODE_FOR_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
11905 { MASK_SSE2, CODE_FOR_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
11906 { MASK_SSE2, CODE_FOR_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
11907 { MASK_SSE2, CODE_FOR_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
11908
11909 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
11910 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
11911 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
11912 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, 1 },
11913 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, 1 },
11914 { MASK_SSE2, CODE_FOR_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
11915 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, EQ, 0 },
11916 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, LT, 0 },
11917 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, LE, 0 },
11918 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, LT, 1 },
11919 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, LE, 1 },
11920 { MASK_SSE2, CODE_FOR_maskncmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, UNORDERED, 0 },
11921 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
11922 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
11923 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
11924 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgtsd", IX86_BUILTIN_CMPGTSD, LT, 1 },
11925 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpgesd", IX86_BUILTIN_CMPGESD, LE, 1 },
11926 { MASK_SSE2, CODE_FOR_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
11927 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, EQ, 0 },
11928 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, LT, 0 },
11929 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, LE, 0 },
11930 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngtsd", IX86_BUILTIN_CMPNGTSD, LT, 1 },
11931 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpngesd", IX86_BUILTIN_CMPNGESD, LE, 1 },
11932 { MASK_SSE2, CODE_FOR_vmmaskncmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, UNORDERED, 0 },
11933
11934 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
11935 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
11936 { MASK_SSE2, CODE_FOR_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
11937 { MASK_SSE2, CODE_FOR_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
11938
11939 { MASK_SSE2, CODE_FOR_sse2_anddf3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
11940 { MASK_SSE2, CODE_FOR_sse2_nanddf3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
11941 { MASK_SSE2, CODE_FOR_sse2_iordf3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
11942 { MASK_SSE2, CODE_FOR_sse2_xordf3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
11943
11944 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
11945 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
11946 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
11947
11948 /* SSE2 MMX */
11949 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
11950 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
11951 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
11952 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
11953 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
11954 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
11955 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
11956 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
11957
11958 { MASK_MMX, CODE_FOR_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
11959 { MASK_MMX, CODE_FOR_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
11960 { MASK_MMX, CODE_FOR_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
11961 { MASK_MMX, CODE_FOR_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
11962 { MASK_MMX, CODE_FOR_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
11963 { MASK_MMX, CODE_FOR_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
11964 { MASK_MMX, CODE_FOR_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
11965 { MASK_MMX, CODE_FOR_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
11966
11967 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
11968 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
11969 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, 0, 0 },
11970 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, 0, 0 },
11971
916b60b7
BS
11972 { MASK_SSE2, CODE_FOR_sse2_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
11973 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
11974 { MASK_SSE2, CODE_FOR_sse2_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
11975 { MASK_SSE2, CODE_FOR_sse2_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
fbe5eb6d
BS
11976
11977 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
11978 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
11979
11980 { MASK_SSE2, CODE_FOR_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
11981 { MASK_SSE2, CODE_FOR_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
11982 { MASK_SSE2, CODE_FOR_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
11983 { MASK_SSE2, CODE_FOR_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
11984 { MASK_SSE2, CODE_FOR_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
11985 { MASK_SSE2, CODE_FOR_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
11986
11987 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
11988 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
11989 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
11990 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
11991
11992 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
11993 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
11994 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
11995 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
11996 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
11997 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
11998
916b60b7
BS
11999 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
12000 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
12001 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
12002
12003 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
12004 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
12005
12006 { MASK_SSE2, CODE_FOR_ashlv8hi3_ti, 0, IX86_BUILTIN_PSLLW128, 0, 0 },
12007 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
12008 { MASK_SSE2, CODE_FOR_ashlv4si3_ti, 0, IX86_BUILTIN_PSLLD128, 0, 0 },
12009 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
12010 { MASK_SSE2, CODE_FOR_ashlv2di3_ti, 0, IX86_BUILTIN_PSLLQ128, 0, 0 },
12011 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
12012
12013 { MASK_SSE2, CODE_FOR_lshrv8hi3_ti, 0, IX86_BUILTIN_PSRLW128, 0, 0 },
12014 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
12015 { MASK_SSE2, CODE_FOR_lshrv4si3_ti, 0, IX86_BUILTIN_PSRLD128, 0, 0 },
12016 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
12017 { MASK_SSE2, CODE_FOR_lshrv2di3_ti, 0, IX86_BUILTIN_PSRLQ128, 0, 0 },
12018 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
12019
12020 { MASK_SSE2, CODE_FOR_ashrv8hi3_ti, 0, IX86_BUILTIN_PSRAW128, 0, 0 },
12021 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
12022 { MASK_SSE2, CODE_FOR_ashrv4si3_ti, 0, IX86_BUILTIN_PSRAD128, 0, 0 },
12023 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
12024
12025 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
12026
fbe5eb6d
BS
12027 { MASK_SSE2, CODE_FOR_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
12028 { MASK_SSE2, CODE_FOR_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
12029 { MASK_SSE2, CODE_FOR_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 }
bd793c65
BS
12030};
12031
8b60264b 12032static const struct builtin_description bdesc_1arg[] =
bd793c65 12033{
fbe5eb6d
BS
12034 { MASK_SSE1 | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
12035 { MASK_SSE1, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
12036
12037 { MASK_SSE1, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
12038 { MASK_SSE1, CODE_FOR_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
12039 { MASK_SSE1, CODE_FOR_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
12040
12041 { MASK_SSE1, CODE_FOR_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
12042 { MASK_SSE1, CODE_FOR_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
12043 { MASK_SSE1, CODE_FOR_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
12044 { MASK_SSE1, CODE_FOR_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
12045
12046 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
12047 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
12048 { MASK_SSE2, CODE_FOR_sse2_movq2dq, 0, IX86_BUILTIN_MOVQ2DQ, 0, 0 },
12049
12050 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
12051
12052 { MASK_SSE2, CODE_FOR_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
12053 { MASK_SSE2, CODE_FOR_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
bd793c65 12054
fbe5eb6d
BS
12055 { MASK_SSE2, CODE_FOR_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
12056 { MASK_SSE2, CODE_FOR_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
12057 { MASK_SSE2, CODE_FOR_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
12058 { MASK_SSE2, CODE_FOR_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
12059 { MASK_SSE2, CODE_FOR_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
bd793c65 12060
fbe5eb6d 12061 { MASK_SSE2, CODE_FOR_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
bd793c65 12062
fbe5eb6d
BS
12063 { MASK_SSE2, CODE_FOR_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
12064 { MASK_SSE2, CODE_FOR_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
12065
12066 { MASK_SSE2, CODE_FOR_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
12067 { MASK_SSE2, CODE_FOR_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
12068 { MASK_SSE2, CODE_FOR_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 }
bd793c65
BS
12069};
12070
f6155fda
SS
12071void
12072ix86_init_builtins ()
12073{
12074 if (TARGET_MMX)
12075 ix86_init_mmx_sse_builtins ();
12076}
12077
12078/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
bd793c65
BS
12079 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
12080 builtins. */
e37af218 12081static void
f6155fda 12082ix86_init_mmx_sse_builtins ()
bd793c65 12083{
8b60264b 12084 const struct builtin_description * d;
77ebd435 12085 size_t i;
bd793c65
BS
12086
12087 tree pchar_type_node = build_pointer_type (char_type_node);
12088 tree pfloat_type_node = build_pointer_type (float_type_node);
12089 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
916b60b7 12090 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
bd793c65
BS
12091 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
12092
12093 /* Comparisons. */
12094 tree int_ftype_v4sf_v4sf
b4de2f7d
AH
12095 = build_function_type_list (integer_type_node,
12096 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12097 tree v4si_ftype_v4sf_v4sf
b4de2f7d
AH
12098 = build_function_type_list (V4SI_type_node,
12099 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12100 /* MMX/SSE/integer conversions. */
bd793c65 12101 tree int_ftype_v4sf
b4de2f7d
AH
12102 = build_function_type_list (integer_type_node,
12103 V4SF_type_node, NULL_TREE);
bd793c65 12104 tree int_ftype_v8qi
b4de2f7d 12105 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
bd793c65 12106 tree v4sf_ftype_v4sf_int
b4de2f7d
AH
12107 = build_function_type_list (V4SF_type_node,
12108 V4SF_type_node, integer_type_node, NULL_TREE);
bd793c65 12109 tree v4sf_ftype_v4sf_v2si
b4de2f7d
AH
12110 = build_function_type_list (V4SF_type_node,
12111 V4SF_type_node, V2SI_type_node, NULL_TREE);
bd793c65 12112 tree int_ftype_v4hi_int
b4de2f7d
AH
12113 = build_function_type_list (integer_type_node,
12114 V4HI_type_node, integer_type_node, NULL_TREE);
bd793c65 12115 tree v4hi_ftype_v4hi_int_int
e7a60f56 12116 = build_function_type_list (V4HI_type_node, V4HI_type_node,
b4de2f7d
AH
12117 integer_type_node, integer_type_node,
12118 NULL_TREE);
bd793c65
BS
12119 /* Miscellaneous. */
12120 tree v8qi_ftype_v4hi_v4hi
b4de2f7d
AH
12121 = build_function_type_list (V8QI_type_node,
12122 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 12123 tree v4hi_ftype_v2si_v2si
b4de2f7d
AH
12124 = build_function_type_list (V4HI_type_node,
12125 V2SI_type_node, V2SI_type_node, NULL_TREE);
bd793c65 12126 tree v4sf_ftype_v4sf_v4sf_int
b4de2f7d
AH
12127 = build_function_type_list (V4SF_type_node,
12128 V4SF_type_node, V4SF_type_node,
12129 integer_type_node, NULL_TREE);
bd793c65 12130 tree v2si_ftype_v4hi_v4hi
b4de2f7d
AH
12131 = build_function_type_list (V2SI_type_node,
12132 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 12133 tree v4hi_ftype_v4hi_int
b4de2f7d 12134 = build_function_type_list (V4HI_type_node,
e7a60f56 12135 V4HI_type_node, integer_type_node, NULL_TREE);
bd793c65 12136 tree v4hi_ftype_v4hi_di
b4de2f7d
AH
12137 = build_function_type_list (V4HI_type_node,
12138 V4HI_type_node, long_long_unsigned_type_node,
12139 NULL_TREE);
bd793c65 12140 tree v2si_ftype_v2si_di
b4de2f7d
AH
12141 = build_function_type_list (V2SI_type_node,
12142 V2SI_type_node, long_long_unsigned_type_node,
12143 NULL_TREE);
bd793c65 12144 tree void_ftype_void
b4de2f7d 12145 = build_function_type (void_type_node, void_list_node);
bd793c65 12146 tree void_ftype_unsigned
b4de2f7d 12147 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
bd793c65 12148 tree unsigned_ftype_void
b4de2f7d 12149 = build_function_type (unsigned_type_node, void_list_node);
bd793c65 12150 tree di_ftype_void
b4de2f7d 12151 = build_function_type (long_long_unsigned_type_node, void_list_node);
e37af218 12152 tree v4sf_ftype_void
b4de2f7d 12153 = build_function_type (V4SF_type_node, void_list_node);
bd793c65 12154 tree v2si_ftype_v4sf
b4de2f7d 12155 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12156 /* Loads/stores. */
bd793c65 12157 tree void_ftype_v8qi_v8qi_pchar
b4de2f7d
AH
12158 = build_function_type_list (void_type_node,
12159 V8QI_type_node, V8QI_type_node,
12160 pchar_type_node, NULL_TREE);
bd793c65 12161 tree v4sf_ftype_pfloat
b4de2f7d 12162 = build_function_type_list (V4SF_type_node, pfloat_type_node, NULL_TREE);
bd793c65
BS
12163 /* @@@ the type is bogus */
12164 tree v4sf_ftype_v4sf_pv2si
b4de2f7d
AH
12165 = build_function_type_list (V4SF_type_node,
12166 V4SF_type_node, pv2di_type_node, NULL_TREE);
1255c85c 12167 tree void_ftype_pv2si_v4sf
b4de2f7d
AH
12168 = build_function_type_list (void_type_node,
12169 pv2di_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12170 tree void_ftype_pfloat_v4sf
b4de2f7d
AH
12171 = build_function_type_list (void_type_node,
12172 pfloat_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12173 tree void_ftype_pdi_di
b4de2f7d
AH
12174 = build_function_type_list (void_type_node,
12175 pdi_type_node, long_long_unsigned_type_node,
12176 NULL_TREE);
916b60b7 12177 tree void_ftype_pv2di_v2di
b4de2f7d
AH
12178 = build_function_type_list (void_type_node,
12179 pv2di_type_node, V2DI_type_node, NULL_TREE);
bd793c65
BS
12180 /* Normal vector unops. */
12181 tree v4sf_ftype_v4sf
b4de2f7d 12182 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
0f290768 12183
bd793c65
BS
12184 /* Normal vector binops. */
12185 tree v4sf_ftype_v4sf_v4sf
b4de2f7d
AH
12186 = build_function_type_list (V4SF_type_node,
12187 V4SF_type_node, V4SF_type_node, NULL_TREE);
bd793c65 12188 tree v8qi_ftype_v8qi_v8qi
b4de2f7d
AH
12189 = build_function_type_list (V8QI_type_node,
12190 V8QI_type_node, V8QI_type_node, NULL_TREE);
bd793c65 12191 tree v4hi_ftype_v4hi_v4hi
b4de2f7d
AH
12192 = build_function_type_list (V4HI_type_node,
12193 V4HI_type_node, V4HI_type_node, NULL_TREE);
bd793c65 12194 tree v2si_ftype_v2si_v2si
b4de2f7d
AH
12195 = build_function_type_list (V2SI_type_node,
12196 V2SI_type_node, V2SI_type_node, NULL_TREE);
bd793c65 12197 tree di_ftype_di_di
b4de2f7d
AH
12198 = build_function_type_list (long_long_unsigned_type_node,
12199 long_long_unsigned_type_node,
12200 long_long_unsigned_type_node, NULL_TREE);
bd793c65 12201
47f339cf 12202 tree v2si_ftype_v2sf
ae3aa00d 12203 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
47f339cf 12204 tree v2sf_ftype_v2si
b4de2f7d 12205 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
47f339cf 12206 tree v2si_ftype_v2si
b4de2f7d 12207 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
47f339cf 12208 tree v2sf_ftype_v2sf
b4de2f7d 12209 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
47f339cf 12210 tree v2sf_ftype_v2sf_v2sf
b4de2f7d
AH
12211 = build_function_type_list (V2SF_type_node,
12212 V2SF_type_node, V2SF_type_node, NULL_TREE);
47f339cf 12213 tree v2si_ftype_v2sf_v2sf
b4de2f7d
AH
12214 = build_function_type_list (V2SI_type_node,
12215 V2SF_type_node, V2SF_type_node, NULL_TREE);
fbe5eb6d
BS
12216 tree pint_type_node = build_pointer_type (integer_type_node);
12217 tree pdouble_type_node = build_pointer_type (double_type_node);
12218 tree int_ftype_v2df_v2df
b4de2f7d
AH
12219 = build_function_type_list (integer_type_node,
12220 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d
BS
12221
12222 tree ti_ftype_void
b4de2f7d 12223 = build_function_type (intTI_type_node, void_list_node);
fbe5eb6d 12224 tree ti_ftype_ti_ti
b4de2f7d
AH
12225 = build_function_type_list (intTI_type_node,
12226 intTI_type_node, intTI_type_node, NULL_TREE);
fbe5eb6d 12227 tree void_ftype_pvoid
b4de2f7d 12228 = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
fbe5eb6d 12229 tree v2di_ftype_di
b4de2f7d
AH
12230 = build_function_type_list (V2DI_type_node,
12231 long_long_unsigned_type_node, NULL_TREE);
fbe5eb6d 12232 tree v4sf_ftype_v4si
b4de2f7d 12233 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 12234 tree v4si_ftype_v4sf
b4de2f7d 12235 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 12236 tree v2df_ftype_v4si
b4de2f7d 12237 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 12238 tree v4si_ftype_v2df
b4de2f7d 12239 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12240 tree v2si_ftype_v2df
b4de2f7d 12241 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12242 tree v4sf_ftype_v2df
b4de2f7d 12243 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12244 tree v2df_ftype_v2si
b4de2f7d 12245 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
fbe5eb6d 12246 tree v2df_ftype_v4sf
b4de2f7d 12247 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 12248 tree int_ftype_v2df
b4de2f7d 12249 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12250 tree v2df_ftype_v2df_int
b4de2f7d
AH
12251 = build_function_type_list (V2DF_type_node,
12252 V2DF_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 12253 tree v4sf_ftype_v4sf_v2df
b4de2f7d
AH
12254 = build_function_type_list (V4SF_type_node,
12255 V4SF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12256 tree v2df_ftype_v2df_v4sf
b4de2f7d
AH
12257 = build_function_type_list (V2DF_type_node,
12258 V2DF_type_node, V4SF_type_node, NULL_TREE);
fbe5eb6d 12259 tree v2df_ftype_v2df_v2df_int
b4de2f7d
AH
12260 = build_function_type_list (V2DF_type_node,
12261 V2DF_type_node, V2DF_type_node,
12262 integer_type_node,
12263 NULL_TREE);
fbe5eb6d 12264 tree v2df_ftype_v2df_pv2si
b4de2f7d
AH
12265 = build_function_type_list (V2DF_type_node,
12266 V2DF_type_node, pv2si_type_node, NULL_TREE);
fbe5eb6d 12267 tree void_ftype_pv2si_v2df
b4de2f7d
AH
12268 = build_function_type_list (void_type_node,
12269 pv2si_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12270 tree void_ftype_pdouble_v2df
b4de2f7d
AH
12271 = build_function_type_list (void_type_node,
12272 pdouble_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12273 tree void_ftype_pint_int
b4de2f7d
AH
12274 = build_function_type_list (void_type_node,
12275 pint_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 12276 tree void_ftype_v16qi_v16qi_pchar
b4de2f7d
AH
12277 = build_function_type_list (void_type_node,
12278 V16QI_type_node, V16QI_type_node,
12279 pchar_type_node, NULL_TREE);
fbe5eb6d 12280 tree v2df_ftype_pdouble
b4de2f7d 12281 = build_function_type_list (V2DF_type_node, pdouble_type_node, NULL_TREE);
fbe5eb6d 12282 tree v2df_ftype_v2df_v2df
b4de2f7d
AH
12283 = build_function_type_list (V2DF_type_node,
12284 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12285 tree v16qi_ftype_v16qi_v16qi
b4de2f7d
AH
12286 = build_function_type_list (V16QI_type_node,
12287 V16QI_type_node, V16QI_type_node, NULL_TREE);
fbe5eb6d 12288 tree v8hi_ftype_v8hi_v8hi
b4de2f7d
AH
12289 = build_function_type_list (V8HI_type_node,
12290 V8HI_type_node, V8HI_type_node, NULL_TREE);
fbe5eb6d 12291 tree v4si_ftype_v4si_v4si
b4de2f7d
AH
12292 = build_function_type_list (V4SI_type_node,
12293 V4SI_type_node, V4SI_type_node, NULL_TREE);
fbe5eb6d 12294 tree v2di_ftype_v2di_v2di
b4de2f7d
AH
12295 = build_function_type_list (V2DI_type_node,
12296 V2DI_type_node, V2DI_type_node, NULL_TREE);
fbe5eb6d 12297 tree v2di_ftype_v2df_v2df
b4de2f7d
AH
12298 = build_function_type_list (V2DI_type_node,
12299 V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12300 tree v2df_ftype_v2df
b4de2f7d 12301 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
fbe5eb6d 12302 tree v2df_ftype_double
b4de2f7d 12303 = build_function_type_list (V2DF_type_node, double_type_node, NULL_TREE);
fbe5eb6d 12304 tree v2df_ftype_double_double
b4de2f7d
AH
12305 = build_function_type_list (V2DF_type_node,
12306 double_type_node, double_type_node, NULL_TREE);
fbe5eb6d 12307 tree int_ftype_v8hi_int
b4de2f7d
AH
12308 = build_function_type_list (integer_type_node,
12309 V8HI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 12310 tree v8hi_ftype_v8hi_int_int
b4de2f7d
AH
12311 = build_function_type_list (V8HI_type_node,
12312 V8HI_type_node, integer_type_node,
12313 integer_type_node, NULL_TREE);
916b60b7 12314 tree v2di_ftype_v2di_int
b4de2f7d
AH
12315 = build_function_type_list (V2DI_type_node,
12316 V2DI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 12317 tree v4si_ftype_v4si_int
b4de2f7d
AH
12318 = build_function_type_list (V4SI_type_node,
12319 V4SI_type_node, integer_type_node, NULL_TREE);
fbe5eb6d 12320 tree v8hi_ftype_v8hi_int
b4de2f7d
AH
12321 = build_function_type_list (V8HI_type_node,
12322 V8HI_type_node, integer_type_node, NULL_TREE);
916b60b7 12323 tree v8hi_ftype_v8hi_v2di
b4de2f7d
AH
12324 = build_function_type_list (V8HI_type_node,
12325 V8HI_type_node, V2DI_type_node, NULL_TREE);
916b60b7 12326 tree v4si_ftype_v4si_v2di
b4de2f7d
AH
12327 = build_function_type_list (V4SI_type_node,
12328 V4SI_type_node, V2DI_type_node, NULL_TREE);
916b60b7 12329 tree v4si_ftype_v8hi_v8hi
b4de2f7d
AH
12330 = build_function_type_list (V4SI_type_node,
12331 V8HI_type_node, V8HI_type_node, NULL_TREE);
916b60b7 12332 tree di_ftype_v8qi_v8qi
b4de2f7d
AH
12333 = build_function_type_list (long_long_unsigned_type_node,
12334 V8QI_type_node, V8QI_type_node, NULL_TREE);
916b60b7 12335 tree v2di_ftype_v16qi_v16qi
b4de2f7d
AH
12336 = build_function_type_list (V2DI_type_node,
12337 V16QI_type_node, V16QI_type_node, NULL_TREE);
916b60b7 12338 tree int_ftype_v16qi
b4de2f7d 12339 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
47f339cf 12340
bd793c65
BS
12341 /* Add all builtins that are more or less simple operations on two
12342 operands. */
ca7558fc 12343 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
12344 {
12345 /* Use one of the operands; the target can have a different mode for
12346 mask-generating compares. */
12347 enum machine_mode mode;
12348 tree type;
12349
12350 if (d->name == 0)
12351 continue;
12352 mode = insn_data[d->icode].operand[1].mode;
12353
bd793c65
BS
12354 switch (mode)
12355 {
fbe5eb6d
BS
12356 case V16QImode:
12357 type = v16qi_ftype_v16qi_v16qi;
12358 break;
12359 case V8HImode:
12360 type = v8hi_ftype_v8hi_v8hi;
12361 break;
12362 case V4SImode:
12363 type = v4si_ftype_v4si_v4si;
12364 break;
12365 case V2DImode:
12366 type = v2di_ftype_v2di_v2di;
12367 break;
12368 case V2DFmode:
12369 type = v2df_ftype_v2df_v2df;
12370 break;
12371 case TImode:
12372 type = ti_ftype_ti_ti;
12373 break;
bd793c65
BS
12374 case V4SFmode:
12375 type = v4sf_ftype_v4sf_v4sf;
12376 break;
12377 case V8QImode:
12378 type = v8qi_ftype_v8qi_v8qi;
12379 break;
12380 case V4HImode:
12381 type = v4hi_ftype_v4hi_v4hi;
12382 break;
12383 case V2SImode:
12384 type = v2si_ftype_v2si_v2si;
12385 break;
bd793c65
BS
12386 case DImode:
12387 type = di_ftype_di_di;
12388 break;
12389
12390 default:
12391 abort ();
12392 }
0f290768 12393
bd793c65
BS
12394 /* Override for comparisons. */
12395 if (d->icode == CODE_FOR_maskcmpv4sf3
12396 || d->icode == CODE_FOR_maskncmpv4sf3
12397 || d->icode == CODE_FOR_vmmaskcmpv4sf3
12398 || d->icode == CODE_FOR_vmmaskncmpv4sf3)
12399 type = v4si_ftype_v4sf_v4sf;
12400
fbe5eb6d
BS
12401 if (d->icode == CODE_FOR_maskcmpv2df3
12402 || d->icode == CODE_FOR_maskncmpv2df3
12403 || d->icode == CODE_FOR_vmmaskcmpv2df3
12404 || d->icode == CODE_FOR_vmmaskncmpv2df3)
12405 type = v2di_ftype_v2df_v2df;
12406
eeb06b1b 12407 def_builtin (d->mask, d->name, type, d->code);
bd793c65
BS
12408 }
12409
12410 /* Add the remaining MMX insns with somewhat more complicated types. */
eeb06b1b
BS
12411 def_builtin (MASK_MMX, "__builtin_ia32_mmx_zero", di_ftype_void, IX86_BUILTIN_MMX_ZERO);
12412 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
12413 def_builtin (MASK_MMX, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
12414 def_builtin (MASK_MMX, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
12415 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
12416 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
12417 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
12418
12419 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
12420 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
12421 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
12422
12423 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
12424 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
12425
12426 def_builtin (MASK_MMX, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
12427 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
0f290768 12428
bd793c65 12429 /* comi/ucomi insns. */
ca7558fc 12430 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
fbe5eb6d
BS
12431 if (d->mask == MASK_SSE2)
12432 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
12433 else
12434 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
bd793c65 12435
1255c85c
BS
12436 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
12437 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
12438 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
bd793c65 12439
fbe5eb6d
BS
12440 def_builtin (MASK_SSE1, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
12441 def_builtin (MASK_SSE1, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
12442 def_builtin (MASK_SSE1, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
12443 def_builtin (MASK_SSE1, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
12444 def_builtin (MASK_SSE1, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
12445 def_builtin (MASK_SSE1, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
bd793c65 12446
fbe5eb6d
BS
12447 def_builtin (MASK_SSE1, "__builtin_ia32_andps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDPS);
12448 def_builtin (MASK_SSE1, "__builtin_ia32_andnps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ANDNPS);
12449 def_builtin (MASK_SSE1, "__builtin_ia32_orps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_ORPS);
12450 def_builtin (MASK_SSE1, "__builtin_ia32_xorps", v4sf_ftype_v4sf_v4sf, IX86_BUILTIN_XORPS);
e37af218 12451
fbe5eb6d
BS
12452 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pextrw", int_ftype_v4hi_int, IX86_BUILTIN_PEXTRW);
12453 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pinsrw", v4hi_ftype_v4hi_int_int, IX86_BUILTIN_PINSRW);
bd793c65 12454
fbe5eb6d 12455 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
bd793c65 12456
fbe5eb6d
BS
12457 def_builtin (MASK_SSE1, "__builtin_ia32_loadaps", v4sf_ftype_pfloat, IX86_BUILTIN_LOADAPS);
12458 def_builtin (MASK_SSE1, "__builtin_ia32_loadups", v4sf_ftype_pfloat, IX86_BUILTIN_LOADUPS);
12459 def_builtin (MASK_SSE1, "__builtin_ia32_loadss", v4sf_ftype_pfloat, IX86_BUILTIN_LOADSS);
12460 def_builtin (MASK_SSE1, "__builtin_ia32_storeaps", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREAPS);
12461 def_builtin (MASK_SSE1, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
12462 def_builtin (MASK_SSE1, "__builtin_ia32_storess", void_ftype_pfloat_v4sf, IX86_BUILTIN_STORESS);
bd793c65 12463
fbe5eb6d
BS
12464 def_builtin (MASK_SSE1, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
12465 def_builtin (MASK_SSE1, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
12466 def_builtin (MASK_SSE1, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
12467 def_builtin (MASK_SSE1, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
bd793c65 12468
fbe5eb6d
BS
12469 def_builtin (MASK_SSE1, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
12470 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
12471 def_builtin (MASK_SSE1, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
12472 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
bd793c65 12473
fbe5eb6d 12474 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
bd793c65 12475
916b60b7 12476 def_builtin (MASK_SSE1 | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
bd793c65 12477
fbe5eb6d
BS
12478 def_builtin (MASK_SSE1, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
12479 def_builtin (MASK_SSE1, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
12480 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
12481 def_builtin (MASK_SSE1, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
12482 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
12483 def_builtin (MASK_SSE1, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
bd793c65 12484
fbe5eb6d 12485 def_builtin (MASK_SSE1, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
bd793c65 12486
47f339cf
BS
12487 /* Original 3DNow! */
12488 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
12489 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
12490 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
12491 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
12492 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
12493 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
12494 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
12495 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
12496 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
12497 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
12498 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
12499 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
12500 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
12501 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
12502 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
12503 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
12504 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
12505 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
12506 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
12507 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
47f339cf
BS
12508
12509 /* 3DNow! extension as used in the Athlon CPU. */
12510 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
12511 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
12512 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
12513 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
12514 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
12515 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
12516
fbe5eb6d
BS
12517 def_builtin (MASK_SSE1, "__builtin_ia32_setzerops", v4sf_ftype_void, IX86_BUILTIN_SSE_ZERO);
12518
12519 /* SSE2 */
12520 def_builtin (MASK_SSE2, "__builtin_ia32_pextrw128", int_ftype_v8hi_int, IX86_BUILTIN_PEXTRW128);
12521 def_builtin (MASK_SSE2, "__builtin_ia32_pinsrw128", v8hi_ftype_v8hi_int_int, IX86_BUILTIN_PINSRW128);
12522
12523 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
12524 def_builtin (MASK_SSE2, "__builtin_ia32_movq2dq", v2di_ftype_di, IX86_BUILTIN_MOVQ2DQ);
12525
12526 def_builtin (MASK_SSE2, "__builtin_ia32_loadapd", v2df_ftype_pdouble, IX86_BUILTIN_LOADAPD);
12527 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pdouble, IX86_BUILTIN_LOADUPD);
12528 def_builtin (MASK_SSE2, "__builtin_ia32_loadsd", v2df_ftype_pdouble, IX86_BUILTIN_LOADSD);
12529 def_builtin (MASK_SSE2, "__builtin_ia32_storeapd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREAPD);
12530 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
12531 def_builtin (MASK_SSE2, "__builtin_ia32_storesd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORESD);
12532
12533 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADHPD);
12534 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pv2si, IX86_BUILTIN_LOADLPD);
12535 def_builtin (MASK_SSE2, "__builtin_ia32_storehpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STOREHPD);
12536 def_builtin (MASK_SSE2, "__builtin_ia32_storelpd", void_ftype_pv2si_v2df, IX86_BUILTIN_STORELPD);
12537
12538 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
916b60b7 12539 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
fbe5eb6d
BS
12540 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
12541 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
916b60b7 12542 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
fbe5eb6d
BS
12543
12544 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
12545 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
12546 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
916b60b7 12547 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
fbe5eb6d
BS
12548
12549 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
12550 def_builtin (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
12551
12552 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
12553
12554 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
916b60b7 12555 def_builtin (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
fbe5eb6d
BS
12556
12557 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
12558 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
12559 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
12560 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
12561 def_builtin (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
12562
12563 def_builtin (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
12564
12565 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
12566 def_builtin (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
12567
12568 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
12569 def_builtin (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
12570 def_builtin (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
12571
12572 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
12573 def_builtin (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
12574 def_builtin (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
12575
12576 def_builtin (MASK_SSE2, "__builtin_ia32_setpd1", v2df_ftype_double, IX86_BUILTIN_SETPD1);
12577 def_builtin (MASK_SSE2, "__builtin_ia32_setpd", v2df_ftype_double_double, IX86_BUILTIN_SETPD);
12578 def_builtin (MASK_SSE2, "__builtin_ia32_setzeropd", ti_ftype_void, IX86_BUILTIN_CLRPD);
12579 def_builtin (MASK_SSE2, "__builtin_ia32_loadpd1", v2df_ftype_pdouble, IX86_BUILTIN_LOADPD1);
12580 def_builtin (MASK_SSE2, "__builtin_ia32_loadrpd", v2df_ftype_pdouble, IX86_BUILTIN_LOADRPD);
12581 def_builtin (MASK_SSE2, "__builtin_ia32_storepd1", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREPD1);
12582 def_builtin (MASK_SSE2, "__builtin_ia32_storerpd", void_ftype_pdouble_v2df, IX86_BUILTIN_STORERPD);
12583
12584 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pvoid, IX86_BUILTIN_CLFLUSH);
12585 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
12586 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
916b60b7
BS
12587
12588 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
12589 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
12590 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
12591
12592 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
12593 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
12594 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
12595
12596 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
12597 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
12598
12599 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
12600 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
12601 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
12602
12603 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
12604 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
12605 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
12606
12607 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
12608 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
12609
12610 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
bd793c65
BS
12611}
12612
12613/* Errors in the source file can cause expand_expr to return const0_rtx
12614 where we expect a vector. To avoid crashing, use one of the vector
12615 clear instructions. */
12616static rtx
12617safe_vector_operand (x, mode)
12618 rtx x;
12619 enum machine_mode mode;
12620{
12621 if (x != const0_rtx)
12622 return x;
12623 x = gen_reg_rtx (mode);
12624
47f339cf 12625 if (VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode))
bd793c65
BS
12626 emit_insn (gen_mmx_clrdi (mode == DImode ? x
12627 : gen_rtx_SUBREG (DImode, x, 0)));
12628 else
e37af218
RH
12629 emit_insn (gen_sse_clrv4sf (mode == V4SFmode ? x
12630 : gen_rtx_SUBREG (V4SFmode, x, 0)));
bd793c65
BS
12631 return x;
12632}
12633
12634/* Subroutine of ix86_expand_builtin to take care of binop insns. */
12635
12636static rtx
12637ix86_expand_binop_builtin (icode, arglist, target)
12638 enum insn_code icode;
12639 tree arglist;
12640 rtx target;
12641{
12642 rtx pat;
12643 tree arg0 = TREE_VALUE (arglist);
12644 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12645 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12646 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12647 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12648 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12649 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
12650
12651 if (VECTOR_MODE_P (mode0))
12652 op0 = safe_vector_operand (op0, mode0);
12653 if (VECTOR_MODE_P (mode1))
12654 op1 = safe_vector_operand (op1, mode1);
12655
12656 if (! target
12657 || GET_MODE (target) != tmode
12658 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12659 target = gen_reg_rtx (tmode);
12660
12661 /* In case the insn wants input operands in modes different from
12662 the result, abort. */
12663 if (GET_MODE (op0) != mode0 || GET_MODE (op1) != mode1)
12664 abort ();
12665
12666 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12667 op0 = copy_to_mode_reg (mode0, op0);
12668 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12669 op1 = copy_to_mode_reg (mode1, op1);
12670
59bef189
RH
12671 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12672 yet one of the two must not be a memory. This is normally enforced
12673 by expanders, but we didn't bother to create one here. */
12674 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12675 op0 = copy_to_mode_reg (mode0, op0);
12676
bd793c65
BS
12677 pat = GEN_FCN (icode) (target, op0, op1);
12678 if (! pat)
12679 return 0;
12680 emit_insn (pat);
12681 return target;
12682}
12683
fce5a9f2 12684/* In type_for_mode we restrict the ability to create TImode types
e37af218
RH
12685 to hosts with 64-bit H_W_I. So we've defined the SSE logicals
12686 to have a V4SFmode signature. Convert them in-place to TImode. */
12687
12688static rtx
12689ix86_expand_timode_binop_builtin (icode, arglist, target)
12690 enum insn_code icode;
12691 tree arglist;
12692 rtx target;
12693{
12694 rtx pat;
12695 tree arg0 = TREE_VALUE (arglist);
12696 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12697 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12698 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12699
12700 op0 = gen_lowpart (TImode, op0);
12701 op1 = gen_lowpart (TImode, op1);
12702 target = gen_reg_rtx (TImode);
12703
12704 if (! (*insn_data[icode].operand[1].predicate) (op0, TImode))
12705 op0 = copy_to_mode_reg (TImode, op0);
12706 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
12707 op1 = copy_to_mode_reg (TImode, op1);
12708
59bef189
RH
12709 /* In the commutative cases, both op0 and op1 are nonimmediate_operand,
12710 yet one of the two must not be a memory. This is normally enforced
12711 by expanders, but we didn't bother to create one here. */
12712 if (GET_CODE (op0) == MEM && GET_CODE (op1) == MEM)
12713 op0 = copy_to_mode_reg (TImode, op0);
12714
e37af218
RH
12715 pat = GEN_FCN (icode) (target, op0, op1);
12716 if (! pat)
12717 return 0;
12718 emit_insn (pat);
12719
12720 return gen_lowpart (V4SFmode, target);
12721}
12722
bd793c65
BS
12723/* Subroutine of ix86_expand_builtin to take care of stores. */
12724
12725static rtx
e37af218 12726ix86_expand_store_builtin (icode, arglist)
bd793c65
BS
12727 enum insn_code icode;
12728 tree arglist;
bd793c65
BS
12729{
12730 rtx pat;
12731 tree arg0 = TREE_VALUE (arglist);
12732 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12733 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12734 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12735 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
12736 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
12737
12738 if (VECTOR_MODE_P (mode1))
12739 op1 = safe_vector_operand (op1, mode1);
12740
12741 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
59bef189
RH
12742
12743 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12744 op1 = copy_to_mode_reg (mode1, op1);
12745
bd793c65
BS
12746 pat = GEN_FCN (icode) (op0, op1);
12747 if (pat)
12748 emit_insn (pat);
12749 return 0;
12750}
12751
12752/* Subroutine of ix86_expand_builtin to take care of unop insns. */
12753
12754static rtx
12755ix86_expand_unop_builtin (icode, arglist, target, do_load)
12756 enum insn_code icode;
12757 tree arglist;
12758 rtx target;
12759 int do_load;
12760{
12761 rtx pat;
12762 tree arg0 = TREE_VALUE (arglist);
12763 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12764 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12765 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12766
12767 if (! target
12768 || GET_MODE (target) != tmode
12769 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12770 target = gen_reg_rtx (tmode);
12771 if (do_load)
12772 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12773 else
12774 {
12775 if (VECTOR_MODE_P (mode0))
12776 op0 = safe_vector_operand (op0, mode0);
12777
12778 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12779 op0 = copy_to_mode_reg (mode0, op0);
12780 }
12781
12782 pat = GEN_FCN (icode) (target, op0);
12783 if (! pat)
12784 return 0;
12785 emit_insn (pat);
12786 return target;
12787}
12788
12789/* Subroutine of ix86_expand_builtin to take care of three special unop insns:
12790 sqrtss, rsqrtss, rcpss. */
12791
12792static rtx
12793ix86_expand_unop1_builtin (icode, arglist, target)
12794 enum insn_code icode;
12795 tree arglist;
12796 rtx target;
12797{
12798 rtx pat;
12799 tree arg0 = TREE_VALUE (arglist);
59bef189 12800 rtx op1, op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
bd793c65
BS
12801 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12802 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12803
12804 if (! target
12805 || GET_MODE (target) != tmode
12806 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12807 target = gen_reg_rtx (tmode);
12808
12809 if (VECTOR_MODE_P (mode0))
12810 op0 = safe_vector_operand (op0, mode0);
12811
12812 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12813 op0 = copy_to_mode_reg (mode0, op0);
fce5a9f2 12814
59bef189
RH
12815 op1 = op0;
12816 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
12817 op1 = copy_to_mode_reg (mode0, op1);
fce5a9f2 12818
59bef189 12819 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
12820 if (! pat)
12821 return 0;
12822 emit_insn (pat);
12823 return target;
12824}
12825
12826/* Subroutine of ix86_expand_builtin to take care of comparison insns. */
12827
12828static rtx
12829ix86_expand_sse_compare (d, arglist, target)
8b60264b 12830 const struct builtin_description *d;
bd793c65
BS
12831 tree arglist;
12832 rtx target;
12833{
12834 rtx pat;
12835 tree arg0 = TREE_VALUE (arglist);
12836 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12837 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12838 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12839 rtx op2;
12840 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
12841 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
12842 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
12843 enum rtx_code comparison = d->comparison;
12844
12845 if (VECTOR_MODE_P (mode0))
12846 op0 = safe_vector_operand (op0, mode0);
12847 if (VECTOR_MODE_P (mode1))
12848 op1 = safe_vector_operand (op1, mode1);
12849
12850 /* Swap operands if we have a comparison that isn't available in
12851 hardware. */
12852 if (d->flag)
12853 {
21e1b5f1
BS
12854 rtx tmp = gen_reg_rtx (mode1);
12855 emit_move_insn (tmp, op1);
bd793c65 12856 op1 = op0;
21e1b5f1 12857 op0 = tmp;
bd793c65 12858 }
21e1b5f1
BS
12859
12860 if (! target
12861 || GET_MODE (target) != tmode
12862 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
bd793c65
BS
12863 target = gen_reg_rtx (tmode);
12864
12865 if (! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
12866 op0 = copy_to_mode_reg (mode0, op0);
12867 if (! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
12868 op1 = copy_to_mode_reg (mode1, op1);
12869
12870 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12871 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
12872 if (! pat)
12873 return 0;
12874 emit_insn (pat);
12875 return target;
12876}
12877
12878/* Subroutine of ix86_expand_builtin to take care of comi insns. */
12879
12880static rtx
12881ix86_expand_sse_comi (d, arglist, target)
8b60264b 12882 const struct builtin_description *d;
bd793c65
BS
12883 tree arglist;
12884 rtx target;
12885{
12886 rtx pat;
12887 tree arg0 = TREE_VALUE (arglist);
12888 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12889 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12890 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12891 rtx op2;
12892 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
12893 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
12894 enum rtx_code comparison = d->comparison;
12895
12896 if (VECTOR_MODE_P (mode0))
12897 op0 = safe_vector_operand (op0, mode0);
12898 if (VECTOR_MODE_P (mode1))
12899 op1 = safe_vector_operand (op1, mode1);
12900
12901 /* Swap operands if we have a comparison that isn't available in
12902 hardware. */
12903 if (d->flag)
12904 {
12905 rtx tmp = op1;
12906 op1 = op0;
12907 op0 = tmp;
bd793c65
BS
12908 }
12909
12910 target = gen_reg_rtx (SImode);
12911 emit_move_insn (target, const0_rtx);
12912 target = gen_rtx_SUBREG (QImode, target, 0);
12913
12914 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
12915 op0 = copy_to_mode_reg (mode0, op0);
12916 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
12917 op1 = copy_to_mode_reg (mode1, op1);
12918
12919 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
12920 pat = GEN_FCN (d->icode) (op0, op1, op2);
12921 if (! pat)
12922 return 0;
12923 emit_insn (pat);
29628f27
BS
12924 emit_insn (gen_rtx_SET (VOIDmode,
12925 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
12926 gen_rtx_fmt_ee (comparison, QImode,
12927 gen_rtx_REG (CCmode, FLAGS_REG),
12928 const0_rtx)));
bd793c65 12929
6f1a6c5b 12930 return SUBREG_REG (target);
bd793c65
BS
12931}
12932
12933/* Expand an expression EXP that calls a built-in function,
12934 with result going to TARGET if that's convenient
12935 (and in mode MODE if that's convenient).
12936 SUBTARGET may be used as the target for computing one of EXP's operands.
12937 IGNORE is nonzero if the value is to be ignored. */
12938
12939rtx
12940ix86_expand_builtin (exp, target, subtarget, mode, ignore)
12941 tree exp;
12942 rtx target;
12943 rtx subtarget ATTRIBUTE_UNUSED;
12944 enum machine_mode mode ATTRIBUTE_UNUSED;
12945 int ignore ATTRIBUTE_UNUSED;
12946{
8b60264b 12947 const struct builtin_description *d;
77ebd435 12948 size_t i;
bd793c65
BS
12949 enum insn_code icode;
12950 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
12951 tree arglist = TREE_OPERAND (exp, 1);
e37af218 12952 tree arg0, arg1, arg2;
bd793c65
BS
12953 rtx op0, op1, op2, pat;
12954 enum machine_mode tmode, mode0, mode1, mode2;
8752c357 12955 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
bd793c65
BS
12956
12957 switch (fcode)
12958 {
12959 case IX86_BUILTIN_EMMS:
12960 emit_insn (gen_emms ());
12961 return 0;
12962
12963 case IX86_BUILTIN_SFENCE:
12964 emit_insn (gen_sfence ());
12965 return 0;
12966
bd793c65 12967 case IX86_BUILTIN_PEXTRW:
fbe5eb6d
BS
12968 case IX86_BUILTIN_PEXTRW128:
12969 icode = (fcode == IX86_BUILTIN_PEXTRW
12970 ? CODE_FOR_mmx_pextrw
12971 : CODE_FOR_sse2_pextrw);
bd793c65
BS
12972 arg0 = TREE_VALUE (arglist);
12973 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12974 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12975 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12976 tmode = insn_data[icode].operand[0].mode;
12977 mode0 = insn_data[icode].operand[1].mode;
12978 mode1 = insn_data[icode].operand[2].mode;
12979
12980 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12981 op0 = copy_to_mode_reg (mode0, op0);
12982 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12983 {
12984 /* @@@ better error message */
12985 error ("selector must be an immediate");
6f1a6c5b 12986 return gen_reg_rtx (tmode);
bd793c65
BS
12987 }
12988 if (target == 0
12989 || GET_MODE (target) != tmode
12990 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12991 target = gen_reg_rtx (tmode);
12992 pat = GEN_FCN (icode) (target, op0, op1);
12993 if (! pat)
12994 return 0;
12995 emit_insn (pat);
12996 return target;
12997
12998 case IX86_BUILTIN_PINSRW:
fbe5eb6d
BS
12999 case IX86_BUILTIN_PINSRW128:
13000 icode = (fcode == IX86_BUILTIN_PINSRW
13001 ? CODE_FOR_mmx_pinsrw
13002 : CODE_FOR_sse2_pinsrw);
bd793c65
BS
13003 arg0 = TREE_VALUE (arglist);
13004 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13005 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13006 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13007 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13008 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13009 tmode = insn_data[icode].operand[0].mode;
13010 mode0 = insn_data[icode].operand[1].mode;
13011 mode1 = insn_data[icode].operand[2].mode;
13012 mode2 = insn_data[icode].operand[3].mode;
13013
13014 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13015 op0 = copy_to_mode_reg (mode0, op0);
13016 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13017 op1 = copy_to_mode_reg (mode1, op1);
13018 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13019 {
13020 /* @@@ better error message */
13021 error ("selector must be an immediate");
13022 return const0_rtx;
13023 }
13024 if (target == 0
13025 || GET_MODE (target) != tmode
13026 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13027 target = gen_reg_rtx (tmode);
13028 pat = GEN_FCN (icode) (target, op0, op1, op2);
13029 if (! pat)
13030 return 0;
13031 emit_insn (pat);
13032 return target;
13033
13034 case IX86_BUILTIN_MASKMOVQ:
fbe5eb6d
BS
13035 icode = (fcode == IX86_BUILTIN_MASKMOVQ
13036 ? (TARGET_64BIT ? CODE_FOR_mmx_maskmovq_rex : CODE_FOR_mmx_maskmovq)
13037 : CODE_FOR_sse2_maskmovdqu);
bd793c65
BS
13038 /* Note the arg order is different from the operand order. */
13039 arg1 = TREE_VALUE (arglist);
13040 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
13041 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13042 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13043 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13044 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13045 mode0 = insn_data[icode].operand[0].mode;
13046 mode1 = insn_data[icode].operand[1].mode;
13047 mode2 = insn_data[icode].operand[2].mode;
13048
5c464583 13049 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
bd793c65
BS
13050 op0 = copy_to_mode_reg (mode0, op0);
13051 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13052 op1 = copy_to_mode_reg (mode1, op1);
13053 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
13054 op2 = copy_to_mode_reg (mode2, op2);
13055 pat = GEN_FCN (icode) (op0, op1, op2);
13056 if (! pat)
13057 return 0;
13058 emit_insn (pat);
13059 return 0;
13060
13061 case IX86_BUILTIN_SQRTSS:
13062 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv4sf2, arglist, target);
13063 case IX86_BUILTIN_RSQRTSS:
13064 return ix86_expand_unop1_builtin (CODE_FOR_vmrsqrtv4sf2, arglist, target);
13065 case IX86_BUILTIN_RCPSS:
13066 return ix86_expand_unop1_builtin (CODE_FOR_vmrcpv4sf2, arglist, target);
13067
e37af218
RH
13068 case IX86_BUILTIN_ANDPS:
13069 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_andti3,
13070 arglist, target);
13071 case IX86_BUILTIN_ANDNPS:
13072 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_nandti3,
13073 arglist, target);
13074 case IX86_BUILTIN_ORPS:
13075 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_iorti3,
13076 arglist, target);
13077 case IX86_BUILTIN_XORPS:
13078 return ix86_expand_timode_binop_builtin (CODE_FOR_sse_xorti3,
13079 arglist, target);
13080
bd793c65
BS
13081 case IX86_BUILTIN_LOADAPS:
13082 return ix86_expand_unop_builtin (CODE_FOR_sse_movaps, arglist, target, 1);
13083
13084 case IX86_BUILTIN_LOADUPS:
13085 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
13086
13087 case IX86_BUILTIN_STOREAPS:
e37af218 13088 return ix86_expand_store_builtin (CODE_FOR_sse_movaps, arglist);
bd793c65 13089 case IX86_BUILTIN_STOREUPS:
e37af218 13090 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
bd793c65
BS
13091
13092 case IX86_BUILTIN_LOADSS:
13093 return ix86_expand_unop_builtin (CODE_FOR_sse_loadss, arglist, target, 1);
13094
13095 case IX86_BUILTIN_STORESS:
e37af218 13096 return ix86_expand_store_builtin (CODE_FOR_sse_storess, arglist);
bd793c65 13097
0f290768 13098 case IX86_BUILTIN_LOADHPS:
bd793c65 13099 case IX86_BUILTIN_LOADLPS:
fbe5eb6d
BS
13100 case IX86_BUILTIN_LOADHPD:
13101 case IX86_BUILTIN_LOADLPD:
13102 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_movhps
13103 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_movlps
13104 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_movhpd
13105 : CODE_FOR_sse2_movlpd);
bd793c65
BS
13106 arg0 = TREE_VALUE (arglist);
13107 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13108 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13109 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13110 tmode = insn_data[icode].operand[0].mode;
13111 mode0 = insn_data[icode].operand[1].mode;
13112 mode1 = insn_data[icode].operand[2].mode;
13113
13114 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13115 op0 = copy_to_mode_reg (mode0, op0);
13116 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
13117 if (target == 0
13118 || GET_MODE (target) != tmode
13119 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13120 target = gen_reg_rtx (tmode);
13121 pat = GEN_FCN (icode) (target, op0, op1);
13122 if (! pat)
13123 return 0;
13124 emit_insn (pat);
13125 return target;
0f290768 13126
bd793c65
BS
13127 case IX86_BUILTIN_STOREHPS:
13128 case IX86_BUILTIN_STORELPS:
fbe5eb6d
BS
13129 case IX86_BUILTIN_STOREHPD:
13130 case IX86_BUILTIN_STORELPD:
13131 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_movhps
13132 : fcode == IX86_BUILTIN_STORELPS ? CODE_FOR_sse_movlps
13133 : fcode == IX86_BUILTIN_STOREHPD ? CODE_FOR_sse2_movhpd
13134 : CODE_FOR_sse2_movlpd);
bd793c65
BS
13135 arg0 = TREE_VALUE (arglist);
13136 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13137 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13138 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13139 mode0 = insn_data[icode].operand[1].mode;
13140 mode1 = insn_data[icode].operand[2].mode;
13141
13142 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13143 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13144 op1 = copy_to_mode_reg (mode1, op1);
13145
13146 pat = GEN_FCN (icode) (op0, op0, op1);
13147 if (! pat)
13148 return 0;
13149 emit_insn (pat);
13150 return 0;
13151
13152 case IX86_BUILTIN_MOVNTPS:
e37af218 13153 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
bd793c65 13154 case IX86_BUILTIN_MOVNTQ:
e37af218 13155 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
bd793c65
BS
13156
13157 case IX86_BUILTIN_LDMXCSR:
13158 op0 = expand_expr (TREE_VALUE (arglist), NULL_RTX, VOIDmode, 0);
13159 target = assign_386_stack_local (SImode, 0);
13160 emit_move_insn (target, op0);
13161 emit_insn (gen_ldmxcsr (target));
13162 return 0;
13163
13164 case IX86_BUILTIN_STMXCSR:
13165 target = assign_386_stack_local (SImode, 0);
13166 emit_insn (gen_stmxcsr (target));
13167 return copy_to_mode_reg (SImode, target);
13168
bd793c65 13169 case IX86_BUILTIN_SHUFPS:
fbe5eb6d
BS
13170 case IX86_BUILTIN_SHUFPD:
13171 icode = (fcode == IX86_BUILTIN_SHUFPS
13172 ? CODE_FOR_sse_shufps
13173 : CODE_FOR_sse2_shufpd);
bd793c65
BS
13174 arg0 = TREE_VALUE (arglist);
13175 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13176 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
13177 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13178 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13179 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
13180 tmode = insn_data[icode].operand[0].mode;
13181 mode0 = insn_data[icode].operand[1].mode;
13182 mode1 = insn_data[icode].operand[2].mode;
13183 mode2 = insn_data[icode].operand[3].mode;
13184
13185 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13186 op0 = copy_to_mode_reg (mode0, op0);
13187 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13188 op1 = copy_to_mode_reg (mode1, op1);
13189 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13190 {
13191 /* @@@ better error message */
13192 error ("mask must be an immediate");
6f1a6c5b 13193 return gen_reg_rtx (tmode);
bd793c65
BS
13194 }
13195 if (target == 0
13196 || GET_MODE (target) != tmode
13197 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13198 target = gen_reg_rtx (tmode);
13199 pat = GEN_FCN (icode) (target, op0, op1, op2);
13200 if (! pat)
13201 return 0;
13202 emit_insn (pat);
13203 return target;
13204
13205 case IX86_BUILTIN_PSHUFW:
fbe5eb6d
BS
13206 case IX86_BUILTIN_PSHUFD:
13207 case IX86_BUILTIN_PSHUFHW:
13208 case IX86_BUILTIN_PSHUFLW:
13209 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
13210 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
13211 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
13212 : CODE_FOR_mmx_pshufw);
bd793c65
BS
13213 arg0 = TREE_VALUE (arglist);
13214 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13215 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13216 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
13217 tmode = insn_data[icode].operand[0].mode;
29628f27
BS
13218 mode1 = insn_data[icode].operand[1].mode;
13219 mode2 = insn_data[icode].operand[2].mode;
bd793c65 13220
29628f27
BS
13221 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
13222 op0 = copy_to_mode_reg (mode1, op0);
13223 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
bd793c65
BS
13224 {
13225 /* @@@ better error message */
13226 error ("mask must be an immediate");
13227 return const0_rtx;
13228 }
13229 if (target == 0
13230 || GET_MODE (target) != tmode
13231 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13232 target = gen_reg_rtx (tmode);
29628f27 13233 pat = GEN_FCN (icode) (target, op0, op1);
bd793c65
BS
13234 if (! pat)
13235 return 0;
13236 emit_insn (pat);
13237 return target;
13238
47f339cf
BS
13239 case IX86_BUILTIN_FEMMS:
13240 emit_insn (gen_femms ());
13241 return NULL_RTX;
13242
13243 case IX86_BUILTIN_PAVGUSB:
13244 return ix86_expand_binop_builtin (CODE_FOR_pavgusb, arglist, target);
13245
13246 case IX86_BUILTIN_PF2ID:
13247 return ix86_expand_unop_builtin (CODE_FOR_pf2id, arglist, target, 0);
13248
13249 case IX86_BUILTIN_PFACC:
13250 return ix86_expand_binop_builtin (CODE_FOR_pfacc, arglist, target);
13251
13252 case IX86_BUILTIN_PFADD:
13253 return ix86_expand_binop_builtin (CODE_FOR_addv2sf3, arglist, target);
13254
13255 case IX86_BUILTIN_PFCMPEQ:
13256 return ix86_expand_binop_builtin (CODE_FOR_eqv2sf3, arglist, target);
13257
13258 case IX86_BUILTIN_PFCMPGE:
13259 return ix86_expand_binop_builtin (CODE_FOR_gev2sf3, arglist, target);
13260
13261 case IX86_BUILTIN_PFCMPGT:
13262 return ix86_expand_binop_builtin (CODE_FOR_gtv2sf3, arglist, target);
13263
13264 case IX86_BUILTIN_PFMAX:
13265 return ix86_expand_binop_builtin (CODE_FOR_pfmaxv2sf3, arglist, target);
13266
13267 case IX86_BUILTIN_PFMIN:
13268 return ix86_expand_binop_builtin (CODE_FOR_pfminv2sf3, arglist, target);
13269
13270 case IX86_BUILTIN_PFMUL:
13271 return ix86_expand_binop_builtin (CODE_FOR_mulv2sf3, arglist, target);
13272
13273 case IX86_BUILTIN_PFRCP:
13274 return ix86_expand_unop_builtin (CODE_FOR_pfrcpv2sf2, arglist, target, 0);
13275
13276 case IX86_BUILTIN_PFRCPIT1:
13277 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit1v2sf3, arglist, target);
13278
13279 case IX86_BUILTIN_PFRCPIT2:
13280 return ix86_expand_binop_builtin (CODE_FOR_pfrcpit2v2sf3, arglist, target);
13281
13282 case IX86_BUILTIN_PFRSQIT1:
13283 return ix86_expand_binop_builtin (CODE_FOR_pfrsqit1v2sf3, arglist, target);
13284
13285 case IX86_BUILTIN_PFRSQRT:
13286 return ix86_expand_unop_builtin (CODE_FOR_pfrsqrtv2sf2, arglist, target, 0);
13287
13288 case IX86_BUILTIN_PFSUB:
13289 return ix86_expand_binop_builtin (CODE_FOR_subv2sf3, arglist, target);
13290
13291 case IX86_BUILTIN_PFSUBR:
13292 return ix86_expand_binop_builtin (CODE_FOR_subrv2sf3, arglist, target);
13293
13294 case IX86_BUILTIN_PI2FD:
13295 return ix86_expand_unop_builtin (CODE_FOR_floatv2si2, arglist, target, 0);
13296
13297 case IX86_BUILTIN_PMULHRW:
13298 return ix86_expand_binop_builtin (CODE_FOR_pmulhrwv4hi3, arglist, target);
13299
47f339cf
BS
13300 case IX86_BUILTIN_PF2IW:
13301 return ix86_expand_unop_builtin (CODE_FOR_pf2iw, arglist, target, 0);
13302
13303 case IX86_BUILTIN_PFNACC:
13304 return ix86_expand_binop_builtin (CODE_FOR_pfnacc, arglist, target);
13305
13306 case IX86_BUILTIN_PFPNACC:
13307 return ix86_expand_binop_builtin (CODE_FOR_pfpnacc, arglist, target);
13308
13309 case IX86_BUILTIN_PI2FW:
13310 return ix86_expand_unop_builtin (CODE_FOR_pi2fw, arglist, target, 0);
13311
13312 case IX86_BUILTIN_PSWAPDSI:
13313 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2si2, arglist, target, 0);
13314
13315 case IX86_BUILTIN_PSWAPDSF:
13316 return ix86_expand_unop_builtin (CODE_FOR_pswapdv2sf2, arglist, target, 0);
13317
e37af218
RH
13318 case IX86_BUILTIN_SSE_ZERO:
13319 target = gen_reg_rtx (V4SFmode);
13320 emit_insn (gen_sse_clrv4sf (target));
bd793c65
BS
13321 return target;
13322
bd793c65
BS
13323 case IX86_BUILTIN_MMX_ZERO:
13324 target = gen_reg_rtx (DImode);
13325 emit_insn (gen_mmx_clrdi (target));
13326 return target;
13327
fbe5eb6d
BS
13328 case IX86_BUILTIN_SQRTSD:
13329 return ix86_expand_unop1_builtin (CODE_FOR_vmsqrtv2df2, arglist, target);
13330 case IX86_BUILTIN_LOADAPD:
13331 return ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist, target, 1);
13332 case IX86_BUILTIN_LOADUPD:
13333 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
13334
13335 case IX86_BUILTIN_STOREAPD:
13336 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13337 case IX86_BUILTIN_STOREUPD:
13338 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
13339
13340 case IX86_BUILTIN_LOADSD:
13341 return ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist, target, 1);
13342
13343 case IX86_BUILTIN_STORESD:
13344 return ix86_expand_store_builtin (CODE_FOR_sse2_storesd, arglist);
13345
13346 case IX86_BUILTIN_SETPD1:
13347 target = assign_386_stack_local (DFmode, 0);
13348 arg0 = TREE_VALUE (arglist);
13349 emit_move_insn (adjust_address (target, DFmode, 0),
13350 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13351 op0 = gen_reg_rtx (V2DFmode);
13352 emit_insn (gen_sse2_loadsd (op0, adjust_address (target, V2DFmode, 0)));
13353 emit_insn (gen_sse2_shufpd (op0, op0, op0, GEN_INT (0)));
13354 return op0;
13355
13356 case IX86_BUILTIN_SETPD:
13357 target = assign_386_stack_local (V2DFmode, 0);
13358 arg0 = TREE_VALUE (arglist);
13359 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
13360 emit_move_insn (adjust_address (target, DFmode, 0),
13361 expand_expr (arg0, NULL_RTX, VOIDmode, 0));
13362 emit_move_insn (adjust_address (target, DFmode, 8),
13363 expand_expr (arg1, NULL_RTX, VOIDmode, 0));
13364 op0 = gen_reg_rtx (V2DFmode);
13365 emit_insn (gen_sse2_movapd (op0, target));
13366 return op0;
13367
13368 case IX86_BUILTIN_LOADRPD:
13369 target = ix86_expand_unop_builtin (CODE_FOR_sse2_movapd, arglist,
13370 gen_reg_rtx (V2DFmode), 1);
13371 emit_insn (gen_sse2_shufpd (target, target, target, GEN_INT (1)));
13372 return target;
13373
13374 case IX86_BUILTIN_LOADPD1:
13375 target = ix86_expand_unop_builtin (CODE_FOR_sse2_loadsd, arglist,
13376 gen_reg_rtx (V2DFmode), 1);
13377 emit_insn (gen_sse2_shufpd (target, target, target, const0_rtx));
13378 return target;
13379
13380 case IX86_BUILTIN_STOREPD1:
13381 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13382 case IX86_BUILTIN_STORERPD:
13383 return ix86_expand_store_builtin (CODE_FOR_sse2_movapd, arglist);
13384
13385 case IX86_BUILTIN_MFENCE:
13386 emit_insn (gen_sse2_mfence ());
13387 return 0;
13388 case IX86_BUILTIN_LFENCE:
13389 emit_insn (gen_sse2_lfence ());
13390 return 0;
13391
13392 case IX86_BUILTIN_CLFLUSH:
13393 arg0 = TREE_VALUE (arglist);
13394 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
13395 icode = CODE_FOR_sse2_clflush;
13396 mode0 = insn_data[icode].operand[0].mode;
13397 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13398 op0 = copy_to_mode_reg (mode0, op0);
13399
13400 emit_insn (gen_sse2_clflush (op0));
13401 return 0;
13402
13403 case IX86_BUILTIN_MOVNTPD:
13404 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
13405 case IX86_BUILTIN_MOVNTDQ:
916b60b7 13406 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
fbe5eb6d
BS
13407 case IX86_BUILTIN_MOVNTI:
13408 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
13409
bd793c65
BS
13410 default:
13411 break;
13412 }
13413
ca7558fc 13414 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
bd793c65
BS
13415 if (d->code == fcode)
13416 {
13417 /* Compares are treated specially. */
13418 if (d->icode == CODE_FOR_maskcmpv4sf3
13419 || d->icode == CODE_FOR_vmmaskcmpv4sf3
13420 || d->icode == CODE_FOR_maskncmpv4sf3
fbe5eb6d
BS
13421 || d->icode == CODE_FOR_vmmaskncmpv4sf3
13422 || d->icode == CODE_FOR_maskcmpv2df3
13423 || d->icode == CODE_FOR_vmmaskcmpv2df3
13424 || d->icode == CODE_FOR_maskncmpv2df3
13425 || d->icode == CODE_FOR_vmmaskncmpv2df3)
bd793c65
BS
13426 return ix86_expand_sse_compare (d, arglist, target);
13427
13428 return ix86_expand_binop_builtin (d->icode, arglist, target);
13429 }
13430
ca7558fc 13431 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
bd793c65
BS
13432 if (d->code == fcode)
13433 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
0f290768 13434
ca7558fc 13435 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
bd793c65
BS
13436 if (d->code == fcode)
13437 return ix86_expand_sse_comi (d, arglist, target);
0f290768 13438
bd793c65
BS
13439 /* @@@ Should really do something sensible here. */
13440 return 0;
bd793c65 13441}
4211a8fb
JH
13442
13443/* Store OPERAND to the memory after reload is completed. This means
f710504c 13444 that we can't easily use assign_stack_local. */
4211a8fb
JH
13445rtx
13446ix86_force_to_memory (mode, operand)
13447 enum machine_mode mode;
13448 rtx operand;
13449{
898d374d 13450 rtx result;
4211a8fb
JH
13451 if (!reload_completed)
13452 abort ();
898d374d
JH
13453 if (TARGET_64BIT && TARGET_RED_ZONE)
13454 {
13455 result = gen_rtx_MEM (mode,
13456 gen_rtx_PLUS (Pmode,
13457 stack_pointer_rtx,
13458 GEN_INT (-RED_ZONE_SIZE)));
13459 emit_move_insn (result, operand);
13460 }
13461 else if (TARGET_64BIT && !TARGET_RED_ZONE)
4211a8fb 13462 {
898d374d 13463 switch (mode)
4211a8fb 13464 {
898d374d
JH
13465 case HImode:
13466 case SImode:
13467 operand = gen_lowpart (DImode, operand);
13468 /* FALLTHRU */
13469 case DImode:
4211a8fb 13470 emit_insn (
898d374d
JH
13471 gen_rtx_SET (VOIDmode,
13472 gen_rtx_MEM (DImode,
13473 gen_rtx_PRE_DEC (DImode,
13474 stack_pointer_rtx)),
13475 operand));
13476 break;
13477 default:
13478 abort ();
13479 }
13480 result = gen_rtx_MEM (mode, stack_pointer_rtx);
13481 }
13482 else
13483 {
13484 switch (mode)
13485 {
13486 case DImode:
13487 {
13488 rtx operands[2];
13489 split_di (&operand, 1, operands, operands + 1);
13490 emit_insn (
13491 gen_rtx_SET (VOIDmode,
13492 gen_rtx_MEM (SImode,
13493 gen_rtx_PRE_DEC (Pmode,
13494 stack_pointer_rtx)),
13495 operands[1]));
13496 emit_insn (
13497 gen_rtx_SET (VOIDmode,
13498 gen_rtx_MEM (SImode,
13499 gen_rtx_PRE_DEC (Pmode,
13500 stack_pointer_rtx)),
13501 operands[0]));
13502 }
13503 break;
13504 case HImode:
13505 /* It is better to store HImodes as SImodes. */
13506 if (!TARGET_PARTIAL_REG_STALL)
13507 operand = gen_lowpart (SImode, operand);
13508 /* FALLTHRU */
13509 case SImode:
4211a8fb 13510 emit_insn (
898d374d
JH
13511 gen_rtx_SET (VOIDmode,
13512 gen_rtx_MEM (GET_MODE (operand),
13513 gen_rtx_PRE_DEC (SImode,
13514 stack_pointer_rtx)),
13515 operand));
13516 break;
13517 default:
13518 abort ();
4211a8fb 13519 }
898d374d 13520 result = gen_rtx_MEM (mode, stack_pointer_rtx);
4211a8fb 13521 }
898d374d 13522 return result;
4211a8fb
JH
13523}
13524
13525/* Free operand from the memory. */
13526void
13527ix86_free_from_memory (mode)
13528 enum machine_mode mode;
13529{
898d374d
JH
13530 if (!TARGET_64BIT || !TARGET_RED_ZONE)
13531 {
13532 int size;
13533
13534 if (mode == DImode || TARGET_64BIT)
13535 size = 8;
13536 else if (mode == HImode && TARGET_PARTIAL_REG_STALL)
13537 size = 2;
13538 else
13539 size = 4;
13540 /* Use LEA to deallocate stack space. In peephole2 it will be converted
13541 to pop or add instruction if registers are available. */
13542 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13543 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
13544 GEN_INT (size))));
13545 }
4211a8fb 13546}
a946dd00 13547
f84aa48a
JH
13548/* Put float CONST_DOUBLE in the constant pool instead of fp regs.
13549 QImode must go into class Q_REGS.
13550 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
892a2d68 13551 movdf to do mem-to-mem moves through integer regs. */
f84aa48a
JH
13552enum reg_class
13553ix86_preferred_reload_class (x, class)
13554 rtx x;
13555 enum reg_class class;
13556{
13557 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
13558 {
13559 /* SSE can't load any constant directly yet. */
13560 if (SSE_CLASS_P (class))
13561 return NO_REGS;
13562 /* Floats can load 0 and 1. */
13563 if (MAYBE_FLOAT_CLASS_P (class) && standard_80387_constant_p (x))
13564 {
13565 /* Limit class to non-SSE. Use GENERAL_REGS if possible. */
13566 if (MAYBE_SSE_CLASS_P (class))
13567 return (reg_class_subset_p (class, GENERAL_REGS)
13568 ? GENERAL_REGS : FLOAT_REGS);
13569 else
13570 return class;
13571 }
13572 /* General regs can load everything. */
13573 if (reg_class_subset_p (class, GENERAL_REGS))
13574 return GENERAL_REGS;
13575 /* In case we haven't resolved FLOAT or SSE yet, give up. */
13576 if (MAYBE_FLOAT_CLASS_P (class) || MAYBE_SSE_CLASS_P (class))
13577 return NO_REGS;
13578 }
13579 if (MAYBE_MMX_CLASS_P (class) && CONSTANT_P (x))
13580 return NO_REGS;
13581 if (GET_MODE (x) == QImode && ! reg_class_subset_p (class, Q_REGS))
13582 return Q_REGS;
13583 return class;
13584}
13585
13586/* If we are copying between general and FP registers, we need a memory
13587 location. The same is true for SSE and MMX registers.
13588
13589 The macro can't work reliably when one of the CLASSES is class containing
13590 registers from multiple units (SSE, MMX, integer). We avoid this by never
13591 combining those units in single alternative in the machine description.
13592 Ensure that this constraint holds to avoid unexpected surprises.
13593
13594 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
13595 enforce these sanity checks. */
13596int
13597ix86_secondary_memory_needed (class1, class2, mode, strict)
13598 enum reg_class class1, class2;
13599 enum machine_mode mode;
13600 int strict;
13601{
13602 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
13603 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
13604 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
13605 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
13606 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
13607 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
13608 {
13609 if (strict)
13610 abort ();
13611 else
13612 return 1;
13613 }
13614 return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)
13615 || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)
13616 && (mode) != SImode)
13617 || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13618 && (mode) != SImode));
13619}
13620/* Return the cost of moving data from a register in class CLASS1 to
a4f31c00 13621 one in class CLASS2.
f84aa48a
JH
13622
13623 It is not required that the cost always equal 2 when FROM is the same as TO;
13624 on some machines it is expensive to move between registers if they are not
13625 general registers. */
13626int
13627ix86_register_move_cost (mode, class1, class2)
13628 enum machine_mode mode;
13629 enum reg_class class1, class2;
13630{
13631 /* In case we require secondary memory, compute cost of the store followed
d631b80a
RH
13632 by load. In order to avoid bad register allocation choices, we need
13633 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
13634
f84aa48a
JH
13635 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
13636 {
d631b80a
RH
13637 int cost = 1;
13638
13639 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
13640 MEMORY_MOVE_COST (mode, class1, 1));
13641 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
13642 MEMORY_MOVE_COST (mode, class2, 1));
13643
13644 /* In case of copying from general_purpose_register we may emit multiple
13645 stores followed by single load causing memory size mismatch stall.
13646 Count this as arbitarily high cost of 20. */
62415523 13647 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
d631b80a
RH
13648 cost += 20;
13649
13650 /* In the case of FP/MMX moves, the registers actually overlap, and we
13651 have to switch modes in order to treat them differently. */
13652 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
13653 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
13654 cost += 20;
13655
13656 return cost;
f84aa48a 13657 }
d631b80a 13658
92d0fb09 13659 /* Moves between SSE/MMX and integer unit are expensive. */
62415523
JH
13660 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
13661 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
fa79946e
JH
13662 return ix86_cost->mmxsse_to_integer;
13663 if (MAYBE_FLOAT_CLASS_P (class1))
13664 return ix86_cost->fp_move;
13665 if (MAYBE_SSE_CLASS_P (class1))
13666 return ix86_cost->sse_move;
13667 if (MAYBE_MMX_CLASS_P (class1))
13668 return ix86_cost->mmx_move;
f84aa48a
JH
13669 return 2;
13670}
13671
a946dd00
JH
13672/* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
13673int
13674ix86_hard_regno_mode_ok (regno, mode)
13675 int regno;
13676 enum machine_mode mode;
13677{
13678 /* Flags and only flags can only hold CCmode values. */
13679 if (CC_REGNO_P (regno))
13680 return GET_MODE_CLASS (mode) == MODE_CC;
13681 if (GET_MODE_CLASS (mode) == MODE_CC
13682 || GET_MODE_CLASS (mode) == MODE_RANDOM
13683 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
13684 return 0;
13685 if (FP_REGNO_P (regno))
13686 return VALID_FP_MODE_P (mode);
13687 if (SSE_REGNO_P (regno))
13688 return VALID_SSE_REG_MODE (mode);
13689 if (MMX_REGNO_P (regno))
47f339cf 13690 return VALID_MMX_REG_MODE (mode) || VALID_MMX_REG_MODE_3DNOW (mode);
a946dd00
JH
13691 /* We handle both integer and floats in the general purpose registers.
13692 In future we should be able to handle vector modes as well. */
13693 if (!VALID_INT_MODE_P (mode) && !VALID_FP_MODE_P (mode))
13694 return 0;
13695 /* Take care for QImode values - they can be in non-QI regs, but then
13696 they do cause partial register stalls. */
d2836273 13697 if (regno < 4 || mode != QImode || TARGET_64BIT)
a946dd00
JH
13698 return 1;
13699 return reload_in_progress || reload_completed || !TARGET_PARTIAL_REG_STALL;
13700}
fa79946e
JH
13701
13702/* Return the cost of moving data of mode M between a
13703 register and memory. A value of 2 is the default; this cost is
13704 relative to those in `REGISTER_MOVE_COST'.
13705
13706 If moving between registers and memory is more expensive than
13707 between two registers, you should define this macro to express the
a4f31c00
AJ
13708 relative cost.
13709
fa79946e
JH
13710 Model also increased moving costs of QImode registers in non
13711 Q_REGS classes.
13712 */
13713int
13714ix86_memory_move_cost (mode, class, in)
13715 enum machine_mode mode;
13716 enum reg_class class;
13717 int in;
13718{
13719 if (FLOAT_CLASS_P (class))
13720 {
13721 int index;
13722 switch (mode)
13723 {
13724 case SFmode:
13725 index = 0;
13726 break;
13727 case DFmode:
13728 index = 1;
13729 break;
13730 case XFmode:
13731 case TFmode:
13732 index = 2;
13733 break;
13734 default:
13735 return 100;
13736 }
13737 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
13738 }
13739 if (SSE_CLASS_P (class))
13740 {
13741 int index;
13742 switch (GET_MODE_SIZE (mode))
13743 {
13744 case 4:
13745 index = 0;
13746 break;
13747 case 8:
13748 index = 1;
13749 break;
13750 case 16:
13751 index = 2;
13752 break;
13753 default:
13754 return 100;
13755 }
13756 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
13757 }
13758 if (MMX_CLASS_P (class))
13759 {
13760 int index;
13761 switch (GET_MODE_SIZE (mode))
13762 {
13763 case 4:
13764 index = 0;
13765 break;
13766 case 8:
13767 index = 1;
13768 break;
13769 default:
13770 return 100;
13771 }
13772 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
13773 }
13774 switch (GET_MODE_SIZE (mode))
13775 {
13776 case 1:
13777 if (in)
13778 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
13779 : ix86_cost->movzbl_load);
13780 else
13781 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
13782 : ix86_cost->int_store[0] + 4);
13783 break;
13784 case 2:
13785 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
13786 default:
13787 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
13788 if (mode == TFmode)
13789 mode = XFmode;
3bb7e126 13790 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
fa79946e
JH
13791 * (int) GET_MODE_SIZE (mode) / 4);
13792 }
13793}
0ecf09f9 13794
21c318ba 13795#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
2cc07db4
RH
13796static void
13797ix86_svr3_asm_out_constructor (symbol, priority)
13798 rtx symbol;
13799 int priority ATTRIBUTE_UNUSED;
13800{
13801 init_section ();
13802 fputs ("\tpushl $", asm_out_file);
13803 assemble_name (asm_out_file, XSTR (symbol, 0));
13804 fputc ('\n', asm_out_file);
13805}
13806#endif
162f023b 13807
b069de3b
SS
13808#if TARGET_MACHO
13809
13810static int current_machopic_label_num;
13811
13812/* Given a symbol name and its associated stub, write out the
13813 definition of the stub. */
13814
13815void
13816machopic_output_stub (file, symb, stub)
13817 FILE *file;
13818 const char *symb, *stub;
13819{
13820 unsigned int length;
13821 char *binder_name, *symbol_name, lazy_ptr_name[32];
13822 int label = ++current_machopic_label_num;
13823
13824 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
13825 symb = (*targetm.strip_name_encoding) (symb);
13826
13827 length = strlen (stub);
13828 binder_name = alloca (length + 32);
13829 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
13830
13831 length = strlen (symb);
13832 symbol_name = alloca (length + 32);
13833 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
13834
13835 sprintf (lazy_ptr_name, "L%d$lz", label);
13836
13837 if (MACHOPIC_PURE)
13838 machopic_picsymbol_stub_section ();
13839 else
13840 machopic_symbol_stub_section ();
13841
13842 fprintf (file, "%s:\n", stub);
13843 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
13844
13845 if (MACHOPIC_PURE)
13846 {
13847 fprintf (file, "\tcall LPC$%d\nLPC$%d:\tpopl %%eax\n", label, label);
13848 fprintf (file, "\tmovl %s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
13849 fprintf (file, "\tjmp %%edx\n");
13850 }
13851 else
13852 fprintf (file, "\tjmp *%s\n", lazy_ptr_name);
13853
13854 fprintf (file, "%s:\n", binder_name);
13855
13856 if (MACHOPIC_PURE)
13857 {
13858 fprintf (file, "\tlea %s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
13859 fprintf (file, "\tpushl %%eax\n");
13860 }
13861 else
13862 fprintf (file, "\t pushl $%s\n", lazy_ptr_name);
13863
13864 fprintf (file, "\tjmp dyld_stub_binding_helper\n");
13865
13866 machopic_lazy_symbol_ptr_section ();
13867 fprintf (file, "%s:\n", lazy_ptr_name);
13868 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
13869 fprintf (file, "\t.long %s\n", binder_name);
13870}
13871#endif /* TARGET_MACHO */
13872
162f023b
JH
13873/* Order the registers for register allocator. */
13874
13875void
13876x86_order_regs_for_local_alloc ()
13877{
13878 int pos = 0;
13879 int i;
13880
13881 /* First allocate the local general purpose registers. */
13882 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13883 if (GENERAL_REGNO_P (i) && call_used_regs[i])
13884 reg_alloc_order [pos++] = i;
13885
13886 /* Global general purpose registers. */
13887 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
13888 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
13889 reg_alloc_order [pos++] = i;
13890
13891 /* x87 registers come first in case we are doing FP math
13892 using them. */
13893 if (!TARGET_SSE_MATH)
13894 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13895 reg_alloc_order [pos++] = i;
fce5a9f2 13896
162f023b
JH
13897 /* SSE registers. */
13898 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
13899 reg_alloc_order [pos++] = i;
13900 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
13901 reg_alloc_order [pos++] = i;
13902
13903 /* x87 registerts. */
13904 if (TARGET_SSE_MATH)
13905 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
13906 reg_alloc_order [pos++] = i;
13907
13908 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
13909 reg_alloc_order [pos++] = i;
13910
13911 /* Initialize the rest of array as we do not allocate some registers
13912 at all. */
13913 while (pos < FIRST_PSEUDO_REGISTER)
13914 reg_alloc_order [pos++] = 0;
13915}
194734e9
JH
13916
13917void
13918x86_output_mi_thunk (file, delta, function)
13919 FILE *file;
13920 int delta;
13921 tree function;
13922{
13923 tree parm;
13924 rtx xops[3];
13925
13926 if (ix86_regparm > 0)
13927 parm = TYPE_ARG_TYPES (TREE_TYPE (function));
13928 else
13929 parm = NULL_TREE;
13930 for (; parm; parm = TREE_CHAIN (parm))
13931 if (TREE_VALUE (parm) == void_type_node)
13932 break;
13933
13934 xops[0] = GEN_INT (delta);
13935 if (TARGET_64BIT)
13936 {
13937 int n = aggregate_value_p (TREE_TYPE (TREE_TYPE (function))) != 0;
13938 xops[1] = gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
13939 output_asm_insn ("add{q} {%0, %1|%1, %0}", xops);
13940 if (flag_pic)
13941 {
13942 fprintf (file, "\tjmp *");
13943 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13944 fprintf (file, "@GOTPCREL(%%rip)\n");
13945 }
13946 else
13947 {
13948 fprintf (file, "\tjmp ");
13949 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13950 fprintf (file, "\n");
13951 }
13952 }
13953 else
13954 {
13955 if (parm)
13956 xops[1] = gen_rtx_REG (SImode, 0);
13957 else if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function))))
13958 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
13959 else
13960 xops[1] = gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
13961 output_asm_insn ("add{l} {%0, %1|%1, %0}", xops);
13962
13963 if (flag_pic)
13964 {
13965 xops[0] = pic_offset_table_rtx;
13966 xops[1] = gen_label_rtx ();
5fc0e5df 13967 xops[2] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
194734e9
JH
13968
13969 if (ix86_regparm > 2)
13970 abort ();
13971 output_asm_insn ("push{l}\t%0", xops);
13972 output_asm_insn ("call\t%P1", xops);
13973 ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (xops[1]));
13974 output_asm_insn ("pop{l}\t%0", xops);
13975 output_asm_insn
13976 ("add{l}\t{%2+[.-%P1], %0|%0, OFFSET FLAT: %2+[.-%P1]}", xops);
13977 xops[0] = gen_rtx_MEM (SImode, XEXP (DECL_RTL (function), 0));
13978 output_asm_insn
13979 ("mov{l}\t{%0@GOT(%%ebx), %%ecx|%%ecx, %0@GOT[%%ebx]}", xops);
13980 asm_fprintf (file, "\tpop{l\t%%ebx|\t%%ebx}\n");
13981 asm_fprintf (file, "\tjmp\t{*%%ecx|%%ecx}\n");
13982 }
13983 else
13984 {
13985 fprintf (file, "\tjmp ");
13986 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
13987 fprintf (file, "\n");
13988 }
13989 }
13990}
e2500fed 13991
e932b21b
JH
13992int
13993x86_field_alignment (field, computed)
13994 tree field;
13995 int computed;
13996{
13997 enum machine_mode mode;
ad9335eb
JJ
13998 tree type = TREE_TYPE (field);
13999
14000 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
e932b21b 14001 return computed;
ad9335eb
JJ
14002 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
14003 ? get_inner_array_type (type) : type);
39e3a681
JJ
14004 if (mode == DFmode || mode == DCmode
14005 || GET_MODE_CLASS (mode) == MODE_INT
14006 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
e932b21b
JH
14007 return MIN (32, computed);
14008 return computed;
14009}
14010
2a500b9e
JH
14011/* Implement machine specific optimizations.
14012 At the moment we implement single transformation: AMD Athlon works faster
14013 when RET is not destination of conditional jump or directly preceeded
14014 by other jump instruction. We avoid the penalty by inserting NOP just
14015 before the RET instructions in such cases. */
14016void
14017x86_machine_dependent_reorg (first)
14018 rtx first ATTRIBUTE_UNUSED;
14019{
14020 edge e;
14021
14022 if (!TARGET_ATHLON || !optimize || optimize_size)
14023 return;
14024 for (e = EXIT_BLOCK_PTR->pred; e; e = e->pred_next)
14025 {
14026 basic_block bb = e->src;
14027 rtx ret = bb->end;
14028 rtx prev;
14029 bool insert = false;
14030
14031 if (!returnjump_p (ret) || !maybe_hot_bb_p (bb))
14032 continue;
14033 prev = prev_nonnote_insn (ret);
14034 if (prev && GET_CODE (prev) == CODE_LABEL)
14035 {
14036 edge e;
14037 for (e = bb->pred; e; e = e->pred_next)
14038 if (EDGE_FREQUENCY (e) && e->src->index > 0
14039 && !(e->flags & EDGE_FALLTHRU))
14040 insert = 1;
14041 }
14042 if (!insert)
14043 {
14044 prev = prev_real_insn (ret);
14045 if (prev && GET_CODE (prev) == JUMP_INSN
14046 && any_condjump_p (prev))
14047 insert = 1;
14048 }
14049 if (insert)
14050 emit_insn_before (gen_nop (), ret);
14051 }
14052}
14053
e2500fed 14054#include "gt-i386.h"
This page took 3.447881 seconds and 5 git commands to generate.